dragon-ml-toolbox 1.4.3__py3-none-any.whl → 1.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 1.4.3
3
+ Version: 1.4.4
4
4
  Summary: A collection of tools for data science and machine learning projects
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -0,0 +1,19 @@
1
+ dragon_ml_toolbox-1.4.4.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
+ dragon_ml_toolbox-1.4.4.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=jDnniT0tgD0uw1NpjibsPF-qK3wmOKgTykLG2iNQU7E,1840
3
+ ml_tools/MICE_imputation.py,sha256=o1wPu3sYt6AxmUgtyDcfqbwL-82Hyep0upvC4tIWKyw,10136
4
+ ml_tools/VIF_factor.py,sha256=HEBsLJy_qSDaPw1Btha5B7omxN4wjJXg-sqoetCjCJw,10016
5
+ ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ ml_tools/data_exploration.py,sha256=nBhDuaUCgEVG0Xi8MQ0X_xWQadr-7j_wUqwt-t-w5uE,20172
7
+ ml_tools/datasetmaster.py,sha256=EFUEX-tqq94Ak1rXXYR-XaX85olrxvF2EuytdzUK7y0,29131
8
+ ml_tools/ensemble_learning.py,sha256=q8qOTbSAwdsgKhjojVm7wn_UApGOCsvFa0mvhLYUuyM,37239
9
+ ml_tools/handle_excel.py,sha256=ZJui5__0rc2T8UGHTheqZGhKmdVZ7Q2I54IoYCjAqJw,12612
10
+ ml_tools/logger.py,sha256=ZTtUB9HTkNs5zHTdYRKNbKADjUkuObsF7s8U5pNnVRA,4716
11
+ ml_tools/particle_swarm_optimization.py,sha256=zaDQTKz2fAixgAfZdkRttGpN2vgraT18ZlpllJlQesk,20034
12
+ ml_tools/pytorch_models.py,sha256=bpWZsrSwCvHJQkR6UfoPpElsMv9AvmiNErNHC8NYB_I,10132
13
+ ml_tools/trainer.py,sha256=WAZ4EdrZuTOAnGXRWV3XcLNce4s7EKGf2-qchLC08Ik,15702
14
+ ml_tools/utilities.py,sha256=r7uEmo38Imly56BP3-Jv6dFJvLsbGipeBlkiZx2fcNQ,10189
15
+ ml_tools/vision_helpers.py,sha256=idQ-Ugp1IdsvwXiYyhYa9G3rTRTm37YRpkQDLEpANHM,7701
16
+ dragon_ml_toolbox-1.4.4.dist-info/METADATA,sha256=8hdr2v8UE5uxieOUSRgjTa5OP1C2BzgjawgxT8ynbQ0,2516
17
+ dragon_ml_toolbox-1.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
+ dragon_ml_toolbox-1.4.4.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
19
+ dragon_ml_toolbox-1.4.4.dist-info/RECORD,,
@@ -36,9 +36,9 @@ def apply_mice(df: pd.DataFrame, df_name: str, resulting_datasets: int=1, iterat
36
36
  raise ValueError("No imputed datasets were generated. Check the MICE process.")
37
37
 
38
38
  if resulting_datasets == 1:
39
- imputed_dataset_names = [f"{df_name}_imputed"]
39
+ imputed_dataset_names = [f"{df_name}_MICE"]
40
40
  else:
41
- imputed_dataset_names = [f"{df_name}_imputed_{i+1}" for i in range(resulting_datasets)]
41
+ imputed_dataset_names = [f"{df_name}_MICE_{i+1}" for i in range(resulting_datasets)]
42
42
 
43
43
  # Ensure indexes match
44
44
  for imputed_df, subname in zip(imputed_datasets, imputed_dataset_names):
ml_tools/VIF_factor.py CHANGED
@@ -26,6 +26,7 @@ def compute_vif(
26
26
  filename: Optional[str] = None,
27
27
  fontsize: int = 14,
28
28
  show_plot: bool = True,
29
+ verbose: bool = True
29
30
  ) -> pd.DataFrame:
30
31
  """
31
32
  Computes Variance Inflation Factors (VIF) for numeric columns in a DataFrame. Optionally, generates a bar plot of VIF values.
@@ -52,19 +53,20 @@ def compute_vif(
52
53
  if use_columns is None:
53
54
  sanitized_columns = df.select_dtypes(include='number').columns.tolist()
54
55
  missing_features = set(ground_truth_cols) - set(sanitized_columns)
55
- if missing_features:
56
+ if missing_features and verbose:
56
57
  print(f"⚠️ These columns are not Numeric:\n{missing_features}")
57
58
  else:
58
59
  sanitized_columns = list()
59
60
  for feature in use_columns:
60
61
  if feature not in ground_truth_cols:
61
- print(f"⚠️ The provided column '{feature}' is not in the DataFrame.")
62
+ if verbose:
63
+ print(f"⚠️ The provided column '{feature}' is not in the DataFrame.")
62
64
  else:
63
65
  sanitized_columns.append(feature)
64
66
 
65
67
  if ignore_columns is not None and use_columns is None:
66
68
  missing_ignore = set(ignore_columns) - set(ground_truth_cols)
67
- if missing_ignore:
69
+ if missing_ignore and verbose:
68
70
  print(f"⚠️ Warning: The following 'columns to ignore' are not in the Dataframe:\n{missing_ignore}")
69
71
  sanitized_columns = [f for f in sanitized_columns if f not in ignore_columns]
70
72
 
@@ -182,7 +184,7 @@ def compute_vif_multi(input_directory: str,
182
184
  max_features_to_plot: int = 20,
183
185
  fontsize: int = 14):
184
186
  """
185
- Computes Variance Inflation Factors (VIF) for numeric columns in a directory with CSV files (loaded as pandas DataFrames).
187
+ Computes Variance Inflation Factors (VIF) for numeric columns in a directory with CSV files (loaded as pandas DataFrames). No plots or warnings will be displayed inline.
186
188
  Generates a bar plot of VIF values. Optionally drops columns with VIF >= 10 and saves as a new CSV file.
187
189
 
188
190
  Args:
@@ -210,10 +212,11 @@ def compute_vif_multi(input_directory: str,
210
212
  fontsize=fontsize,
211
213
  save_dir=output_plot_directory,
212
214
  filename=df_name,
213
- show_plot=False)
215
+ show_plot=False,
216
+ verbose=False)
214
217
 
215
218
  if output_dataset_directory is not None:
216
- new_filename = 'VIF_' + df_name
219
+ new_filename = df_name + '_VIF'
217
220
  result_df, dropped_cols = drop_vif_based(df=df, vif_df=vif_dataframe)
218
221
 
219
222
  if len(dropped_cols) > 0:
@@ -153,7 +153,7 @@ def drop_columns_with_missing_data(df: pd.DataFrame, threshold: float = 0.7, sho
153
153
 
154
154
  result_df = df.drop(columns=cols_to_drop)
155
155
  if show_nulls_after:
156
- show_null_columns(df=result_df).head(20)
156
+ print(show_null_columns(df=result_df))
157
157
 
158
158
  return result_df
159
159
  else:
@@ -259,7 +259,7 @@ def plot_correlation_heatmap(df: pd.DataFrame, save_dir: Union[str, None] = None
259
259
  os.makedirs(save_dir, exist_ok=True)
260
260
  full_path = os.path.join(save_dir, plot_title + ".svg")
261
261
  plt.savefig(full_path, bbox_inches="tight", format='svg')
262
- print(f"Saved correlation heatmap to: {full_path}")
262
+ print(f"Saved correlation heatmap: '{plot_title}.svg'")
263
263
 
264
264
  plt.show()
265
265
  plt.close()
@@ -521,7 +521,8 @@ def clip_outliers_multi(
521
521
 
522
522
  def distribute_datasets_by_target(
523
523
  df: pd.DataFrame,
524
- target_columns: list[str]
524
+ target_columns: list[str],
525
+ verbose: bool = False
525
526
  ) -> Iterator[Tuple[str, pd.DataFrame]]:
526
527
  """
527
528
  Yields cleaned DataFrames for each target column, where rows with missing
@@ -533,6 +534,8 @@ def distribute_datasets_by_target(
533
534
  Preprocessed dataframe with all feature and target columns ready to train.
534
535
  target_columns : List[str]
535
536
  List of target column names to generate per-target DataFrames.
537
+ verbose: bool
538
+ Whether to print info for each yielded dataset.
536
539
 
537
540
  Yields
538
541
  ------
@@ -540,10 +543,13 @@ def distribute_datasets_by_target(
540
543
  * First element is the target column name.
541
544
  * Second element is the corresponding cleaned DataFrame.
542
545
  """
543
- feature_columns = [col for col in df.columns if col not in target_columns]
546
+ valid_targets = [col for col in df.columns if col in target_columns]
547
+ feature_columns = [col for col in df.columns if col not in valid_targets]
544
548
 
545
- for target in target_columns:
549
+ for target in valid_targets:
546
550
  subset = df[feature_columns + [target]].dropna(subset=[target])
551
+ if verbose:
552
+ print(f"Target: '{target}' - Dataframe shape: {subset.shape}")
547
553
  yield target, subset
548
554
 
549
555
 
@@ -157,9 +157,7 @@ class RegressionTreeModels:
157
157
  self.gamma = gamma
158
158
 
159
159
  # LightGBM specific
160
- if num_leaves >= (2**max_depth):
161
- num_leaves = (2**max_depth) - 1
162
- print(f"⚠️ Warning: 'num_leaves' should be set proportional to 'max_depth'. Value set as {num_leaves}.")
160
+ num_leaves = min(num_leaves, 2 ** (max_depth - 1))
163
161
  self.num_leaves = num_leaves
164
162
  self.min_data_in_leaf = min_data_in_leaf
165
163
 
@@ -202,7 +200,7 @@ class RegressionTreeModels:
202
200
  verbose=-1,
203
201
  reg_alpha=self.L1,
204
202
  reg_lambda=self.L2,
205
- boosting_type='dart',
203
+ boosting_type='gbdt',
206
204
  num_leaves=self.num_leaves,
207
205
  min_data_in_leaf=self.min_data_in_leaf
208
206
  )
@@ -321,9 +319,7 @@ class ClassificationTreeModels:
321
319
  self.gamma = gamma
322
320
 
323
321
  # LightGBM specific
324
- if num_leaves >= (2**max_depth):
325
- num_leaves = (2**max_depth) - 1
326
- print(f"⚠️ Warning: 'num_leaves' should be set proportional to 'max_depth'. Value set as {num_leaves}.")
322
+ num_leaves = min(num_leaves, 2 ** (max_depth - 1))
327
323
  self.num_leaves = num_leaves
328
324
  self.min_data_in_leaf = min_data_in_leaf
329
325
 
@@ -370,7 +366,7 @@ class ClassificationTreeModels:
370
366
  verbose=-1,
371
367
  reg_alpha=self.L1,
372
368
  reg_lambda=self.L2,
373
- boosting_type='dart' if self.use_model_balance else 'goss',
369
+ boosting_type='gbdt' if self.use_model_balance else 'goss',
374
370
  num_leaves=self.num_leaves,
375
371
  min_data_in_leaf=self.min_data_in_leaf,
376
372
  class_weight='balanced' if self.use_model_balance else None
ml_tools/logger.py CHANGED
@@ -55,7 +55,7 @@ def custom_logger(
55
55
  """
56
56
  try:
57
57
  os.makedirs(save_directory, exist_ok=True)
58
- timestamp = datetime.now().strftime(r"%Y%m%d_%H%M")
58
+ timestamp = datetime.now().strftime(r"%Y%m%d_%H%M%S")
59
59
  log_name = sanitize_filename(log_name)
60
60
  base_path = os.path.join(save_directory, f"{log_name}_{timestamp}")
61
61
 
@@ -80,7 +80,7 @@ def custom_logger(
80
80
  else:
81
81
  raise ValueError("Unsupported data type. Must be list, dict, DataFrame, str, or BaseException.")
82
82
 
83
- print(f"Log saved to: {base_path}")
83
+ print(f"Log saved to: '{base_path}'")
84
84
 
85
85
  except Exception as e:
86
86
  print(f"Error in custom_logger: {e}")
@@ -129,10 +129,10 @@ def run_pso(lower_boundaries: list[float],
129
129
  target_name: Union[str, None]=None,
130
130
  feature_names: Union[list[str], None]=None,
131
131
  swarm_size: int=200,
132
- max_iterations: int=1000,
132
+ max_iterations: int=1500,
133
133
  inequality_constrain_function=None,
134
- post_hoc_analysis: Optional[int]=3,
135
- workers: int=3) -> Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]:
134
+ post_hoc_analysis: Optional[int]=5,
135
+ workers: int=1) -> Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]:
136
136
  """
137
137
  Executes Particle Swarm Optimization (PSO) to optimize a given objective function and saves the results as a CSV file.
138
138
 
@@ -1,19 +0,0 @@
1
- dragon_ml_toolbox-1.4.3.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
- dragon_ml_toolbox-1.4.3.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=jDnniT0tgD0uw1NpjibsPF-qK3wmOKgTykLG2iNQU7E,1840
3
- ml_tools/MICE_imputation.py,sha256=3CN_Z5NnQnr9BQOBcccIV13BcV-zRSvWUpYXoMZpPt8,10142
4
- ml_tools/VIF_factor.py,sha256=LQWr1P8WYij07FX_3RZC6Rr22bfAMnrt0Lhvi7SbBpY,9846
5
- ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- ml_tools/data_exploration.py,sha256=iRMyn-H0ffjhLkL-B5zKSb1tlyT4bKm0H4vE_GMaXP0,19903
7
- ml_tools/datasetmaster.py,sha256=EFUEX-tqq94Ak1rXXYR-XaX85olrxvF2EuytdzUK7y0,29131
8
- ml_tools/ensemble_learning.py,sha256=5CCd8w0j-uDkf7ToN2ENT_KdZbB8ZQUFYlrKN-OHUxA,37533
9
- ml_tools/handle_excel.py,sha256=ZJui5__0rc2T8UGHTheqZGhKmdVZ7Q2I54IoYCjAqJw,12612
10
- ml_tools/logger.py,sha256=NOtL3YSuffAGmpTpXjY-uJjqFLdRG_jpL7MDyloBw9c,4712
11
- ml_tools/particle_swarm_optimization.py,sha256=g_KwPQL77HuVwceABP17RsF__qLmNAp2YVsXOxmFEOM,20034
12
- ml_tools/pytorch_models.py,sha256=bpWZsrSwCvHJQkR6UfoPpElsMv9AvmiNErNHC8NYB_I,10132
13
- ml_tools/trainer.py,sha256=WAZ4EdrZuTOAnGXRWV3XcLNce4s7EKGf2-qchLC08Ik,15702
14
- ml_tools/utilities.py,sha256=r7uEmo38Imly56BP3-Jv6dFJvLsbGipeBlkiZx2fcNQ,10189
15
- ml_tools/vision_helpers.py,sha256=idQ-Ugp1IdsvwXiYyhYa9G3rTRTm37YRpkQDLEpANHM,7701
16
- dragon_ml_toolbox-1.4.3.dist-info/METADATA,sha256=l0uOaYlimIH_YCT89C2mOkHaiKDEtq9XxhHpbcMCppU,2516
17
- dragon_ml_toolbox-1.4.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
- dragon_ml_toolbox-1.4.3.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
19
- dragon_ml_toolbox-1.4.3.dist-info/RECORD,,