dragon-ml-toolbox 8.1.0__py3-none-any.whl → 9.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (34) hide show
  1. {dragon_ml_toolbox-8.1.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/METADATA +5 -1
  2. dragon_ml_toolbox-9.0.0.dist-info/RECORD +35 -0
  3. ml_tools/ETL_engineering.py +216 -81
  4. ml_tools/GUI_tools.py +5 -5
  5. ml_tools/MICE_imputation.py +12 -8
  6. ml_tools/ML_callbacks.py +6 -3
  7. ml_tools/ML_datasetmaster.py +37 -20
  8. ml_tools/ML_evaluation.py +4 -4
  9. ml_tools/ML_evaluation_multi.py +26 -17
  10. ml_tools/ML_inference.py +30 -23
  11. ml_tools/ML_models.py +14 -14
  12. ml_tools/ML_optimization.py +4 -3
  13. ml_tools/ML_scaler.py +7 -7
  14. ml_tools/ML_trainer.py +17 -15
  15. ml_tools/PSO_optimization.py +16 -8
  16. ml_tools/RNN_forecast.py +1 -1
  17. ml_tools/SQL.py +22 -13
  18. ml_tools/VIF_factor.py +7 -6
  19. ml_tools/_logger.py +105 -7
  20. ml_tools/custom_logger.py +12 -8
  21. ml_tools/data_exploration.py +20 -15
  22. ml_tools/ensemble_evaluation.py +10 -6
  23. ml_tools/ensemble_inference.py +18 -18
  24. ml_tools/ensemble_learning.py +8 -5
  25. ml_tools/handle_excel.py +15 -11
  26. ml_tools/optimization_tools.py +3 -4
  27. ml_tools/path_manager.py +21 -15
  28. ml_tools/utilities.py +35 -26
  29. dragon_ml_toolbox-8.1.0.dist-info/RECORD +0 -36
  30. ml_tools/_ML_optimization_multi.py +0 -231
  31. {dragon_ml_toolbox-8.1.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/WHEEL +0 -0
  32. {dragon_ml_toolbox-8.1.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/licenses/LICENSE +0 -0
  33. {dragon_ml_toolbox-8.1.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  34. {dragon_ml_toolbox-8.1.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/top_level.txt +0 -0
@@ -83,7 +83,8 @@ def drop_constant_columns(df: pd.DataFrame, verbose: bool = True) -> pd.DataFram
83
83
  A new DataFrame with the constant columns removed.
84
84
  """
85
85
  if not isinstance(df, pd.DataFrame):
86
- raise TypeError("Input must be a pandas DataFrame.")
86
+ _LOGGER.error("Input must be a pandas DataFrame.")
87
+ raise TypeError()
87
88
 
88
89
  original_columns = set(df.columns)
89
90
  cols_to_keep = []
@@ -136,7 +137,7 @@ def drop_rows_with_missing_data(df: pd.DataFrame, targets: Optional[list[str]],
136
137
  _LOGGER.info(f"🧹 Dropping {target_na.sum()} rows with all target columns missing.")
137
138
  df_clean = df_clean[~target_na]
138
139
  else:
139
- _LOGGER.info("No rows with all targets missing.")
140
+ _LOGGER.info("No rows found where all targets are missing.")
140
141
  else:
141
142
  valid_targets = []
142
143
 
@@ -149,9 +150,9 @@ def drop_rows_with_missing_data(df: pd.DataFrame, targets: Optional[list[str]],
149
150
  _LOGGER.info(f"🧹 Dropping {len(rows_to_drop)} rows with more than {threshold*100:.0f}% missing feature data.")
150
151
  df_clean = df_clean.drop(index=rows_to_drop)
151
152
  else:
152
- _LOGGER.info(f"No rows exceed the {threshold*100:.0f}% missing feature data threshold.")
153
+ _LOGGER.info(f"No rows exceed the {threshold*100:.0f}% missing feature data threshold.")
153
154
  else:
154
- _LOGGER.warning("⚠️ No feature columns available to evaluate.")
155
+ _LOGGER.warning("No feature columns available to evaluate.")
155
156
 
156
157
  return df_clean
157
158
 
@@ -211,7 +212,7 @@ def drop_columns_with_missing_data(df: pd.DataFrame, threshold: float = 0.7, sho
211
212
  cols_to_drop = missing_fraction[missing_fraction > threshold].index
212
213
 
213
214
  if len(cols_to_drop) > 0:
214
- _LOGGER.info(f"Dropping columns with more than {threshold*100:.0f}% missing data:")
215
+ _LOGGER.info(f"🧹 Dropping columns with more than {threshold*100:.0f}% missing data:")
215
216
  print(list(cols_to_drop))
216
217
 
217
218
  result_df = df.drop(columns=cols_to_drop)
@@ -339,7 +340,8 @@ def split_continuous_binary(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFram
339
340
  TypeError: If any column is not numeric.
340
341
  """
341
342
  if not all(np.issubdtype(dtype, np.number) for dtype in df.dtypes):
342
- raise TypeError("All columns must be numeric (int or float).")
343
+ _LOGGER.error("All columns must be numeric (int or float).")
344
+ raise TypeError()
343
345
 
344
346
  binary_cols = []
345
347
  continuous_cols = []
@@ -390,7 +392,7 @@ def plot_correlation_heatmap(df: pd.DataFrame,
390
392
  """
391
393
  numeric_df = df.select_dtypes(include='number')
392
394
  if numeric_df.empty:
393
- _LOGGER.warning("⚠️ No numeric columns found. Heatmap not generated.")
395
+ _LOGGER.warning("No numeric columns found. Heatmap not generated.")
394
396
  return
395
397
 
396
398
  corr = numeric_df.corr(method=method)
@@ -558,11 +560,11 @@ def clip_outliers_single(
558
560
  None: if a problem with the dataframe column occurred.
559
561
  """
560
562
  if column not in df.columns:
561
- _LOGGER.warning(f"⚠️ Column '{column}' not found in DataFrame.")
563
+ _LOGGER.warning(f"Column '{column}' not found in DataFrame.")
562
564
  return None
563
565
 
564
566
  if not pd.api.types.is_numeric_dtype(df[column]):
565
- _LOGGER.warning(f"⚠️ Column '{column}' must be numeric.")
567
+ _LOGGER.warning(f"Column '{column}' must be numeric.")
566
568
  return None
567
569
 
568
570
  new_df = df.copy(deep=True)
@@ -600,13 +602,16 @@ def clip_outliers_multi(
600
602
  for col, bounds in clip_dict.items():
601
603
  try:
602
604
  if col not in df.columns:
603
- raise ValueError(f"Column '{col}' not found in DataFrame.")
605
+ _LOGGER.error(f"Column '{col}' not found in DataFrame.")
606
+ raise ValueError()
604
607
 
605
608
  if not pd.api.types.is_numeric_dtype(df[col]):
606
- raise TypeError(f"Column '{col}' is not numeric.")
609
+ _LOGGER.error(f"Column '{col}' is not numeric.")
610
+ raise TypeError()
607
611
 
608
612
  if not (isinstance(bounds, tuple) and len(bounds) == 2):
609
- raise ValueError(f"Bounds for '{col}' must be a tuple of (min, max).")
613
+ _LOGGER.error(f"Bounds for '{col}' must be a tuple of (min, max).")
614
+ raise ValueError()
610
615
 
611
616
  min_val, max_val = bounds
612
617
  new_df[col] = new_df[col].clip(lower=min_val, upper=max_val)
@@ -621,7 +626,7 @@ def clip_outliers_multi(
621
626
  _LOGGER.info(f"Clipped {clipped_columns} columns.")
622
627
 
623
628
  if skipped_columns:
624
- _LOGGER.warning("⚠️ Skipped columns:")
629
+ _LOGGER.warning("Skipped columns:")
625
630
  for col, msg in skipped_columns:
626
631
  print(f" - {col}: {msg}")
627
632
 
@@ -707,11 +712,11 @@ def standardize_percentages(
707
712
  for col in columns:
708
713
  # --- Robustness Checks ---
709
714
  if col not in df_copy.columns:
710
- _LOGGER.warning(f"⚠️ Column '{col}' not found. Skipping.")
715
+ _LOGGER.warning(f"Column '{col}' not found. Skipping.")
711
716
  continue
712
717
 
713
718
  if not is_numeric_dtype(df_copy[col]):
714
- _LOGGER.warning(f"⚠️ Column '{col}' is not numeric. Skipping.")
719
+ _LOGGER.warning(f"Column '{col}' is not numeric. Skipping.")
715
720
  continue
716
721
 
717
722
  # --- Applying the Logic ---
@@ -119,8 +119,8 @@ def evaluate_model_classification(
119
119
  heatmap_path = save_path / f"Classification_Report_{sanitized_target_name}.svg"
120
120
  plt.savefig(heatmap_path, format="svg", bbox_inches="tight")
121
121
  plt.close()
122
- except Exception as e:
123
- _LOGGER.error(f"Could not generate classification report heatmap for {target_name}: {e}")
122
+ except Exception:
123
+ _LOGGER.exception(f"Could not generate classification report heatmap for {target_name}:")
124
124
 
125
125
  # Create confusion matrix
126
126
  fig, ax = plt.subplots(figsize=figsize)
@@ -198,7 +198,8 @@ def plot_roc_curve(
198
198
 
199
199
  elif hasattr(probabilities_or_model, "predict_proba"):
200
200
  if input_features is None:
201
- raise ValueError("input_features must be provided when using a classifier.")
201
+ _LOGGER.error("input_features must be provided when using a classifier.")
202
+ raise ValueError()
202
203
 
203
204
  try:
204
205
  classes = probabilities_or_model.classes_ # type: ignore
@@ -209,7 +210,8 @@ def plot_roc_curve(
209
210
  y_score = probabilities_or_model.predict_proba(input_features)[:, positive_class_index] # type: ignore
210
211
 
211
212
  else:
212
- raise TypeError("Unsupported type for 'probabilities_or_model'. Must be a NumPy array or a model with support for '.predict_proba()'.")
213
+ _LOGGER.error("Unsupported type for 'probabilities_or_model'. Must be a NumPy array or a model with support for '.predict_proba()'.")
214
+ raise TypeError()
213
215
 
214
216
  # ROC and AUC
215
217
  fpr, tpr, _ = roc_curve(true_labels, y_score)
@@ -276,7 +278,8 @@ def plot_precision_recall_curve(
276
278
 
277
279
  elif hasattr(probabilities_or_model, "predict_proba"):
278
280
  if input_features is None:
279
- raise ValueError("input_features must be provided when using a classifier.")
281
+ _LOGGER.error("input_features must be provided when using a classifier.")
282
+ raise ValueError()
280
283
  try:
281
284
  classes = probabilities_or_model.classes_ # type: ignore
282
285
  positive_class_index = list(classes).index(1)
@@ -284,7 +287,8 @@ def plot_precision_recall_curve(
284
287
  positive_class_index = 1
285
288
  y_score = probabilities_or_model.predict_proba(input_features)[:, positive_class_index] # type: ignore
286
289
  else:
287
- raise TypeError("Unsupported type for 'probabilities_or_model'. Must be a NumPy array or a model with support for '.predict_proba()'.")
290
+ _LOGGER.error("Unsupported type for 'probabilities_or_model'. Must be a NumPy array or a model with support for '.predict_proba()'.")
291
+ raise TypeError()
288
292
 
289
293
  # Calculate PR curve and AP score
290
294
  precision, recall, _ = precision_recall_curve(true_labels, y_score)
@@ -59,15 +59,15 @@ class InferenceHandler:
59
59
  self._feature_names = feature_names_list
60
60
  elif self._feature_names != feature_names_list:
61
61
  # Add a warning if subsequent models have different feature names.
62
- _LOGGER.warning(f"⚠️ Mismatched feature names in {fname}. Using feature order from the first model loaded.")
62
+ _LOGGER.warning(f"Mismatched feature names in {fname}. Using feature order from the first model loaded.")
63
63
 
64
64
  self.models[target_name] = model
65
65
  if self.verbose:
66
- _LOGGER.info(f"Loaded model for target: {target_name}")
66
+ _LOGGER.info(f"Loaded model for target: {target_name}")
67
+
68
+ except Exception:
69
+ _LOGGER.error(f"Failed to load or parse {fname}.")
67
70
 
68
- except Exception as e:
69
- _LOGGER.warning(f"⚠️ Failed to load or parse {fname}: {e}")
70
-
71
71
  @property
72
72
  def feature_names(self) -> List[str]:
73
73
  """
@@ -92,7 +92,8 @@ class InferenceHandler:
92
92
  features = features.reshape(1, -1)
93
93
 
94
94
  if features.shape[0] != 1:
95
- raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
95
+ _LOGGER.error("The 'predict()' method is for a single sample. Use 'predict_batch()' for multiple samples.")
96
+ raise ValueError()
96
97
 
97
98
  results: Dict[str, Any] = dict()
98
99
  for target_name, model in self.models.items():
@@ -106,7 +107,7 @@ class InferenceHandler:
106
107
  EnsembleKeys.CLASSIFICATION_PROBABILITIES: probabilities}
107
108
 
108
109
  if self.verbose:
109
- _LOGGER.info("Inference process complete.")
110
+ _LOGGER.info("Inference process complete.")
110
111
  return results
111
112
 
112
113
  def predict_batch(self, features: np.ndarray) -> Dict[str, Any]:
@@ -122,7 +123,8 @@ class InferenceHandler:
122
123
  - For classification: The value is another dictionary {'labels': ..., 'probabilities': ...}.
123
124
  """
124
125
  if features.ndim != 2:
125
- raise ValueError("Input for batch prediction must be a 2D array.")
126
+ _LOGGER.error("Input for batch prediction must be a 2D array.")
127
+ raise ValueError()
126
128
 
127
129
  results: Dict[str, Any] = dict()
128
130
  for target_name, model in self.models.items():
@@ -134,7 +136,7 @@ class InferenceHandler:
134
136
  results[target_name] = {"labels": labels, "probabilities": probabilities}
135
137
 
136
138
  if self.verbose:
137
- _LOGGER.info("Inference process complete.")
139
+ _LOGGER.info("Inference process complete.")
138
140
 
139
141
  return results
140
142
 
@@ -174,11 +176,11 @@ def model_report(
174
176
  target = full_object[EnsembleKeys.TARGET]
175
177
  features = full_object[EnsembleKeys.FEATURES]
176
178
  except FileNotFoundError:
177
- _LOGGER.error(f"Model file not found at '{model_p}'")
179
+ _LOGGER.error(f"Model file not found at '{model_p}'")
178
180
  raise
179
181
  except (KeyError, TypeError) as e:
180
182
  _LOGGER.error(
181
- f"The serialized object is missing required keys '{EnsembleKeys.MODEL}', '{EnsembleKeys.TARGET}', '{EnsembleKeys.FEATURES}'"
183
+ f"The serialized object is missing required keys '{EnsembleKeys.MODEL}', '{EnsembleKeys.TARGET}', '{EnsembleKeys.FEATURES}'"
182
184
  )
183
185
  raise e
184
186
 
@@ -209,9 +211,9 @@ def model_report(
209
211
  with open(json_filepath, 'w') as f:
210
212
  json.dump(report_data, f, indent=4)
211
213
  if verbose:
212
- _LOGGER.info(f"JSON report saved to: '{json_filepath}'")
214
+ _LOGGER.info(f"JSON report saved to: '{json_filepath}'")
213
215
  except PermissionError:
214
- _LOGGER.error(f"Permission denied to write JSON report at '{json_filepath}'")
216
+ _LOGGER.exception(f"Permission denied to write JSON report at '{json_filepath}'.")
215
217
 
216
218
  # --- 5. Return the extracted data ---
217
219
  return report_data
@@ -233,15 +235,13 @@ def _deserialize_object(filepath: Union[str,Path], verbose: bool=True, raise_on_
233
235
  try:
234
236
  obj = joblib.load(true_filepath)
235
237
  except (IOError, OSError, EOFError, TypeError, ValueError) as e:
236
- message = f"Failed to deserialize object from '{true_filepath}': {e}"
238
+ _LOGGER.error(f"Failed to deserialize object from '{true_filepath}'.")
237
239
  if raise_on_error:
238
- raise Exception(message)
239
- else:
240
- print(message)
240
+ raise e
241
241
  return None
242
242
  else:
243
243
  if verbose:
244
- print(f"\n✅ Loaded object of type '{type(obj)}'")
244
+ _LOGGER.info(f"Loaded object of type '{type(obj)}'")
245
245
  return obj
246
246
 
247
247
 
@@ -339,7 +339,8 @@ def _resample(X_train: np.ndarray, y_train: pd.Series,
339
339
  elif strategy == 'ADASYN':
340
340
  resample_algorithm = ADASYN(random_state=random_state, n_neighbors=3)
341
341
  else:
342
- raise ValueError(f"Invalid resampling strategy: {strategy}")
342
+ _LOGGER.error(f"Invalid resampling strategy: {strategy}")
343
+ raise ValueError()
343
344
 
344
345
  X_res, y_res, *_ = resample_algorithm.fit_resample(X_train, y_train)
345
346
  return X_res, y_res
@@ -459,7 +460,8 @@ def train_test_pipeline(model, model_name: str, dataset_id: str, task: Literal["
459
460
  y_pred = evaluate_model_regression(model=trained_model, model_name=model_name, save_dir=local_save_directory,
460
461
  x_test_scaled=test_features, single_y_test=test_target, target_name=target_name)
461
462
  else:
462
- raise ValueError(f"Unrecognized task '{task}' for model training,")
463
+ _LOGGER.error(f"Unrecognized task '{task}' for model training,")
464
+ raise ValueError()
463
465
  if debug:
464
466
  _LOGGER.info(f"Predicted vector: {type(y_pred)} with shape: {y_pred.shape}")
465
467
 
@@ -487,13 +489,14 @@ def run_ensemble_pipeline(datasets_dir: Union[str,Path], save_dir: Union[str,Pat
487
489
  elif isinstance(model_object, ClassificationTreeModels):
488
490
  task = "classification"
489
491
  if handle_classification_imbalance is None:
490
- _LOGGER.warning("⚠️ No method to handle classification class imbalance has been selected. Datasets are assumed to be balanced.")
492
+ _LOGGER.warning("No method to handle classification class imbalance has been selected. Datasets are assumed to be balanced.")
491
493
  elif handle_classification_imbalance == "by_model":
492
494
  model_object.use_model_balance = True
493
495
  else:
494
496
  model_object.use_model_balance = False
495
497
  else:
496
- raise TypeError(f"Unrecognized model {type(model_object)}")
498
+ _LOGGER.error(f"Unrecognized model {type(model_object)}")
499
+ raise TypeError()
497
500
 
498
501
  #Check paths
499
502
  datasets_path = make_fullpath(datasets_dir)
@@ -519,7 +522,7 @@ def run_ensemble_pipeline(datasets_dir: Union[str,Path], save_dir: Union[str,Pat
519
522
  debug=debug, save_dir=save_path, save_model=save_model,
520
523
  generate_learning_curves=generate_learning_curves)
521
524
 
522
- _LOGGER.info("Training and evaluation complete.")
525
+ _LOGGER.info("Training and evaluation complete.")
523
526
 
524
527
 
525
528
  def info():
ml_tools/handle_excel.py CHANGED
@@ -37,7 +37,8 @@ def find_excel_files(
37
37
  input_path = make_fullpath(directory)
38
38
 
39
39
  if not input_path.is_dir():
40
- raise NotADirectoryError(f"Directory not found: {input_path}")
40
+ _LOGGER.error(f"Directory not found: {input_path}")
41
+ raise NotADirectoryError()
41
42
 
42
43
  excel_files = [
43
44
  f for f in input_path.iterdir()
@@ -47,7 +48,8 @@ def find_excel_files(
47
48
  ]
48
49
 
49
50
  if not excel_files:
50
- raise FileNotFoundError(f"No valid Excel files found in directory: {input_path}")
51
+ _LOGGER.error(f"No valid Excel files found in directory: {input_path}")
52
+ raise FileNotFoundError()
51
53
 
52
54
  return excel_files
53
55
 
@@ -99,7 +101,7 @@ def unmerge_and_split_excel(filepath: Union[str,Path]) -> None:
99
101
 
100
102
  total_output_files += 1
101
103
 
102
- _LOGGER.info(f"Processed file: {file_path} into {total_output_files} output file(s).")
104
+ _LOGGER.info(f"Processed file: {file_path} into {total_output_files} output file(s).")
103
105
  return None
104
106
 
105
107
 
@@ -155,7 +157,7 @@ def unmerge_and_split_from_directory(input_dir: Union[str,Path], output_dir: Uni
155
157
 
156
158
  total_output_files += 1
157
159
 
158
- _LOGGER.info(f"Processed {len(excel_files)} input Excel file(s) with a total of {total_output_files} output Excel file(s).")
160
+ _LOGGER.info(f"Processed {len(excel_files)} input Excel file(s) with a total of {total_output_files} output Excel file(s).")
159
161
  return None
160
162
 
161
163
 
@@ -199,13 +201,13 @@ def validate_excel_schema(
199
201
  invalid_files.append(file)
200
202
 
201
203
  except Exception as e:
202
- _LOGGER.error(f"Error processing '{file}': {e}")
204
+ _LOGGER.error(f"Error processing '{file}': {e}")
203
205
  invalid_files.append(file)
204
206
 
205
207
  valid_excel_number = len(excel_paths) - len(invalid_files)
206
208
  _LOGGER.info(f"{valid_excel_number} out of {len(excel_paths)} excel files conform to the schema.")
207
209
  if invalid_files:
208
- _LOGGER.warning(f"⚠️ {len(invalid_files)} excel files are invalid:")
210
+ _LOGGER.warning(f"{len(invalid_files)} excel files are invalid:")
209
211
  for in_file in invalid_files:
210
212
  print(f" - {in_file.name}")
211
213
 
@@ -252,7 +254,8 @@ def vertical_merge_transform_excel(
252
254
  if target_columns is not None:
253
255
  missing = [col for col in target_columns if col not in df.columns]
254
256
  if missing:
255
- raise ValueError(f"Invalid columns in {file.name}: {missing}")
257
+ _LOGGER.error(f"Invalid columns in {file.name}: {missing}")
258
+ raise ValueError()
256
259
  df = df[target_columns]
257
260
 
258
261
  dataframes.append(df)
@@ -262,11 +265,12 @@ def vertical_merge_transform_excel(
262
265
  if rename_columns is not None:
263
266
  expected_len = len(target_columns if target_columns is not None else merged_df.columns)
264
267
  if len(rename_columns) != expected_len:
265
- raise ValueError("Length of 'rename_columns' must match the selected columns")
268
+ _LOGGER.error("Length of 'rename_columns' must match the selected columns")
269
+ raise ValueError()
266
270
  merged_df.columns = rename_columns
267
271
 
268
272
  merged_df.to_csv(csv_path, index=False, encoding='utf-8')
269
- _LOGGER.info(f"Merged {len(dataframes)} excel files into '{csv_filename}'.")
273
+ _LOGGER.info(f"Merged {len(dataframes)} excel files into '{csv_filename}'.")
270
274
 
271
275
 
272
276
  def horizontal_merge_transform_excel(
@@ -327,7 +331,7 @@ def horizontal_merge_transform_excel(
327
331
  duplicate_columns = merged_df.columns[merged_df.columns.duplicated()].tolist()
328
332
 
329
333
  if duplicate_columns:
330
- _LOGGER.warning(f"⚠️ Duplicate columns: {duplicate_columns}")
334
+ _LOGGER.warning(f"Duplicate columns: {duplicate_columns}")
331
335
 
332
336
  if skip_duplicates:
333
337
  merged_df = merged_df.loc[:, ~merged_df.columns.duplicated()]
@@ -347,7 +351,7 @@ def horizontal_merge_transform_excel(
347
351
 
348
352
  merged_df.to_csv(csv_path, index=False, encoding='utf-8')
349
353
 
350
- _LOGGER.info(f"Merged {len(excel_files)} Excel files into '{csv_filename}'.")
354
+ _LOGGER.info(f"Merged {len(excel_files)} Excel files into '{csv_filename}'.")
351
355
 
352
356
 
353
357
  def info():
@@ -61,7 +61,7 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
61
61
 
62
62
  long_df = pd.concat(data_to_plot, ignore_index=True)
63
63
  features = long_df['feature'].unique()
64
- _LOGGER.info(f"📂 Found data for {len(features)} features across {len(long_df['target'].unique())} targets. Generating plots...")
64
+ _LOGGER.info(f"Found data for {len(features)} features across {len(long_df['target'].unique())} targets. Generating plots...")
65
65
 
66
66
  # --- Plotting Loop ---
67
67
  for feature_name in features:
@@ -105,7 +105,7 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
105
105
  plt.savefig(plot_filename, bbox_inches='tight')
106
106
  plt.close()
107
107
 
108
- _LOGGER.info(f"All plots saved successfully to: '{output_path}'")
108
+ _LOGGER.info(f"All plots saved successfully to: '{output_path}'")
109
109
 
110
110
 
111
111
  def _save_result(
@@ -129,8 +129,7 @@ def _save_result(
129
129
  if db_manager and db_table_name:
130
130
  db_manager.insert_row(db_table_name, result_dict)
131
131
  else:
132
- _LOGGER.warning("⚠️ SQLite saving requested but db_manager or table_name not provided.")
133
-
132
+ _LOGGER.warning("SQLite saving requested but db_manager or table_name not provided.")
134
133
 
135
134
 
136
135
  def info():
ml_tools/path_manager.py CHANGED
@@ -88,7 +88,7 @@ class PathManager:
88
88
  try:
89
89
  return self._paths[key]
90
90
  except KeyError:
91
- _LOGGER.error(f"Path key '{key}' not found.")
91
+ _LOGGER.error(f"Path key '{key}' not found.")
92
92
  raise
93
93
 
94
94
  def update(self, new_paths: Dict[str, Union[str, Path]], overwrite: bool = False) -> None:
@@ -106,9 +106,8 @@ class PathManager:
106
106
  if not overwrite:
107
107
  for key in new_paths:
108
108
  if key in self._paths:
109
- raise KeyError(
110
- f"❌ Path key '{key}' already exists in the manager. To replace it, call update() with overwrite=True."
111
- )
109
+ _LOGGER.error(f"Path key '{key}' already exists in the manager. To replace it, call update() with overwrite=True.")
110
+ raise KeyError
112
111
 
113
112
  # Resolve any string paths to Path objects before storing
114
113
  resolved_new_paths = {k: Path(v) for k, v in new_paths.items()}
@@ -136,7 +135,7 @@ class PathManager:
136
135
  if key in self._paths:
137
136
  path_items.append((key, self._paths[key]))
138
137
  elif verbose:
139
- _LOGGER.warning(f"⚠️ Key '{key}' not found in PathManager, skipping.")
138
+ _LOGGER.warning(f"Key '{key}' not found in PathManager, skipping.")
140
139
  else:
141
140
  path_items = self._paths.items()
142
141
 
@@ -153,7 +152,7 @@ class PathManager:
153
152
 
154
153
  if self._is_bundled and is_internal_path:
155
154
  if verbose:
156
- _LOGGER.warning(f"⚠️ Skipping internal directory '{key}' in bundled app (read-only).")
155
+ _LOGGER.warning(f"Skipping internal directory '{key}' in bundled app (read-only).")
157
156
  continue
158
157
  # -------------------------
159
158
 
@@ -261,7 +260,8 @@ def make_fullpath(
261
260
  resolved = path.resolve(strict=True)
262
261
  except FileNotFoundError:
263
262
  if not make:
264
- raise ValueError(f"Path does not exist: '{path}'")
263
+ _LOGGER.error(f"Path does not exist: '{path}'.")
264
+ raise FileNotFoundError()
265
265
 
266
266
  try:
267
267
  if is_file:
@@ -271,14 +271,17 @@ def make_fullpath(
271
271
  else:
272
272
  path.mkdir(parents=True, exist_ok=True)
273
273
  resolved = path.resolve(strict=True)
274
- except Exception as e:
275
- raise ValueError(f"Failed to create {'file' if is_file else 'directory'} '{path}': {e}")
274
+ except Exception:
275
+ _LOGGER.exception(f"Failed to create {'file' if is_file else 'directory'} '{path}'.")
276
+ raise IOError()
276
277
 
277
278
  if enforce == "file" and not resolved.is_file():
278
- raise TypeError(f"Path was enforced as a file, but it is not: '{resolved}'")
279
+ _LOGGER.error(f"Path was enforced as a file, but it is not: '{resolved}'")
280
+ raise TypeError()
279
281
 
280
282
  if enforce == "directory" and not resolved.is_dir():
281
- raise TypeError(f"Path was enforced as a directory, but it is not: '{resolved}'")
283
+ _LOGGER.error(f"Path was enforced as a directory, but it is not: '{resolved}'")
284
+ raise TypeError()
282
285
 
283
286
  if verbose:
284
287
  if resolved.is_file():
@@ -315,7 +318,8 @@ def sanitize_filename(filename: str) -> str:
315
318
 
316
319
  # Check for empty string after sanitization
317
320
  if not sanitized:
318
- raise ValueError("The sanitized filename is empty. The original input may have contained only invalid characters.")
321
+ _LOGGER.error("The sanitized filename is empty. The original input may have contained only invalid characters.")
322
+ raise ValueError()
319
323
 
320
324
  return sanitized
321
325
 
@@ -334,7 +338,8 @@ def list_csv_paths(directory: Union[str,Path], verbose: bool=True) -> dict[str,
334
338
 
335
339
  csv_paths = list(dir_path.glob("*.csv"))
336
340
  if not csv_paths:
337
- raise IOError(f"No CSV files found in directory: {dir_path.name}")
341
+ _LOGGER.error(f"No CSV files found in directory: {dir_path.name}")
342
+ raise IOError()
338
343
 
339
344
  # make a dictionary of paths and names
340
345
  name_path_dict = {p.stem: p for p in csv_paths}
@@ -367,12 +372,13 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
367
372
 
368
373
  matched_paths = list(dir_path.glob(pattern))
369
374
  if not matched_paths:
370
- raise IOError(f"No '.{normalized_ext}' files found in directory: {dir_path}")
375
+ _LOGGER.error(f"No '.{normalized_ext}' files found in directory: {dir_path}.")
376
+ raise IOError()
371
377
 
372
378
  name_path_dict = {p.stem: p for p in matched_paths}
373
379
 
374
380
  if verbose:
375
- _LOGGER.info(f"\n📂 '{normalized_ext.upper()}' files found:")
381
+ _LOGGER.info(f"📂 '{normalized_ext.upper()}' files found:")
376
382
  for name in name_path_dict:
377
383
  print(f"\t{name}")
378
384