dragon-ml-toolbox 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (34) hide show
  1. {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/METADATA +5 -1
  2. dragon_ml_toolbox-9.0.0.dist-info/RECORD +35 -0
  3. ml_tools/ETL_engineering.py +177 -79
  4. ml_tools/GUI_tools.py +5 -5
  5. ml_tools/MICE_imputation.py +12 -8
  6. ml_tools/ML_callbacks.py +6 -3
  7. ml_tools/ML_datasetmaster.py +37 -20
  8. ml_tools/ML_evaluation.py +4 -4
  9. ml_tools/ML_evaluation_multi.py +26 -17
  10. ml_tools/ML_inference.py +30 -23
  11. ml_tools/ML_models.py +14 -14
  12. ml_tools/ML_optimization.py +4 -3
  13. ml_tools/ML_scaler.py +7 -7
  14. ml_tools/ML_trainer.py +17 -15
  15. ml_tools/PSO_optimization.py +16 -8
  16. ml_tools/RNN_forecast.py +1 -1
  17. ml_tools/SQL.py +22 -13
  18. ml_tools/VIF_factor.py +7 -6
  19. ml_tools/_logger.py +105 -7
  20. ml_tools/custom_logger.py +12 -8
  21. ml_tools/data_exploration.py +20 -15
  22. ml_tools/ensemble_evaluation.py +10 -6
  23. ml_tools/ensemble_inference.py +18 -18
  24. ml_tools/ensemble_learning.py +8 -5
  25. ml_tools/handle_excel.py +15 -11
  26. ml_tools/optimization_tools.py +3 -4
  27. ml_tools/path_manager.py +21 -15
  28. ml_tools/utilities.py +35 -26
  29. dragon_ml_toolbox-8.2.0.dist-info/RECORD +0 -36
  30. ml_tools/_ML_optimization_multi.py +0 -231
  31. {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/WHEEL +0 -0
  32. {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/licenses/LICENSE +0 -0
  33. {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  34. {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/top_level.txt +0 -0
ml_tools/ML_callbacks.py CHANGED
@@ -134,7 +134,8 @@ class EarlyStopping(Callback):
134
134
  self.verbose = verbose
135
135
 
136
136
  if mode not in ['auto', 'min', 'max']:
137
- raise ValueError(f"EarlyStopping mode {mode} is unknown, choose one of ('auto', 'min', 'max')")
137
+ _LOGGER.error(f"EarlyStopping mode {mode} is unknown, choose one of ('auto', 'min', 'max')")
138
+ raise ValueError()
138
139
  self.mode = mode
139
140
 
140
141
  # Determine the comparison operator based on the mode
@@ -221,7 +222,8 @@ class ModelCheckpoint(Callback):
221
222
  self.last_best_filepath = None
222
223
 
223
224
  if mode not in ['auto', 'min', 'max']:
224
- raise ValueError(f"ModelCheckpoint mode {mode} is unknown.")
225
+ _LOGGER.error(f"ModelCheckpoint mode {mode} is unknown.")
226
+ raise ValueError()
225
227
  self.mode = mode
226
228
 
227
229
  if self.mode == 'min':
@@ -329,7 +331,8 @@ class LRScheduler(Callback):
329
331
  # For schedulers that need a metric (e.g., val_loss)
330
332
  if isinstance(self.scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
331
333
  if self.monitor is None:
332
- raise ValueError("LRScheduler needs a `monitor` metric for ReduceLROnPlateau.")
334
+ _LOGGER.error("LRScheduler needs a `monitor` metric for ReduceLROnPlateau.")
335
+ raise ValueError()
333
336
 
334
337
  metric_val = logs.get(self.monitor) # type: ignore
335
338
  if metric_val is not None:
@@ -85,11 +85,13 @@ class _BaseDatasetMaker(ABC):
85
85
  try:
86
86
  continuous_feature_indices = [name_to_idx[name] for name in continuous_feature_columns] # type: ignore
87
87
  except KeyError as e:
88
- raise ValueError(f"Feature column '{e.args[0]}' not found.")
88
+ _LOGGER.error(f"Feature column '{e.args[0]}' not found.")
89
+ raise ValueError()
89
90
  elif all(isinstance(c, int) for c in continuous_feature_columns):
90
91
  continuous_feature_indices = continuous_feature_columns # type: ignore
91
92
  else:
92
- raise TypeError("`continuous_feature_columns` must be a list of all strings or all integers.")
93
+ _LOGGER.error("'continuous_feature_columns' must be a list of all strings or all integers.")
94
+ raise TypeError()
93
95
 
94
96
  X_train_values = X_train.values
95
97
  X_test_values = X_test.values
@@ -152,8 +154,12 @@ class _BaseDatasetMaker(ABC):
152
154
  Args:
153
155
  save_dir (str | Path): The directory where the scaler will be saved.
154
156
  """
155
- if not self.scaler: raise RuntimeError("No scaler was fitted or provided.")
156
- if not self.id: raise ValueError("Must set the `id` before saving scaler.")
157
+ if not self.scaler:
158
+ _LOGGER.error("No scaler was fitted or provided.")
159
+ raise RuntimeError()
160
+ if not self.id:
161
+ _LOGGER.error("Must set the `id` before saving scaler.")
162
+ raise ValueError()
157
163
  save_path = make_fullpath(save_dir, make=True, enforce="directory")
158
164
  sanitized_id = sanitize_filename(self.id)
159
165
  filename = f"scaler_{sanitized_id}.pth"
@@ -365,7 +371,7 @@ class VisionDatasetMaker(_BaseMaker):
365
371
  f"Image channels (bands): {img_channels or 'None'}\n"
366
372
  f"--------------------------------------"
367
373
  )
368
- _LOGGER.info(report)
374
+ print(report)
369
375
 
370
376
  def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
371
377
  stratify: bool = True, random_state: Optional[int] = None) -> 'VisionDatasetMaker':
@@ -375,7 +381,8 @@ class VisionDatasetMaker(_BaseMaker):
375
381
  return self
376
382
 
377
383
  if val_size + test_size >= 1.0:
378
- raise ValueError("The sum of val_size and test_size must be less than 1.")
384
+ _LOGGER.error("The sum of val_size and test_size must be less than 1.")
385
+ raise ValueError()
379
386
 
380
387
  indices = list(range(len(self.full_dataset)))
381
388
  labels_for_split = self.labels if stratify else None
@@ -409,7 +416,8 @@ class VisionDatasetMaker(_BaseMaker):
409
416
  extra_train_transforms: Optional[List] = None) -> 'VisionDatasetMaker':
410
417
  """Configures and applies the image transformations (augmentations)."""
411
418
  if not self._is_split:
412
- raise RuntimeError("Transforms must be configured AFTER splitting data. Call .split_data() first.")
419
+ _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
420
+ raise RuntimeError()
413
421
 
414
422
  base_train_transforms = [transforms.RandomResizedCrop(crop_size), transforms.RandomHorizontalFlip()]
415
423
  if extra_train_transforms:
@@ -432,9 +440,10 @@ class VisionDatasetMaker(_BaseMaker):
432
440
  def get_datasets(self) -> Tuple[Dataset, ...]:
433
441
  """Returns the final train, validation, and optional test datasets."""
434
442
  if not self._is_split:
435
- raise RuntimeError("Data has not been split. Call .split_data() first.")
443
+ _LOGGER.error("Data has not been split. Call .split_data() first.")
444
+ raise RuntimeError()
436
445
  if not self._are_transforms_configured:
437
- _LOGGER.warning("⚠️ Transforms have not been configured. Using default ToTensor only.")
446
+ _LOGGER.warning("Transforms have not been configured. Using default ToTensor only.")
438
447
 
439
448
  if self._test_dataset:
440
449
  return self._train_dataset, self._val_dataset, self._test_dataset
@@ -468,7 +477,8 @@ class SequenceMaker(_BaseMaker):
468
477
  self.time_axis = numpy.arange(len(data))
469
478
  self.sequence = data.astype(numpy.float32)
470
479
  else:
471
- raise TypeError("Data must be a pandas DataFrame/Series or a numpy array.")
480
+ _LOGGER.error("Data must be a pandas DataFrame/Series or a numpy array.")
481
+ raise TypeError()
472
482
 
473
483
  self.train_sequence = None
474
484
  self.test_sequence = None
@@ -483,10 +493,11 @@ class SequenceMaker(_BaseMaker):
483
493
  splitting to prevent data leakage from the test set.
484
494
  """
485
495
  if not self._is_split:
486
- raise RuntimeError("Data must be split BEFORE normalizing. Call .split_data() first.")
496
+ _LOGGER.error("Data must be split BEFORE normalizing. Call .split_data() first.")
497
+ raise RuntimeError()
487
498
 
488
499
  if self.scaler:
489
- _LOGGER.warning("⚠️ Data has already been normalized.")
500
+ _LOGGER.warning("Data has already been normalized.")
490
501
  return self
491
502
 
492
503
  # 1. PytorchScaler requires a Dataset to fit. Create a temporary one.
@@ -511,13 +522,13 @@ class SequenceMaker(_BaseMaker):
511
522
  self.test_sequence = self.scaler.transform(test_tensor).numpy().flatten()
512
523
 
513
524
  self._is_normalized = True
514
- _LOGGER.info("Sequence data normalized using PytorchScaler.")
525
+ _LOGGER.info("Sequence data normalized using PytorchScaler.")
515
526
  return self
516
527
 
517
528
  def split_data(self, test_size: float = 0.2) -> 'SequenceMaker':
518
529
  """Splits the sequence into training and testing portions."""
519
530
  if self._is_split:
520
- _LOGGER.warning("⚠️ Data has already been split.")
531
+ _LOGGER.warning("Data has already been split.")
521
532
  return self
522
533
 
523
534
  split_idx = int(len(self.sequence) * (1 - test_size))
@@ -538,7 +549,8 @@ class SequenceMaker(_BaseMaker):
538
549
  "sequence-to-sequence": Label vectors are of the same size as the feature vectors instead of a single future prediction.
539
550
  """
540
551
  if not self._is_split:
541
- raise RuntimeError("Cannot generate windows before splitting data. Call .split_data() first.")
552
+ _LOGGER.error("Cannot generate windows before splitting data. Call .split_data() first.")
553
+ raise RuntimeError()
542
554
 
543
555
  self._train_dataset = self._create_windowed_dataset(self.train_sequence, sequence_to_sequence) # type: ignore
544
556
  self._test_dataset = self._create_windowed_dataset(self.test_sequence, sequence_to_sequence) # type: ignore
@@ -550,7 +562,8 @@ class SequenceMaker(_BaseMaker):
550
562
  def _create_windowed_dataset(self, data: numpy.ndarray, use_sequence_labels: bool) -> Dataset:
551
563
  """Efficiently creates windowed features and labels using numpy."""
552
564
  if len(data) <= self.sequence_length:
553
- raise ValueError("Data length must be greater than the sequence_length to create at least one window.")
565
+ _LOGGER.error("Data length must be greater than the sequence_length to create at least one window.")
566
+ raise ValueError()
554
567
 
555
568
  if not use_sequence_labels:
556
569
  features = data[:-1]
@@ -578,7 +591,8 @@ class SequenceMaker(_BaseMaker):
578
591
  def denormalize(self, data: Union[torch.Tensor, numpy.ndarray]) -> numpy.ndarray:
579
592
  """Applies inverse transformation using the stored PytorchScaler."""
580
593
  if self.scaler is None:
581
- raise RuntimeError("Data was not normalized. Cannot denormalize.")
594
+ _LOGGER.error("Data was not normalized. Cannot denormalize.")
595
+ raise RuntimeError()
582
596
 
583
597
  # Ensure data is a torch.Tensor
584
598
  if isinstance(data, numpy.ndarray):
@@ -597,7 +611,8 @@ class SequenceMaker(_BaseMaker):
597
611
  def plot(self, predictions: Optional[numpy.ndarray] = None):
598
612
  """Plots the original training and testing data, with optional predictions."""
599
613
  if not self._is_split:
600
- raise RuntimeError("Cannot plot before splitting data. Call .split_data() first.")
614
+ _LOGGER.error("Cannot plot before splitting data. Call .split_data() first.")
615
+ raise RuntimeError()
601
616
 
602
617
  plt.figure(figsize=(15, 6))
603
618
  plt.title("Time Series Data")
@@ -618,7 +633,8 @@ class SequenceMaker(_BaseMaker):
618
633
  def get_datasets(self) -> Tuple[Dataset, Dataset]:
619
634
  """Returns the final train and test datasets."""
620
635
  if not self._are_windows_generated:
621
- raise RuntimeError("Windows have not been generated. Call .generate_windows() first.")
636
+ _LOGGER.error("Windows have not been generated. Call .generate_windows() first.")
637
+ raise RuntimeError()
622
638
  return self._train_dataset, self._test_dataset
623
639
 
624
640
 
@@ -637,7 +653,8 @@ class ResizeAspectFill:
637
653
 
638
654
  def __call__(self, image: Image.Image) -> Image.Image:
639
655
  if not isinstance(image, Image.Image):
640
- raise TypeError(f"Expected PIL.Image.Image, got {type(image).__name__}")
656
+ _LOGGER.error(f"Expected PIL.Image.Image, got {type(image).__name__}")
657
+ raise TypeError()
641
658
 
642
659
  w, h = image.size
643
660
  if w == h:
ml_tools/ML_evaluation.py CHANGED
@@ -110,7 +110,7 @@ def classification_metrics(save_dir: Union[str, Path], y_true: np.ndarray, y_pre
110
110
  _LOGGER.info(f"📊 Report heatmap saved as '{heatmap_path.name}'")
111
111
  plt.close()
112
112
  except Exception as e:
113
- _LOGGER.error(f"Could not generate classification report heatmap: {e}")
113
+ _LOGGER.error(f"Could not generate classification report heatmap: {e}")
114
114
 
115
115
  # Save Confusion Matrix
116
116
  fig_cm, ax_cm = plt.subplots(figsize=(6, 6), dpi=100)
@@ -172,7 +172,7 @@ def classification_metrics(save_dir: Union[str, Path], y_true: np.ndarray, y_pre
172
172
 
173
173
  cal_path = save_dir_path / "calibration_plot.svg"
174
174
  plt.savefig(cal_path)
175
- _LOGGER.info(f" Calibration plot saved as '{cal_path.name}'")
175
+ _LOGGER.info(f"📈 Calibration plot saved as '{cal_path.name}'")
176
176
  plt.close(fig_cal)
177
177
 
178
178
 
@@ -277,7 +277,7 @@ def shap_summary_plot(model,
277
277
 
278
278
  # --- Data Validation Step ---
279
279
  if np.isnan(background_data_np).any() or np.isnan(instances_to_explain_np).any():
280
- _LOGGER.error("Input data for SHAP contains NaN values. Aborting explanation.")
280
+ _LOGGER.error("Input data for SHAP contains NaN values. Aborting explanation.")
281
281
  return
282
282
 
283
283
  print("\n--- SHAP Value Explanation ---")
@@ -364,7 +364,7 @@ def plot_attention_importance(weights: List[torch.Tensor], feature_names: Option
364
364
  save_dir (str | Path): Directory to save the plot and summary CSV.
365
365
  """
366
366
  if not weights:
367
- _LOGGER.warning("⚠️ Attention weights list is empty. Skipping importance plot.")
367
+ _LOGGER.error("Attention weights list is empty. Skipping importance plot.")
368
368
  return
369
369
 
370
370
  # --- Step 1: Aggregate data ---
@@ -19,7 +19,7 @@ from sklearn.metrics import (
19
19
  jaccard_score
20
20
  )
21
21
  from pathlib import Path
22
- from typing import Union, List, Optional
22
+ from typing import Union, List
23
23
 
24
24
  from .path_manager import make_fullpath, sanitize_filename
25
25
  from ._logger import _LOGGER
@@ -52,11 +52,14 @@ def multi_target_regression_metrics(
52
52
  save_dir (str | Path): Directory to save plots and the report.
53
53
  """
54
54
  if y_true.ndim != 2 or y_pred.ndim != 2:
55
- raise ValueError("y_true and y_pred must be 2D arrays for multi-target regression.")
55
+ _LOGGER.error("y_true and y_pred must be 2D arrays for multi-target regression.")
56
+ raise ValueError()
56
57
  if y_true.shape != y_pred.shape:
57
- raise ValueError("Shapes of y_true and y_pred must match.")
58
+ _LOGGER.error("Shapes of y_true and y_pred must match.")
59
+ raise ValueError()
58
60
  if y_true.shape[1] != len(target_names):
59
- raise ValueError("Number of target names must match the number of columns in y_true.")
61
+ _LOGGER.error("Number of target names must match the number of columns in y_true.")
62
+ raise ValueError()
60
63
 
61
64
  save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
62
65
  metrics_summary = []
@@ -64,7 +67,7 @@ def multi_target_regression_metrics(
64
67
  _LOGGER.info("--- Multi-Target Regression Evaluation ---")
65
68
 
66
69
  for i, name in enumerate(target_names):
67
- _LOGGER.info(f" -> Evaluating target: '{name}'")
70
+ print(f" -> Evaluating target: '{name}'")
68
71
  true_i = y_true[:, i]
69
72
  pred_i = y_pred[:, i]
70
73
  sanitized_name = sanitize_filename(name)
@@ -113,7 +116,7 @@ def multi_target_regression_metrics(
113
116
  summary_df = pd.DataFrame(metrics_summary)
114
117
  report_path = save_dir_path / "regression_report_multi.csv"
115
118
  summary_df.to_csv(report_path, index=False)
116
- _LOGGER.info(f"Full regression report saved to '{report_path.name}'")
119
+ _LOGGER.info(f"Full regression report saved to '{report_path.name}'")
117
120
 
118
121
 
119
122
  def multi_label_classification_metrics(
@@ -139,11 +142,14 @@ def multi_label_classification_metrics(
139
142
  binary predictions for metrics like the confusion matrix.
140
143
  """
141
144
  if y_true.ndim != 2 or y_prob.ndim != 2:
142
- raise ValueError("y_true and y_prob must be 2D arrays for multi-label classification.")
145
+ _LOGGER.error("y_true and y_prob must be 2D arrays for multi-label classification.")
146
+ raise ValueError()
143
147
  if y_true.shape != y_prob.shape:
144
- raise ValueError("Shapes of y_true and y_prob must match.")
148
+ _LOGGER.error("Shapes of y_true and y_prob must match.")
149
+ raise ValueError()
145
150
  if y_true.shape[1] != len(target_names):
146
- raise ValueError("Number of target names must match the number of columns in y_true.")
151
+ _LOGGER.error("Number of target names must match the number of columns in y_true.")
152
+ raise ValueError()
147
153
 
148
154
  save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
149
155
 
@@ -165,13 +171,13 @@ def multi_label_classification_metrics(
165
171
  f"Jaccard Score (macro): {j_score_macro:.4f}\n"
166
172
  f"--------------------------------------------------\n"
167
173
  )
168
- _LOGGER.info(overall_report)
174
+ print(overall_report)
169
175
  overall_report_path = save_dir_path / "classification_report_overall.txt"
170
176
  overall_report_path.write_text(overall_report)
171
177
 
172
178
  # --- Per-Label Metrics and Plots ---
173
179
  for i, name in enumerate(target_names):
174
- _LOGGER.info(f" -> Evaluating label: '{name}'")
180
+ print(f" -> Evaluating label: '{name}'")
175
181
  true_i = y_true[:, i]
176
182
  pred_i = y_pred[:, i]
177
183
  prob_i = y_prob[:, i]
@@ -215,7 +221,7 @@ def multi_label_classification_metrics(
215
221
  plt.savefig(pr_path)
216
222
  plt.close(fig_pr)
217
223
 
218
- _LOGGER.info(f"All individual label reports and plots saved to '{save_dir_path.name}'")
224
+ _LOGGER.info(f"All individual label reports and plots saved to '{save_dir_path.name}'")
219
225
 
220
226
 
221
227
  def multi_target_shap_summary_plot(
@@ -242,10 +248,10 @@ def multi_target_shap_summary_plot(
242
248
  instances_to_explain_np = instances_to_explain.numpy() if isinstance(instances_to_explain, torch.Tensor) else instances_to_explain
243
249
 
244
250
  if np.isnan(background_data_np).any() or np.isnan(instances_to_explain_np).any():
245
- _LOGGER.error("Input data for SHAP contains NaN values. Aborting explanation.")
251
+ _LOGGER.error("Input data for SHAP contains NaN values. Aborting explanation.")
246
252
  return
247
253
 
248
- _LOGGER.info("\n--- Multi-Target SHAP Value Explanation ---")
254
+ _LOGGER.info("--- Multi-Target SHAP Value Explanation ---")
249
255
  model.eval()
250
256
  model.cpu()
251
257
 
@@ -262,7 +268,7 @@ def multi_target_shap_summary_plot(
262
268
  # 3. Create the KernelExplainer.
263
269
  explainer = shap.KernelExplainer(prediction_wrapper, background_summary)
264
270
 
265
- _LOGGER.info("Calculating SHAP values with KernelExplainer...")
271
+ print("Calculating SHAP values with KernelExplainer...")
266
272
  # For multi-output models, shap_values is a list of arrays.
267
273
  shap_values_list = explainer.shap_values(instances_to_explain_np, l1_reg="aic")
268
274
 
@@ -271,7 +277,7 @@ def multi_target_shap_summary_plot(
271
277
 
272
278
  # 4. Iterate through each target's SHAP values and generate plots.
273
279
  for i, target_name in enumerate(target_names):
274
- _LOGGER.info(f" -> Generating SHAP plots for target: '{target_name}'")
280
+ print(f" -> Generating SHAP plots for target: '{target_name}'")
275
281
  shap_values_for_target = shap_values_list[i]
276
282
  sanitized_target_name = sanitize_filename(target_name)
277
283
 
@@ -292,5 +298,8 @@ def multi_target_shap_summary_plot(
292
298
  plt.close()
293
299
 
294
300
  plt.ion()
295
- _LOGGER.info(f"All SHAP plots saved to '{save_dir_path.name}'")
301
+ _LOGGER.info(f"All SHAP plots saved to '{save_dir_path.name}'")
296
302
 
303
+
304
+ def info():
305
+ _script_info(__all__)
ml_tools/ML_inference.py CHANGED
@@ -59,20 +59,20 @@ class _BaseInferenceHandler(ABC):
59
59
  self.model.load_state_dict(torch.load(model_p, map_location=self.device))
60
60
  self.model.to(self.device)
61
61
  self.model.eval() # Set the model to evaluation mode
62
- _LOGGER.info(f"Model state loaded from '{model_p.name}' and set to evaluation mode.")
62
+ _LOGGER.info(f"Model state loaded from '{model_p.name}' and set to evaluation mode.")
63
63
  except Exception as e:
64
- _LOGGER.error(f"Failed to load model state from '{model_p}': {e}")
64
+ _LOGGER.error(f"Failed to load model state from '{model_p}': {e}")
65
65
  raise
66
66
 
67
67
  def _validate_device(self, device: str) -> torch.device:
68
68
  """Validates the selected device and returns a torch.device object."""
69
69
  device_lower = device.lower()
70
70
  if "cuda" in device_lower and not torch.cuda.is_available():
71
- _LOGGER.warning("⚠️ CUDA not available, switching to CPU.")
71
+ _LOGGER.warning("CUDA not available, switching to CPU.")
72
72
  device_lower = "cpu"
73
73
  elif device_lower == "mps" and not torch.backends.mps.is_available():
74
74
  # Your M-series Mac will appreciate this check!
75
- _LOGGER.warning("⚠️ Apple Metal Performance Shaders (MPS) not available, switching to CPU.")
75
+ _LOGGER.warning("Apple Metal Performance Shaders (MPS) not available, switching to CPU.")
76
76
  device_lower = "cpu"
77
77
  return torch.device(device_lower)
78
78
 
@@ -144,7 +144,8 @@ class PyTorchInferenceHandler(_BaseInferenceHandler):
144
144
  A dictionary containing the raw output tensors from the model.
145
145
  """
146
146
  if features.ndim != 2:
147
- raise ValueError("Input for batch prediction must be a 2D array or tensor.")
147
+ _LOGGER.error("Input for batch prediction must be a 2D array or tensor.")
148
+ raise ValueError()
148
149
 
149
150
  input_tensor = self._preprocess_input(features)
150
151
 
@@ -176,7 +177,8 @@ class PyTorchInferenceHandler(_BaseInferenceHandler):
176
177
  features = features.reshape(1, -1) # Reshape to a batch of one
177
178
 
178
179
  if features.shape[0] != 1:
179
- raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
180
+ _LOGGER.error("The 'predict()' method is for a single sample. Use 'predict_batch()' for multiple samples.")
181
+ raise ValueError()
180
182
 
181
183
  batch_results = self.predict_batch(features)
182
184
 
@@ -216,7 +218,8 @@ class PyTorchInferenceHandler(_BaseInferenceHandler):
216
218
  `target_id` must be implemented.
217
219
  """
218
220
  if self.target_id is None:
219
- raise AttributeError(f"'target_id' has not been implemented.")
221
+ _LOGGER.error(f"'target_id' has not been implemented.")
222
+ raise AttributeError()
220
223
 
221
224
  if self.task == "regression":
222
225
  result = self.predict_numpy(features)[PyTorchInferenceKeys.PREDICTIONS]
@@ -252,7 +255,8 @@ class PyTorchInferenceHandlerMulti(_BaseInferenceHandler):
252
255
  super().__init__(model, state_dict, device, scaler)
253
256
 
254
257
  if task not in ["multi_target_regression", "multi_label_classification"]:
255
- raise ValueError("`task` must be 'multi_target_regression' or 'multi_label_classification'.")
258
+ _LOGGER.error("`task` must be 'multi_target_regression' or 'multi_label_classification'.")
259
+ raise ValueError()
256
260
  self.task = task
257
261
  self.target_ids = target_ids
258
262
 
@@ -272,7 +276,8 @@ class PyTorchInferenceHandlerMulti(_BaseInferenceHandler):
272
276
  A dictionary containing the raw output tensors from the model.
273
277
  """
274
278
  if features.ndim != 2:
275
- raise ValueError("Input for batch prediction must be a 2D array or tensor.")
279
+ _LOGGER.error("Input for batch prediction must be a 2D array or tensor.")
280
+ raise ValueError()
276
281
 
277
282
  input_tensor = self._preprocess_input(features)
278
283
 
@@ -309,7 +314,8 @@ class PyTorchInferenceHandlerMulti(_BaseInferenceHandler):
309
314
  features = features.reshape(1, -1)
310
315
 
311
316
  if features.shape[0] != 1:
312
- raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
317
+ _LOGGER.error("The 'predict()' method is for a single sample. 'Use predict_batch()' for multiple samples.")
318
+ raise ValueError()
313
319
 
314
320
  batch_results = self.predict_batch(features, classification_threshold)
315
321
 
@@ -348,7 +354,8 @@ class PyTorchInferenceHandlerMulti(_BaseInferenceHandler):
348
354
  `target_ids` must be implemented.
349
355
  """
350
356
  if self.target_ids is None:
351
- raise AttributeError(f"'target_id' has not been implemented.")
357
+ _LOGGER.error(f"'target_id' has not been implemented.")
358
+ raise AttributeError()
352
359
 
353
360
  if self.task == "multi_target_regression":
354
361
  result = self.predict_numpy(features)[PyTorchInferenceKeys.PREDICTIONS].flatten().tolist()
@@ -398,18 +405,18 @@ def multi_inference_regression(handlers: list[PyTorchInferenceHandler],
398
405
 
399
406
  # Validate that the input is a 2D tensor.
400
407
  if feature_vector.ndim != 2:
401
- raise ValueError("Input feature_vector must be a 1D or 2D array/tensor.")
408
+ _LOGGER.error("Input feature_vector must be a 1D or 2D array/tensor.")
409
+ raise ValueError()
402
410
 
403
411
  results: dict[str,Any] = dict()
404
412
  for handler in handlers:
405
413
  # validation
406
414
  if handler.target_id is None:
407
- raise AttributeError("All inference handlers must have a 'target_id' attribute.")
415
+ _LOGGER.error("All inference handlers must have a 'target_id' attribute.")
416
+ raise AttributeError()
408
417
  if handler.task != "regression":
409
- raise ValueError(
410
- f"Invalid task type: The handler for target_id '{handler.target_id}' "
411
- f"is for '{handler.task}', but only 'regression' tasks are supported."
412
- )
418
+ _LOGGER.error(f"Invalid task type: The handler for target_id '{handler.target_id}' is for '{handler.task}', but only 'regression' tasks are supported.")
419
+ raise ValueError()
413
420
 
414
421
  # inference
415
422
  if output == "numpy":
@@ -476,7 +483,8 @@ def multi_inference_classification(
476
483
  feature_vector = feature_vector.reshape(1, -1)
477
484
 
478
485
  if feature_vector.ndim != 2:
479
- raise ValueError("Input feature_vector must be a 1D or 2D array/tensor.")
486
+ _LOGGER.error("Input feature_vector must be a 1D or 2D array/tensor.")
487
+ raise ValueError()
480
488
 
481
489
  # Initialize two dictionaries for results
482
490
  labels_results: dict[str, Any] = dict()
@@ -485,12 +493,11 @@ def multi_inference_classification(
485
493
  for handler in handlers:
486
494
  # Validation
487
495
  if handler.target_id is None:
488
- raise AttributeError("All inference handlers must have a 'target_id' attribute.")
496
+ _LOGGER.error("All inference handlers must have a 'target_id' attribute.")
497
+ raise AttributeError()
489
498
  if handler.task != "classification":
490
- raise ValueError(
491
- f"Invalid task type: The handler for target_id '{handler.target_id}' "
492
- f"is for '{handler.task}', but this function only supports 'classification'."
493
- )
499
+ _LOGGER.error(f"Invalid task type: The handler for target_id '{handler.target_id}' is for '{handler.task}', but this function only supports 'classification'.")
500
+ raise ValueError()
494
501
 
495
502
  # Inference
496
503
  if output == "numpy":
ml_tools/ML_models.py CHANGED
@@ -34,13 +34,17 @@ class _BaseMLP(nn.Module):
34
34
 
35
35
  # --- Validation ---
36
36
  if not isinstance(in_features, int) or in_features < 1:
37
- raise ValueError("in_features must be a positive integer.")
37
+ _LOGGER.error("'in_features' must be a positive integer.")
38
+ raise ValueError()
38
39
  if not isinstance(out_targets, int) or out_targets < 1:
39
- raise ValueError("out_targets must be a positive integer.")
40
+ _LOGGER.error("'out_targets' must be a positive integer.")
41
+ raise ValueError()
40
42
  if not isinstance(hidden_layers, list) or not all(isinstance(n, int) for n in hidden_layers):
41
- raise TypeError("hidden_layers must be a list of integers.")
43
+ _LOGGER.error("'hidden_layers' must be a list of integers.")
44
+ raise TypeError()
42
45
  if not (0.0 <= drop_out < 1.0):
43
- raise ValueError("drop_out must be a float between 0.0 and 1.0.")
46
+ _LOGGER.error("'drop_out' must be a float between 0.0 and 1.0.")
47
+ raise ValueError()
44
48
 
45
49
  # --- Save configuration ---
46
50
  self.in_features = in_features
@@ -626,10 +630,8 @@ def save_architecture(model: nn.Module, directory: Union[str, Path], verbose: bo
626
630
  AttributeError: If the model does not have a `get_config()` method.
627
631
  """
628
632
  if not hasattr(model, 'get_config'):
629
- raise AttributeError(
630
- f"Model '{model.__class__.__name__}' does not have a 'get_config()' method. "
631
- "Please implement it to return the model's constructor arguments."
632
- )
633
+ _LOGGER.error(f"Model '{model.__class__.__name__}' does not have a 'get_config()' method.")
634
+ raise AttributeError()
633
635
 
634
636
  # Ensure the target directory exists
635
637
  path_dir = make_fullpath(directory, make=True, enforce="directory")
@@ -644,7 +646,7 @@ def save_architecture(model: nn.Module, directory: Union[str, Path], verbose: bo
644
646
  json.dump(config, f, indent=4)
645
647
 
646
648
  if verbose:
647
- _LOGGER.info(f"Architecture for '{model.__class__.__name__}' saved to '{path_dir.name}'")
649
+ _LOGGER.info(f"Architecture for '{model.__class__.__name__}' saved to '{path_dir.name}'")
648
650
 
649
651
 
650
652
  def load_architecture(filepath: Union[str, Path], expected_model_class: type, verbose: bool=True) -> nn.Module:
@@ -674,15 +676,13 @@ def load_architecture(filepath: Union[str, Path], expected_model_class: type, ve
674
676
  config = saved_data['config']
675
677
 
676
678
  if saved_class_name != expected_model_class.__name__:
677
- raise ValueError(
678
- f"Model class mismatch. File specifies '{saved_class_name}', "
679
- f"but you expected '{expected_model_class.__name__}'."
680
- )
679
+ _LOGGER.error(f"Model class mismatch. File specifies '{saved_class_name}', but '{expected_model_class.__name__}' was expected.")
680
+ raise ValueError()
681
681
 
682
682
  # Create an instance of the model using the provided class and config
683
683
  model = expected_model_class(**config)
684
684
  if verbose:
685
- _LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
685
+ _LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
686
686
  return model
687
687
 
688
688
 
@@ -127,7 +127,8 @@ def create_pytorch_problem(
127
127
  SearcherClass = GeneticAlgorithm
128
128
 
129
129
  else:
130
- raise ValueError(f"Unknown algorithm '{algorithm}'.")
130
+ _LOGGER.error(f"Unknown algorithm '{algorithm}'.")
131
+ raise ValueError()
131
132
 
132
133
  # Create a factory function with all arguments pre-filled
133
134
  searcher_factory = partial(SearcherClass, problem, **searcher_kwargs)
@@ -242,7 +243,7 @@ def run_optimization(
242
243
  if verbose:
243
244
  _handle_pandas_log(pandas_logger, save_path=save_path, target_name=target_name)
244
245
 
245
- _LOGGER.info(f"Optimization complete. Best solution saved to '{csv_path.name}'")
246
+ _LOGGER.info(f"Optimization complete. Best solution saved to '{csv_path.name}'")
246
247
  return result_dict
247
248
 
248
249
  # --- MULTIPLE REPETITIONS LOGIC ---
@@ -295,7 +296,7 @@ def run_optimization(
295
296
  if pandas_logger is not None:
296
297
  _handle_pandas_log(pandas_logger, save_path=save_path, target_name=target_name)
297
298
 
298
- _LOGGER.info(f"Optimal solution space complete. Results saved to '{save_path}'")
299
+ _LOGGER.info(f"Optimal solution space complete. Results saved to '{save_path}'")
299
300
  return None
300
301
 
301
302
 
ml_tools/ML_scaler.py CHANGED
@@ -50,7 +50,7 @@ class PytorchScaler:
50
50
  PytorchScaler: A new, fitted instance of the scaler.
51
51
  """
52
52
  if not continuous_feature_indices:
53
- _LOGGER.warning("⚠️ No continuous feature indices provided. Scaler will not be fitted.")
53
+ _LOGGER.error("No continuous feature indices provided. Scaler will not be fitted.")
54
54
  return cls()
55
55
 
56
56
  loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
@@ -72,7 +72,7 @@ class PytorchScaler:
72
72
  count += continuous_features.size(0)
73
73
 
74
74
  if count == 0:
75
- _LOGGER.warning("⚠️ Dataset is empty. Scaler cannot be fitted.")
75
+ _LOGGER.error("Dataset is empty. Scaler cannot be fitted.")
76
76
  return cls(continuous_feature_indices=continuous_feature_indices)
77
77
 
78
78
  # Calculate mean
@@ -80,7 +80,7 @@ class PytorchScaler:
80
80
 
81
81
  # Calculate standard deviation
82
82
  if count < 2:
83
- _LOGGER.warning(f"⚠️ Only one sample found. Standard deviation cannot be calculated and is set to 1.")
83
+ _LOGGER.warning(f"Only one sample found. Standard deviation cannot be calculated and is set to 1.")
84
84
  std = torch.ones_like(mean)
85
85
  else:
86
86
  # var = E[X^2] - (E[X])^2
@@ -101,7 +101,7 @@ class PytorchScaler:
101
101
  torch.Tensor: The transformed data tensor.
102
102
  """
103
103
  if self.mean_ is None or self.std_ is None or self.continuous_feature_indices is None:
104
- _LOGGER.warning("⚠️ Scaler has not been fitted. Returning original data.")
104
+ _LOGGER.error("Scaler has not been fitted. Returning original data.")
105
105
  return data
106
106
 
107
107
  data_clone = data.clone()
@@ -132,7 +132,7 @@ class PytorchScaler:
132
132
  torch.Tensor: The original-scale data tensor.
133
133
  """
134
134
  if self.mean_ is None or self.std_ is None or self.continuous_feature_indices is None:
135
- _LOGGER.warning("⚠️ Scaler has not been fitted. Returning original data.")
135
+ _LOGGER.error("Scaler has not been fitted. Returning original data.")
136
136
  return data
137
137
 
138
138
  data_clone = data.clone()
@@ -163,7 +163,7 @@ class PytorchScaler:
163
163
  'continuous_feature_indices': self.continuous_feature_indices
164
164
  }
165
165
  torch.save(state, path_obj)
166
- _LOGGER.info(f"PytorchScaler state saved to '{path_obj.name}'.")
166
+ _LOGGER.info(f"PytorchScaler state saved to '{path_obj.name}'.")
167
167
 
168
168
  @staticmethod
169
169
  def load(filepath: Union[str, Path]) -> 'PytorchScaler':
@@ -178,7 +178,7 @@ class PytorchScaler:
178
178
  """
179
179
  path_obj = make_fullpath(filepath, enforce="file")
180
180
  state = torch.load(path_obj)
181
- _LOGGER.info(f"PytorchScaler state loaded from '{path_obj.name}'.")
181
+ _LOGGER.info(f"PytorchScaler state loaded from '{path_obj.name}'.")
182
182
  return PytorchScaler(
183
183
  mean=state['mean'],
184
184
  std=state['std'],