dragon-ml-toolbox 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.1.0.dist-info}/METADATA +5 -1
- dragon_ml_toolbox-9.1.0.dist-info/RECORD +35 -0
- ml_tools/ETL_engineering.py +177 -79
- ml_tools/GUI_tools.py +5 -5
- ml_tools/MICE_imputation.py +12 -8
- ml_tools/ML_callbacks.py +6 -3
- ml_tools/ML_datasetmaster.py +37 -20
- ml_tools/ML_evaluation.py +4 -4
- ml_tools/ML_evaluation_multi.py +26 -17
- ml_tools/ML_inference.py +30 -23
- ml_tools/ML_models.py +14 -14
- ml_tools/ML_optimization.py +4 -3
- ml_tools/ML_scaler.py +7 -7
- ml_tools/ML_trainer.py +17 -15
- ml_tools/PSO_optimization.py +16 -8
- ml_tools/RNN_forecast.py +1 -1
- ml_tools/SQL.py +22 -13
- ml_tools/VIF_factor.py +7 -6
- ml_tools/_logger.py +105 -7
- ml_tools/custom_logger.py +12 -8
- ml_tools/data_exploration.py +20 -15
- ml_tools/ensemble_evaluation.py +10 -6
- ml_tools/ensemble_inference.py +18 -18
- ml_tools/ensemble_learning.py +8 -5
- ml_tools/handle_excel.py +41 -23
- ml_tools/optimization_tools.py +3 -4
- ml_tools/path_manager.py +21 -15
- ml_tools/utilities.py +35 -26
- dragon_ml_toolbox-8.2.0.dist-info/RECORD +0 -36
- ml_tools/_ML_optimization_multi.py +0 -231
- {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.1.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.1.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.1.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.1.0.dist-info}/top_level.txt +0 -0
ml_tools/ML_callbacks.py
CHANGED
|
@@ -134,7 +134,8 @@ class EarlyStopping(Callback):
|
|
|
134
134
|
self.verbose = verbose
|
|
135
135
|
|
|
136
136
|
if mode not in ['auto', 'min', 'max']:
|
|
137
|
-
|
|
137
|
+
_LOGGER.error(f"EarlyStopping mode {mode} is unknown, choose one of ('auto', 'min', 'max')")
|
|
138
|
+
raise ValueError()
|
|
138
139
|
self.mode = mode
|
|
139
140
|
|
|
140
141
|
# Determine the comparison operator based on the mode
|
|
@@ -221,7 +222,8 @@ class ModelCheckpoint(Callback):
|
|
|
221
222
|
self.last_best_filepath = None
|
|
222
223
|
|
|
223
224
|
if mode not in ['auto', 'min', 'max']:
|
|
224
|
-
|
|
225
|
+
_LOGGER.error(f"ModelCheckpoint mode {mode} is unknown.")
|
|
226
|
+
raise ValueError()
|
|
225
227
|
self.mode = mode
|
|
226
228
|
|
|
227
229
|
if self.mode == 'min':
|
|
@@ -329,7 +331,8 @@ class LRScheduler(Callback):
|
|
|
329
331
|
# For schedulers that need a metric (e.g., val_loss)
|
|
330
332
|
if isinstance(self.scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
|
|
331
333
|
if self.monitor is None:
|
|
332
|
-
|
|
334
|
+
_LOGGER.error("LRScheduler needs a `monitor` metric for ReduceLROnPlateau.")
|
|
335
|
+
raise ValueError()
|
|
333
336
|
|
|
334
337
|
metric_val = logs.get(self.monitor) # type: ignore
|
|
335
338
|
if metric_val is not None:
|
ml_tools/ML_datasetmaster.py
CHANGED
|
@@ -85,11 +85,13 @@ class _BaseDatasetMaker(ABC):
|
|
|
85
85
|
try:
|
|
86
86
|
continuous_feature_indices = [name_to_idx[name] for name in continuous_feature_columns] # type: ignore
|
|
87
87
|
except KeyError as e:
|
|
88
|
-
|
|
88
|
+
_LOGGER.error(f"Feature column '{e.args[0]}' not found.")
|
|
89
|
+
raise ValueError()
|
|
89
90
|
elif all(isinstance(c, int) for c in continuous_feature_columns):
|
|
90
91
|
continuous_feature_indices = continuous_feature_columns # type: ignore
|
|
91
92
|
else:
|
|
92
|
-
|
|
93
|
+
_LOGGER.error("'continuous_feature_columns' must be a list of all strings or all integers.")
|
|
94
|
+
raise TypeError()
|
|
93
95
|
|
|
94
96
|
X_train_values = X_train.values
|
|
95
97
|
X_test_values = X_test.values
|
|
@@ -152,8 +154,12 @@ class _BaseDatasetMaker(ABC):
|
|
|
152
154
|
Args:
|
|
153
155
|
save_dir (str | Path): The directory where the scaler will be saved.
|
|
154
156
|
"""
|
|
155
|
-
if not self.scaler:
|
|
156
|
-
|
|
157
|
+
if not self.scaler:
|
|
158
|
+
_LOGGER.error("No scaler was fitted or provided.")
|
|
159
|
+
raise RuntimeError()
|
|
160
|
+
if not self.id:
|
|
161
|
+
_LOGGER.error("Must set the `id` before saving scaler.")
|
|
162
|
+
raise ValueError()
|
|
157
163
|
save_path = make_fullpath(save_dir, make=True, enforce="directory")
|
|
158
164
|
sanitized_id = sanitize_filename(self.id)
|
|
159
165
|
filename = f"scaler_{sanitized_id}.pth"
|
|
@@ -365,7 +371,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
365
371
|
f"Image channels (bands): {img_channels or 'None'}\n"
|
|
366
372
|
f"--------------------------------------"
|
|
367
373
|
)
|
|
368
|
-
|
|
374
|
+
print(report)
|
|
369
375
|
|
|
370
376
|
def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
|
|
371
377
|
stratify: bool = True, random_state: Optional[int] = None) -> 'VisionDatasetMaker':
|
|
@@ -375,7 +381,8 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
375
381
|
return self
|
|
376
382
|
|
|
377
383
|
if val_size + test_size >= 1.0:
|
|
378
|
-
|
|
384
|
+
_LOGGER.error("The sum of val_size and test_size must be less than 1.")
|
|
385
|
+
raise ValueError()
|
|
379
386
|
|
|
380
387
|
indices = list(range(len(self.full_dataset)))
|
|
381
388
|
labels_for_split = self.labels if stratify else None
|
|
@@ -409,7 +416,8 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
409
416
|
extra_train_transforms: Optional[List] = None) -> 'VisionDatasetMaker':
|
|
410
417
|
"""Configures and applies the image transformations (augmentations)."""
|
|
411
418
|
if not self._is_split:
|
|
412
|
-
|
|
419
|
+
_LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
|
|
420
|
+
raise RuntimeError()
|
|
413
421
|
|
|
414
422
|
base_train_transforms = [transforms.RandomResizedCrop(crop_size), transforms.RandomHorizontalFlip()]
|
|
415
423
|
if extra_train_transforms:
|
|
@@ -432,9 +440,10 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
432
440
|
def get_datasets(self) -> Tuple[Dataset, ...]:
|
|
433
441
|
"""Returns the final train, validation, and optional test datasets."""
|
|
434
442
|
if not self._is_split:
|
|
435
|
-
|
|
443
|
+
_LOGGER.error("Data has not been split. Call .split_data() first.")
|
|
444
|
+
raise RuntimeError()
|
|
436
445
|
if not self._are_transforms_configured:
|
|
437
|
-
_LOGGER.warning("
|
|
446
|
+
_LOGGER.warning("Transforms have not been configured. Using default ToTensor only.")
|
|
438
447
|
|
|
439
448
|
if self._test_dataset:
|
|
440
449
|
return self._train_dataset, self._val_dataset, self._test_dataset
|
|
@@ -468,7 +477,8 @@ class SequenceMaker(_BaseMaker):
|
|
|
468
477
|
self.time_axis = numpy.arange(len(data))
|
|
469
478
|
self.sequence = data.astype(numpy.float32)
|
|
470
479
|
else:
|
|
471
|
-
|
|
480
|
+
_LOGGER.error("Data must be a pandas DataFrame/Series or a numpy array.")
|
|
481
|
+
raise TypeError()
|
|
472
482
|
|
|
473
483
|
self.train_sequence = None
|
|
474
484
|
self.test_sequence = None
|
|
@@ -483,10 +493,11 @@ class SequenceMaker(_BaseMaker):
|
|
|
483
493
|
splitting to prevent data leakage from the test set.
|
|
484
494
|
"""
|
|
485
495
|
if not self._is_split:
|
|
486
|
-
|
|
496
|
+
_LOGGER.error("Data must be split BEFORE normalizing. Call .split_data() first.")
|
|
497
|
+
raise RuntimeError()
|
|
487
498
|
|
|
488
499
|
if self.scaler:
|
|
489
|
-
_LOGGER.warning("
|
|
500
|
+
_LOGGER.warning("Data has already been normalized.")
|
|
490
501
|
return self
|
|
491
502
|
|
|
492
503
|
# 1. PytorchScaler requires a Dataset to fit. Create a temporary one.
|
|
@@ -511,13 +522,13 @@ class SequenceMaker(_BaseMaker):
|
|
|
511
522
|
self.test_sequence = self.scaler.transform(test_tensor).numpy().flatten()
|
|
512
523
|
|
|
513
524
|
self._is_normalized = True
|
|
514
|
-
_LOGGER.info("
|
|
525
|
+
_LOGGER.info("Sequence data normalized using PytorchScaler.")
|
|
515
526
|
return self
|
|
516
527
|
|
|
517
528
|
def split_data(self, test_size: float = 0.2) -> 'SequenceMaker':
|
|
518
529
|
"""Splits the sequence into training and testing portions."""
|
|
519
530
|
if self._is_split:
|
|
520
|
-
_LOGGER.warning("
|
|
531
|
+
_LOGGER.warning("Data has already been split.")
|
|
521
532
|
return self
|
|
522
533
|
|
|
523
534
|
split_idx = int(len(self.sequence) * (1 - test_size))
|
|
@@ -538,7 +549,8 @@ class SequenceMaker(_BaseMaker):
|
|
|
538
549
|
"sequence-to-sequence": Label vectors are of the same size as the feature vectors instead of a single future prediction.
|
|
539
550
|
"""
|
|
540
551
|
if not self._is_split:
|
|
541
|
-
|
|
552
|
+
_LOGGER.error("Cannot generate windows before splitting data. Call .split_data() first.")
|
|
553
|
+
raise RuntimeError()
|
|
542
554
|
|
|
543
555
|
self._train_dataset = self._create_windowed_dataset(self.train_sequence, sequence_to_sequence) # type: ignore
|
|
544
556
|
self._test_dataset = self._create_windowed_dataset(self.test_sequence, sequence_to_sequence) # type: ignore
|
|
@@ -550,7 +562,8 @@ class SequenceMaker(_BaseMaker):
|
|
|
550
562
|
def _create_windowed_dataset(self, data: numpy.ndarray, use_sequence_labels: bool) -> Dataset:
|
|
551
563
|
"""Efficiently creates windowed features and labels using numpy."""
|
|
552
564
|
if len(data) <= self.sequence_length:
|
|
553
|
-
|
|
565
|
+
_LOGGER.error("Data length must be greater than the sequence_length to create at least one window.")
|
|
566
|
+
raise ValueError()
|
|
554
567
|
|
|
555
568
|
if not use_sequence_labels:
|
|
556
569
|
features = data[:-1]
|
|
@@ -578,7 +591,8 @@ class SequenceMaker(_BaseMaker):
|
|
|
578
591
|
def denormalize(self, data: Union[torch.Tensor, numpy.ndarray]) -> numpy.ndarray:
|
|
579
592
|
"""Applies inverse transformation using the stored PytorchScaler."""
|
|
580
593
|
if self.scaler is None:
|
|
581
|
-
|
|
594
|
+
_LOGGER.error("Data was not normalized. Cannot denormalize.")
|
|
595
|
+
raise RuntimeError()
|
|
582
596
|
|
|
583
597
|
# Ensure data is a torch.Tensor
|
|
584
598
|
if isinstance(data, numpy.ndarray):
|
|
@@ -597,7 +611,8 @@ class SequenceMaker(_BaseMaker):
|
|
|
597
611
|
def plot(self, predictions: Optional[numpy.ndarray] = None):
|
|
598
612
|
"""Plots the original training and testing data, with optional predictions."""
|
|
599
613
|
if not self._is_split:
|
|
600
|
-
|
|
614
|
+
_LOGGER.error("Cannot plot before splitting data. Call .split_data() first.")
|
|
615
|
+
raise RuntimeError()
|
|
601
616
|
|
|
602
617
|
plt.figure(figsize=(15, 6))
|
|
603
618
|
plt.title("Time Series Data")
|
|
@@ -618,7 +633,8 @@ class SequenceMaker(_BaseMaker):
|
|
|
618
633
|
def get_datasets(self) -> Tuple[Dataset, Dataset]:
|
|
619
634
|
"""Returns the final train and test datasets."""
|
|
620
635
|
if not self._are_windows_generated:
|
|
621
|
-
|
|
636
|
+
_LOGGER.error("Windows have not been generated. Call .generate_windows() first.")
|
|
637
|
+
raise RuntimeError()
|
|
622
638
|
return self._train_dataset, self._test_dataset
|
|
623
639
|
|
|
624
640
|
|
|
@@ -637,7 +653,8 @@ class ResizeAspectFill:
|
|
|
637
653
|
|
|
638
654
|
def __call__(self, image: Image.Image) -> Image.Image:
|
|
639
655
|
if not isinstance(image, Image.Image):
|
|
640
|
-
|
|
656
|
+
_LOGGER.error(f"Expected PIL.Image.Image, got {type(image).__name__}")
|
|
657
|
+
raise TypeError()
|
|
641
658
|
|
|
642
659
|
w, h = image.size
|
|
643
660
|
if w == h:
|
ml_tools/ML_evaluation.py
CHANGED
|
@@ -110,7 +110,7 @@ def classification_metrics(save_dir: Union[str, Path], y_true: np.ndarray, y_pre
|
|
|
110
110
|
_LOGGER.info(f"📊 Report heatmap saved as '{heatmap_path.name}'")
|
|
111
111
|
plt.close()
|
|
112
112
|
except Exception as e:
|
|
113
|
-
_LOGGER.error(f"
|
|
113
|
+
_LOGGER.error(f"Could not generate classification report heatmap: {e}")
|
|
114
114
|
|
|
115
115
|
# Save Confusion Matrix
|
|
116
116
|
fig_cm, ax_cm = plt.subplots(figsize=(6, 6), dpi=100)
|
|
@@ -172,7 +172,7 @@ def classification_metrics(save_dir: Union[str, Path], y_true: np.ndarray, y_pre
|
|
|
172
172
|
|
|
173
173
|
cal_path = save_dir_path / "calibration_plot.svg"
|
|
174
174
|
plt.savefig(cal_path)
|
|
175
|
-
_LOGGER.info(f"
|
|
175
|
+
_LOGGER.info(f"📈 Calibration plot saved as '{cal_path.name}'")
|
|
176
176
|
plt.close(fig_cal)
|
|
177
177
|
|
|
178
178
|
|
|
@@ -277,7 +277,7 @@ def shap_summary_plot(model,
|
|
|
277
277
|
|
|
278
278
|
# --- Data Validation Step ---
|
|
279
279
|
if np.isnan(background_data_np).any() or np.isnan(instances_to_explain_np).any():
|
|
280
|
-
_LOGGER.error("
|
|
280
|
+
_LOGGER.error("Input data for SHAP contains NaN values. Aborting explanation.")
|
|
281
281
|
return
|
|
282
282
|
|
|
283
283
|
print("\n--- SHAP Value Explanation ---")
|
|
@@ -364,7 +364,7 @@ def plot_attention_importance(weights: List[torch.Tensor], feature_names: Option
|
|
|
364
364
|
save_dir (str | Path): Directory to save the plot and summary CSV.
|
|
365
365
|
"""
|
|
366
366
|
if not weights:
|
|
367
|
-
_LOGGER.
|
|
367
|
+
_LOGGER.error("Attention weights list is empty. Skipping importance plot.")
|
|
368
368
|
return
|
|
369
369
|
|
|
370
370
|
# --- Step 1: Aggregate data ---
|
ml_tools/ML_evaluation_multi.py
CHANGED
|
@@ -19,7 +19,7 @@ from sklearn.metrics import (
|
|
|
19
19
|
jaccard_score
|
|
20
20
|
)
|
|
21
21
|
from pathlib import Path
|
|
22
|
-
from typing import Union, List
|
|
22
|
+
from typing import Union, List
|
|
23
23
|
|
|
24
24
|
from .path_manager import make_fullpath, sanitize_filename
|
|
25
25
|
from ._logger import _LOGGER
|
|
@@ -52,11 +52,14 @@ def multi_target_regression_metrics(
|
|
|
52
52
|
save_dir (str | Path): Directory to save plots and the report.
|
|
53
53
|
"""
|
|
54
54
|
if y_true.ndim != 2 or y_pred.ndim != 2:
|
|
55
|
-
|
|
55
|
+
_LOGGER.error("y_true and y_pred must be 2D arrays for multi-target regression.")
|
|
56
|
+
raise ValueError()
|
|
56
57
|
if y_true.shape != y_pred.shape:
|
|
57
|
-
|
|
58
|
+
_LOGGER.error("Shapes of y_true and y_pred must match.")
|
|
59
|
+
raise ValueError()
|
|
58
60
|
if y_true.shape[1] != len(target_names):
|
|
59
|
-
|
|
61
|
+
_LOGGER.error("Number of target names must match the number of columns in y_true.")
|
|
62
|
+
raise ValueError()
|
|
60
63
|
|
|
61
64
|
save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
|
|
62
65
|
metrics_summary = []
|
|
@@ -64,7 +67,7 @@ def multi_target_regression_metrics(
|
|
|
64
67
|
_LOGGER.info("--- Multi-Target Regression Evaluation ---")
|
|
65
68
|
|
|
66
69
|
for i, name in enumerate(target_names):
|
|
67
|
-
|
|
70
|
+
print(f" -> Evaluating target: '{name}'")
|
|
68
71
|
true_i = y_true[:, i]
|
|
69
72
|
pred_i = y_pred[:, i]
|
|
70
73
|
sanitized_name = sanitize_filename(name)
|
|
@@ -113,7 +116,7 @@ def multi_target_regression_metrics(
|
|
|
113
116
|
summary_df = pd.DataFrame(metrics_summary)
|
|
114
117
|
report_path = save_dir_path / "regression_report_multi.csv"
|
|
115
118
|
summary_df.to_csv(report_path, index=False)
|
|
116
|
-
_LOGGER.info(f"
|
|
119
|
+
_LOGGER.info(f"Full regression report saved to '{report_path.name}'")
|
|
117
120
|
|
|
118
121
|
|
|
119
122
|
def multi_label_classification_metrics(
|
|
@@ -139,11 +142,14 @@ def multi_label_classification_metrics(
|
|
|
139
142
|
binary predictions for metrics like the confusion matrix.
|
|
140
143
|
"""
|
|
141
144
|
if y_true.ndim != 2 or y_prob.ndim != 2:
|
|
142
|
-
|
|
145
|
+
_LOGGER.error("y_true and y_prob must be 2D arrays for multi-label classification.")
|
|
146
|
+
raise ValueError()
|
|
143
147
|
if y_true.shape != y_prob.shape:
|
|
144
|
-
|
|
148
|
+
_LOGGER.error("Shapes of y_true and y_prob must match.")
|
|
149
|
+
raise ValueError()
|
|
145
150
|
if y_true.shape[1] != len(target_names):
|
|
146
|
-
|
|
151
|
+
_LOGGER.error("Number of target names must match the number of columns in y_true.")
|
|
152
|
+
raise ValueError()
|
|
147
153
|
|
|
148
154
|
save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
|
|
149
155
|
|
|
@@ -165,13 +171,13 @@ def multi_label_classification_metrics(
|
|
|
165
171
|
f"Jaccard Score (macro): {j_score_macro:.4f}\n"
|
|
166
172
|
f"--------------------------------------------------\n"
|
|
167
173
|
)
|
|
168
|
-
|
|
174
|
+
print(overall_report)
|
|
169
175
|
overall_report_path = save_dir_path / "classification_report_overall.txt"
|
|
170
176
|
overall_report_path.write_text(overall_report)
|
|
171
177
|
|
|
172
178
|
# --- Per-Label Metrics and Plots ---
|
|
173
179
|
for i, name in enumerate(target_names):
|
|
174
|
-
|
|
180
|
+
print(f" -> Evaluating label: '{name}'")
|
|
175
181
|
true_i = y_true[:, i]
|
|
176
182
|
pred_i = y_pred[:, i]
|
|
177
183
|
prob_i = y_prob[:, i]
|
|
@@ -215,7 +221,7 @@ def multi_label_classification_metrics(
|
|
|
215
221
|
plt.savefig(pr_path)
|
|
216
222
|
plt.close(fig_pr)
|
|
217
223
|
|
|
218
|
-
_LOGGER.info(f"
|
|
224
|
+
_LOGGER.info(f"All individual label reports and plots saved to '{save_dir_path.name}'")
|
|
219
225
|
|
|
220
226
|
|
|
221
227
|
def multi_target_shap_summary_plot(
|
|
@@ -242,10 +248,10 @@ def multi_target_shap_summary_plot(
|
|
|
242
248
|
instances_to_explain_np = instances_to_explain.numpy() if isinstance(instances_to_explain, torch.Tensor) else instances_to_explain
|
|
243
249
|
|
|
244
250
|
if np.isnan(background_data_np).any() or np.isnan(instances_to_explain_np).any():
|
|
245
|
-
_LOGGER.error("
|
|
251
|
+
_LOGGER.error("Input data for SHAP contains NaN values. Aborting explanation.")
|
|
246
252
|
return
|
|
247
253
|
|
|
248
|
-
_LOGGER.info("
|
|
254
|
+
_LOGGER.info("--- Multi-Target SHAP Value Explanation ---")
|
|
249
255
|
model.eval()
|
|
250
256
|
model.cpu()
|
|
251
257
|
|
|
@@ -262,7 +268,7 @@ def multi_target_shap_summary_plot(
|
|
|
262
268
|
# 3. Create the KernelExplainer.
|
|
263
269
|
explainer = shap.KernelExplainer(prediction_wrapper, background_summary)
|
|
264
270
|
|
|
265
|
-
|
|
271
|
+
print("Calculating SHAP values with KernelExplainer...")
|
|
266
272
|
# For multi-output models, shap_values is a list of arrays.
|
|
267
273
|
shap_values_list = explainer.shap_values(instances_to_explain_np, l1_reg="aic")
|
|
268
274
|
|
|
@@ -271,7 +277,7 @@ def multi_target_shap_summary_plot(
|
|
|
271
277
|
|
|
272
278
|
# 4. Iterate through each target's SHAP values and generate plots.
|
|
273
279
|
for i, target_name in enumerate(target_names):
|
|
274
|
-
|
|
280
|
+
print(f" -> Generating SHAP plots for target: '{target_name}'")
|
|
275
281
|
shap_values_for_target = shap_values_list[i]
|
|
276
282
|
sanitized_target_name = sanitize_filename(target_name)
|
|
277
283
|
|
|
@@ -292,5 +298,8 @@ def multi_target_shap_summary_plot(
|
|
|
292
298
|
plt.close()
|
|
293
299
|
|
|
294
300
|
plt.ion()
|
|
295
|
-
_LOGGER.info(f"
|
|
301
|
+
_LOGGER.info(f"All SHAP plots saved to '{save_dir_path.name}'")
|
|
296
302
|
|
|
303
|
+
|
|
304
|
+
def info():
|
|
305
|
+
_script_info(__all__)
|
ml_tools/ML_inference.py
CHANGED
|
@@ -59,20 +59,20 @@ class _BaseInferenceHandler(ABC):
|
|
|
59
59
|
self.model.load_state_dict(torch.load(model_p, map_location=self.device))
|
|
60
60
|
self.model.to(self.device)
|
|
61
61
|
self.model.eval() # Set the model to evaluation mode
|
|
62
|
-
_LOGGER.info(f"
|
|
62
|
+
_LOGGER.info(f"Model state loaded from '{model_p.name}' and set to evaluation mode.")
|
|
63
63
|
except Exception as e:
|
|
64
|
-
_LOGGER.error(f"
|
|
64
|
+
_LOGGER.error(f"Failed to load model state from '{model_p}': {e}")
|
|
65
65
|
raise
|
|
66
66
|
|
|
67
67
|
def _validate_device(self, device: str) -> torch.device:
|
|
68
68
|
"""Validates the selected device and returns a torch.device object."""
|
|
69
69
|
device_lower = device.lower()
|
|
70
70
|
if "cuda" in device_lower and not torch.cuda.is_available():
|
|
71
|
-
_LOGGER.warning("
|
|
71
|
+
_LOGGER.warning("CUDA not available, switching to CPU.")
|
|
72
72
|
device_lower = "cpu"
|
|
73
73
|
elif device_lower == "mps" and not torch.backends.mps.is_available():
|
|
74
74
|
# Your M-series Mac will appreciate this check!
|
|
75
|
-
_LOGGER.warning("
|
|
75
|
+
_LOGGER.warning("Apple Metal Performance Shaders (MPS) not available, switching to CPU.")
|
|
76
76
|
device_lower = "cpu"
|
|
77
77
|
return torch.device(device_lower)
|
|
78
78
|
|
|
@@ -144,7 +144,8 @@ class PyTorchInferenceHandler(_BaseInferenceHandler):
|
|
|
144
144
|
A dictionary containing the raw output tensors from the model.
|
|
145
145
|
"""
|
|
146
146
|
if features.ndim != 2:
|
|
147
|
-
|
|
147
|
+
_LOGGER.error("Input for batch prediction must be a 2D array or tensor.")
|
|
148
|
+
raise ValueError()
|
|
148
149
|
|
|
149
150
|
input_tensor = self._preprocess_input(features)
|
|
150
151
|
|
|
@@ -176,7 +177,8 @@ class PyTorchInferenceHandler(_BaseInferenceHandler):
|
|
|
176
177
|
features = features.reshape(1, -1) # Reshape to a batch of one
|
|
177
178
|
|
|
178
179
|
if features.shape[0] != 1:
|
|
179
|
-
|
|
180
|
+
_LOGGER.error("The 'predict()' method is for a single sample. Use 'predict_batch()' for multiple samples.")
|
|
181
|
+
raise ValueError()
|
|
180
182
|
|
|
181
183
|
batch_results = self.predict_batch(features)
|
|
182
184
|
|
|
@@ -216,7 +218,8 @@ class PyTorchInferenceHandler(_BaseInferenceHandler):
|
|
|
216
218
|
`target_id` must be implemented.
|
|
217
219
|
"""
|
|
218
220
|
if self.target_id is None:
|
|
219
|
-
|
|
221
|
+
_LOGGER.error(f"'target_id' has not been implemented.")
|
|
222
|
+
raise AttributeError()
|
|
220
223
|
|
|
221
224
|
if self.task == "regression":
|
|
222
225
|
result = self.predict_numpy(features)[PyTorchInferenceKeys.PREDICTIONS]
|
|
@@ -252,7 +255,8 @@ class PyTorchInferenceHandlerMulti(_BaseInferenceHandler):
|
|
|
252
255
|
super().__init__(model, state_dict, device, scaler)
|
|
253
256
|
|
|
254
257
|
if task not in ["multi_target_regression", "multi_label_classification"]:
|
|
255
|
-
|
|
258
|
+
_LOGGER.error("`task` must be 'multi_target_regression' or 'multi_label_classification'.")
|
|
259
|
+
raise ValueError()
|
|
256
260
|
self.task = task
|
|
257
261
|
self.target_ids = target_ids
|
|
258
262
|
|
|
@@ -272,7 +276,8 @@ class PyTorchInferenceHandlerMulti(_BaseInferenceHandler):
|
|
|
272
276
|
A dictionary containing the raw output tensors from the model.
|
|
273
277
|
"""
|
|
274
278
|
if features.ndim != 2:
|
|
275
|
-
|
|
279
|
+
_LOGGER.error("Input for batch prediction must be a 2D array or tensor.")
|
|
280
|
+
raise ValueError()
|
|
276
281
|
|
|
277
282
|
input_tensor = self._preprocess_input(features)
|
|
278
283
|
|
|
@@ -309,7 +314,8 @@ class PyTorchInferenceHandlerMulti(_BaseInferenceHandler):
|
|
|
309
314
|
features = features.reshape(1, -1)
|
|
310
315
|
|
|
311
316
|
if features.shape[0] != 1:
|
|
312
|
-
|
|
317
|
+
_LOGGER.error("The 'predict()' method is for a single sample. 'Use predict_batch()' for multiple samples.")
|
|
318
|
+
raise ValueError()
|
|
313
319
|
|
|
314
320
|
batch_results = self.predict_batch(features, classification_threshold)
|
|
315
321
|
|
|
@@ -348,7 +354,8 @@ class PyTorchInferenceHandlerMulti(_BaseInferenceHandler):
|
|
|
348
354
|
`target_ids` must be implemented.
|
|
349
355
|
"""
|
|
350
356
|
if self.target_ids is None:
|
|
351
|
-
|
|
357
|
+
_LOGGER.error(f"'target_id' has not been implemented.")
|
|
358
|
+
raise AttributeError()
|
|
352
359
|
|
|
353
360
|
if self.task == "multi_target_regression":
|
|
354
361
|
result = self.predict_numpy(features)[PyTorchInferenceKeys.PREDICTIONS].flatten().tolist()
|
|
@@ -398,18 +405,18 @@ def multi_inference_regression(handlers: list[PyTorchInferenceHandler],
|
|
|
398
405
|
|
|
399
406
|
# Validate that the input is a 2D tensor.
|
|
400
407
|
if feature_vector.ndim != 2:
|
|
401
|
-
|
|
408
|
+
_LOGGER.error("Input feature_vector must be a 1D or 2D array/tensor.")
|
|
409
|
+
raise ValueError()
|
|
402
410
|
|
|
403
411
|
results: dict[str,Any] = dict()
|
|
404
412
|
for handler in handlers:
|
|
405
413
|
# validation
|
|
406
414
|
if handler.target_id is None:
|
|
407
|
-
|
|
415
|
+
_LOGGER.error("All inference handlers must have a 'target_id' attribute.")
|
|
416
|
+
raise AttributeError()
|
|
408
417
|
if handler.task != "regression":
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
f"is for '{handler.task}', but only 'regression' tasks are supported."
|
|
412
|
-
)
|
|
418
|
+
_LOGGER.error(f"Invalid task type: The handler for target_id '{handler.target_id}' is for '{handler.task}', but only 'regression' tasks are supported.")
|
|
419
|
+
raise ValueError()
|
|
413
420
|
|
|
414
421
|
# inference
|
|
415
422
|
if output == "numpy":
|
|
@@ -476,7 +483,8 @@ def multi_inference_classification(
|
|
|
476
483
|
feature_vector = feature_vector.reshape(1, -1)
|
|
477
484
|
|
|
478
485
|
if feature_vector.ndim != 2:
|
|
479
|
-
|
|
486
|
+
_LOGGER.error("Input feature_vector must be a 1D or 2D array/tensor.")
|
|
487
|
+
raise ValueError()
|
|
480
488
|
|
|
481
489
|
# Initialize two dictionaries for results
|
|
482
490
|
labels_results: dict[str, Any] = dict()
|
|
@@ -485,12 +493,11 @@ def multi_inference_classification(
|
|
|
485
493
|
for handler in handlers:
|
|
486
494
|
# Validation
|
|
487
495
|
if handler.target_id is None:
|
|
488
|
-
|
|
496
|
+
_LOGGER.error("All inference handlers must have a 'target_id' attribute.")
|
|
497
|
+
raise AttributeError()
|
|
489
498
|
if handler.task != "classification":
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
f"is for '{handler.task}', but this function only supports 'classification'."
|
|
493
|
-
)
|
|
499
|
+
_LOGGER.error(f"Invalid task type: The handler for target_id '{handler.target_id}' is for '{handler.task}', but this function only supports 'classification'.")
|
|
500
|
+
raise ValueError()
|
|
494
501
|
|
|
495
502
|
# Inference
|
|
496
503
|
if output == "numpy":
|
ml_tools/ML_models.py
CHANGED
|
@@ -34,13 +34,17 @@ class _BaseMLP(nn.Module):
|
|
|
34
34
|
|
|
35
35
|
# --- Validation ---
|
|
36
36
|
if not isinstance(in_features, int) or in_features < 1:
|
|
37
|
-
|
|
37
|
+
_LOGGER.error("'in_features' must be a positive integer.")
|
|
38
|
+
raise ValueError()
|
|
38
39
|
if not isinstance(out_targets, int) or out_targets < 1:
|
|
39
|
-
|
|
40
|
+
_LOGGER.error("'out_targets' must be a positive integer.")
|
|
41
|
+
raise ValueError()
|
|
40
42
|
if not isinstance(hidden_layers, list) or not all(isinstance(n, int) for n in hidden_layers):
|
|
41
|
-
|
|
43
|
+
_LOGGER.error("'hidden_layers' must be a list of integers.")
|
|
44
|
+
raise TypeError()
|
|
42
45
|
if not (0.0 <= drop_out < 1.0):
|
|
43
|
-
|
|
46
|
+
_LOGGER.error("'drop_out' must be a float between 0.0 and 1.0.")
|
|
47
|
+
raise ValueError()
|
|
44
48
|
|
|
45
49
|
# --- Save configuration ---
|
|
46
50
|
self.in_features = in_features
|
|
@@ -626,10 +630,8 @@ def save_architecture(model: nn.Module, directory: Union[str, Path], verbose: bo
|
|
|
626
630
|
AttributeError: If the model does not have a `get_config()` method.
|
|
627
631
|
"""
|
|
628
632
|
if not hasattr(model, 'get_config'):
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
"Please implement it to return the model's constructor arguments."
|
|
632
|
-
)
|
|
633
|
+
_LOGGER.error(f"Model '{model.__class__.__name__}' does not have a 'get_config()' method.")
|
|
634
|
+
raise AttributeError()
|
|
633
635
|
|
|
634
636
|
# Ensure the target directory exists
|
|
635
637
|
path_dir = make_fullpath(directory, make=True, enforce="directory")
|
|
@@ -644,7 +646,7 @@ def save_architecture(model: nn.Module, directory: Union[str, Path], verbose: bo
|
|
|
644
646
|
json.dump(config, f, indent=4)
|
|
645
647
|
|
|
646
648
|
if verbose:
|
|
647
|
-
_LOGGER.info(f"
|
|
649
|
+
_LOGGER.info(f"Architecture for '{model.__class__.__name__}' saved to '{path_dir.name}'")
|
|
648
650
|
|
|
649
651
|
|
|
650
652
|
def load_architecture(filepath: Union[str, Path], expected_model_class: type, verbose: bool=True) -> nn.Module:
|
|
@@ -674,15 +676,13 @@ def load_architecture(filepath: Union[str, Path], expected_model_class: type, ve
|
|
|
674
676
|
config = saved_data['config']
|
|
675
677
|
|
|
676
678
|
if saved_class_name != expected_model_class.__name__:
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
f"but you expected '{expected_model_class.__name__}'."
|
|
680
|
-
)
|
|
679
|
+
_LOGGER.error(f"Model class mismatch. File specifies '{saved_class_name}', but '{expected_model_class.__name__}' was expected.")
|
|
680
|
+
raise ValueError()
|
|
681
681
|
|
|
682
682
|
# Create an instance of the model using the provided class and config
|
|
683
683
|
model = expected_model_class(**config)
|
|
684
684
|
if verbose:
|
|
685
|
-
_LOGGER.info(f"
|
|
685
|
+
_LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
|
|
686
686
|
return model
|
|
687
687
|
|
|
688
688
|
|
ml_tools/ML_optimization.py
CHANGED
|
@@ -127,7 +127,8 @@ def create_pytorch_problem(
|
|
|
127
127
|
SearcherClass = GeneticAlgorithm
|
|
128
128
|
|
|
129
129
|
else:
|
|
130
|
-
|
|
130
|
+
_LOGGER.error(f"Unknown algorithm '{algorithm}'.")
|
|
131
|
+
raise ValueError()
|
|
131
132
|
|
|
132
133
|
# Create a factory function with all arguments pre-filled
|
|
133
134
|
searcher_factory = partial(SearcherClass, problem, **searcher_kwargs)
|
|
@@ -242,7 +243,7 @@ def run_optimization(
|
|
|
242
243
|
if verbose:
|
|
243
244
|
_handle_pandas_log(pandas_logger, save_path=save_path, target_name=target_name)
|
|
244
245
|
|
|
245
|
-
_LOGGER.info(f"
|
|
246
|
+
_LOGGER.info(f"Optimization complete. Best solution saved to '{csv_path.name}'")
|
|
246
247
|
return result_dict
|
|
247
248
|
|
|
248
249
|
# --- MULTIPLE REPETITIONS LOGIC ---
|
|
@@ -295,7 +296,7 @@ def run_optimization(
|
|
|
295
296
|
if pandas_logger is not None:
|
|
296
297
|
_handle_pandas_log(pandas_logger, save_path=save_path, target_name=target_name)
|
|
297
298
|
|
|
298
|
-
_LOGGER.info(f"
|
|
299
|
+
_LOGGER.info(f"Optimal solution space complete. Results saved to '{save_path}'")
|
|
299
300
|
return None
|
|
300
301
|
|
|
301
302
|
|
ml_tools/ML_scaler.py
CHANGED
|
@@ -50,7 +50,7 @@ class PytorchScaler:
|
|
|
50
50
|
PytorchScaler: A new, fitted instance of the scaler.
|
|
51
51
|
"""
|
|
52
52
|
if not continuous_feature_indices:
|
|
53
|
-
_LOGGER.
|
|
53
|
+
_LOGGER.error("No continuous feature indices provided. Scaler will not be fitted.")
|
|
54
54
|
return cls()
|
|
55
55
|
|
|
56
56
|
loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
|
|
@@ -72,7 +72,7 @@ class PytorchScaler:
|
|
|
72
72
|
count += continuous_features.size(0)
|
|
73
73
|
|
|
74
74
|
if count == 0:
|
|
75
|
-
_LOGGER.
|
|
75
|
+
_LOGGER.error("Dataset is empty. Scaler cannot be fitted.")
|
|
76
76
|
return cls(continuous_feature_indices=continuous_feature_indices)
|
|
77
77
|
|
|
78
78
|
# Calculate mean
|
|
@@ -80,7 +80,7 @@ class PytorchScaler:
|
|
|
80
80
|
|
|
81
81
|
# Calculate standard deviation
|
|
82
82
|
if count < 2:
|
|
83
|
-
_LOGGER.warning(f"
|
|
83
|
+
_LOGGER.warning(f"Only one sample found. Standard deviation cannot be calculated and is set to 1.")
|
|
84
84
|
std = torch.ones_like(mean)
|
|
85
85
|
else:
|
|
86
86
|
# var = E[X^2] - (E[X])^2
|
|
@@ -101,7 +101,7 @@ class PytorchScaler:
|
|
|
101
101
|
torch.Tensor: The transformed data tensor.
|
|
102
102
|
"""
|
|
103
103
|
if self.mean_ is None or self.std_ is None or self.continuous_feature_indices is None:
|
|
104
|
-
_LOGGER.
|
|
104
|
+
_LOGGER.error("Scaler has not been fitted. Returning original data.")
|
|
105
105
|
return data
|
|
106
106
|
|
|
107
107
|
data_clone = data.clone()
|
|
@@ -132,7 +132,7 @@ class PytorchScaler:
|
|
|
132
132
|
torch.Tensor: The original-scale data tensor.
|
|
133
133
|
"""
|
|
134
134
|
if self.mean_ is None or self.std_ is None or self.continuous_feature_indices is None:
|
|
135
|
-
_LOGGER.
|
|
135
|
+
_LOGGER.error("Scaler has not been fitted. Returning original data.")
|
|
136
136
|
return data
|
|
137
137
|
|
|
138
138
|
data_clone = data.clone()
|
|
@@ -163,7 +163,7 @@ class PytorchScaler:
|
|
|
163
163
|
'continuous_feature_indices': self.continuous_feature_indices
|
|
164
164
|
}
|
|
165
165
|
torch.save(state, path_obj)
|
|
166
|
-
_LOGGER.info(f"
|
|
166
|
+
_LOGGER.info(f"PytorchScaler state saved to '{path_obj.name}'.")
|
|
167
167
|
|
|
168
168
|
@staticmethod
|
|
169
169
|
def load(filepath: Union[str, Path]) -> 'PytorchScaler':
|
|
@@ -178,7 +178,7 @@ class PytorchScaler:
|
|
|
178
178
|
"""
|
|
179
179
|
path_obj = make_fullpath(filepath, enforce="file")
|
|
180
180
|
state = torch.load(path_obj)
|
|
181
|
-
_LOGGER.info(f"
|
|
181
|
+
_LOGGER.info(f"PytorchScaler state loaded from '{path_obj.name}'.")
|
|
182
182
|
return PytorchScaler(
|
|
183
183
|
mean=state['mean'],
|
|
184
184
|
std=state['std'],
|