dragon-ml-toolbox 6.0.0__py3-none-any.whl → 6.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-6.0.0.dist-info → dragon_ml_toolbox-6.1.0.dist-info}/METADATA +1 -1
- {dragon_ml_toolbox-6.0.0.dist-info → dragon_ml_toolbox-6.1.0.dist-info}/RECORD +10 -10
- ml_tools/ML_evaluation.py +8 -2
- ml_tools/ML_inference.py +50 -44
- ml_tools/ML_optimization.py +146 -66
- ml_tools/ensemble_evaluation.py +1 -1
- {dragon_ml_toolbox-6.0.0.dist-info → dragon_ml_toolbox-6.1.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-6.0.0.dist-info → dragon_ml_toolbox-6.1.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-6.0.0.dist-info → dragon_ml_toolbox-6.1.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-6.0.0.dist-info → dragon_ml_toolbox-6.1.0.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
dragon_ml_toolbox-6.
|
|
2
|
-
dragon_ml_toolbox-6.
|
|
1
|
+
dragon_ml_toolbox-6.1.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
|
|
2
|
+
dragon_ml_toolbox-6.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
|
|
3
3
|
ml_tools/ETL_engineering.py,sha256=4wwZXi9_U7xfCY70jGBaKniOeZ0m75ppxWpQBd_DmLc,39369
|
|
4
4
|
ml_tools/GUI_tools.py,sha256=n4ZZ5kEjwK5rkOCFJE41HeLFfjhpJVLUSzk9Kd9Kr_0,45410
|
|
5
5
|
ml_tools/MICE_imputation.py,sha256=oFHg-OytOzPYTzBR_wIRHhP71cMn3aupDeT59ABsXlQ,11576
|
|
6
6
|
ml_tools/ML_callbacks.py,sha256=FEJ80TSEtY0-hdnOsAWeVApQt1mdzTdOntqtoWmMAzE,13310
|
|
7
7
|
ml_tools/ML_datasetmaster.py,sha256=bbKCNA_b_uDIfxP9YIYKZm-VSfUSD15LvegFxpE9DIQ,34315
|
|
8
|
-
ml_tools/ML_evaluation.py,sha256
|
|
9
|
-
ml_tools/ML_inference.py,sha256=
|
|
8
|
+
ml_tools/ML_evaluation.py,sha256=-Z5fXQi2ou6l5Oyir06bO90SZIZVrjQfgoVAqKgSjks,13800
|
|
9
|
+
ml_tools/ML_inference.py,sha256=blEDgzvDqatxbfloBKsyNPacRwoq9g6WTpIKQ3zoTak,5758
|
|
10
10
|
ml_tools/ML_models.py,sha256=SJhKHGAN2VTBqzcHUOpFWuVZ2Y7U1M4P_axG_LNYWcI,6460
|
|
11
|
-
ml_tools/ML_optimization.py,sha256=
|
|
11
|
+
ml_tools/ML_optimization.py,sha256=kiK04ZsQb4pwF4pqHzfr3AAQdOSLkA_P7DRfO0o9mig,13289
|
|
12
12
|
ml_tools/ML_trainer.py,sha256=1q_CDXuMfndRsPuNofUn2mg2TlhG6MYuGqjWxTDgN9c,15112
|
|
13
13
|
ml_tools/PSO_optimization.py,sha256=9Y074d-B5h4Wvp9YPiy6KAeXM-Yv6Il3gWalKvOLVgo,22705
|
|
14
14
|
ml_tools/RNN_forecast.py,sha256=2CyjBLSYYc3xLHxwLXUmP5Qv8AmV1OB_EndETNX1IBk,1956
|
|
@@ -19,7 +19,7 @@ ml_tools/_logger.py,sha256=TpgYguxO-CWYqqgLW0tqFjtwZ58PE_W2OCfWNGZr0n0,1175
|
|
|
19
19
|
ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
|
|
20
20
|
ml_tools/custom_logger.py,sha256=njM_0XPbQ1S-x5LeSQAaTo2if-XVOR_pQSGg4EDeiTU,4603
|
|
21
21
|
ml_tools/data_exploration.py,sha256=P4f8OpRa7Q4i-11nkppxXw5Lx2lwlpn20GwWBbN_xbM,23901
|
|
22
|
-
ml_tools/ensemble_evaluation.py,sha256=
|
|
22
|
+
ml_tools/ensemble_evaluation.py,sha256=wnqoTPg4WYWf2A8z5XT0eSlW4snEuLCXQVj88sZKzQ4,24683
|
|
23
23
|
ml_tools/ensemble_inference.py,sha256=rtU7eUaQne615n2g7IHZCJI-OvrBCcjxbTkEIvtCGFQ,9414
|
|
24
24
|
ml_tools/ensemble_learning.py,sha256=dAyFgSTyvxJWjc_enJ_8EUoWwiekBeoNyJNxVY-kcUU,21868
|
|
25
25
|
ml_tools/handle_excel.py,sha256=J9iwIqMZemoxK49J5osSwp9Ge0h9YTKyYGbOm53hcno,13007
|
|
@@ -27,7 +27,7 @@ ml_tools/keys.py,sha256=HtPG8-MWh89C32A7eIlfuuA-DLwkxGkoDfwR2TGN9CQ,1074
|
|
|
27
27
|
ml_tools/optimization_tools.py,sha256=MuT4OG7_r1QqLUti-yYix7QeCpglezD0oe9BDCq0QXk,5086
|
|
28
28
|
ml_tools/path_manager.py,sha256=Z8e7w3MPqQaN8xmTnKuXZS6CIW59BFwwqGhGc00sdp4,13692
|
|
29
29
|
ml_tools/utilities.py,sha256=LqXXTovaHbA5AOKRk6Ru6DgAPAM0wPfYU70kUjYBryo,19231
|
|
30
|
-
dragon_ml_toolbox-6.
|
|
31
|
-
dragon_ml_toolbox-6.
|
|
32
|
-
dragon_ml_toolbox-6.
|
|
33
|
-
dragon_ml_toolbox-6.
|
|
30
|
+
dragon_ml_toolbox-6.1.0.dist-info/METADATA,sha256=5wcasdUXOm_YPRrWuTI7J8jj-MgLWZ9zTnwt-yTKqWE,6698
|
|
31
|
+
dragon_ml_toolbox-6.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
32
|
+
dragon_ml_toolbox-6.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
33
|
+
dragon_ml_toolbox-6.1.0.dist-info/RECORD,,
|
ml_tools/ML_evaluation.py
CHANGED
|
@@ -163,7 +163,7 @@ def classification_metrics(save_dir: Union[str, Path], y_true: np.ndarray, y_pre
|
|
|
163
163
|
fig_cal, ax_cal = plt.subplots(figsize=(8, 8), dpi=100)
|
|
164
164
|
CalibrationDisplay.from_predictions(y_true, y_score, n_bins=15, ax=ax_cal)
|
|
165
165
|
|
|
166
|
-
ax_cal.set_title('
|
|
166
|
+
ax_cal.set_title('Reliability Curve')
|
|
167
167
|
ax_cal.set_xlabel('Mean Predicted Probability')
|
|
168
168
|
ax_cal.set_ylabel('Fraction of Positives')
|
|
169
169
|
ax_cal.grid(True)
|
|
@@ -197,7 +197,7 @@ def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray, save_dir: Union[s
|
|
|
197
197
|
f" Coefficient of Determination (R²): {r2:.4f}"
|
|
198
198
|
]
|
|
199
199
|
report_string = "\n".join(report_lines)
|
|
200
|
-
print(report_string)
|
|
200
|
+
# print(report_string)
|
|
201
201
|
|
|
202
202
|
save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
|
|
203
203
|
# Save text report
|
|
@@ -308,6 +308,8 @@ def shap_summary_plot(model, background_data: Union[torch.Tensor,np.ndarray], in
|
|
|
308
308
|
# Save Bar Plot
|
|
309
309
|
bar_path = save_dir_path / "shap_bar_plot.svg"
|
|
310
310
|
shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="bar", show=False)
|
|
311
|
+
ax = plt.gca()
|
|
312
|
+
ax.set_xlabel("SHAP Value Impact", labelpad=10)
|
|
311
313
|
plt.title("SHAP Feature Importance")
|
|
312
314
|
plt.tight_layout()
|
|
313
315
|
plt.savefig(bar_path)
|
|
@@ -317,6 +319,10 @@ def shap_summary_plot(model, background_data: Union[torch.Tensor,np.ndarray], in
|
|
|
317
319
|
# Save Dot Plot
|
|
318
320
|
dot_path = save_dir_path / "shap_dot_plot.svg"
|
|
319
321
|
shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="dot", show=False)
|
|
322
|
+
ax = plt.gca()
|
|
323
|
+
ax.set_xlabel("SHAP Value Impact", labelpad=10)
|
|
324
|
+
cb = plt.gcf().axes[-1]
|
|
325
|
+
cb.set_ylabel("", size=1)
|
|
320
326
|
plt.title("SHAP Feature Importance")
|
|
321
327
|
plt.tight_layout()
|
|
322
328
|
plt.savefig(dot_path)
|
ml_tools/ML_inference.py
CHANGED
|
@@ -66,47 +66,10 @@ class PyTorchInferenceHandler:
|
|
|
66
66
|
|
|
67
67
|
# Ensure tensor is on the correct device
|
|
68
68
|
return features.to(self.device)
|
|
69
|
-
|
|
70
|
-
def
|
|
71
|
-
"""
|
|
72
|
-
Predicts on a single feature vector.
|
|
73
|
-
|
|
74
|
-
Args:
|
|
75
|
-
features (np.ndarray | torch.Tensor): A 1D or 2D array/tensor for a single sample.
|
|
76
|
-
|
|
77
|
-
Returns:
|
|
78
|
-
Dict[str, Any]: A dictionary containing the prediction.
|
|
79
|
-
- For regression: {'predictions': float}
|
|
80
|
-
- For classification: {'labels': int, 'probabilities': np.ndarray}
|
|
69
|
+
|
|
70
|
+
def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
|
|
81
71
|
"""
|
|
82
|
-
|
|
83
|
-
features = features.reshape(1, -1)
|
|
84
|
-
|
|
85
|
-
if features.shape[0] != 1:
|
|
86
|
-
raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
|
|
87
|
-
|
|
88
|
-
results_batch = self.predict_batch(features)
|
|
89
|
-
|
|
90
|
-
# Extract the single result from the batch
|
|
91
|
-
if self.task == "regression":
|
|
92
|
-
return {PyTorchInferenceKeys.PREDICTIONS: results_batch[PyTorchInferenceKeys.PREDICTIONS].item()}
|
|
93
|
-
else: # classification
|
|
94
|
-
return {
|
|
95
|
-
PyTorchInferenceKeys.LABELS: results_batch[PyTorchInferenceKeys.LABELS].item(),
|
|
96
|
-
PyTorchInferenceKeys.PROBABILITIES: results_batch[PyTorchInferenceKeys.PROBABILITIES][0]
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
|
|
100
|
-
"""
|
|
101
|
-
Predicts on a batch of feature vectors.
|
|
102
|
-
|
|
103
|
-
Args:
|
|
104
|
-
features (np.ndarray | torch.Tensor): A 2D array/tensor where each row is a sample.
|
|
105
|
-
|
|
106
|
-
Returns:
|
|
107
|
-
Dict[str, Any]: A dictionary containing the predictions.
|
|
108
|
-
- For regression: {'predictions': np.ndarray}
|
|
109
|
-
- For classification: {'labels': np.ndarray, 'probabilities': np.ndarray}
|
|
72
|
+
Core batch prediction method. Returns results as PyTorch tensors on the model's device.
|
|
110
73
|
"""
|
|
111
74
|
if features.ndim != 2:
|
|
112
75
|
raise ValueError("Input for batch prediction must be a 2D array or tensor.")
|
|
@@ -114,18 +77,61 @@ class PyTorchInferenceHandler:
|
|
|
114
77
|
input_tensor = self._preprocess_input(features)
|
|
115
78
|
|
|
116
79
|
with torch.no_grad():
|
|
117
|
-
|
|
80
|
+
# Output tensor remains on the model's device (e.g., 'mps' or 'cuda')
|
|
81
|
+
output = self.model(input_tensor)
|
|
118
82
|
|
|
119
83
|
if self.task == "classification":
|
|
120
84
|
probs = nn.functional.softmax(output, dim=1)
|
|
121
85
|
labels = torch.argmax(probs, dim=1)
|
|
122
86
|
return {
|
|
123
|
-
PyTorchInferenceKeys.LABELS: labels
|
|
124
|
-
PyTorchInferenceKeys.PROBABILITIES: probs
|
|
87
|
+
PyTorchInferenceKeys.LABELS: labels,
|
|
88
|
+
PyTorchInferenceKeys.PROBABILITIES: probs
|
|
125
89
|
}
|
|
126
90
|
else: # regression
|
|
127
|
-
return {PyTorchInferenceKeys.PREDICTIONS: output
|
|
91
|
+
return {PyTorchInferenceKeys.PREDICTIONS: output}
|
|
128
92
|
|
|
93
|
+
def predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
|
|
94
|
+
"""
|
|
95
|
+
Core single-sample prediction. Returns results as PyTorch tensors on the model's device.
|
|
96
|
+
"""
|
|
97
|
+
if features.ndim == 1:
|
|
98
|
+
features = features.reshape(1, -1)
|
|
99
|
+
|
|
100
|
+
if features.shape[0] != 1:
|
|
101
|
+
raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
|
|
102
|
+
|
|
103
|
+
batch_results = self.predict_batch(features)
|
|
104
|
+
|
|
105
|
+
single_results = {key: value[0] for key, value in batch_results.items()}
|
|
106
|
+
return single_results
|
|
107
|
+
|
|
108
|
+
# --- NumPy Convenience Wrappers (on CPU) ---
|
|
109
|
+
|
|
110
|
+
def predict_batch_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, np.ndarray]:
|
|
111
|
+
"""
|
|
112
|
+
Convenience wrapper for predict_batch that returns NumPy arrays.
|
|
113
|
+
"""
|
|
114
|
+
tensor_results = self.predict_batch(features)
|
|
115
|
+
# Move tensor to CPU before converting to NumPy
|
|
116
|
+
numpy_results = {key: value.cpu().numpy() for key, value in tensor_results.items()}
|
|
117
|
+
return numpy_results
|
|
118
|
+
|
|
119
|
+
def predict_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
|
|
120
|
+
"""
|
|
121
|
+
Convenience wrapper for predict that returns NumPy arrays or scalars.
|
|
122
|
+
"""
|
|
123
|
+
tensor_results = self.predict(features)
|
|
124
|
+
|
|
125
|
+
if self.task == "regression":
|
|
126
|
+
# .item() implicitly moves to CPU
|
|
127
|
+
return {PyTorchInferenceKeys.PREDICTIONS: tensor_results[PyTorchInferenceKeys.PREDICTIONS].item()}
|
|
128
|
+
else: # classification
|
|
129
|
+
return {
|
|
130
|
+
PyTorchInferenceKeys.LABELS: tensor_results[PyTorchInferenceKeys.LABELS].item(),
|
|
131
|
+
# ✅ Move tensor to CPU before converting to NumPy
|
|
132
|
+
PyTorchInferenceKeys.PROBABILITIES: tensor_results[PyTorchInferenceKeys.PROBABILITIES].cpu().numpy()
|
|
133
|
+
}
|
|
134
|
+
|
|
129
135
|
|
|
130
136
|
def info():
|
|
131
137
|
_script_info(__all__)
|
ml_tools/ML_optimization.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
|
+
import pandas # logger
|
|
1
2
|
import torch
|
|
2
3
|
import numpy #handling torch to numpy
|
|
3
4
|
import evotorch
|
|
4
|
-
from evotorch.algorithms import
|
|
5
|
-
from evotorch.logging import
|
|
6
|
-
from
|
|
5
|
+
from evotorch.algorithms import SNES, CEM, GeneticAlgorithm
|
|
6
|
+
from evotorch.logging import PandasLogger
|
|
7
|
+
from evotorch.operators import SimulatedBinaryCrossOver, GaussianMutation
|
|
8
|
+
from typing import Literal, Union, Tuple, List, Optional, Any, Callable
|
|
7
9
|
from pathlib import Path
|
|
8
10
|
from tqdm.auto import trange
|
|
9
11
|
from contextlib import nullcontext
|
|
12
|
+
from functools import partial
|
|
10
13
|
|
|
11
14
|
from .path_manager import make_fullpath, sanitize_filename
|
|
12
15
|
from ._logger import _LOGGER
|
|
@@ -15,8 +18,7 @@ from .ML_inference import PyTorchInferenceHandler
|
|
|
15
18
|
from .keys import PyTorchInferenceKeys
|
|
16
19
|
from .SQL import DatabaseManager
|
|
17
20
|
from .optimization_tools import _save_result
|
|
18
|
-
from .utilities import threshold_binary_values
|
|
19
|
-
|
|
21
|
+
from .utilities import threshold_binary_values, save_dataframe
|
|
20
22
|
|
|
21
23
|
__all__ = [
|
|
22
24
|
"create_pytorch_problem",
|
|
@@ -25,32 +27,34 @@ __all__ = [
|
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
def create_pytorch_problem(
|
|
28
|
-
|
|
30
|
+
inference_handler: PyTorchInferenceHandler,
|
|
29
31
|
bounds: Tuple[List[float], List[float]],
|
|
30
32
|
binary_features: int,
|
|
31
|
-
task: Literal["
|
|
32
|
-
algorithm: Literal["
|
|
33
|
-
|
|
33
|
+
task: Literal["min", "max"],
|
|
34
|
+
algorithm: Literal["SNES", "CEM", "Genetic"] = "Genetic",
|
|
35
|
+
population_size: int = 200,
|
|
34
36
|
**searcher_kwargs
|
|
35
|
-
) -> Tuple[evotorch.Problem,
|
|
37
|
+
) -> Tuple[evotorch.Problem, Callable[[], Any]]:
|
|
36
38
|
"""
|
|
37
|
-
Creates and configures an EvoTorch Problem and Searcher for a PyTorch model.
|
|
38
|
-
|
|
39
|
+
Creates and configures an EvoTorch Problem and a Searcher factory class for a PyTorch model.
|
|
40
|
+
|
|
41
|
+
SNES and CEM do not accept bounds, the given bounds will be used as initial bounds only.
|
|
42
|
+
|
|
43
|
+
The Genetic Algorithm works directly with the bounds, and operators such as SimulatedBinaryCrossOver and GaussianMutation.
|
|
44
|
+
|
|
39
45
|
Args:
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
bounds (tuple[list[float], list[float]]): A tuple containing the lower
|
|
43
|
-
and upper bounds for the solution features.
|
|
46
|
+
inference_handler (PyTorchInferenceHandler): An initialized inference handler containing the model and weights.
|
|
47
|
+
bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for the solution features.
|
|
44
48
|
binary_features (int): Number of binary features located at the END of the feature vector. Will be automatically added to the bounds.
|
|
45
49
|
task (str): The optimization goal, either "minimize" or "maximize".
|
|
46
|
-
algorithm (str): The search algorithm to use
|
|
47
|
-
|
|
50
|
+
algorithm (str): The search algorithm to use.
|
|
51
|
+
population_size (int): Used for CEM and GeneticAlgorithm.
|
|
48
52
|
**searcher_kwargs: Additional keyword arguments to pass to the
|
|
49
53
|
selected search algorithm's constructor (e.g., stdev_init=0.5 for CMAES).
|
|
50
54
|
|
|
51
55
|
Returns:
|
|
52
56
|
Tuple:
|
|
53
|
-
A tuple containing the configured
|
|
57
|
+
A tuple containing the configured Problem and Searcher.
|
|
54
58
|
"""
|
|
55
59
|
lower_bounds, upper_bounds = bounds
|
|
56
60
|
|
|
@@ -60,51 +64,86 @@ def create_pytorch_problem(
|
|
|
60
64
|
upper_bounds.extend([0.55] * binary_features)
|
|
61
65
|
|
|
62
66
|
solution_length = len(lower_bounds)
|
|
63
|
-
device =
|
|
67
|
+
device = inference_handler.device
|
|
64
68
|
|
|
65
69
|
# Define the fitness function that EvoTorch will call.
|
|
66
|
-
@evotorch.decorators.to_tensor # type: ignore
|
|
67
|
-
@evotorch.decorators.on_aux_device(device)
|
|
68
70
|
def fitness_func(solution_tensor: torch.Tensor) -> torch.Tensor:
|
|
69
71
|
# Directly use the continuous-valued tensor from the optimizer for prediction
|
|
70
|
-
predictions =
|
|
72
|
+
predictions = inference_handler.predict_batch(solution_tensor)[PyTorchInferenceKeys.PREDICTIONS]
|
|
71
73
|
return predictions.flatten()
|
|
72
|
-
|
|
74
|
+
|
|
75
|
+
|
|
73
76
|
# Create the Problem instance.
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
77
|
+
if algorithm == "CEM" or algorithm == "SNES":
|
|
78
|
+
problem = evotorch.Problem(
|
|
79
|
+
objective_sense=task,
|
|
80
|
+
objective_func=fitness_func,
|
|
81
|
+
solution_length=solution_length,
|
|
82
|
+
initial_bounds=(lower_bounds, upper_bounds),
|
|
83
|
+
device=device,
|
|
84
|
+
vectorized=True #Use batches
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# If stdev_init is not provided, calculate it based on the bounds (used for SNES and CEM)
|
|
88
|
+
if 'stdev_init' not in searcher_kwargs:
|
|
89
|
+
# Calculate stdev for each parameter as 25% of its search range
|
|
90
|
+
stdevs = [abs(up - low) * 0.25 for low, up in zip(lower_bounds, upper_bounds)]
|
|
91
|
+
searcher_kwargs['stdev_init'] = torch.tensor(stdevs, dtype=torch.float32, requires_grad=False)
|
|
92
|
+
|
|
93
|
+
if algorithm == "SNES":
|
|
94
|
+
SearcherClass = SNES
|
|
95
|
+
elif algorithm == "CEM":
|
|
96
|
+
SearcherClass = CEM
|
|
97
|
+
# Set a defaults for CEM if not provided
|
|
98
|
+
if 'popsize' not in searcher_kwargs:
|
|
99
|
+
searcher_kwargs['popsize'] = population_size
|
|
100
|
+
if 'parenthood_ratio' not in searcher_kwargs:
|
|
101
|
+
searcher_kwargs['parenthood_ratio'] = 0.2 #float 0.0 - 1.0
|
|
102
|
+
|
|
103
|
+
elif algorithm == "Genetic":
|
|
104
|
+
problem = evotorch.Problem(
|
|
105
|
+
objective_sense=task,
|
|
106
|
+
objective_func=fitness_func,
|
|
107
|
+
solution_length=solution_length,
|
|
108
|
+
bounds=(lower_bounds, upper_bounds),
|
|
109
|
+
device=device,
|
|
110
|
+
vectorized=True #Use batches
|
|
111
|
+
)
|
|
89
112
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
113
|
+
operators = [
|
|
114
|
+
SimulatedBinaryCrossOver(problem,
|
|
115
|
+
tournament_size=4,
|
|
116
|
+
eta=0.8),
|
|
117
|
+
GaussianMutation(problem,
|
|
118
|
+
stdev=0.1)
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
searcher_kwargs["operators"] = operators
|
|
122
|
+
if 'popsize' not in searcher_kwargs:
|
|
123
|
+
searcher_kwargs['popsize'] = population_size
|
|
124
|
+
|
|
125
|
+
SearcherClass = GeneticAlgorithm
|
|
126
|
+
|
|
127
|
+
else:
|
|
128
|
+
raise ValueError(f"Unknown algorithm '{algorithm}'.")
|
|
129
|
+
|
|
130
|
+
# Create a factory function with all arguments pre-filled
|
|
131
|
+
searcher_factory = partial(SearcherClass, problem, **searcher_kwargs)
|
|
94
132
|
|
|
95
|
-
return problem,
|
|
133
|
+
return problem, searcher_factory
|
|
96
134
|
|
|
97
135
|
|
|
98
136
|
def run_optimization(
|
|
99
137
|
problem: evotorch.Problem,
|
|
100
|
-
|
|
138
|
+
searcher_factory: Callable[[],Any],
|
|
101
139
|
num_generations: int,
|
|
102
140
|
target_name: str,
|
|
103
141
|
binary_features: int,
|
|
104
142
|
save_dir: Union[str, Path],
|
|
105
143
|
save_format: Literal['csv', 'sqlite', 'both'],
|
|
106
144
|
feature_names: Optional[List[str]],
|
|
107
|
-
repetitions: int = 1
|
|
145
|
+
repetitions: int = 1,
|
|
146
|
+
verbose: bool = True
|
|
108
147
|
) -> Optional[dict]:
|
|
109
148
|
"""
|
|
110
149
|
Runs the evolutionary optimization process, with support for multiple repetitions.
|
|
@@ -124,20 +163,19 @@ def run_optimization(
|
|
|
124
163
|
Args:
|
|
125
164
|
problem (evotorch.Problem): The configured problem instance, which defines
|
|
126
165
|
the objective function, solution space, and optimization sense.
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
num_generations (int): The total number of generations to run the
|
|
130
|
-
search algorithm for in each repetition.
|
|
166
|
+
searcher_factory (Callable): The searcher factory to generate fresh evolutionary algorithms.
|
|
167
|
+
num_generations (int): The total number of generations to run the search algorithm for in each repetition.
|
|
131
168
|
target_name (str): Target name that will also be used for the CSV filename and SQL table.
|
|
132
169
|
binary_features (int): Number of binary features located at the END of the feature vector.
|
|
133
170
|
save_dir (str | Path): The directory where the result file(s) will be saved.
|
|
134
171
|
save_format (Literal['csv', 'sqlite', 'both'], optional): The format for
|
|
135
|
-
saving results during iterative analysis.
|
|
172
|
+
saving results during iterative analysis.
|
|
136
173
|
feature_names (List[str], optional): Names of the solution features for
|
|
137
174
|
labeling the output files. If None, generic names like 'feature_0',
|
|
138
|
-
'feature_1', etc., will be created.
|
|
175
|
+
'feature_1', etc., will be created.
|
|
139
176
|
repetitions (int, optional): The number of independent times to run the
|
|
140
|
-
entire optimization process.
|
|
177
|
+
entire optimization process.
|
|
178
|
+
verbose (bool): Add an Evotorch Pandas logger saved as a csv. Only for the first repetition.
|
|
141
179
|
|
|
142
180
|
Returns:
|
|
143
181
|
Optional[dict]: A dictionary containing the best feature values and the
|
|
@@ -162,11 +200,29 @@ def run_optimization(
|
|
|
162
200
|
|
|
163
201
|
# --- SINGLE RUN LOGIC ---
|
|
164
202
|
if repetitions <= 1:
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
203
|
+
searcher = searcher_factory()
|
|
204
|
+
_LOGGER.info(f"🤖 Starting optimization with {searcher.__class__.__name__} Algorithm for {num_generations} generations...")
|
|
205
|
+
# for _ in trange(num_generations, desc="Optimizing"):
|
|
206
|
+
# searcher.step()
|
|
207
|
+
|
|
208
|
+
# Attach logger if requested
|
|
209
|
+
if verbose:
|
|
210
|
+
pandas_logger = PandasLogger(searcher)
|
|
211
|
+
|
|
212
|
+
searcher.run(num_generations) # Use the built-in run method for simplicity
|
|
213
|
+
|
|
214
|
+
# # DEBUG new searcher objects
|
|
215
|
+
# for status_key in searcher.iter_status_keys():
|
|
216
|
+
# print("===", status_key, "===")
|
|
217
|
+
# print(searcher.status[status_key])
|
|
218
|
+
# print()
|
|
219
|
+
|
|
220
|
+
# Get results from the .status dictionary
|
|
221
|
+
# SNES and CEM use the key 'center' to get mean values if needed best_solution_tensor = searcher.status["center"]
|
|
222
|
+
best_solution_container = searcher.status["pop_best"]
|
|
223
|
+
best_solution_tensor = best_solution_container.values
|
|
224
|
+
best_fitness = best_solution_container.evals
|
|
168
225
|
|
|
169
|
-
best_solution_tensor, best_fitness = searcher.best
|
|
170
226
|
best_solution_np = best_solution_tensor.cpu().numpy()
|
|
171
227
|
|
|
172
228
|
# threshold binary features
|
|
@@ -179,6 +235,11 @@ def run_optimization(
|
|
|
179
235
|
result_dict[target_name] = best_fitness.item()
|
|
180
236
|
|
|
181
237
|
_save_result(result_dict, 'csv', csv_path) # Single run defaults to CSV
|
|
238
|
+
|
|
239
|
+
# Process logger
|
|
240
|
+
if verbose:
|
|
241
|
+
_handle_pandas_log(pandas_logger, save_path=save_path)
|
|
242
|
+
|
|
182
243
|
_LOGGER.info(f"✅ Optimization complete. Best solution saved to '{csv_path.name}'")
|
|
183
244
|
return result_dict
|
|
184
245
|
|
|
@@ -193,17 +254,26 @@ def run_optimization(
|
|
|
193
254
|
schema = {name: "REAL" for name in feature_names}
|
|
194
255
|
schema[target_name] = "REAL"
|
|
195
256
|
db_manager.create_table(db_table_name, schema)
|
|
196
|
-
|
|
257
|
+
|
|
258
|
+
print("")
|
|
259
|
+
# Repetitions loop
|
|
260
|
+
pandas_logger = None
|
|
197
261
|
for i in trange(repetitions, desc="Repetitions"):
|
|
198
|
-
|
|
262
|
+
# CRITICAL: Create a fresh searcher for each run using the factory
|
|
263
|
+
searcher = searcher_factory()
|
|
199
264
|
|
|
200
|
-
#
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
265
|
+
# Attach logger if requested
|
|
266
|
+
if verbose and i==0:
|
|
267
|
+
pandas_logger = PandasLogger(searcher)
|
|
268
|
+
|
|
269
|
+
searcher.run(num_generations) # Use the built-in run method for simplicity
|
|
270
|
+
|
|
271
|
+
# Get results from the .status dictionary
|
|
272
|
+
# SNES and CEM use the key 'center' to get mean values if needed best_solution_tensor = searcher.status["center"]
|
|
273
|
+
best_solution_container = searcher.status["pop_best"]
|
|
274
|
+
best_solution_tensor = best_solution_container.values
|
|
275
|
+
best_fitness = best_solution_container.evals
|
|
276
|
+
|
|
207
277
|
best_solution_np = best_solution_tensor.cpu().numpy()
|
|
208
278
|
|
|
209
279
|
# threshold binary features
|
|
@@ -212,15 +282,25 @@ def run_optimization(
|
|
|
212
282
|
else:
|
|
213
283
|
best_solution_thresholded = best_solution_np
|
|
214
284
|
|
|
285
|
+
# make results dictionary
|
|
215
286
|
result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
|
|
216
287
|
result_dict[target_name] = best_fitness.item()
|
|
217
288
|
|
|
218
289
|
# Save each result incrementally
|
|
219
290
|
_save_result(result_dict, save_format, csv_path, db_manager, db_table_name)
|
|
291
|
+
|
|
292
|
+
# Process logger
|
|
293
|
+
if pandas_logger is not None:
|
|
294
|
+
_handle_pandas_log(pandas_logger, save_path=save_path)
|
|
220
295
|
|
|
221
296
|
_LOGGER.info(f"✅ Optimal solution space complete. Results saved to '{save_path}'")
|
|
222
297
|
return None
|
|
223
298
|
|
|
224
299
|
|
|
300
|
+
def _handle_pandas_log(logger: PandasLogger, save_path: Path):
|
|
301
|
+
log_dataframe = logger.to_dataframe()
|
|
302
|
+
save_dataframe(df=log_dataframe, save_dir=save_path / "EvolutionLog", filename="evolution")
|
|
303
|
+
|
|
304
|
+
|
|
225
305
|
def info():
|
|
226
306
|
_script_info(__all__)
|
ml_tools/ensemble_evaluation.py
CHANGED
|
@@ -351,7 +351,7 @@ def plot_calibration_curve(
|
|
|
351
351
|
ax=ax
|
|
352
352
|
)
|
|
353
353
|
|
|
354
|
-
ax.set_title(f"{model_name} -
|
|
354
|
+
ax.set_title(f"{model_name} - Reliability Curve for {target_name}", fontsize=base_fontsize)
|
|
355
355
|
ax.tick_params(axis='both', labelsize=base_fontsize - 2)
|
|
356
356
|
ax.set_xlabel("Mean Predicted Probability", fontsize=base_fontsize)
|
|
357
357
|
ax.set_ylabel("Fraction of Positives", fontsize=base_fontsize)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|