dragon-ml-toolbox 6.0.0__tar.gz → 6.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (38) hide show
  1. {dragon_ml_toolbox-6.0.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-6.1.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/ML_evaluation.py +8 -2
  4. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/ML_inference.py +50 -44
  5. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/ML_optimization.py +146 -66
  6. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/ensemble_evaluation.py +1 -1
  7. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/pyproject.toml +1 -1
  8. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/LICENSE +0 -0
  9. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/LICENSE-THIRD-PARTY.md +0 -0
  10. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/README.md +0 -0
  11. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  12. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  13. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  14. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  15. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/ETL_engineering.py +0 -0
  16. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/GUI_tools.py +0 -0
  17. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/MICE_imputation.py +0 -0
  18. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/ML_callbacks.py +0 -0
  19. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/ML_datasetmaster.py +0 -0
  20. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/ML_models.py +0 -0
  21. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/ML_trainer.py +0 -0
  22. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/PSO_optimization.py +0 -0
  23. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/RNN_forecast.py +0 -0
  24. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/SQL.py +0 -0
  25. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/VIF_factor.py +0 -0
  26. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/__init__.py +0 -0
  27. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/_logger.py +0 -0
  28. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/_script_info.py +0 -0
  29. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/custom_logger.py +0 -0
  30. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/data_exploration.py +0 -0
  31. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/ensemble_inference.py +0 -0
  32. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/ensemble_learning.py +0 -0
  33. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/handle_excel.py +0 -0
  34. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/keys.py +0 -0
  35. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/optimization_tools.py +0 -0
  36. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/path_manager.py +0 -0
  37. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/ml_tools/utilities.py +0 -0
  38. {dragon_ml_toolbox-6.0.0 → dragon_ml_toolbox-6.1.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 6.0.0
3
+ Version: 6.1.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 6.0.0
3
+ Version: 6.1.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -163,7 +163,7 @@ def classification_metrics(save_dir: Union[str, Path], y_true: np.ndarray, y_pre
163
163
  fig_cal, ax_cal = plt.subplots(figsize=(8, 8), dpi=100)
164
164
  CalibrationDisplay.from_predictions(y_true, y_score, n_bins=15, ax=ax_cal)
165
165
 
166
- ax_cal.set_title('Calibration Plot (Reliability Curve)')
166
+ ax_cal.set_title('Reliability Curve')
167
167
  ax_cal.set_xlabel('Mean Predicted Probability')
168
168
  ax_cal.set_ylabel('Fraction of Positives')
169
169
  ax_cal.grid(True)
@@ -197,7 +197,7 @@ def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray, save_dir: Union[s
197
197
  f" Coefficient of Determination (R²): {r2:.4f}"
198
198
  ]
199
199
  report_string = "\n".join(report_lines)
200
- print(report_string)
200
+ # print(report_string)
201
201
 
202
202
  save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
203
203
  # Save text report
@@ -308,6 +308,8 @@ def shap_summary_plot(model, background_data: Union[torch.Tensor,np.ndarray], in
308
308
  # Save Bar Plot
309
309
  bar_path = save_dir_path / "shap_bar_plot.svg"
310
310
  shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="bar", show=False)
311
+ ax = plt.gca()
312
+ ax.set_xlabel("SHAP Value Impact", labelpad=10)
311
313
  plt.title("SHAP Feature Importance")
312
314
  plt.tight_layout()
313
315
  plt.savefig(bar_path)
@@ -317,6 +319,10 @@ def shap_summary_plot(model, background_data: Union[torch.Tensor,np.ndarray], in
317
319
  # Save Dot Plot
318
320
  dot_path = save_dir_path / "shap_dot_plot.svg"
319
321
  shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="dot", show=False)
322
+ ax = plt.gca()
323
+ ax.set_xlabel("SHAP Value Impact", labelpad=10)
324
+ cb = plt.gcf().axes[-1]
325
+ cb.set_ylabel("", size=1)
320
326
  plt.title("SHAP Feature Importance")
321
327
  plt.tight_layout()
322
328
  plt.savefig(dot_path)
@@ -66,47 +66,10 @@ class PyTorchInferenceHandler:
66
66
 
67
67
  # Ensure tensor is on the correct device
68
68
  return features.to(self.device)
69
-
70
- def predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
71
- """
72
- Predicts on a single feature vector.
73
-
74
- Args:
75
- features (np.ndarray | torch.Tensor): A 1D or 2D array/tensor for a single sample.
76
-
77
- Returns:
78
- Dict[str, Any]: A dictionary containing the prediction.
79
- - For regression: {'predictions': float}
80
- - For classification: {'labels': int, 'probabilities': np.ndarray}
69
+
70
+ def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
81
71
  """
82
- if features.ndim == 1:
83
- features = features.reshape(1, -1)
84
-
85
- if features.shape[0] != 1:
86
- raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
87
-
88
- results_batch = self.predict_batch(features)
89
-
90
- # Extract the single result from the batch
91
- if self.task == "regression":
92
- return {PyTorchInferenceKeys.PREDICTIONS: results_batch[PyTorchInferenceKeys.PREDICTIONS].item()}
93
- else: # classification
94
- return {
95
- PyTorchInferenceKeys.LABELS: results_batch[PyTorchInferenceKeys.LABELS].item(),
96
- PyTorchInferenceKeys.PROBABILITIES: results_batch[PyTorchInferenceKeys.PROBABILITIES][0]
97
- }
98
-
99
- def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
100
- """
101
- Predicts on a batch of feature vectors.
102
-
103
- Args:
104
- features (np.ndarray | torch.Tensor): A 2D array/tensor where each row is a sample.
105
-
106
- Returns:
107
- Dict[str, Any]: A dictionary containing the predictions.
108
- - For regression: {'predictions': np.ndarray}
109
- - For classification: {'labels': np.ndarray, 'probabilities': np.ndarray}
72
+ Core batch prediction method. Returns results as PyTorch tensors on the model's device.
110
73
  """
111
74
  if features.ndim != 2:
112
75
  raise ValueError("Input for batch prediction must be a 2D array or tensor.")
@@ -114,18 +77,61 @@ class PyTorchInferenceHandler:
114
77
  input_tensor = self._preprocess_input(features)
115
78
 
116
79
  with torch.no_grad():
117
- output = self.model(input_tensor).cpu()
80
+ # Output tensor remains on the model's device (e.g., 'mps' or 'cuda')
81
+ output = self.model(input_tensor)
118
82
 
119
83
  if self.task == "classification":
120
84
  probs = nn.functional.softmax(output, dim=1)
121
85
  labels = torch.argmax(probs, dim=1)
122
86
  return {
123
- PyTorchInferenceKeys.LABELS: labels.numpy(),
124
- PyTorchInferenceKeys.PROBABILITIES: probs.numpy()
87
+ PyTorchInferenceKeys.LABELS: labels,
88
+ PyTorchInferenceKeys.PROBABILITIES: probs
125
89
  }
126
90
  else: # regression
127
- return {PyTorchInferenceKeys.PREDICTIONS: output.numpy()}
91
+ return {PyTorchInferenceKeys.PREDICTIONS: output}
128
92
 
93
+ def predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
94
+ """
95
+ Core single-sample prediction. Returns results as PyTorch tensors on the model's device.
96
+ """
97
+ if features.ndim == 1:
98
+ features = features.reshape(1, -1)
99
+
100
+ if features.shape[0] != 1:
101
+ raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
102
+
103
+ batch_results = self.predict_batch(features)
104
+
105
+ single_results = {key: value[0] for key, value in batch_results.items()}
106
+ return single_results
107
+
108
+ # --- NumPy Convenience Wrappers (on CPU) ---
109
+
110
+ def predict_batch_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, np.ndarray]:
111
+ """
112
+ Convenience wrapper for predict_batch that returns NumPy arrays.
113
+ """
114
+ tensor_results = self.predict_batch(features)
115
+ # Move tensor to CPU before converting to NumPy
116
+ numpy_results = {key: value.cpu().numpy() for key, value in tensor_results.items()}
117
+ return numpy_results
118
+
119
+ def predict_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
120
+ """
121
+ Convenience wrapper for predict that returns NumPy arrays or scalars.
122
+ """
123
+ tensor_results = self.predict(features)
124
+
125
+ if self.task == "regression":
126
+ # .item() implicitly moves to CPU
127
+ return {PyTorchInferenceKeys.PREDICTIONS: tensor_results[PyTorchInferenceKeys.PREDICTIONS].item()}
128
+ else: # classification
129
+ return {
130
+ PyTorchInferenceKeys.LABELS: tensor_results[PyTorchInferenceKeys.LABELS].item(),
131
+ # ✅ Move tensor to CPU before converting to NumPy
132
+ PyTorchInferenceKeys.PROBABILITIES: tensor_results[PyTorchInferenceKeys.PROBABILITIES].cpu().numpy()
133
+ }
134
+
129
135
 
130
136
  def info():
131
137
  _script_info(__all__)
@@ -1,12 +1,15 @@
1
+ import pandas # logger
1
2
  import torch
2
3
  import numpy #handling torch to numpy
3
4
  import evotorch
4
- from evotorch.algorithms import CMAES, SteadyStateGA
5
- from evotorch.logging import StdOutLogger
6
- from typing import Literal, Union, Tuple, List, Optional
5
+ from evotorch.algorithms import SNES, CEM, GeneticAlgorithm
6
+ from evotorch.logging import PandasLogger
7
+ from evotorch.operators import SimulatedBinaryCrossOver, GaussianMutation
8
+ from typing import Literal, Union, Tuple, List, Optional, Any, Callable
7
9
  from pathlib import Path
8
10
  from tqdm.auto import trange
9
11
  from contextlib import nullcontext
12
+ from functools import partial
10
13
 
11
14
  from .path_manager import make_fullpath, sanitize_filename
12
15
  from ._logger import _LOGGER
@@ -15,8 +18,7 @@ from .ML_inference import PyTorchInferenceHandler
15
18
  from .keys import PyTorchInferenceKeys
16
19
  from .SQL import DatabaseManager
17
20
  from .optimization_tools import _save_result
18
- from .utilities import threshold_binary_values
19
-
21
+ from .utilities import threshold_binary_values, save_dataframe
20
22
 
21
23
  __all__ = [
22
24
  "create_pytorch_problem",
@@ -25,32 +27,34 @@ __all__ = [
25
27
 
26
28
 
27
29
  def create_pytorch_problem(
28
- handler: PyTorchInferenceHandler,
30
+ inference_handler: PyTorchInferenceHandler,
29
31
  bounds: Tuple[List[float], List[float]],
30
32
  binary_features: int,
31
- task: Literal["minimize", "maximize"],
32
- algorithm: Literal["CMAES", "GA"] = "CMAES",
33
- verbose: bool = False,
33
+ task: Literal["min", "max"],
34
+ algorithm: Literal["SNES", "CEM", "Genetic"] = "Genetic",
35
+ population_size: int = 200,
34
36
  **searcher_kwargs
35
- ) -> Tuple[evotorch.Problem, evotorch.Searcher]: # type: ignore
37
+ ) -> Tuple[evotorch.Problem, Callable[[], Any]]:
36
38
  """
37
- Creates and configures an EvoTorch Problem and Searcher for a PyTorch model.
38
-
39
+ Creates and configures an EvoTorch Problem and a Searcher factory class for a PyTorch model.
40
+
41
+ SNES and CEM do not accept bounds, the given bounds will be used as initial bounds only.
42
+
43
+ The Genetic Algorithm works directly with the bounds, and operators such as SimulatedBinaryCrossOver and GaussianMutation.
44
+
39
45
  Args:
40
- handler (PyTorchInferenceHandler): An initialized inference handler
41
- containing the model and weights.
42
- bounds (tuple[list[float], list[float]]): A tuple containing the lower
43
- and upper bounds for the solution features.
46
+ inference_handler (PyTorchInferenceHandler): An initialized inference handler containing the model and weights.
47
+ bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for the solution features.
44
48
  binary_features (int): Number of binary features located at the END of the feature vector. Will be automatically added to the bounds.
45
49
  task (str): The optimization goal, either "minimize" or "maximize".
46
- algorithm (str): The search algorithm to use, "CMAES" or "GA" (SteadyStateGA).
47
- verbose (bool): Add an Evotorch logger for real-time console updates.
50
+ algorithm (str): The search algorithm to use.
51
+ population_size (int): Used for CEM and GeneticAlgorithm.
48
52
  **searcher_kwargs: Additional keyword arguments to pass to the
49
53
  selected search algorithm's constructor (e.g., stdev_init=0.5 for CMAES).
50
54
 
51
55
  Returns:
52
56
  Tuple:
53
- A tuple containing the configured evotorch.Problem and evotorch.Searcher.
57
+ A tuple containing the configured Problem and Searcher.
54
58
  """
55
59
  lower_bounds, upper_bounds = bounds
56
60
 
@@ -60,51 +64,86 @@ def create_pytorch_problem(
60
64
  upper_bounds.extend([0.55] * binary_features)
61
65
 
62
66
  solution_length = len(lower_bounds)
63
- device = handler.device
67
+ device = inference_handler.device
64
68
 
65
69
  # Define the fitness function that EvoTorch will call.
66
- @evotorch.decorators.to_tensor # type: ignore
67
- @evotorch.decorators.on_aux_device(device)
68
70
  def fitness_func(solution_tensor: torch.Tensor) -> torch.Tensor:
69
71
  # Directly use the continuous-valued tensor from the optimizer for prediction
70
- predictions = handler.predict_batch(solution_tensor)[PyTorchInferenceKeys.PREDICTIONS]
72
+ predictions = inference_handler.predict_batch(solution_tensor)[PyTorchInferenceKeys.PREDICTIONS]
71
73
  return predictions.flatten()
72
-
74
+
75
+
73
76
  # Create the Problem instance.
74
- problem = evotorch.Problem(
75
- objective_sense=task,
76
- objective_func=fitness_func,
77
- solution_length=solution_length,
78
- initial_bounds=(lower_bounds, upper_bounds),
79
- device=device,
80
- )
81
-
82
- # Create the selected searcher instance.
83
- if algorithm == "CMAES":
84
- searcher = CMAES(problem, **searcher_kwargs)
85
- elif algorithm == "GA":
86
- searcher = SteadyStateGA(problem, **searcher_kwargs)
87
- else:
88
- raise ValueError(f"Unknown algorithm '{algorithm}'. Choose 'CMAES' or 'GA'.")
77
+ if algorithm == "CEM" or algorithm == "SNES":
78
+ problem = evotorch.Problem(
79
+ objective_sense=task,
80
+ objective_func=fitness_func,
81
+ solution_length=solution_length,
82
+ initial_bounds=(lower_bounds, upper_bounds),
83
+ device=device,
84
+ vectorized=True #Use batches
85
+ )
86
+
87
+ # If stdev_init is not provided, calculate it based on the bounds (used for SNES and CEM)
88
+ if 'stdev_init' not in searcher_kwargs:
89
+ # Calculate stdev for each parameter as 25% of its search range
90
+ stdevs = [abs(up - low) * 0.25 for low, up in zip(lower_bounds, upper_bounds)]
91
+ searcher_kwargs['stdev_init'] = torch.tensor(stdevs, dtype=torch.float32, requires_grad=False)
92
+
93
+ if algorithm == "SNES":
94
+ SearcherClass = SNES
95
+ elif algorithm == "CEM":
96
+ SearcherClass = CEM
97
+ # Set a defaults for CEM if not provided
98
+ if 'popsize' not in searcher_kwargs:
99
+ searcher_kwargs['popsize'] = population_size
100
+ if 'parenthood_ratio' not in searcher_kwargs:
101
+ searcher_kwargs['parenthood_ratio'] = 0.2 #float 0.0 - 1.0
102
+
103
+ elif algorithm == "Genetic":
104
+ problem = evotorch.Problem(
105
+ objective_sense=task,
106
+ objective_func=fitness_func,
107
+ solution_length=solution_length,
108
+ bounds=(lower_bounds, upper_bounds),
109
+ device=device,
110
+ vectorized=True #Use batches
111
+ )
89
112
 
90
- # Add a logger for real-time console updates.
91
- # This gives the user immediate feedback on the optimization progress.
92
- if verbose:
93
- _ = StdOutLogger(searcher)
113
+ operators = [
114
+ SimulatedBinaryCrossOver(problem,
115
+ tournament_size=4,
116
+ eta=0.8),
117
+ GaussianMutation(problem,
118
+ stdev=0.1)
119
+ ]
120
+
121
+ searcher_kwargs["operators"] = operators
122
+ if 'popsize' not in searcher_kwargs:
123
+ searcher_kwargs['popsize'] = population_size
124
+
125
+ SearcherClass = GeneticAlgorithm
126
+
127
+ else:
128
+ raise ValueError(f"Unknown algorithm '{algorithm}'.")
129
+
130
+ # Create a factory function with all arguments pre-filled
131
+ searcher_factory = partial(SearcherClass, problem, **searcher_kwargs)
94
132
 
95
- return problem, searcher
133
+ return problem, searcher_factory
96
134
 
97
135
 
98
136
  def run_optimization(
99
137
  problem: evotorch.Problem,
100
- searcher: evotorch.Searcher, # type: ignore
138
+ searcher_factory: Callable[[],Any],
101
139
  num_generations: int,
102
140
  target_name: str,
103
141
  binary_features: int,
104
142
  save_dir: Union[str, Path],
105
143
  save_format: Literal['csv', 'sqlite', 'both'],
106
144
  feature_names: Optional[List[str]],
107
- repetitions: int = 1
145
+ repetitions: int = 1,
146
+ verbose: bool = True
108
147
  ) -> Optional[dict]:
109
148
  """
110
149
  Runs the evolutionary optimization process, with support for multiple repetitions.
@@ -124,20 +163,19 @@ def run_optimization(
124
163
  Args:
125
164
  problem (evotorch.Problem): The configured problem instance, which defines
126
165
  the objective function, solution space, and optimization sense.
127
- searcher (evotorch.Searcher): The configured searcher instance, which
128
- contains the evolutionary algorithm (e.g., CMAES, GA).
129
- num_generations (int): The total number of generations to run the
130
- search algorithm for in each repetition.
166
+ searcher_factory (Callable): The searcher factory to generate fresh evolutionary algorithms.
167
+ num_generations (int): The total number of generations to run the search algorithm for in each repetition.
131
168
  target_name (str): Target name that will also be used for the CSV filename and SQL table.
132
169
  binary_features (int): Number of binary features located at the END of the feature vector.
133
170
  save_dir (str | Path): The directory where the result file(s) will be saved.
134
171
  save_format (Literal['csv', 'sqlite', 'both'], optional): The format for
135
- saving results during iterative analysis. Defaults to 'both'.
172
+ saving results during iterative analysis.
136
173
  feature_names (List[str], optional): Names of the solution features for
137
174
  labeling the output files. If None, generic names like 'feature_0',
138
- 'feature_1', etc., will be created. Defaults to None.
175
+ 'feature_1', etc., will be created.
139
176
  repetitions (int, optional): The number of independent times to run the
140
- entire optimization process. Defaults to 1.
177
+ entire optimization process.
178
+ verbose (bool): Add an Evotorch Pandas logger saved as a csv. Only for the first repetition.
141
179
 
142
180
  Returns:
143
181
  Optional[dict]: A dictionary containing the best feature values and the
@@ -162,11 +200,29 @@ def run_optimization(
162
200
 
163
201
  # --- SINGLE RUN LOGIC ---
164
202
  if repetitions <= 1:
165
- _LOGGER.info(f"🤖 Starting optimization with {searcher.__class__.__name__} for {num_generations} generations...")
166
- for _ in trange(num_generations, desc="Optimizing"):
167
- searcher.step()
203
+ searcher = searcher_factory()
204
+ _LOGGER.info(f"🤖 Starting optimization with {searcher.__class__.__name__} Algorithm for {num_generations} generations...")
205
+ # for _ in trange(num_generations, desc="Optimizing"):
206
+ # searcher.step()
207
+
208
+ # Attach logger if requested
209
+ if verbose:
210
+ pandas_logger = PandasLogger(searcher)
211
+
212
+ searcher.run(num_generations) # Use the built-in run method for simplicity
213
+
214
+ # # DEBUG new searcher objects
215
+ # for status_key in searcher.iter_status_keys():
216
+ # print("===", status_key, "===")
217
+ # print(searcher.status[status_key])
218
+ # print()
219
+
220
+ # Get results from the .status dictionary
221
+ # SNES and CEM use the key 'center' to get mean values if needed best_solution_tensor = searcher.status["center"]
222
+ best_solution_container = searcher.status["pop_best"]
223
+ best_solution_tensor = best_solution_container.values
224
+ best_fitness = best_solution_container.evals
168
225
 
169
- best_solution_tensor, best_fitness = searcher.best
170
226
  best_solution_np = best_solution_tensor.cpu().numpy()
171
227
 
172
228
  # threshold binary features
@@ -179,6 +235,11 @@ def run_optimization(
179
235
  result_dict[target_name] = best_fitness.item()
180
236
 
181
237
  _save_result(result_dict, 'csv', csv_path) # Single run defaults to CSV
238
+
239
+ # Process logger
240
+ if verbose:
241
+ _handle_pandas_log(pandas_logger, save_path=save_path)
242
+
182
243
  _LOGGER.info(f"✅ Optimization complete. Best solution saved to '{csv_path.name}'")
183
244
  return result_dict
184
245
 
@@ -193,17 +254,26 @@ def run_optimization(
193
254
  schema = {name: "REAL" for name in feature_names}
194
255
  schema[target_name] = "REAL"
195
256
  db_manager.create_table(db_table_name, schema)
196
-
257
+
258
+ print("")
259
+ # Repetitions loop
260
+ pandas_logger = None
197
261
  for i in trange(repetitions, desc="Repetitions"):
198
- _LOGGER.info(f"--- Starting Repetition {i+1}/{repetitions} ---")
262
+ # CRITICAL: Create a fresh searcher for each run using the factory
263
+ searcher = searcher_factory()
199
264
 
200
- # CRITICAL: Re-initialize the searcher to ensure each run is independent
201
- searcher.reset()
202
-
203
- for _ in range(num_generations): # Inner loop does not need a progress bar
204
- searcher.step()
205
-
206
- best_solution_tensor, best_fitness = searcher.best
265
+ # Attach logger if requested
266
+ if verbose and i==0:
267
+ pandas_logger = PandasLogger(searcher)
268
+
269
+ searcher.run(num_generations) # Use the built-in run method for simplicity
270
+
271
+ # Get results from the .status dictionary
272
+ # SNES and CEM use the key 'center' to get mean values if needed best_solution_tensor = searcher.status["center"]
273
+ best_solution_container = searcher.status["pop_best"]
274
+ best_solution_tensor = best_solution_container.values
275
+ best_fitness = best_solution_container.evals
276
+
207
277
  best_solution_np = best_solution_tensor.cpu().numpy()
208
278
 
209
279
  # threshold binary features
@@ -212,15 +282,25 @@ def run_optimization(
212
282
  else:
213
283
  best_solution_thresholded = best_solution_np
214
284
 
285
+ # make results dictionary
215
286
  result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
216
287
  result_dict[target_name] = best_fitness.item()
217
288
 
218
289
  # Save each result incrementally
219
290
  _save_result(result_dict, save_format, csv_path, db_manager, db_table_name)
291
+
292
+ # Process logger
293
+ if pandas_logger is not None:
294
+ _handle_pandas_log(pandas_logger, save_path=save_path)
220
295
 
221
296
  _LOGGER.info(f"✅ Optimal solution space complete. Results saved to '{save_path}'")
222
297
  return None
223
298
 
224
299
 
300
+ def _handle_pandas_log(logger: PandasLogger, save_path: Path):
301
+ log_dataframe = logger.to_dataframe()
302
+ save_dataframe(df=log_dataframe, save_dir=save_path / "EvolutionLog", filename="evolution")
303
+
304
+
225
305
  def info():
226
306
  _script_info(__all__)
@@ -351,7 +351,7 @@ def plot_calibration_curve(
351
351
  ax=ax
352
352
  )
353
353
 
354
- ax.set_title(f"{model_name} - Calibration Plot for {target_name}", fontsize=base_fontsize)
354
+ ax.set_title(f"{model_name} - Reliability Curve for {target_name}", fontsize=base_fontsize)
355
355
  ax.tick_params(axis='both', labelsize=base_fontsize - 2)
356
356
  ax.set_xlabel("Mean Predicted Probability", fontsize=base_fontsize)
357
357
  ax.set_ylabel("Fraction of Positives", fontsize=base_fontsize)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "6.0.0"
3
+ version = "6.1.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }