dragon-ml-toolbox 6.0.1__tar.gz → 6.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (39) hide show
  1. {dragon_ml_toolbox-6.0.1/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-6.1.1}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/ML_inference.py +50 -44
  4. dragon_ml_toolbox-6.1.1/ml_tools/ML_optimization.py +308 -0
  5. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/pyproject.toml +1 -1
  6. dragon_ml_toolbox-6.0.1/ml_tools/ML_optimization.py +0 -226
  7. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/LICENSE +0 -0
  8. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/LICENSE-THIRD-PARTY.md +0 -0
  9. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/README.md +0 -0
  10. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  11. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  12. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  13. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  14. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/ETL_engineering.py +0 -0
  15. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/GUI_tools.py +0 -0
  16. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/MICE_imputation.py +0 -0
  17. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/ML_callbacks.py +0 -0
  18. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/ML_datasetmaster.py +0 -0
  19. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/ML_evaluation.py +0 -0
  20. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/ML_models.py +0 -0
  21. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/ML_trainer.py +0 -0
  22. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/PSO_optimization.py +0 -0
  23. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/RNN_forecast.py +0 -0
  24. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/SQL.py +0 -0
  25. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/VIF_factor.py +0 -0
  26. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/__init__.py +0 -0
  27. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/_logger.py +0 -0
  28. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/_script_info.py +0 -0
  29. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/custom_logger.py +0 -0
  30. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/data_exploration.py +0 -0
  31. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/ensemble_evaluation.py +0 -0
  32. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/ensemble_inference.py +0 -0
  33. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/ensemble_learning.py +0 -0
  34. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/handle_excel.py +0 -0
  35. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/keys.py +0 -0
  36. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/optimization_tools.py +0 -0
  37. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/path_manager.py +0 -0
  38. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/ml_tools/utilities.py +0 -0
  39. {dragon_ml_toolbox-6.0.1 → dragon_ml_toolbox-6.1.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 6.0.1
3
+ Version: 6.1.1
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 6.0.1
3
+ Version: 6.1.1
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -66,47 +66,10 @@ class PyTorchInferenceHandler:
66
66
 
67
67
  # Ensure tensor is on the correct device
68
68
  return features.to(self.device)
69
-
70
- def predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
71
- """
72
- Predicts on a single feature vector.
73
-
74
- Args:
75
- features (np.ndarray | torch.Tensor): A 1D or 2D array/tensor for a single sample.
76
-
77
- Returns:
78
- Dict[str, Any]: A dictionary containing the prediction.
79
- - For regression: {'predictions': float}
80
- - For classification: {'labels': int, 'probabilities': np.ndarray}
69
+
70
+ def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
81
71
  """
82
- if features.ndim == 1:
83
- features = features.reshape(1, -1)
84
-
85
- if features.shape[0] != 1:
86
- raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
87
-
88
- results_batch = self.predict_batch(features)
89
-
90
- # Extract the single result from the batch
91
- if self.task == "regression":
92
- return {PyTorchInferenceKeys.PREDICTIONS: results_batch[PyTorchInferenceKeys.PREDICTIONS].item()}
93
- else: # classification
94
- return {
95
- PyTorchInferenceKeys.LABELS: results_batch[PyTorchInferenceKeys.LABELS].item(),
96
- PyTorchInferenceKeys.PROBABILITIES: results_batch[PyTorchInferenceKeys.PROBABILITIES][0]
97
- }
98
-
99
- def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
100
- """
101
- Predicts on a batch of feature vectors.
102
-
103
- Args:
104
- features (np.ndarray | torch.Tensor): A 2D array/tensor where each row is a sample.
105
-
106
- Returns:
107
- Dict[str, Any]: A dictionary containing the predictions.
108
- - For regression: {'predictions': np.ndarray}
109
- - For classification: {'labels': np.ndarray, 'probabilities': np.ndarray}
72
+ Core batch prediction method. Returns results as PyTorch tensors on the model's device.
110
73
  """
111
74
  if features.ndim != 2:
112
75
  raise ValueError("Input for batch prediction must be a 2D array or tensor.")
@@ -114,18 +77,61 @@ class PyTorchInferenceHandler:
114
77
  input_tensor = self._preprocess_input(features)
115
78
 
116
79
  with torch.no_grad():
117
- output = self.model(input_tensor).cpu()
80
+ # Output tensor remains on the model's device (e.g., 'mps' or 'cuda')
81
+ output = self.model(input_tensor)
118
82
 
119
83
  if self.task == "classification":
120
84
  probs = nn.functional.softmax(output, dim=1)
121
85
  labels = torch.argmax(probs, dim=1)
122
86
  return {
123
- PyTorchInferenceKeys.LABELS: labels.numpy(),
124
- PyTorchInferenceKeys.PROBABILITIES: probs.numpy()
87
+ PyTorchInferenceKeys.LABELS: labels,
88
+ PyTorchInferenceKeys.PROBABILITIES: probs
125
89
  }
126
90
  else: # regression
127
- return {PyTorchInferenceKeys.PREDICTIONS: output.numpy()}
91
+ return {PyTorchInferenceKeys.PREDICTIONS: output}
128
92
 
93
+ def predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
94
+ """
95
+ Core single-sample prediction. Returns results as PyTorch tensors on the model's device.
96
+ """
97
+ if features.ndim == 1:
98
+ features = features.reshape(1, -1)
99
+
100
+ if features.shape[0] != 1:
101
+ raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
102
+
103
+ batch_results = self.predict_batch(features)
104
+
105
+ single_results = {key: value[0] for key, value in batch_results.items()}
106
+ return single_results
107
+
108
+ # --- NumPy Convenience Wrappers (on CPU) ---
109
+
110
+ def predict_batch_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, np.ndarray]:
111
+ """
112
+ Convenience wrapper for predict_batch that returns NumPy arrays.
113
+ """
114
+ tensor_results = self.predict_batch(features)
115
+ # Move tensor to CPU before converting to NumPy
116
+ numpy_results = {key: value.cpu().numpy() for key, value in tensor_results.items()}
117
+ return numpy_results
118
+
119
+ def predict_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
120
+ """
121
+ Convenience wrapper for predict that returns NumPy arrays or scalars.
122
+ """
123
+ tensor_results = self.predict(features)
124
+
125
+ if self.task == "regression":
126
+ # .item() implicitly moves to CPU
127
+ return {PyTorchInferenceKeys.PREDICTIONS: tensor_results[PyTorchInferenceKeys.PREDICTIONS].item()}
128
+ else: # classification
129
+ return {
130
+ PyTorchInferenceKeys.LABELS: tensor_results[PyTorchInferenceKeys.LABELS].item(),
131
+ # ✅ Move tensor to CPU before converting to NumPy
132
+ PyTorchInferenceKeys.PROBABILITIES: tensor_results[PyTorchInferenceKeys.PROBABILITIES].cpu().numpy()
133
+ }
134
+
129
135
 
130
136
  def info():
131
137
  _script_info(__all__)
@@ -0,0 +1,308 @@
1
+ import pandas # logger
2
+ import torch
3
+ import numpy #handling torch to numpy
4
+ import evotorch
5
+ from evotorch.algorithms import SNES, CEM, GeneticAlgorithm
6
+ from evotorch.logging import PandasLogger
7
+ from evotorch.operators import SimulatedBinaryCrossOver, GaussianMutation
8
+ from typing import Literal, Union, Tuple, List, Optional, Any, Callable
9
+ from pathlib import Path
10
+ from tqdm.auto import trange
11
+ from contextlib import nullcontext
12
+ from functools import partial
13
+
14
+ from .path_manager import make_fullpath, sanitize_filename
15
+ from ._logger import _LOGGER
16
+ from ._script_info import _script_info
17
+ from .ML_inference import PyTorchInferenceHandler
18
+ from .keys import PyTorchInferenceKeys
19
+ from .SQL import DatabaseManager
20
+ from .optimization_tools import _save_result
21
+ from .utilities import threshold_binary_values, save_dataframe
22
+
23
+ __all__ = [
24
+ "create_pytorch_problem",
25
+ "run_optimization"
26
+ ]
27
+
28
+
29
+ def create_pytorch_problem(
30
+ inference_handler: PyTorchInferenceHandler,
31
+ bounds: Tuple[List[float], List[float]],
32
+ binary_features: int,
33
+ task: Literal["min", "max"],
34
+ algorithm: Literal["SNES", "CEM", "Genetic"] = "Genetic",
35
+ population_size: int = 200,
36
+ **searcher_kwargs
37
+ ) -> Tuple[evotorch.Problem, Callable[[], Any]]:
38
+ """
39
+ Creates and configures an EvoTorch Problem and a Searcher factory class for a PyTorch model.
40
+
41
+ SNES and CEM do not accept bounds, the given bounds will be used as initial bounds only.
42
+
43
+ The Genetic Algorithm works directly with the bounds, and operators such as SimulatedBinaryCrossOver and GaussianMutation.
44
+
45
+ Args:
46
+ inference_handler (PyTorchInferenceHandler): An initialized inference handler containing the model and weights.
47
+ bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for the solution features.
48
+ binary_features (int): Number of binary features located at the END of the feature vector. Will be automatically added to the bounds.
49
+ task (str): The optimization goal, either "minimize" or "maximize".
50
+ algorithm (str): The search algorithm to use.
51
+ population_size (int): Used for CEM and GeneticAlgorithm.
52
+ **searcher_kwargs: Additional keyword arguments to pass to the
53
+ selected search algorithm's constructor (e.g., stdev_init=0.5 for CMAES).
54
+
55
+ Returns:
56
+ Tuple:
57
+ A tuple containing the configured Problem and Searcher.
58
+ """
59
+ # Create copies to avoid modifying the original lists passed in the `bounds` tuple
60
+ lower_bounds = list(bounds[0])
61
+ upper_bounds = list(bounds[1])
62
+
63
+ # add binary bounds
64
+ if binary_features > 0:
65
+ lower_bounds.extend([0.45] * binary_features)
66
+ upper_bounds.extend([0.55] * binary_features)
67
+
68
+ solution_length = len(lower_bounds)
69
+ device = inference_handler.device
70
+
71
+ # Define the fitness function that EvoTorch will call.
72
+ def fitness_func(solution_tensor: torch.Tensor) -> torch.Tensor:
73
+ # Directly use the continuous-valued tensor from the optimizer for prediction
74
+ predictions = inference_handler.predict_batch(solution_tensor)[PyTorchInferenceKeys.PREDICTIONS]
75
+ return predictions.flatten()
76
+
77
+
78
+ # Create the Problem instance.
79
+ if algorithm == "CEM" or algorithm == "SNES":
80
+ problem = evotorch.Problem(
81
+ objective_sense=task,
82
+ objective_func=fitness_func,
83
+ solution_length=solution_length,
84
+ initial_bounds=(lower_bounds, upper_bounds),
85
+ device=device,
86
+ vectorized=True #Use batches
87
+ )
88
+
89
+ # If stdev_init is not provided, calculate it based on the bounds (used for SNES and CEM)
90
+ if 'stdev_init' not in searcher_kwargs:
91
+ # Calculate stdev for each parameter as 25% of its search range
92
+ stdevs = [abs(up - low) * 0.25 for low, up in zip(lower_bounds, upper_bounds)]
93
+ searcher_kwargs['stdev_init'] = torch.tensor(stdevs, dtype=torch.float32, requires_grad=False)
94
+
95
+ if algorithm == "SNES":
96
+ SearcherClass = SNES
97
+ elif algorithm == "CEM":
98
+ SearcherClass = CEM
99
+ # Set a defaults for CEM if not provided
100
+ if 'popsize' not in searcher_kwargs:
101
+ searcher_kwargs['popsize'] = population_size
102
+ if 'parenthood_ratio' not in searcher_kwargs:
103
+ searcher_kwargs['parenthood_ratio'] = 0.2 #float 0.0 - 1.0
104
+
105
+ elif algorithm == "Genetic":
106
+ problem = evotorch.Problem(
107
+ objective_sense=task,
108
+ objective_func=fitness_func,
109
+ solution_length=solution_length,
110
+ bounds=(lower_bounds, upper_bounds),
111
+ device=device,
112
+ vectorized=True #Use batches
113
+ )
114
+
115
+ operators = [
116
+ SimulatedBinaryCrossOver(problem,
117
+ tournament_size=4,
118
+ eta=0.8),
119
+ GaussianMutation(problem,
120
+ stdev=0.1)
121
+ ]
122
+
123
+ searcher_kwargs["operators"] = operators
124
+ if 'popsize' not in searcher_kwargs:
125
+ searcher_kwargs['popsize'] = population_size
126
+
127
+ SearcherClass = GeneticAlgorithm
128
+
129
+ else:
130
+ raise ValueError(f"Unknown algorithm '{algorithm}'.")
131
+
132
+ # Create a factory function with all arguments pre-filled
133
+ searcher_factory = partial(SearcherClass, problem, **searcher_kwargs)
134
+
135
+ return problem, searcher_factory
136
+
137
+
138
+ def run_optimization(
139
+ problem: evotorch.Problem,
140
+ searcher_factory: Callable[[],Any],
141
+ num_generations: int,
142
+ target_name: str,
143
+ binary_features: int,
144
+ save_dir: Union[str, Path],
145
+ save_format: Literal['csv', 'sqlite', 'both'],
146
+ feature_names: Optional[List[str]],
147
+ repetitions: int = 1,
148
+ verbose: bool = True
149
+ ) -> Optional[dict]:
150
+ """
151
+ Runs the evolutionary optimization process, with support for multiple repetitions.
152
+
153
+ This function serves as the main engine for the optimization task. It takes a
154
+ configured Problem and a Searcher from EvoTorch and executes the optimization
155
+ for a specified number of generations.
156
+
157
+ It has two modes of operation:
158
+ 1. **Single Run (repetitions=1):** Executes the optimization once, saves the
159
+ single best result to a CSV file, and returns it as a dictionary.
160
+ 2. **Iterative Analysis (repetitions > 1):** Executes the optimization
161
+ multiple times. Results from each run are streamed incrementally to the
162
+ specified file formats (CSV and/or SQLite database). In this mode,
163
+ the function returns None.
164
+
165
+ Args:
166
+ problem (evotorch.Problem): The configured problem instance, which defines
167
+ the objective function, solution space, and optimization sense.
168
+ searcher_factory (Callable): The searcher factory to generate fresh evolutionary algorithms.
169
+ num_generations (int): The total number of generations to run the search algorithm for in each repetition.
170
+ target_name (str): Target name that will also be used for the CSV filename and SQL table.
171
+ binary_features (int): Number of binary features located at the END of the feature vector.
172
+ save_dir (str | Path): The directory where the result file(s) will be saved.
173
+ save_format (Literal['csv', 'sqlite', 'both'], optional): The format for
174
+ saving results during iterative analysis.
175
+ feature_names (List[str], optional): Names of the solution features for
176
+ labeling the output files. If None, generic names like 'feature_0',
177
+ 'feature_1', etc., will be created.
178
+ repetitions (int, optional): The number of independent times to run the
179
+ entire optimization process.
180
+ verbose (bool): Add an Evotorch Pandas logger saved as a csv. Only for the first repetition.
181
+
182
+ Returns:
183
+ Optional[dict]: A dictionary containing the best feature values and the
184
+ fitness score if `repetitions` is 1. Returns `None` if `repetitions`
185
+ is greater than 1, as results are streamed to files instead.
186
+ """
187
+ # preprocess paths
188
+ save_path = make_fullpath(save_dir, make=True, enforce="directory")
189
+
190
+ sanitized_target_name = sanitize_filename(target_name)
191
+ if not sanitized_target_name.endswith(".csv"):
192
+ sanitized_target_name = sanitized_target_name + ".csv"
193
+
194
+ csv_path = save_path / sanitized_target_name
195
+
196
+ db_path = save_path / "Optimization.db"
197
+ db_table_name = target_name
198
+
199
+ # preprocess feature names
200
+ if feature_names is None:
201
+ feature_names = [f"feature_{i}" for i in range(problem.solution_length)] # type: ignore
202
+
203
+ # --- SINGLE RUN LOGIC ---
204
+ if repetitions <= 1:
205
+ searcher = searcher_factory()
206
+ _LOGGER.info(f"🤖 Starting optimization with {searcher.__class__.__name__} Algorithm for {num_generations} generations...")
207
+ # for _ in trange(num_generations, desc="Optimizing"):
208
+ # searcher.step()
209
+
210
+ # Attach logger if requested
211
+ if verbose:
212
+ pandas_logger = PandasLogger(searcher)
213
+
214
+ searcher.run(num_generations) # Use the built-in run method for simplicity
215
+
216
+ # # DEBUG new searcher objects
217
+ # for status_key in searcher.iter_status_keys():
218
+ # print("===", status_key, "===")
219
+ # print(searcher.status[status_key])
220
+ # print()
221
+
222
+ # Get results from the .status dictionary
223
+ # SNES and CEM use the key 'center' to get mean values if needed best_solution_tensor = searcher.status["center"]
224
+ best_solution_container = searcher.status["pop_best"]
225
+ best_solution_tensor = best_solution_container.values
226
+ best_fitness = best_solution_container.evals
227
+
228
+ best_solution_np = best_solution_tensor.cpu().numpy()
229
+
230
+ # threshold binary features
231
+ if binary_features > 0:
232
+ best_solution_thresholded = threshold_binary_values(input_array=best_solution_np, binary_values=binary_features)
233
+ else:
234
+ best_solution_thresholded = best_solution_np
235
+
236
+ result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
237
+ result_dict[target_name] = best_fitness.item()
238
+
239
+ _save_result(result_dict, 'csv', csv_path) # Single run defaults to CSV
240
+
241
+ # Process logger
242
+ if verbose:
243
+ _handle_pandas_log(pandas_logger, save_path=save_path)
244
+
245
+ _LOGGER.info(f"✅ Optimization complete. Best solution saved to '{csv_path.name}'")
246
+ return result_dict
247
+
248
+ # --- MULTIPLE REPETITIONS LOGIC ---
249
+ else:
250
+ _LOGGER.info(f"🏁 Starting optimal solution space analysis with {repetitions} repetitions...")
251
+
252
+ db_context = DatabaseManager(db_path) if save_format in ['sqlite', 'both'] else nullcontext()
253
+
254
+ with db_context as db_manager:
255
+ if db_manager:
256
+ schema = {name: "REAL" for name in feature_names}
257
+ schema[target_name] = "REAL"
258
+ db_manager.create_table(db_table_name, schema)
259
+
260
+ print("")
261
+ # Repetitions loop
262
+ pandas_logger = None
263
+ for i in trange(repetitions, desc="Repetitions"):
264
+ # CRITICAL: Create a fresh searcher for each run using the factory
265
+ searcher = searcher_factory()
266
+
267
+ # Attach logger if requested
268
+ if verbose and i==0:
269
+ pandas_logger = PandasLogger(searcher)
270
+
271
+ searcher.run(num_generations) # Use the built-in run method for simplicity
272
+
273
+ # Get results from the .status dictionary
274
+ # SNES and CEM use the key 'center' to get mean values if needed best_solution_tensor = searcher.status["center"]
275
+ best_solution_container = searcher.status["pop_best"]
276
+ best_solution_tensor = best_solution_container.values
277
+ best_fitness = best_solution_container.evals
278
+
279
+ best_solution_np = best_solution_tensor.cpu().numpy()
280
+
281
+ # threshold binary features
282
+ if binary_features > 0:
283
+ best_solution_thresholded = threshold_binary_values(input_array=best_solution_np, binary_values=binary_features)
284
+ else:
285
+ best_solution_thresholded = best_solution_np
286
+
287
+ # make results dictionary
288
+ result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
289
+ result_dict[target_name] = best_fitness.item()
290
+
291
+ # Save each result incrementally
292
+ _save_result(result_dict, save_format, csv_path, db_manager, db_table_name)
293
+
294
+ # Process logger
295
+ if pandas_logger is not None:
296
+ _handle_pandas_log(pandas_logger, save_path=save_path)
297
+
298
+ _LOGGER.info(f"✅ Optimal solution space complete. Results saved to '{save_path}'")
299
+ return None
300
+
301
+
302
+ def _handle_pandas_log(logger: PandasLogger, save_path: Path):
303
+ log_dataframe = logger.to_dataframe()
304
+ save_dataframe(df=log_dataframe, save_dir=save_path / "EvolutionLog", filename="evolution")
305
+
306
+
307
+ def info():
308
+ _script_info(__all__)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "6.0.1"
3
+ version = "6.1.1"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }
@@ -1,226 +0,0 @@
1
- import torch
2
- import numpy #handling torch to numpy
3
- import evotorch
4
- from evotorch.algorithms import CMAES, SteadyStateGA
5
- from evotorch.logging import StdOutLogger
6
- from typing import Literal, Union, Tuple, List, Optional
7
- from pathlib import Path
8
- from tqdm.auto import trange
9
- from contextlib import nullcontext
10
-
11
- from .path_manager import make_fullpath, sanitize_filename
12
- from ._logger import _LOGGER
13
- from ._script_info import _script_info
14
- from .ML_inference import PyTorchInferenceHandler
15
- from .keys import PyTorchInferenceKeys
16
- from .SQL import DatabaseManager
17
- from .optimization_tools import _save_result
18
- from .utilities import threshold_binary_values
19
-
20
-
21
- __all__ = [
22
- "create_pytorch_problem",
23
- "run_optimization"
24
- ]
25
-
26
-
27
- def create_pytorch_problem(
28
- handler: PyTorchInferenceHandler,
29
- bounds: Tuple[List[float], List[float]],
30
- binary_features: int,
31
- task: Literal["minimize", "maximize"],
32
- algorithm: Literal["CMAES", "GA"] = "CMAES",
33
- verbose: bool = False,
34
- **searcher_kwargs
35
- ) -> Tuple[evotorch.Problem, evotorch.Searcher]: # type: ignore
36
- """
37
- Creates and configures an EvoTorch Problem and Searcher for a PyTorch model.
38
-
39
- Args:
40
- handler (PyTorchInferenceHandler): An initialized inference handler
41
- containing the model and weights.
42
- bounds (tuple[list[float], list[float]]): A tuple containing the lower
43
- and upper bounds for the solution features.
44
- binary_features (int): Number of binary features located at the END of the feature vector. Will be automatically added to the bounds.
45
- task (str): The optimization goal, either "minimize" or "maximize".
46
- algorithm (str): The search algorithm to use, "CMAES" or "GA" (SteadyStateGA).
47
- verbose (bool): Add an Evotorch logger for real-time console updates.
48
- **searcher_kwargs: Additional keyword arguments to pass to the
49
- selected search algorithm's constructor (e.g., stdev_init=0.5 for CMAES).
50
-
51
- Returns:
52
- Tuple:
53
- A tuple containing the configured evotorch.Problem and evotorch.Searcher.
54
- """
55
- lower_bounds, upper_bounds = bounds
56
-
57
- # add binary bounds
58
- if binary_features > 0:
59
- lower_bounds.extend([0.45] * binary_features)
60
- upper_bounds.extend([0.55] * binary_features)
61
-
62
- solution_length = len(lower_bounds)
63
- device = handler.device
64
-
65
- # Define the fitness function that EvoTorch will call.
66
- @evotorch.decorators.to_tensor # type: ignore
67
- @evotorch.decorators.on_aux_device(device)
68
- def fitness_func(solution_tensor: torch.Tensor) -> torch.Tensor:
69
- # Directly use the continuous-valued tensor from the optimizer for prediction
70
- predictions = handler.predict_batch(solution_tensor)[PyTorchInferenceKeys.PREDICTIONS]
71
- return predictions.flatten()
72
-
73
- # Create the Problem instance.
74
- problem = evotorch.Problem(
75
- objective_sense=task,
76
- objective_func=fitness_func,
77
- solution_length=solution_length,
78
- initial_bounds=(lower_bounds, upper_bounds),
79
- device=device,
80
- )
81
-
82
- # Create the selected searcher instance.
83
- if algorithm == "CMAES":
84
- searcher = CMAES(problem, **searcher_kwargs)
85
- elif algorithm == "GA":
86
- searcher = SteadyStateGA(problem, **searcher_kwargs)
87
- else:
88
- raise ValueError(f"Unknown algorithm '{algorithm}'. Choose 'CMAES' or 'GA'.")
89
-
90
- # Add a logger for real-time console updates.
91
- # This gives the user immediate feedback on the optimization progress.
92
- if verbose:
93
- _ = StdOutLogger(searcher)
94
-
95
- return problem, searcher
96
-
97
-
98
- def run_optimization(
99
- problem: evotorch.Problem,
100
- searcher: evotorch.Searcher, # type: ignore
101
- num_generations: int,
102
- target_name: str,
103
- binary_features: int,
104
- save_dir: Union[str, Path],
105
- save_format: Literal['csv', 'sqlite', 'both'],
106
- feature_names: Optional[List[str]],
107
- repetitions: int = 1
108
- ) -> Optional[dict]:
109
- """
110
- Runs the evolutionary optimization process, with support for multiple repetitions.
111
-
112
- This function serves as the main engine for the optimization task. It takes a
113
- configured Problem and a Searcher from EvoTorch and executes the optimization
114
- for a specified number of generations.
115
-
116
- It has two modes of operation:
117
- 1. **Single Run (repetitions=1):** Executes the optimization once, saves the
118
- single best result to a CSV file, and returns it as a dictionary.
119
- 2. **Iterative Analysis (repetitions > 1):** Executes the optimization
120
- multiple times. Results from each run are streamed incrementally to the
121
- specified file formats (CSV and/or SQLite database). In this mode,
122
- the function returns None.
123
-
124
- Args:
125
- problem (evotorch.Problem): The configured problem instance, which defines
126
- the objective function, solution space, and optimization sense.
127
- searcher (evotorch.Searcher): The configured searcher instance, which
128
- contains the evolutionary algorithm (e.g., CMAES, GA).
129
- num_generations (int): The total number of generations to run the
130
- search algorithm for in each repetition.
131
- target_name (str): Target name that will also be used for the CSV filename and SQL table.
132
- binary_features (int): Number of binary features located at the END of the feature vector.
133
- save_dir (str | Path): The directory where the result file(s) will be saved.
134
- save_format (Literal['csv', 'sqlite', 'both'], optional): The format for
135
- saving results during iterative analysis. Defaults to 'both'.
136
- feature_names (List[str], optional): Names of the solution features for
137
- labeling the output files. If None, generic names like 'feature_0',
138
- 'feature_1', etc., will be created. Defaults to None.
139
- repetitions (int, optional): The number of independent times to run the
140
- entire optimization process. Defaults to 1.
141
-
142
- Returns:
143
- Optional[dict]: A dictionary containing the best feature values and the
144
- fitness score if `repetitions` is 1. Returns `None` if `repetitions`
145
- is greater than 1, as results are streamed to files instead.
146
- """
147
- # preprocess paths
148
- save_path = make_fullpath(save_dir, make=True, enforce="directory")
149
-
150
- sanitized_target_name = sanitize_filename(target_name)
151
- if not sanitized_target_name.endswith(".csv"):
152
- sanitized_target_name = sanitized_target_name + ".csv"
153
-
154
- csv_path = save_path / sanitized_target_name
155
-
156
- db_path = save_path / "Optimization.db"
157
- db_table_name = target_name
158
-
159
- # preprocess feature names
160
- if feature_names is None:
161
- feature_names = [f"feature_{i}" for i in range(problem.solution_length)] # type: ignore
162
-
163
- # --- SINGLE RUN LOGIC ---
164
- if repetitions <= 1:
165
- _LOGGER.info(f"🤖 Starting optimization with {searcher.__class__.__name__} for {num_generations} generations...")
166
- for _ in trange(num_generations, desc="Optimizing"):
167
- searcher.step()
168
-
169
- best_solution_tensor, best_fitness = searcher.best
170
- best_solution_np = best_solution_tensor.cpu().numpy()
171
-
172
- # threshold binary features
173
- if binary_features > 0:
174
- best_solution_thresholded = threshold_binary_values(input_array=best_solution_np, binary_values=binary_features)
175
- else:
176
- best_solution_thresholded = best_solution_np
177
-
178
- result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
179
- result_dict[target_name] = best_fitness.item()
180
-
181
- _save_result(result_dict, 'csv', csv_path) # Single run defaults to CSV
182
- _LOGGER.info(f"✅ Optimization complete. Best solution saved to '{csv_path.name}'")
183
- return result_dict
184
-
185
- # --- MULTIPLE REPETITIONS LOGIC ---
186
- else:
187
- _LOGGER.info(f"🏁 Starting optimal solution space analysis with {repetitions} repetitions...")
188
-
189
- db_context = DatabaseManager(db_path) if save_format in ['sqlite', 'both'] else nullcontext()
190
-
191
- with db_context as db_manager:
192
- if db_manager:
193
- schema = {name: "REAL" for name in feature_names}
194
- schema[target_name] = "REAL"
195
- db_manager.create_table(db_table_name, schema)
196
-
197
- for i in trange(repetitions, desc="Repetitions"):
198
- _LOGGER.info(f"--- Starting Repetition {i+1}/{repetitions} ---")
199
-
200
- # CRITICAL: Re-initialize the searcher to ensure each run is independent
201
- searcher.reset()
202
-
203
- for _ in range(num_generations): # Inner loop does not need a progress bar
204
- searcher.step()
205
-
206
- best_solution_tensor, best_fitness = searcher.best
207
- best_solution_np = best_solution_tensor.cpu().numpy()
208
-
209
- # threshold binary features
210
- if binary_features > 0:
211
- best_solution_thresholded = threshold_binary_values(input_array=best_solution_np, binary_values=binary_features)
212
- else:
213
- best_solution_thresholded = best_solution_np
214
-
215
- result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
216
- result_dict[target_name] = best_fitness.item()
217
-
218
- # Save each result incrementally
219
- _save_result(result_dict, save_format, csv_path, db_manager, db_table_name)
220
-
221
- _LOGGER.info(f"✅ Optimal solution space complete. Results saved to '{save_path}'")
222
- return None
223
-
224
-
225
- def info():
226
- _script_info(__all__)