dragon-ml-toolbox 12.0.0__py3-none-any.whl → 12.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.0.0
3
+ Version: 12.1.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -8,7 +8,7 @@ Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
8
8
  Project-URL: Changelog, https://github.com/DrAg0n-BoRn/ML_tools/blob/master/CHANGELOG.md
9
9
  Classifier: Programming Language :: Python :: 3
10
10
  Classifier: Operating System :: OS Independent
11
- Requires-Python: ==3.12
11
+ Requires-Python: >=3.12
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  License-File: LICENSE-THIRD-PARTY.md
@@ -1,5 +1,5 @@
1
- dragon_ml_toolbox-12.0.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-12.0.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
1
+ dragon_ml_toolbox-12.1.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-12.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
3
3
  ml_tools/ETL_cleaning.py,sha256=PLRSR-VYnt1nNT9XrcWq40SE0VzHCw7DQ8v9czfSQsU,20366
4
4
  ml_tools/ETL_engineering.py,sha256=l0I6Og9o4s6EODdk0kZXjbbC-a3vVPYy1FopP2BkQSQ,54909
5
5
  ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
@@ -10,8 +10,9 @@ ml_tools/ML_evaluation.py,sha256=tLswOPgH4G1KExSMn0876YtNkbxPh-W3J4MYOjomMWA,162
10
10
  ml_tools/ML_evaluation_multi.py,sha256=6OZyQ4SM9ALh38mOABmiHgIQDWcovsD_iOo7Bg9YZCE,12516
11
11
  ml_tools/ML_inference.py,sha256=ymFvncFsU10PExq87xnEj541DKV5ck0nMuK8ToJHzVQ,23067
12
12
  ml_tools/ML_models.py,sha256=pSCV6KbmVnPZr49Kbyg7g25CYaWBWJr6IinBHKgVKGw,28042
13
- ml_tools/ML_optimization.py,sha256=r1lAQiztTtRuh13rWj1iqbXvWO0LCqbzlkRdy3gEWo4,18124
13
+ ml_tools/ML_optimization.py,sha256=TfVccKfZ_W6BgraZZ01-SNcNgGuViPozWLezBY8mBIg,20466
14
14
  ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
15
+ ml_tools/ML_simple_optimization.py,sha256=X96zX6XPu3ggrcOapuG69jsiZJczJNihS1rcwi9OsBI,18159
15
16
  ml_tools/ML_trainer.py,sha256=_g48w5Ak-wQr5fGHdJqlcpnzv3gWyL1ghkOhy9VOZbo,23930
16
17
  ml_tools/ML_utilities.py,sha256=35DfZzAwfDwVwfRECD8X_2ynsU2NCpTdNJSmza6oAzQ,8712
17
18
  ml_tools/PSO_optimization.py,sha256=fVHeemqilBS0zrGV25E5yKwDlGdd2ZKa18d8CZ6Q6Fk,22961
@@ -23,18 +24,18 @@ ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
23
24
  ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
24
25
  ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
25
26
  ml_tools/custom_logger.py,sha256=OZqG7FR_UE6byzY3RDmlj08a336ZU-4DzNBMPLr_d5c,5881
26
- ml_tools/data_exploration.py,sha256=qpRUCQEVUmkxjx7DAztT6yIdI___xNV5NVPMBqCp3Mk,38870
27
+ ml_tools/data_exploration.py,sha256=is9P4c4orIKW6gRhTeScZlCGYH9ODguxMtVlrVubb4E,42515
27
28
  ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
28
29
  ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
29
30
  ml_tools/ensemble_learning.py,sha256=aTPeKthO4zRWBEaQJOUj8jEqVHiHjjOMXuiEWjI9NxM,21946
30
31
  ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
31
32
  ml_tools/keys.py,sha256=FDpbS3Jb0pjrVvvp2_8nZi919mbob_-xwuy5OOtKM_A,1848
32
- ml_tools/math_utilities.py,sha256=CUkyBuExFOnEHp9J1Xsh6H4xILwYOBilwFccM9J_Dxo,7870
33
- ml_tools/optimization_tools.py,sha256=P3I6lIpvZ8Xf2kX5FvvBKBmrK2pB6idBpkTzfUJxTeE,5073
33
+ ml_tools/math_utilities.py,sha256=PxoOrnuj6Ntp7_TJqyDWi0JX03WpAO5iaFNK2Oeq5I4,8800
34
+ ml_tools/optimization_tools.py,sha256=bkKrTjukNOpxgVDMW5mUX5vQ72ckBcS5VA4eG8uZsOI,13515
34
35
  ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
35
36
  ml_tools/serde.py,sha256=k0qAwfMf13lVBQSgq5u9MSXEoo31iOA2-Ncm8XgMCMI,3974
36
37
  ml_tools/utilities.py,sha256=gef62GLK7ev5BWkkQekeJoVZqwf2mIuOlOfyCw6WdtE,13882
37
- dragon_ml_toolbox-12.0.0.dist-info/METADATA,sha256=piCOJTB5V7QKGXqbYiu3GjdNLeyrpzV-42tIxVxBRBU,6166
38
- dragon_ml_toolbox-12.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
39
- dragon_ml_toolbox-12.0.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
40
- dragon_ml_toolbox-12.0.0.dist-info/RECORD,,
38
+ dragon_ml_toolbox-12.1.0.dist-info/METADATA,sha256=PJbBSG9h6juu_srL07VVhgOIGqebQwn_rlI1RgZdTwo,6166
39
+ dragon_ml_toolbox-12.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ dragon_ml_toolbox-12.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
41
+ dragon_ml_toolbox-12.1.0.dist-info/RECORD,,
@@ -5,7 +5,7 @@ import evotorch
5
5
  from evotorch.algorithms import SNES, CEM, GeneticAlgorithm
6
6
  from evotorch.logging import PandasLogger
7
7
  from evotorch.operators import SimulatedBinaryCrossOver, GaussianMutation
8
- from typing import Literal, Union, Tuple, List, Optional, Any, Callable
8
+ from typing import Literal, Union, Tuple, List, Optional, Any, Callable, Dict
9
9
  from pathlib import Path
10
10
  from tqdm.auto import trange
11
11
  from contextlib import nullcontext
@@ -19,7 +19,7 @@ from .keys import PyTorchInferenceKeys
19
19
  from .SQL import DatabaseManager
20
20
  from .optimization_tools import _save_result
21
21
  from .utilities import save_dataframe
22
- from .math_utilities import threshold_binary_values
22
+ from .math_utilities import discretize_categorical_values
23
23
 
24
24
 
25
25
  __all__ = [
@@ -39,15 +39,23 @@ class MLOptimizer:
39
39
  SNES and CEM algorithms do not accept bounds, the given bounds will be used as an initial starting point.
40
40
 
41
41
  Example:
42
- >>> # 1. Initialize the optimizer with model and search parameters
42
+ >>> # 1. Get categorical info from preprocessing steps
43
+ >>> # e.g., from data_exploration.encode_categorical_features
44
+ >>> cat_mappings = {'feature_C': {'A': 0, 'B': 1}, 'feature_D': {'X': 0, 'Y': 1}}
45
+ >>> # e.g., from data_exploration.create_transformer_categorical_map
46
+ >>> # Assumes feature_C is at index 2 (cardinality 2) and feature_D is at index 3 (cardinality 2)
47
+ >>> cat_index_map = {2: 2, 3: 2}
48
+ >>>
49
+ >>> # 2. Initialize the optimizer
43
50
  >>> optimizer = MLOptimizer(
44
51
  ... inference_handler=my_handler,
45
- ... bounds=(lower_bounds, upper_bounds),
46
- ... number_binary_features=2,
52
+ ... bounds=(lower_bounds, upper_bounds), # Bounds for ALL features
47
53
  ... task="max",
48
- ... algorithm="Genetic"
54
+ ... algorithm="Genetic",
55
+ ... categorical_index_map=cat_index_map,
56
+ ... categorical_mappings=cat_mappings,
49
57
  ... )
50
- >>> # 2. Run the optimization and save the results
58
+ >>> # 3. Run the optimization
51
59
  >>> best_result = optimizer.run(
52
60
  ... num_generations=100,
53
61
  ... target_name="my_target",
@@ -59,35 +67,43 @@ class MLOptimizer:
59
67
  def __init__(self,
60
68
  inference_handler: PyTorchInferenceHandler,
61
69
  bounds: Tuple[List[float], List[float]],
62
- number_binary_features: int,
63
70
  task: Literal["min", "max"],
64
71
  algorithm: Literal["SNES", "CEM", "Genetic"] = "Genetic",
65
72
  population_size: int = 200,
73
+ categorical_index_map: Optional[Dict[int, int]] = None,
74
+ categorical_mappings: Optional[Dict[str, Dict[str, int]]] = None,
75
+ discretize_start_at_zero: bool = True,
66
76
  **searcher_kwargs):
67
77
  """
68
78
  Initializes the optimizer by creating the EvoTorch problem and searcher.
69
79
 
70
80
  Args:
71
81
  inference_handler (PyTorchInferenceHandler): An initialized inference handler containing the model and weights.
72
- bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for the solution features.
73
- number_binary_features (int): Number of binary features located at the END of the feature vector.
82
+ bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for ALL solution features.
83
+ Use the `optimization_tools.create_optimization_bounds()` helper to easily generate this and ensure unbiased categorical bounds.
74
84
  task (str): The optimization goal, either "min" or "max".
75
85
  algorithm (str): The search algorithm to use ("SNES", "CEM", "Genetic").
76
86
  population_size (int): Population size for CEM and GeneticAlgorithm.
87
+ categorical_index_map (Dict[int, int] | None): Used to discretize values after optimization. Maps {column_index: cardinality}.
88
+ categorical_mappings (Dict[str, Dict[str, int]] | None): Used to map discrete integer values back to strings (e.g., {0: 'Category_A'}) before saving.
89
+ discretize_start_at_zero (bool):
90
+ True if the discrete encoding starts at 0 (e.g., [0, 1, 2]).
91
+ False if it starts at 1 (e.g., [1, 2, 3]).
77
92
  **searcher_kwargs: Additional keyword arguments for the selected search algorithm's constructor.
78
93
  """
79
94
  # Call the existing factory function to get the problem and searcher factory
80
95
  self.problem, self.searcher_factory = create_pytorch_problem(
81
96
  inference_handler=inference_handler,
82
97
  bounds=bounds,
83
- binary_features=number_binary_features,
84
98
  task=task,
85
99
  algorithm=algorithm,
86
100
  population_size=population_size,
87
101
  **searcher_kwargs
88
102
  )
89
- # Store binary_features count to pass it to the run function later
90
- self._binary_features = number_binary_features
103
+ # Store categorical info to pass to the run function
104
+ self.categorical_map = categorical_index_map
105
+ self.categorical_mappings = categorical_mappings
106
+ self.discretize_start_at_zero = discretize_start_at_zero
91
107
 
92
108
  def run(self,
93
109
  num_generations: int,
@@ -104,7 +120,8 @@ class MLOptimizer:
104
120
  num_generations (int): The total number of generations for each repetition.
105
121
  target_name (str): Target name used for the CSV filename and/or SQL table.
106
122
  save_dir (str | Path): The directory where result files will be saved.
107
- feature_names (List[str] | None): Names of the solution features for labeling output. If None, generic names like 'feature_0', 'feature_1', ... , will be created.
123
+ feature_names (List[str] | None): Names of the solution features for labeling output.
124
+ If None, generic names like 'feature_0', 'feature_1', ... , will be created.
108
125
  save_format (Literal['csv', 'sqlite', 'both']): The format for saving results.
109
126
  repetitions (int): The number of independent times to run the optimization.
110
127
  verbose (bool): If True, enables detailed logging.
@@ -112,25 +129,26 @@ class MLOptimizer:
112
129
  Returns:
113
130
  Optional[dict]: A dictionary with the best result if repetitions is 1, otherwise None.
114
131
  """
115
- # Call the existing run function with the stored problem, searcher, and binary feature count
132
+ # Call the existing run function with the stored problem, searcher, and categorical info
116
133
  return run_optimization(
117
134
  problem=self.problem,
118
135
  searcher_factory=self.searcher_factory,
119
136
  num_generations=num_generations,
120
137
  target_name=target_name,
121
- binary_features=self._binary_features,
122
138
  save_dir=save_dir,
123
139
  save_format=save_format,
124
140
  feature_names=feature_names,
125
141
  repetitions=repetitions,
126
- verbose=verbose
142
+ verbose=verbose,
143
+ categorical_map=self.categorical_map,
144
+ categorical_mappings=self.categorical_mappings,
145
+ discretize_start_at_zero=self.discretize_start_at_zero
127
146
  )
128
147
 
129
148
 
130
149
  def create_pytorch_problem(
131
150
  inference_handler: PyTorchInferenceHandler,
132
151
  bounds: Tuple[List[float], List[float]],
133
- binary_features: int,
134
152
  task: Literal["min", "max"],
135
153
  algorithm: Literal["SNES", "CEM", "Genetic"] = "Genetic",
136
154
  population_size: int = 200,
@@ -146,7 +164,7 @@ def create_pytorch_problem(
146
164
  Args:
147
165
  inference_handler (PyTorchInferenceHandler): An initialized inference handler containing the model and weights.
148
166
  bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for the solution features.
149
- binary_features (int): Number of binary features located at the END of the feature vector. Will be automatically added to the bounds.
167
+ Use the `optimization_tools.create_optimization_bounds()` helper to easily generate this and ensure unbiased categorical bounds.
150
168
  task (str): The optimization goal, either "minimize" or "maximize".
151
169
  algorithm (str): The search algorithm to use.
152
170
  population_size (int): Used for CEM and GeneticAlgorithm.
@@ -161,11 +179,6 @@ def create_pytorch_problem(
161
179
  lower_bounds = list(bounds[0])
162
180
  upper_bounds = list(bounds[1])
163
181
 
164
- # add binary bounds
165
- if binary_features > 0:
166
- lower_bounds.extend([0.48] * binary_features)
167
- upper_bounds.extend([0.52] * binary_features)
168
-
169
182
  solution_length = len(lower_bounds)
170
183
  device = inference_handler.device
171
184
 
@@ -242,12 +255,14 @@ def run_optimization(
242
255
  searcher_factory: Callable[[],Any],
243
256
  num_generations: int,
244
257
  target_name: str,
245
- binary_features: int,
246
258
  save_dir: Union[str, Path],
247
259
  save_format: Literal['csv', 'sqlite', 'both'],
248
260
  feature_names: Optional[List[str]],
249
261
  repetitions: int = 1,
250
- verbose: bool = True
262
+ verbose: bool = True,
263
+ categorical_map: Optional[Dict[int, int]] = None,
264
+ categorical_mappings: Optional[Dict[str, Dict[str, int]]] = None,
265
+ discretize_start_at_zero: bool = True
251
266
  ) -> Optional[dict]:
252
267
  """
253
268
  Runs the evolutionary optimization process, with support for multiple repetitions.
@@ -270,7 +285,6 @@ def run_optimization(
270
285
  searcher_factory (Callable): The searcher factory to generate fresh evolutionary algorithms.
271
286
  num_generations (int): The total number of generations to run the search algorithm for in each repetition.
272
287
  target_name (str): Target name that will also be used for the CSV filename and SQL table.
273
- binary_features (int): Number of binary features located at the END of the feature vector.
274
288
  save_dir (str | Path): The directory where the result file(s) will be saved.
275
289
  save_format (Literal['csv', 'sqlite', 'both'], optional): The format for
276
290
  saving results during iterative analysis.
@@ -280,13 +294,18 @@ def run_optimization(
280
294
  repetitions (int, optional): The number of independent times to run the
281
295
  entire optimization process.
282
296
  verbose (bool): Add an Evotorch Pandas logger saved as a csv. Only for the first repetition.
297
+ categorical_index_map (Dict[int, int] | None): Used to discretize values after optimization. Maps {column_index: cardinality}.
298
+ categorical_mappings (Dict[str, Dict[str, int]] | None): Used to map discrete integer values back to strings (e.g., {0: 'Category_A'}) before saving.
299
+ discretize_start_at_zero (bool):
300
+ True if the discrete encoding starts at 0 (e.g., [0, 1, 2]).
301
+ False if it starts at 1 (e.g., [1, 2, 3]).
283
302
 
284
303
  Returns:
285
304
  Optional[dict]: A dictionary containing the best feature values and the
286
305
  fitness score if `repetitions` is 1. Returns `None` if `repetitions`
287
306
  is greater than 1, as results are streamed to files instead.
288
307
  """
289
- # preprocess paths
308
+ # --- 1. Setup Paths and Feature Names ---
290
309
  save_path = make_fullpath(save_dir, make=True, enforce="directory")
291
310
 
292
311
  sanitized_target_name = sanitize_filename(target_name)
@@ -294,54 +313,38 @@ def run_optimization(
294
313
  sanitized_target_name = sanitized_target_name + ".csv"
295
314
 
296
315
  csv_path = save_path / sanitized_target_name
297
-
298
316
  db_path = save_path / "Optimization.db"
299
317
  db_table_name = target_name
300
318
 
301
- # preprocess feature names
319
+ # Use problem's solution_length to create default names if none provided
302
320
  if feature_names is None:
303
- feature_names = [f"feature_{i}" for i in range(problem.solution_length)] # type: ignore
321
+ feat_len = problem.solution_length
322
+ feature_names = [f"feature_{i}" for i in range(feat_len)] # type: ignore
304
323
 
324
+ # --- 2. Run Optimization ---
305
325
  # --- SINGLE RUN LOGIC ---
306
326
  if repetitions <= 1:
307
- searcher = searcher_factory()
308
- _LOGGER.info(f"🤖 Starting optimization with {searcher.__class__.__name__} Algorithm for {num_generations} generations...")
309
- # for _ in trange(num_generations, desc="Optimizing"):
310
- # searcher.step()
311
-
312
- # Attach logger if requested
313
- if verbose:
314
- pandas_logger = PandasLogger(searcher)
315
-
316
- searcher.run(num_generations) # Use the built-in run method for simplicity
317
-
318
- # # DEBUG new searcher objects
319
- # for status_key in searcher.iter_status_keys():
320
- # print("===", status_key, "===")
321
- # print(searcher.status[status_key])
322
- # print()
327
+ _LOGGER.info(f"🤖 Starting optimization for {num_generations} generations...")
323
328
 
324
- # Get results from the .status dictionary
325
- # SNES and CEM use the key 'center' to get mean values if needed best_solution_tensor = searcher.status["center"]
326
- best_solution_container = searcher.status["pop_best"]
327
- best_solution_tensor = best_solution_container.values
328
- best_fitness = best_solution_container.evals
329
-
330
- best_solution_np = best_solution_tensor.cpu().numpy()
331
-
332
- # threshold binary features
333
- if binary_features > 0:
334
- best_solution_thresholded = threshold_binary_values(input_array=best_solution_np, binary_values=binary_features)
335
- else:
336
- best_solution_thresholded = best_solution_np
337
-
338
- result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
339
- result_dict[target_name] = best_fitness.item()
329
+ result_dict, pandas_logger = _run_single_optimization_rep(
330
+ searcher_factory=searcher_factory,
331
+ num_generations=num_generations,
332
+ feature_names=feature_names,
333
+ target_name=target_name,
334
+ categorical_map=categorical_map,
335
+ discretize_start_at_zero=discretize_start_at_zero,
336
+ attach_logger=verbose
337
+ )
340
338
 
341
- _save_result(result_dict, 'csv', csv_path) # Single run defaults to CSV
339
+ # Single run defaults to CSV, pass mappings for reverse mapping
340
+ _save_result(
341
+ result_dict=result_dict,
342
+ save_format='csv',
343
+ csv_path=csv_path,
344
+ categorical_mappings=categorical_mappings
345
+ )
342
346
 
343
- # Process logger
344
- if verbose:
347
+ if pandas_logger:
345
348
  _handle_pandas_log(pandas_logger, save_path=save_path, target_name=target_name)
346
349
 
347
350
  _LOGGER.info(f"Optimization complete. Best solution saved to '{csv_path.name}'")
@@ -350,57 +353,106 @@ def run_optimization(
350
353
  # --- MULTIPLE REPETITIONS LOGIC ---
351
354
  else:
352
355
  _LOGGER.info(f"🏁 Starting optimal solution space analysis with {repetitions} repetitions...")
353
-
356
+
357
+ first_run_logger = None # To store the logger from the first rep
354
358
  db_context = DatabaseManager(db_path) if save_format in ['sqlite', 'both'] else nullcontext()
355
359
 
356
360
  with db_context as db_manager:
361
+ # --- Setup Database Schema (if applicable) ---
357
362
  if db_manager:
358
- schema = {name: "REAL" for name in feature_names}
363
+ schema = {}
364
+ categorical_cols = set(categorical_mappings.keys()) if categorical_mappings else set()
365
+
366
+ for name in feature_names:
367
+ schema[name] = "TEXT" if name in categorical_cols else "REAL"
359
368
  schema[target_name] = "REAL"
369
+
360
370
  db_manager.create_table(db_table_name, schema)
361
371
 
372
+ # --- Repetitions Loop ---
362
373
  print("")
363
- # Repetitions loop
364
- pandas_logger = None
365
374
  for i in trange(repetitions, desc="Repetitions"):
366
- # CRITICAL: Create a fresh searcher for each run using the factory
367
- searcher = searcher_factory()
368
-
369
- # Attach logger if requested
370
- if verbose and i==0:
371
- pandas_logger = PandasLogger(searcher)
372
375
 
373
- searcher.run(num_generations) # Use the built-in run method for simplicity
376
+ # Only attach a logger for the first repetition if verbose
377
+ attach_logger = verbose and (i == 0)
374
378
 
375
- # Get results from the .status dictionary
376
- # SNES and CEM use the key 'center' to get mean values if needed best_solution_tensor = searcher.status["center"]
377
- best_solution_container = searcher.status["pop_best"]
378
- best_solution_tensor = best_solution_container.values
379
- best_fitness = best_solution_container.evals
380
-
381
- best_solution_np = best_solution_tensor.cpu().numpy()
379
+ result_dict, pandas_logger = _run_single_optimization_rep(
380
+ searcher_factory=searcher_factory,
381
+ num_generations=num_generations,
382
+ feature_names=feature_names,
383
+ target_name=target_name,
384
+ categorical_map=categorical_map,
385
+ discretize_start_at_zero=discretize_start_at_zero,
386
+ attach_logger=attach_logger
387
+ )
382
388
 
383
- # threshold binary features
384
- if binary_features > 0:
385
- best_solution_thresholded = threshold_binary_values(input_array=best_solution_np, binary_values=binary_features)
386
- else:
387
- best_solution_thresholded = best_solution_np
388
-
389
- # make results dictionary
390
- result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
391
- result_dict[target_name] = best_fitness.item()
389
+ if pandas_logger:
390
+ first_run_logger = pandas_logger
392
391
 
393
392
  # Save each result incrementally
394
- _save_result(result_dict, save_format, csv_path, db_manager, db_table_name)
393
+ _save_result(
394
+ result_dict=result_dict,
395
+ save_format=save_format,
396
+ csv_path=csv_path,
397
+ db_manager=db_manager,
398
+ db_table_name=db_table_name,
399
+ categorical_mappings=categorical_mappings
400
+ )
395
401
 
396
- # Process logger
397
- if pandas_logger is not None:
398
- _handle_pandas_log(pandas_logger, save_path=save_path, target_name=target_name)
402
+ if first_run_logger:
403
+ _handle_pandas_log(first_run_logger, save_path=save_path, target_name=target_name)
399
404
 
400
405
  _LOGGER.info(f"Optimal solution space complete. Results saved to '{save_path}'")
401
406
  return None
402
407
 
403
408
 
409
+ def _run_single_optimization_rep(
410
+ searcher_factory: Callable[[],Any],
411
+ num_generations: int,
412
+ feature_names: List[str],
413
+ target_name: str,
414
+ categorical_map: Optional[Dict[int, int]],
415
+ discretize_start_at_zero: bool,
416
+ attach_logger: bool
417
+ ) -> Tuple[dict, Optional[PandasLogger]]:
418
+ """
419
+ Internal helper to run one full optimization repetition.
420
+
421
+ Handles searcher creation, logging, running, and result post-processing.
422
+ """
423
+ # CRITICAL: Create a fresh searcher for each run using the factory
424
+ searcher = searcher_factory()
425
+
426
+ # Attach logger if requested
427
+ pandas_logger = PandasLogger(searcher) if attach_logger else None
428
+
429
+ # Run the optimization
430
+ searcher.run(num_generations)
431
+
432
+ # Get the best result
433
+ best_solution_container = searcher.status["pop_best"]
434
+ best_solution_tensor = best_solution_container.values
435
+ best_fitness = best_solution_container.evals
436
+
437
+ best_solution_np = best_solution_tensor.cpu().numpy()
438
+
439
+ # Discretize categorical/binary features
440
+ if categorical_map:
441
+ best_solution_thresholded = discretize_categorical_values(
442
+ input_array=best_solution_np,
443
+ categorical_info=categorical_map,
444
+ start_at_zero=discretize_start_at_zero
445
+ )
446
+ else:
447
+ best_solution_thresholded = best_solution_np
448
+
449
+ # Format results into a dictionary
450
+ result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
451
+ result_dict[target_name] = best_fitness.item()
452
+
453
+ return result_dict, pandas_logger
454
+
455
+
404
456
  def _handle_pandas_log(logger: PandasLogger, save_path: Path, target_name: str):
405
457
  log_dataframe = logger.to_dataframe()
406
458
  save_dataframe(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)