dragon-ml-toolbox 12.0.1__py3-none-any.whl → 12.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-12.0.1.dist-info → dragon_ml_toolbox-12.1.0.dist-info}/METADATA +1 -1
- {dragon_ml_toolbox-12.0.1.dist-info → dragon_ml_toolbox-12.1.0.dist-info}/RECORD +11 -10
- ml_tools/ML_optimization.py +149 -97
- ml_tools/ML_simple_optimization.py +413 -0
- ml_tools/data_exploration.py +96 -3
- ml_tools/math_utilities.py +30 -6
- ml_tools/optimization_tools.py +219 -24
- {dragon_ml_toolbox-12.0.1.dist-info → dragon_ml_toolbox-12.1.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-12.0.1.dist-info → dragon_ml_toolbox-12.1.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-12.0.1.dist-info → dragon_ml_toolbox-12.1.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-12.0.1.dist-info → dragon_ml_toolbox-12.1.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
dragon_ml_toolbox-12.0.
|
|
2
|
-
dragon_ml_toolbox-12.0.
|
|
1
|
+
dragon_ml_toolbox-12.1.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
|
|
2
|
+
dragon_ml_toolbox-12.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
|
|
3
3
|
ml_tools/ETL_cleaning.py,sha256=PLRSR-VYnt1nNT9XrcWq40SE0VzHCw7DQ8v9czfSQsU,20366
|
|
4
4
|
ml_tools/ETL_engineering.py,sha256=l0I6Og9o4s6EODdk0kZXjbbC-a3vVPYy1FopP2BkQSQ,54909
|
|
5
5
|
ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
|
|
@@ -10,8 +10,9 @@ ml_tools/ML_evaluation.py,sha256=tLswOPgH4G1KExSMn0876YtNkbxPh-W3J4MYOjomMWA,162
|
|
|
10
10
|
ml_tools/ML_evaluation_multi.py,sha256=6OZyQ4SM9ALh38mOABmiHgIQDWcovsD_iOo7Bg9YZCE,12516
|
|
11
11
|
ml_tools/ML_inference.py,sha256=ymFvncFsU10PExq87xnEj541DKV5ck0nMuK8ToJHzVQ,23067
|
|
12
12
|
ml_tools/ML_models.py,sha256=pSCV6KbmVnPZr49Kbyg7g25CYaWBWJr6IinBHKgVKGw,28042
|
|
13
|
-
ml_tools/ML_optimization.py,sha256=
|
|
13
|
+
ml_tools/ML_optimization.py,sha256=TfVccKfZ_W6BgraZZ01-SNcNgGuViPozWLezBY8mBIg,20466
|
|
14
14
|
ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
|
|
15
|
+
ml_tools/ML_simple_optimization.py,sha256=X96zX6XPu3ggrcOapuG69jsiZJczJNihS1rcwi9OsBI,18159
|
|
15
16
|
ml_tools/ML_trainer.py,sha256=_g48w5Ak-wQr5fGHdJqlcpnzv3gWyL1ghkOhy9VOZbo,23930
|
|
16
17
|
ml_tools/ML_utilities.py,sha256=35DfZzAwfDwVwfRECD8X_2ynsU2NCpTdNJSmza6oAzQ,8712
|
|
17
18
|
ml_tools/PSO_optimization.py,sha256=fVHeemqilBS0zrGV25E5yKwDlGdd2ZKa18d8CZ6Q6Fk,22961
|
|
@@ -23,18 +24,18 @@ ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
|
|
|
23
24
|
ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
|
|
24
25
|
ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
|
|
25
26
|
ml_tools/custom_logger.py,sha256=OZqG7FR_UE6byzY3RDmlj08a336ZU-4DzNBMPLr_d5c,5881
|
|
26
|
-
ml_tools/data_exploration.py,sha256=
|
|
27
|
+
ml_tools/data_exploration.py,sha256=is9P4c4orIKW6gRhTeScZlCGYH9ODguxMtVlrVubb4E,42515
|
|
27
28
|
ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
|
|
28
29
|
ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
|
|
29
30
|
ml_tools/ensemble_learning.py,sha256=aTPeKthO4zRWBEaQJOUj8jEqVHiHjjOMXuiEWjI9NxM,21946
|
|
30
31
|
ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
|
|
31
32
|
ml_tools/keys.py,sha256=FDpbS3Jb0pjrVvvp2_8nZi919mbob_-xwuy5OOtKM_A,1848
|
|
32
|
-
ml_tools/math_utilities.py,sha256=
|
|
33
|
-
ml_tools/optimization_tools.py,sha256=
|
|
33
|
+
ml_tools/math_utilities.py,sha256=PxoOrnuj6Ntp7_TJqyDWi0JX03WpAO5iaFNK2Oeq5I4,8800
|
|
34
|
+
ml_tools/optimization_tools.py,sha256=bkKrTjukNOpxgVDMW5mUX5vQ72ckBcS5VA4eG8uZsOI,13515
|
|
34
35
|
ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
|
|
35
36
|
ml_tools/serde.py,sha256=k0qAwfMf13lVBQSgq5u9MSXEoo31iOA2-Ncm8XgMCMI,3974
|
|
36
37
|
ml_tools/utilities.py,sha256=gef62GLK7ev5BWkkQekeJoVZqwf2mIuOlOfyCw6WdtE,13882
|
|
37
|
-
dragon_ml_toolbox-12.0.
|
|
38
|
-
dragon_ml_toolbox-12.0.
|
|
39
|
-
dragon_ml_toolbox-12.0.
|
|
40
|
-
dragon_ml_toolbox-12.0.
|
|
38
|
+
dragon_ml_toolbox-12.1.0.dist-info/METADATA,sha256=PJbBSG9h6juu_srL07VVhgOIGqebQwn_rlI1RgZdTwo,6166
|
|
39
|
+
dragon_ml_toolbox-12.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
dragon_ml_toolbox-12.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
41
|
+
dragon_ml_toolbox-12.1.0.dist-info/RECORD,,
|
ml_tools/ML_optimization.py
CHANGED
|
@@ -5,7 +5,7 @@ import evotorch
|
|
|
5
5
|
from evotorch.algorithms import SNES, CEM, GeneticAlgorithm
|
|
6
6
|
from evotorch.logging import PandasLogger
|
|
7
7
|
from evotorch.operators import SimulatedBinaryCrossOver, GaussianMutation
|
|
8
|
-
from typing import Literal, Union, Tuple, List, Optional, Any, Callable
|
|
8
|
+
from typing import Literal, Union, Tuple, List, Optional, Any, Callable, Dict
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from tqdm.auto import trange
|
|
11
11
|
from contextlib import nullcontext
|
|
@@ -19,7 +19,7 @@ from .keys import PyTorchInferenceKeys
|
|
|
19
19
|
from .SQL import DatabaseManager
|
|
20
20
|
from .optimization_tools import _save_result
|
|
21
21
|
from .utilities import save_dataframe
|
|
22
|
-
from .math_utilities import
|
|
22
|
+
from .math_utilities import discretize_categorical_values
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
__all__ = [
|
|
@@ -39,15 +39,23 @@ class MLOptimizer:
|
|
|
39
39
|
SNES and CEM algorithms do not accept bounds, the given bounds will be used as an initial starting point.
|
|
40
40
|
|
|
41
41
|
Example:
|
|
42
|
-
>>> # 1.
|
|
42
|
+
>>> # 1. Get categorical info from preprocessing steps
|
|
43
|
+
>>> # e.g., from data_exploration.encode_categorical_features
|
|
44
|
+
>>> cat_mappings = {'feature_C': {'A': 0, 'B': 1}, 'feature_D': {'X': 0, 'Y': 1}}
|
|
45
|
+
>>> # e.g., from data_exploration.create_transformer_categorical_map
|
|
46
|
+
>>> # Assumes feature_C is at index 2 (cardinality 2) and feature_D is at index 3 (cardinality 2)
|
|
47
|
+
>>> cat_index_map = {2: 2, 3: 2}
|
|
48
|
+
>>>
|
|
49
|
+
>>> # 2. Initialize the optimizer
|
|
43
50
|
>>> optimizer = MLOptimizer(
|
|
44
51
|
... inference_handler=my_handler,
|
|
45
|
-
... bounds=(lower_bounds, upper_bounds),
|
|
46
|
-
... number_binary_features=2,
|
|
52
|
+
... bounds=(lower_bounds, upper_bounds), # Bounds for ALL features
|
|
47
53
|
... task="max",
|
|
48
|
-
... algorithm="Genetic"
|
|
54
|
+
... algorithm="Genetic",
|
|
55
|
+
... categorical_index_map=cat_index_map,
|
|
56
|
+
... categorical_mappings=cat_mappings,
|
|
49
57
|
... )
|
|
50
|
-
>>> #
|
|
58
|
+
>>> # 3. Run the optimization
|
|
51
59
|
>>> best_result = optimizer.run(
|
|
52
60
|
... num_generations=100,
|
|
53
61
|
... target_name="my_target",
|
|
@@ -59,35 +67,43 @@ class MLOptimizer:
|
|
|
59
67
|
def __init__(self,
|
|
60
68
|
inference_handler: PyTorchInferenceHandler,
|
|
61
69
|
bounds: Tuple[List[float], List[float]],
|
|
62
|
-
number_binary_features: int,
|
|
63
70
|
task: Literal["min", "max"],
|
|
64
71
|
algorithm: Literal["SNES", "CEM", "Genetic"] = "Genetic",
|
|
65
72
|
population_size: int = 200,
|
|
73
|
+
categorical_index_map: Optional[Dict[int, int]] = None,
|
|
74
|
+
categorical_mappings: Optional[Dict[str, Dict[str, int]]] = None,
|
|
75
|
+
discretize_start_at_zero: bool = True,
|
|
66
76
|
**searcher_kwargs):
|
|
67
77
|
"""
|
|
68
78
|
Initializes the optimizer by creating the EvoTorch problem and searcher.
|
|
69
79
|
|
|
70
80
|
Args:
|
|
71
81
|
inference_handler (PyTorchInferenceHandler): An initialized inference handler containing the model and weights.
|
|
72
|
-
bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for
|
|
73
|
-
|
|
82
|
+
bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for ALL solution features.
|
|
83
|
+
Use the `optimization_tools.create_optimization_bounds()` helper to easily generate this and ensure unbiased categorical bounds.
|
|
74
84
|
task (str): The optimization goal, either "min" or "max".
|
|
75
85
|
algorithm (str): The search algorithm to use ("SNES", "CEM", "Genetic").
|
|
76
86
|
population_size (int): Population size for CEM and GeneticAlgorithm.
|
|
87
|
+
categorical_index_map (Dict[int, int] | None): Used to discretize values after optimization. Maps {column_index: cardinality}.
|
|
88
|
+
categorical_mappings (Dict[str, Dict[str, int]] | None): Used to map discrete integer values back to strings (e.g., {0: 'Category_A'}) before saving.
|
|
89
|
+
discretize_start_at_zero (bool):
|
|
90
|
+
True if the discrete encoding starts at 0 (e.g., [0, 1, 2]).
|
|
91
|
+
False if it starts at 1 (e.g., [1, 2, 3]).
|
|
77
92
|
**searcher_kwargs: Additional keyword arguments for the selected search algorithm's constructor.
|
|
78
93
|
"""
|
|
79
94
|
# Call the existing factory function to get the problem and searcher factory
|
|
80
95
|
self.problem, self.searcher_factory = create_pytorch_problem(
|
|
81
96
|
inference_handler=inference_handler,
|
|
82
97
|
bounds=bounds,
|
|
83
|
-
binary_features=number_binary_features,
|
|
84
98
|
task=task,
|
|
85
99
|
algorithm=algorithm,
|
|
86
100
|
population_size=population_size,
|
|
87
101
|
**searcher_kwargs
|
|
88
102
|
)
|
|
89
|
-
# Store
|
|
90
|
-
self.
|
|
103
|
+
# Store categorical info to pass to the run function
|
|
104
|
+
self.categorical_map = categorical_index_map
|
|
105
|
+
self.categorical_mappings = categorical_mappings
|
|
106
|
+
self.discretize_start_at_zero = discretize_start_at_zero
|
|
91
107
|
|
|
92
108
|
def run(self,
|
|
93
109
|
num_generations: int,
|
|
@@ -104,7 +120,8 @@ class MLOptimizer:
|
|
|
104
120
|
num_generations (int): The total number of generations for each repetition.
|
|
105
121
|
target_name (str): Target name used for the CSV filename and/or SQL table.
|
|
106
122
|
save_dir (str | Path): The directory where result files will be saved.
|
|
107
|
-
feature_names (List[str] | None): Names of the solution features for labeling output.
|
|
123
|
+
feature_names (List[str] | None): Names of the solution features for labeling output.
|
|
124
|
+
If None, generic names like 'feature_0', 'feature_1', ... , will be created.
|
|
108
125
|
save_format (Literal['csv', 'sqlite', 'both']): The format for saving results.
|
|
109
126
|
repetitions (int): The number of independent times to run the optimization.
|
|
110
127
|
verbose (bool): If True, enables detailed logging.
|
|
@@ -112,25 +129,26 @@ class MLOptimizer:
|
|
|
112
129
|
Returns:
|
|
113
130
|
Optional[dict]: A dictionary with the best result if repetitions is 1, otherwise None.
|
|
114
131
|
"""
|
|
115
|
-
# Call the existing run function with the stored problem, searcher, and
|
|
132
|
+
# Call the existing run function with the stored problem, searcher, and categorical info
|
|
116
133
|
return run_optimization(
|
|
117
134
|
problem=self.problem,
|
|
118
135
|
searcher_factory=self.searcher_factory,
|
|
119
136
|
num_generations=num_generations,
|
|
120
137
|
target_name=target_name,
|
|
121
|
-
binary_features=self._binary_features,
|
|
122
138
|
save_dir=save_dir,
|
|
123
139
|
save_format=save_format,
|
|
124
140
|
feature_names=feature_names,
|
|
125
141
|
repetitions=repetitions,
|
|
126
|
-
verbose=verbose
|
|
142
|
+
verbose=verbose,
|
|
143
|
+
categorical_map=self.categorical_map,
|
|
144
|
+
categorical_mappings=self.categorical_mappings,
|
|
145
|
+
discretize_start_at_zero=self.discretize_start_at_zero
|
|
127
146
|
)
|
|
128
147
|
|
|
129
148
|
|
|
130
149
|
def create_pytorch_problem(
|
|
131
150
|
inference_handler: PyTorchInferenceHandler,
|
|
132
151
|
bounds: Tuple[List[float], List[float]],
|
|
133
|
-
binary_features: int,
|
|
134
152
|
task: Literal["min", "max"],
|
|
135
153
|
algorithm: Literal["SNES", "CEM", "Genetic"] = "Genetic",
|
|
136
154
|
population_size: int = 200,
|
|
@@ -146,7 +164,7 @@ def create_pytorch_problem(
|
|
|
146
164
|
Args:
|
|
147
165
|
inference_handler (PyTorchInferenceHandler): An initialized inference handler containing the model and weights.
|
|
148
166
|
bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for the solution features.
|
|
149
|
-
|
|
167
|
+
Use the `optimization_tools.create_optimization_bounds()` helper to easily generate this and ensure unbiased categorical bounds.
|
|
150
168
|
task (str): The optimization goal, either "minimize" or "maximize".
|
|
151
169
|
algorithm (str): The search algorithm to use.
|
|
152
170
|
population_size (int): Used for CEM and GeneticAlgorithm.
|
|
@@ -161,11 +179,6 @@ def create_pytorch_problem(
|
|
|
161
179
|
lower_bounds = list(bounds[0])
|
|
162
180
|
upper_bounds = list(bounds[1])
|
|
163
181
|
|
|
164
|
-
# add binary bounds
|
|
165
|
-
if binary_features > 0:
|
|
166
|
-
lower_bounds.extend([0.48] * binary_features)
|
|
167
|
-
upper_bounds.extend([0.52] * binary_features)
|
|
168
|
-
|
|
169
182
|
solution_length = len(lower_bounds)
|
|
170
183
|
device = inference_handler.device
|
|
171
184
|
|
|
@@ -242,12 +255,14 @@ def run_optimization(
|
|
|
242
255
|
searcher_factory: Callable[[],Any],
|
|
243
256
|
num_generations: int,
|
|
244
257
|
target_name: str,
|
|
245
|
-
binary_features: int,
|
|
246
258
|
save_dir: Union[str, Path],
|
|
247
259
|
save_format: Literal['csv', 'sqlite', 'both'],
|
|
248
260
|
feature_names: Optional[List[str]],
|
|
249
261
|
repetitions: int = 1,
|
|
250
|
-
verbose: bool = True
|
|
262
|
+
verbose: bool = True,
|
|
263
|
+
categorical_map: Optional[Dict[int, int]] = None,
|
|
264
|
+
categorical_mappings: Optional[Dict[str, Dict[str, int]]] = None,
|
|
265
|
+
discretize_start_at_zero: bool = True
|
|
251
266
|
) -> Optional[dict]:
|
|
252
267
|
"""
|
|
253
268
|
Runs the evolutionary optimization process, with support for multiple repetitions.
|
|
@@ -270,7 +285,6 @@ def run_optimization(
|
|
|
270
285
|
searcher_factory (Callable): The searcher factory to generate fresh evolutionary algorithms.
|
|
271
286
|
num_generations (int): The total number of generations to run the search algorithm for in each repetition.
|
|
272
287
|
target_name (str): Target name that will also be used for the CSV filename and SQL table.
|
|
273
|
-
binary_features (int): Number of binary features located at the END of the feature vector.
|
|
274
288
|
save_dir (str | Path): The directory where the result file(s) will be saved.
|
|
275
289
|
save_format (Literal['csv', 'sqlite', 'both'], optional): The format for
|
|
276
290
|
saving results during iterative analysis.
|
|
@@ -280,13 +294,18 @@ def run_optimization(
|
|
|
280
294
|
repetitions (int, optional): The number of independent times to run the
|
|
281
295
|
entire optimization process.
|
|
282
296
|
verbose (bool): Add an Evotorch Pandas logger saved as a csv. Only for the first repetition.
|
|
297
|
+
categorical_index_map (Dict[int, int] | None): Used to discretize values after optimization. Maps {column_index: cardinality}.
|
|
298
|
+
categorical_mappings (Dict[str, Dict[str, int]] | None): Used to map discrete integer values back to strings (e.g., {0: 'Category_A'}) before saving.
|
|
299
|
+
discretize_start_at_zero (bool):
|
|
300
|
+
True if the discrete encoding starts at 0 (e.g., [0, 1, 2]).
|
|
301
|
+
False if it starts at 1 (e.g., [1, 2, 3]).
|
|
283
302
|
|
|
284
303
|
Returns:
|
|
285
304
|
Optional[dict]: A dictionary containing the best feature values and the
|
|
286
305
|
fitness score if `repetitions` is 1. Returns `None` if `repetitions`
|
|
287
306
|
is greater than 1, as results are streamed to files instead.
|
|
288
307
|
"""
|
|
289
|
-
#
|
|
308
|
+
# --- 1. Setup Paths and Feature Names ---
|
|
290
309
|
save_path = make_fullpath(save_dir, make=True, enforce="directory")
|
|
291
310
|
|
|
292
311
|
sanitized_target_name = sanitize_filename(target_name)
|
|
@@ -294,54 +313,38 @@ def run_optimization(
|
|
|
294
313
|
sanitized_target_name = sanitized_target_name + ".csv"
|
|
295
314
|
|
|
296
315
|
csv_path = save_path / sanitized_target_name
|
|
297
|
-
|
|
298
316
|
db_path = save_path / "Optimization.db"
|
|
299
317
|
db_table_name = target_name
|
|
300
318
|
|
|
301
|
-
#
|
|
319
|
+
# Use problem's solution_length to create default names if none provided
|
|
302
320
|
if feature_names is None:
|
|
303
|
-
|
|
321
|
+
feat_len = problem.solution_length
|
|
322
|
+
feature_names = [f"feature_{i}" for i in range(feat_len)] # type: ignore
|
|
304
323
|
|
|
324
|
+
# --- 2. Run Optimization ---
|
|
305
325
|
# --- SINGLE RUN LOGIC ---
|
|
306
326
|
if repetitions <= 1:
|
|
307
|
-
|
|
308
|
-
_LOGGER.info(f"🤖 Starting optimization with {searcher.__class__.__name__} Algorithm for {num_generations} generations...")
|
|
309
|
-
# for _ in trange(num_generations, desc="Optimizing"):
|
|
310
|
-
# searcher.step()
|
|
311
|
-
|
|
312
|
-
# Attach logger if requested
|
|
313
|
-
if verbose:
|
|
314
|
-
pandas_logger = PandasLogger(searcher)
|
|
315
|
-
|
|
316
|
-
searcher.run(num_generations) # Use the built-in run method for simplicity
|
|
317
|
-
|
|
318
|
-
# # DEBUG new searcher objects
|
|
319
|
-
# for status_key in searcher.iter_status_keys():
|
|
320
|
-
# print("===", status_key, "===")
|
|
321
|
-
# print(searcher.status[status_key])
|
|
322
|
-
# print()
|
|
327
|
+
_LOGGER.info(f"🤖 Starting optimization for {num_generations} generations...")
|
|
323
328
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
if binary_features > 0:
|
|
334
|
-
best_solution_thresholded = threshold_binary_values(input_array=best_solution_np, binary_values=binary_features)
|
|
335
|
-
else:
|
|
336
|
-
best_solution_thresholded = best_solution_np
|
|
337
|
-
|
|
338
|
-
result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
|
|
339
|
-
result_dict[target_name] = best_fitness.item()
|
|
329
|
+
result_dict, pandas_logger = _run_single_optimization_rep(
|
|
330
|
+
searcher_factory=searcher_factory,
|
|
331
|
+
num_generations=num_generations,
|
|
332
|
+
feature_names=feature_names,
|
|
333
|
+
target_name=target_name,
|
|
334
|
+
categorical_map=categorical_map,
|
|
335
|
+
discretize_start_at_zero=discretize_start_at_zero,
|
|
336
|
+
attach_logger=verbose
|
|
337
|
+
)
|
|
340
338
|
|
|
341
|
-
|
|
339
|
+
# Single run defaults to CSV, pass mappings for reverse mapping
|
|
340
|
+
_save_result(
|
|
341
|
+
result_dict=result_dict,
|
|
342
|
+
save_format='csv',
|
|
343
|
+
csv_path=csv_path,
|
|
344
|
+
categorical_mappings=categorical_mappings
|
|
345
|
+
)
|
|
342
346
|
|
|
343
|
-
|
|
344
|
-
if verbose:
|
|
347
|
+
if pandas_logger:
|
|
345
348
|
_handle_pandas_log(pandas_logger, save_path=save_path, target_name=target_name)
|
|
346
349
|
|
|
347
350
|
_LOGGER.info(f"Optimization complete. Best solution saved to '{csv_path.name}'")
|
|
@@ -350,57 +353,106 @@ def run_optimization(
|
|
|
350
353
|
# --- MULTIPLE REPETITIONS LOGIC ---
|
|
351
354
|
else:
|
|
352
355
|
_LOGGER.info(f"🏁 Starting optimal solution space analysis with {repetitions} repetitions...")
|
|
353
|
-
|
|
356
|
+
|
|
357
|
+
first_run_logger = None # To store the logger from the first rep
|
|
354
358
|
db_context = DatabaseManager(db_path) if save_format in ['sqlite', 'both'] else nullcontext()
|
|
355
359
|
|
|
356
360
|
with db_context as db_manager:
|
|
361
|
+
# --- Setup Database Schema (if applicable) ---
|
|
357
362
|
if db_manager:
|
|
358
|
-
schema = {
|
|
363
|
+
schema = {}
|
|
364
|
+
categorical_cols = set(categorical_mappings.keys()) if categorical_mappings else set()
|
|
365
|
+
|
|
366
|
+
for name in feature_names:
|
|
367
|
+
schema[name] = "TEXT" if name in categorical_cols else "REAL"
|
|
359
368
|
schema[target_name] = "REAL"
|
|
369
|
+
|
|
360
370
|
db_manager.create_table(db_table_name, schema)
|
|
361
371
|
|
|
372
|
+
# --- Repetitions Loop ---
|
|
362
373
|
print("")
|
|
363
|
-
# Repetitions loop
|
|
364
|
-
pandas_logger = None
|
|
365
374
|
for i in trange(repetitions, desc="Repetitions"):
|
|
366
|
-
# CRITICAL: Create a fresh searcher for each run using the factory
|
|
367
|
-
searcher = searcher_factory()
|
|
368
|
-
|
|
369
|
-
# Attach logger if requested
|
|
370
|
-
if verbose and i==0:
|
|
371
|
-
pandas_logger = PandasLogger(searcher)
|
|
372
375
|
|
|
373
|
-
|
|
376
|
+
# Only attach a logger for the first repetition if verbose
|
|
377
|
+
attach_logger = verbose and (i == 0)
|
|
374
378
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
379
|
+
result_dict, pandas_logger = _run_single_optimization_rep(
|
|
380
|
+
searcher_factory=searcher_factory,
|
|
381
|
+
num_generations=num_generations,
|
|
382
|
+
feature_names=feature_names,
|
|
383
|
+
target_name=target_name,
|
|
384
|
+
categorical_map=categorical_map,
|
|
385
|
+
discretize_start_at_zero=discretize_start_at_zero,
|
|
386
|
+
attach_logger=attach_logger
|
|
387
|
+
)
|
|
382
388
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
best_solution_thresholded = threshold_binary_values(input_array=best_solution_np, binary_values=binary_features)
|
|
386
|
-
else:
|
|
387
|
-
best_solution_thresholded = best_solution_np
|
|
388
|
-
|
|
389
|
-
# make results dictionary
|
|
390
|
-
result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
|
|
391
|
-
result_dict[target_name] = best_fitness.item()
|
|
389
|
+
if pandas_logger:
|
|
390
|
+
first_run_logger = pandas_logger
|
|
392
391
|
|
|
393
392
|
# Save each result incrementally
|
|
394
|
-
_save_result(
|
|
393
|
+
_save_result(
|
|
394
|
+
result_dict=result_dict,
|
|
395
|
+
save_format=save_format,
|
|
396
|
+
csv_path=csv_path,
|
|
397
|
+
db_manager=db_manager,
|
|
398
|
+
db_table_name=db_table_name,
|
|
399
|
+
categorical_mappings=categorical_mappings
|
|
400
|
+
)
|
|
395
401
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
_handle_pandas_log(pandas_logger, save_path=save_path, target_name=target_name)
|
|
402
|
+
if first_run_logger:
|
|
403
|
+
_handle_pandas_log(first_run_logger, save_path=save_path, target_name=target_name)
|
|
399
404
|
|
|
400
405
|
_LOGGER.info(f"Optimal solution space complete. Results saved to '{save_path}'")
|
|
401
406
|
return None
|
|
402
407
|
|
|
403
408
|
|
|
409
|
+
def _run_single_optimization_rep(
|
|
410
|
+
searcher_factory: Callable[[],Any],
|
|
411
|
+
num_generations: int,
|
|
412
|
+
feature_names: List[str],
|
|
413
|
+
target_name: str,
|
|
414
|
+
categorical_map: Optional[Dict[int, int]],
|
|
415
|
+
discretize_start_at_zero: bool,
|
|
416
|
+
attach_logger: bool
|
|
417
|
+
) -> Tuple[dict, Optional[PandasLogger]]:
|
|
418
|
+
"""
|
|
419
|
+
Internal helper to run one full optimization repetition.
|
|
420
|
+
|
|
421
|
+
Handles searcher creation, logging, running, and result post-processing.
|
|
422
|
+
"""
|
|
423
|
+
# CRITICAL: Create a fresh searcher for each run using the factory
|
|
424
|
+
searcher = searcher_factory()
|
|
425
|
+
|
|
426
|
+
# Attach logger if requested
|
|
427
|
+
pandas_logger = PandasLogger(searcher) if attach_logger else None
|
|
428
|
+
|
|
429
|
+
# Run the optimization
|
|
430
|
+
searcher.run(num_generations)
|
|
431
|
+
|
|
432
|
+
# Get the best result
|
|
433
|
+
best_solution_container = searcher.status["pop_best"]
|
|
434
|
+
best_solution_tensor = best_solution_container.values
|
|
435
|
+
best_fitness = best_solution_container.evals
|
|
436
|
+
|
|
437
|
+
best_solution_np = best_solution_tensor.cpu().numpy()
|
|
438
|
+
|
|
439
|
+
# Discretize categorical/binary features
|
|
440
|
+
if categorical_map:
|
|
441
|
+
best_solution_thresholded = discretize_categorical_values(
|
|
442
|
+
input_array=best_solution_np,
|
|
443
|
+
categorical_info=categorical_map,
|
|
444
|
+
start_at_zero=discretize_start_at_zero
|
|
445
|
+
)
|
|
446
|
+
else:
|
|
447
|
+
best_solution_thresholded = best_solution_np
|
|
448
|
+
|
|
449
|
+
# Format results into a dictionary
|
|
450
|
+
result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
|
|
451
|
+
result_dict[target_name] = best_fitness.item()
|
|
452
|
+
|
|
453
|
+
return result_dict, pandas_logger
|
|
454
|
+
|
|
455
|
+
|
|
404
456
|
def _handle_pandas_log(logger: PandasLogger, save_path: Path, target_name: str):
|
|
405
457
|
log_dataframe = logger.to_dataframe()
|
|
406
458
|
save_dataframe(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
|