dragon-ml-toolbox 13.0.0__py3-none-any.whl → 14.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-13.0.0.dist-info → dragon_ml_toolbox-14.7.0.dist-info}/METADATA +12 -2
- dragon_ml_toolbox-14.7.0.dist-info/RECORD +49 -0
- {dragon_ml_toolbox-13.0.0.dist-info → dragon_ml_toolbox-14.7.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +10 -0
- ml_tools/MICE_imputation.py +207 -5
- ml_tools/ML_configuration.py +108 -0
- ml_tools/ML_datasetmaster.py +241 -260
- ml_tools/ML_evaluation.py +229 -76
- ml_tools/ML_evaluation_multi.py +45 -16
- ml_tools/ML_inference.py +0 -1
- ml_tools/ML_models.py +135 -55
- ml_tools/ML_models_advanced.py +323 -0
- ml_tools/ML_optimization.py +49 -36
- ml_tools/ML_trainer.py +498 -29
- ml_tools/ML_utilities.py +351 -4
- ml_tools/ML_vision_datasetmaster.py +1492 -0
- ml_tools/ML_vision_evaluation.py +260 -0
- ml_tools/ML_vision_inference.py +428 -0
- ml_tools/ML_vision_models.py +641 -0
- ml_tools/ML_vision_transformers.py +203 -0
- ml_tools/PSO_optimization.py +5 -1
- ml_tools/_ML_vision_recipe.py +88 -0
- ml_tools/__init__.py +1 -0
- ml_tools/_schema.py +96 -0
- ml_tools/custom_logger.py +37 -14
- ml_tools/data_exploration.py +576 -138
- ml_tools/ensemble_evaluation.py +53 -10
- ml_tools/keys.py +43 -1
- ml_tools/math_utilities.py +1 -1
- ml_tools/optimization_tools.py +65 -86
- ml_tools/serde.py +78 -17
- ml_tools/utilities.py +192 -3
- dragon_ml_toolbox-13.0.0.dist-info/RECORD +0 -41
- ml_tools/ML_simple_optimization.py +0 -413
- {dragon_ml_toolbox-13.0.0.dist-info → dragon_ml_toolbox-14.7.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-13.0.0.dist-info → dragon_ml_toolbox-14.7.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-13.0.0.dist-info → dragon_ml_toolbox-14.7.0.dist-info}/top_level.txt +0 -0
ml_tools/ML_optimization.py
CHANGED
|
@@ -17,9 +17,10 @@ from ._script_info import _script_info
|
|
|
17
17
|
from .ML_inference import PyTorchInferenceHandler
|
|
18
18
|
from .keys import PyTorchInferenceKeys
|
|
19
19
|
from .SQL import DatabaseManager
|
|
20
|
-
from .optimization_tools import _save_result
|
|
20
|
+
from .optimization_tools import _save_result, create_optimization_bounds
|
|
21
21
|
from .utilities import save_dataframe_filename
|
|
22
22
|
from .math_utilities import discretize_categorical_values
|
|
23
|
+
from ._schema import FeatureSchema
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
__all__ = [
|
|
@@ -40,66 +41,76 @@ class MLOptimizer:
|
|
|
40
41
|
SNES and CEM algorithms do not accept bounds, the given bounds will be used as an initial starting point.
|
|
41
42
|
|
|
42
43
|
Example:
|
|
43
|
-
>>> # 1. Get
|
|
44
|
-
>>>
|
|
45
|
-
>>>
|
|
46
|
-
>>>
|
|
47
|
-
>>> # Assumes feature_C is at index 2 (cardinality 2) and feature_D is at index 3 (cardinality 2)
|
|
48
|
-
>>> cat_index_map = {2: 2, 3: 2}
|
|
44
|
+
>>> # 1. Get the final schema from data exploration
|
|
45
|
+
>>> schema = data_exploration.finalize_feature_schema(...)
|
|
46
|
+
>>> # 2. Define bounds for continuous features
|
|
47
|
+
>>> cont_bounds = {'feature_A': (0, 100), 'feature_B': (-10, 10)}
|
|
49
48
|
>>>
|
|
50
|
-
>>> #
|
|
49
|
+
>>> # 3. Initialize the optimizer
|
|
51
50
|
>>> optimizer = MLOptimizer(
|
|
52
51
|
... inference_handler=my_handler,
|
|
53
|
-
...
|
|
52
|
+
... schema=schema,
|
|
53
|
+
... continuous_bounds_map=cont_bounds,
|
|
54
54
|
... task="max",
|
|
55
55
|
... algorithm="Genetic",
|
|
56
|
-
... categorical_index_map=cat_index_map,
|
|
57
|
-
... categorical_mappings=cat_mappings,
|
|
58
56
|
... )
|
|
59
|
-
>>> #
|
|
57
|
+
>>> # 4. Run the optimization
|
|
60
58
|
>>> best_result = optimizer.run(
|
|
61
59
|
... num_generations=100,
|
|
62
60
|
... target_name="my_target",
|
|
63
|
-
... feature_names=my_feature_names,
|
|
64
61
|
... save_dir="/path/to/results",
|
|
65
62
|
... save_format="csv"
|
|
66
63
|
... )
|
|
67
64
|
"""
|
|
68
65
|
def __init__(self,
|
|
69
66
|
inference_handler: PyTorchInferenceHandler,
|
|
70
|
-
|
|
67
|
+
schema: FeatureSchema,
|
|
68
|
+
continuous_bounds_map: Dict[str, Tuple[float, float]],
|
|
71
69
|
task: Literal["min", "max"],
|
|
72
70
|
algorithm: Literal["SNES", "CEM", "Genetic"] = "Genetic",
|
|
73
71
|
population_size: int = 200,
|
|
74
|
-
categorical_index_map: Optional[Dict[int, int]] = None,
|
|
75
|
-
categorical_mappings: Optional[Dict[str, Dict[str, int]]] = None,
|
|
76
72
|
discretize_start_at_zero: bool = True,
|
|
77
73
|
**searcher_kwargs):
|
|
78
74
|
"""
|
|
79
75
|
Initializes the optimizer by creating the EvoTorch problem and searcher.
|
|
80
76
|
|
|
81
77
|
Args:
|
|
82
|
-
inference_handler (PyTorchInferenceHandler):
|
|
83
|
-
|
|
84
|
-
|
|
78
|
+
inference_handler (PyTorchInferenceHandler):
|
|
79
|
+
An initialized inference handler containing the model.
|
|
80
|
+
schema (FeatureSchema):
|
|
81
|
+
The definitive schema object from data_exploration.
|
|
82
|
+
continuous_bounds_map (Dict[str, Tuple[float, float]]):
|
|
83
|
+
A dictionary mapping the *name* of each **continuous** feature
|
|
84
|
+
to its (min_bound, max_bound) tuple.
|
|
85
85
|
task (str): The optimization goal, either "min" or "max".
|
|
86
86
|
algorithm (str): The search algorithm to use ("SNES", "CEM", "Genetic").
|
|
87
87
|
population_size (int): Population size for CEM and GeneticAlgorithm.
|
|
88
|
-
categorical_index_map (Dict[int, int] | None): Used to discretize values after optimization. Maps {column_index: cardinality}.
|
|
89
|
-
categorical_mappings (Dict[str, Dict[str, int]] | None): Used to map discrete integer values back to strings (e.g., {0: 'Category_A'}) before saving.
|
|
90
88
|
discretize_start_at_zero (bool):
|
|
91
89
|
True if the discrete encoding starts at 0 (e.g., [0, 1, 2]).
|
|
92
90
|
False if it starts at 1 (e.g., [1, 2, 3]).
|
|
93
|
-
**searcher_kwargs: Additional keyword arguments for the selected
|
|
91
|
+
**searcher_kwargs: Additional keyword arguments for the selected
|
|
92
|
+
search algorithm's constructor.
|
|
94
93
|
"""
|
|
95
|
-
#
|
|
94
|
+
# --- Store schema ---
|
|
95
|
+
self.schema = schema
|
|
96
|
+
|
|
97
|
+
# --- 1. Create bounds from schema ---
|
|
98
|
+
# This is the new, robust way to get bounds
|
|
99
|
+
bounds = create_optimization_bounds(
|
|
100
|
+
schema=schema,
|
|
101
|
+
continuous_bounds_map=continuous_bounds_map,
|
|
102
|
+
start_at_zero=discretize_start_at_zero
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# --- 2. Make a fitness function ---
|
|
96
106
|
self.evaluator = FitnessEvaluator(
|
|
97
107
|
inference_handler=inference_handler,
|
|
98
|
-
|
|
108
|
+
# Get categorical info from the schema
|
|
109
|
+
categorical_index_map=schema.categorical_index_map,
|
|
99
110
|
discretize_start_at_zero=discretize_start_at_zero
|
|
100
111
|
)
|
|
101
112
|
|
|
102
|
-
#
|
|
113
|
+
# --- 3. Create the problem and searcher factory ---
|
|
103
114
|
self.problem, self.searcher_factory = create_pytorch_problem(
|
|
104
115
|
evaluator=self.evaluator,
|
|
105
116
|
bounds=bounds,
|
|
@@ -108,36 +119,36 @@ class MLOptimizer:
|
|
|
108
119
|
population_size=population_size,
|
|
109
120
|
**searcher_kwargs
|
|
110
121
|
)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
self.categorical_mappings = categorical_mappings
|
|
122
|
+
|
|
123
|
+
# --- 4. Store other info needed by run() ---
|
|
114
124
|
self.discretize_start_at_zero = discretize_start_at_zero
|
|
115
125
|
|
|
116
126
|
def run(self,
|
|
117
127
|
num_generations: int,
|
|
118
128
|
target_name: str,
|
|
119
129
|
save_dir: Union[str, Path],
|
|
120
|
-
feature_names: Optional[List[str]],
|
|
121
130
|
save_format: Literal['csv', 'sqlite', 'both'],
|
|
122
131
|
repetitions: int = 1,
|
|
123
132
|
verbose: bool = True) -> Optional[dict]:
|
|
124
133
|
"""
|
|
125
134
|
Runs the evolutionary optimization process using the pre-configured settings.
|
|
126
135
|
|
|
136
|
+
The `feature_names` are automatically pulled from the `FeatureSchema`
|
|
137
|
+
provided during initialization.
|
|
138
|
+
|
|
127
139
|
Args:
|
|
128
140
|
num_generations (int): The total number of generations for each repetition.
|
|
129
141
|
target_name (str): Target name used for the CSV filename and/or SQL table.
|
|
130
142
|
save_dir (str | Path): The directory where result files will be saved.
|
|
131
|
-
feature_names (List[str] | None): Names of the solution features for labeling output.
|
|
132
|
-
If None, generic names like 'feature_0', 'feature_1', ... , will be created.
|
|
133
143
|
save_format (Literal['csv', 'sqlite', 'both']): The format for saving results.
|
|
134
144
|
repetitions (int): The number of independent times to run the optimization.
|
|
135
145
|
verbose (bool): If True, enables detailed logging.
|
|
136
146
|
|
|
137
147
|
Returns:
|
|
138
|
-
Optional[dict]: A dictionary with the best result if repetitions is 1,
|
|
148
|
+
Optional[dict]: A dictionary with the best result if repetitions is 1,
|
|
149
|
+
otherwise None.
|
|
139
150
|
"""
|
|
140
|
-
# Call the existing run function
|
|
151
|
+
# Call the existing run function, passing info from the schema
|
|
141
152
|
return run_optimization(
|
|
142
153
|
problem=self.problem,
|
|
143
154
|
searcher_factory=self.searcher_factory,
|
|
@@ -145,11 +156,13 @@ class MLOptimizer:
|
|
|
145
156
|
target_name=target_name,
|
|
146
157
|
save_dir=save_dir,
|
|
147
158
|
save_format=save_format,
|
|
148
|
-
|
|
159
|
+
# Get the definitive feature names (as a list) from the schema
|
|
160
|
+
feature_names=list(self.schema.feature_names),
|
|
161
|
+
# Get categorical info from the schema
|
|
162
|
+
categorical_map=self.schema.categorical_index_map,
|
|
163
|
+
categorical_mappings=self.schema.categorical_mappings,
|
|
149
164
|
repetitions=repetitions,
|
|
150
165
|
verbose=verbose,
|
|
151
|
-
categorical_map=self.categorical_map,
|
|
152
|
-
categorical_mappings=self.categorical_mappings,
|
|
153
166
|
discretize_start_at_zero=self.discretize_start_at_zero
|
|
154
167
|
)
|
|
155
168
|
|