dragon-ml-toolbox 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (34) hide show
  1. {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/METADATA +5 -1
  2. dragon_ml_toolbox-9.0.0.dist-info/RECORD +35 -0
  3. ml_tools/ETL_engineering.py +177 -79
  4. ml_tools/GUI_tools.py +5 -5
  5. ml_tools/MICE_imputation.py +12 -8
  6. ml_tools/ML_callbacks.py +6 -3
  7. ml_tools/ML_datasetmaster.py +37 -20
  8. ml_tools/ML_evaluation.py +4 -4
  9. ml_tools/ML_evaluation_multi.py +26 -17
  10. ml_tools/ML_inference.py +30 -23
  11. ml_tools/ML_models.py +14 -14
  12. ml_tools/ML_optimization.py +4 -3
  13. ml_tools/ML_scaler.py +7 -7
  14. ml_tools/ML_trainer.py +17 -15
  15. ml_tools/PSO_optimization.py +16 -8
  16. ml_tools/RNN_forecast.py +1 -1
  17. ml_tools/SQL.py +22 -13
  18. ml_tools/VIF_factor.py +7 -6
  19. ml_tools/_logger.py +105 -7
  20. ml_tools/custom_logger.py +12 -8
  21. ml_tools/data_exploration.py +20 -15
  22. ml_tools/ensemble_evaluation.py +10 -6
  23. ml_tools/ensemble_inference.py +18 -18
  24. ml_tools/ensemble_learning.py +8 -5
  25. ml_tools/handle_excel.py +15 -11
  26. ml_tools/optimization_tools.py +3 -4
  27. ml_tools/path_manager.py +21 -15
  28. ml_tools/utilities.py +35 -26
  29. dragon_ml_toolbox-8.2.0.dist-info/RECORD +0 -36
  30. ml_tools/_ML_optimization_multi.py +0 -231
  31. {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/WHEEL +0 -0
  32. {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/licenses/LICENSE +0 -0
  33. {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  34. {dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/top_level.txt +0 -0
@@ -1,231 +0,0 @@
1
- import pandas as pd
2
- import torch
3
- import numpy as np
4
- import evotorch
5
- from evotorch.algorithms import NSGA2
6
- from evotorch.logging import PandasLogger
7
- from typing import Literal, Union, Tuple, List, Optional, Any, Callable
8
- from pathlib import Path
9
- from tqdm.auto import trange
10
- from functools import partial
11
- from contextlib import nullcontext
12
- import matplotlib.pyplot as plt
13
- import seaborn as sns
14
-
15
- from .path_manager import make_fullpath, sanitize_filename
16
- from ._logger import _LOGGER
17
- from ._script_info import _script_info
18
- from .ML_inference import PyTorchInferenceHandlerMulti # Using the multi-target handler
19
- from .keys import PyTorchInferenceKeys
20
- from .utilities import threshold_binary_values, save_dataframe
21
- from .SQL import DatabaseManager # Added for SQL saving
22
-
23
- __all__ = [
24
- "create_multi_objective_problem",
25
- "run_multi_objective_optimization",
26
- "plot_pareto_front"
27
- ]
28
-
29
-
30
- def create_multi_objective_problem(
31
- inference_handler: PyTorchInferenceHandlerMulti,
32
- bounds: Tuple[List[float], List[float]],
33
- binary_features: int,
34
- objective_senses: Tuple[Literal["min", "max"], ...],
35
- algorithm: Literal["NSGA2"] = "NSGA2",
36
- population_size: int = 200,
37
- **searcher_kwargs
38
- ) -> Tuple[evotorch.Problem, Callable[[], Any]]:
39
- """
40
- Creates and configures an EvoTorch Problem and a Searcher for multi-objective optimization.
41
-
42
- This function sets up a problem where the goal is to optimize multiple conflicting
43
- objectives simultaneously, using an algorithm like NSGA2 to find the Pareto front.
44
-
45
- Args:
46
- inference_handler (PyTorchInferenceHandlerMulti): An initialized handler for the multi-target model.
47
- bounds (tuple[list[float], list[float]]): Lower and upper bounds for the solution features.
48
- binary_features (int): Number of binary features at the end of the feature vector.
49
- objective_senses (Tuple[Literal["min", "max"], ...]): A tuple specifying the optimization
50
- goal for each target (e.g., ("max", "min", "max")). The length of this tuple
51
- must match the number of outputs from the model.
52
- algorithm (str): The multi-objective search algorithm to use. Currently supports "NSGA2".
53
- population_size (int): The number of solutions in each generation.
54
- **searcher_kwargs: Additional keyword arguments for the search algorithm's constructor.
55
-
56
- Returns:
57
- A tuple containing the configured multi-objective Problem and the Searcher factory.
58
- """
59
- lower_bounds, upper_bounds = list(bounds[0]), list(bounds[1])
60
-
61
- if binary_features > 0:
62
- lower_bounds.extend([0.45] * binary_features)
63
- upper_bounds.extend([0.55] * binary_features)
64
-
65
- solution_length = len(lower_bounds)
66
- device = inference_handler.device
67
-
68
- def fitness_func(solution_tensor: torch.Tensor) -> torch.Tensor:
69
- """
70
- The fitness function for a multi-objective problem.
71
- It returns the entire output tensor from the model. EvoTorch handles the rest.
72
- """
73
- # The handler returns a tensor of shape [batch_size, num_targets]
74
- predictions = inference_handler.predict_batch(solution_tensor)[PyTorchInferenceKeys.PREDICTIONS]
75
- return predictions
76
-
77
- if algorithm == "NSGA2":
78
- problem = evotorch.Problem(
79
- objective_sense=objective_senses,
80
- objective_func=fitness_func,
81
- solution_length=solution_length,
82
- bounds=(lower_bounds, upper_bounds),
83
- device=device,
84
- vectorized=True,
85
- num_actors='max' # Use available CPU cores
86
- )
87
- SearcherClass = NSGA2
88
- if 'popsize' not in searcher_kwargs:
89
- searcher_kwargs['popsize'] = population_size
90
- else:
91
- raise ValueError(f"Unknown multi-objective algorithm '{algorithm}'.")
92
-
93
- searcher_factory = partial(SearcherClass, problem, **searcher_kwargs)
94
- return problem, searcher_factory
95
-
96
-
97
- def run_multi_objective_optimization(
98
- problem: evotorch.Problem,
99
- searcher_factory: Callable[[], Any],
100
- num_generations: int,
101
- run_name: str,
102
- binary_features: int,
103
- save_dir: Union[str, Path],
104
- feature_names: List[str],
105
- target_names: List[str],
106
- save_format: Literal['csv', 'sqlite', 'both'] = 'csv',
107
- verbose: bool = True
108
- ):
109
- """
110
- Runs the multi-objective evolutionary optimization process to find the Pareto front.
111
-
112
- This function executes a multi-objective algorithm (like NSGA2) and saves the
113
- entire set of non-dominated solutions (the Pareto front) to the specified format(s).
114
- It also generates and saves a plot of the Pareto front.
115
-
116
- Args:
117
- problem (evotorch.Problem): The configured multi-objective problem.
118
- searcher_factory (Callable): A factory function to generate a fresh searcher instance.
119
- num_generations (int): The number of generations to run the algorithm.
120
- run_name (str): A name for this optimization run, used for filenames/table names.
121
- binary_features (int): Number of binary features in the solution vector.
122
- save_dir (str | Path): The directory where the result files will be saved.
123
- feature_names (List[str]): Names of the solution features for labeling columns.
124
- target_names (List[str]): Names of the target objectives for labeling columns.
125
- save_format (str): The format to save results in ('csv', 'sqlite', or 'both').
126
- verbose (bool): If True, attaches a logger and saves the evolution history.
127
- """
128
- save_path = make_fullpath(save_dir, make=True, enforce="directory")
129
- sanitized_run_name = sanitize_filename(run_name)
130
-
131
- if len(target_names) != problem.num_objectives:
132
- raise ValueError("The number of `target_names` must match the number of objectives in the problem.")
133
-
134
- searcher = searcher_factory()
135
- _LOGGER.info(f"🤖 Starting multi-objective optimization with {searcher.__class__.__name__} for {num_generations} generations...")
136
-
137
- logger = PandasLogger(searcher) if verbose else None
138
- searcher.run(num_generations)
139
-
140
- pareto_front = searcher.status["pareto_front"]
141
- _LOGGER.info(f"✅ Optimization complete. Found {len(pareto_front)} non-dominated solutions.")
142
-
143
- solutions_np = pareto_front.values.cpu().numpy()
144
- objectives_np = pareto_front.evals.cpu().numpy()
145
-
146
- if binary_features > 0:
147
- solutions_np = threshold_binary_values(input_array=solutions_np, binary_values=binary_features)
148
-
149
- results_df = pd.DataFrame(solutions_np, columns=feature_names)
150
- objective_cols = []
151
- for i, name in enumerate(target_names):
152
- col_name = f"predicted_{name}"
153
- results_df[col_name] = objectives_np[:, i]
154
- objective_cols.append(col_name)
155
-
156
- # --- Saving Logic ---
157
- if save_format in ['csv', 'both']:
158
- csv_path = save_path / f"pareto_front_{sanitized_run_name}.csv"
159
- results_df.to_csv(csv_path, index=False)
160
- _LOGGER.info(f"📄 Pareto front data saved to '{csv_path.name}'")
161
-
162
- if save_format in ['sqlite', 'both']:
163
- db_path = save_path / "Optimization_Multi.db"
164
- with DatabaseManager(db_path) as db:
165
- db.insert_from_dataframe(
166
- table_name=sanitized_run_name,
167
- df=results_df,
168
- if_exists='replace'
169
- )
170
- _LOGGER.info(f"🗃️ Pareto front data saved to table '{sanitized_run_name}' in '{db_path.name}'")
171
-
172
- # --- Plotting Logic ---
173
- plot_pareto_front(
174
- results_df,
175
- objective_cols=objective_cols,
176
- save_path=save_path / f"pareto_plot_{sanitized_run_name}.svg"
177
- )
178
-
179
- if logger:
180
- log_df = logger.to_dataframe()
181
- save_dataframe(df=log_df, save_dir=save_path / "EvolutionLogs", filename=f"log_{sanitized_run_name}")
182
-
183
-
184
- def plot_pareto_front(results_df: pd.DataFrame, objective_cols: List[str], save_path: Path):
185
- """
186
- Generates and saves a plot of the Pareto front.
187
-
188
- - For 2 objectives, it creates a 2D scatter plot.
189
- - For 3 objectives, it creates a 3D scatter plot.
190
- - For >3 objectives, it creates a scatter plot matrix (pairs plot).
191
-
192
- Args:
193
- results_df (pd.DataFrame): DataFrame containing the optimization results.
194
- objective_cols (List[str]): The names of the columns that hold the objective values.
195
- save_path (Path): The full path (including filename) to save the SVG plot.
196
- """
197
- num_objectives = len(objective_cols)
198
- _LOGGER.info(f"🎨 Generating Pareto front plot for {num_objectives} objectives...")
199
-
200
- plt.style.use('seaborn-v0_8-whitegrid')
201
-
202
- if num_objectives == 2:
203
- fig, ax = plt.subplots(figsize=(8, 6), dpi=120)
204
- ax.scatter(results_df[objective_cols[0]], results_df[objective_cols[1]], alpha=0.7, edgecolors='k')
205
- ax.set_xlabel(objective_cols[0])
206
- ax.set_ylabel(objective_cols[1])
207
- ax.set_title("Pareto Front (2D)")
208
-
209
- elif num_objectives == 3:
210
- fig = plt.figure(figsize=(9, 7), dpi=120)
211
- ax = fig.add_subplot(111, projection='3d')
212
- ax.scatter(results_df[objective_cols[0]], results_df[objective_cols[1]], results_df[objective_cols[2]], alpha=0.7, depthshade=True)
213
- ax.set_xlabel(objective_cols[0])
214
- ax.set_ylabel(objective_cols[1])
215
- ax.set_zlabel(objective_cols[2])
216
- ax.set_title("Pareto Front (3D)")
217
-
218
- else: # > 3 objectives
219
- _LOGGER.info(" -> More than 3 objectives found, generating a scatter plot matrix.")
220
- g = sns.pairplot(results_df[objective_cols], diag_kind="kde", plot_kws={'alpha': 0.6})
221
- g.fig.suptitle("Pareto Front (Pairs Plot)", y=1.02)
222
- plt.savefig(save_path, bbox_inches='tight')
223
- plt.close()
224
- _LOGGER.info(f"📊 Pareto plot saved to '{save_path.name}'")
225
- return
226
-
227
- plt.tight_layout()
228
- plt.savefig(save_path)
229
- plt.close()
230
- _LOGGER.info(f"📊 Pareto plot saved to '{save_path.name}'")
231
-