dragon-ml-toolbox 12.2.0__tar.gz → 12.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (46) hide show
  1. {dragon_ml_toolbox-12.2.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-12.4.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ML_optimization.py +73 -15
  4. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/custom_logger.py +1 -1
  5. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/data_exploration.py +11 -1
  6. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/optimization_tools.py +1 -1
  7. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/pyproject.toml +1 -1
  8. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/LICENSE +0 -0
  9. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/LICENSE-THIRD-PARTY.md +0 -0
  10. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/README.md +0 -0
  11. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  12. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  13. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  14. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  15. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ETL_cleaning.py +0 -0
  16. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ETL_engineering.py +0 -0
  17. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/GUI_tools.py +0 -0
  18. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/MICE_imputation.py +0 -0
  19. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ML_callbacks.py +0 -0
  20. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ML_datasetmaster.py +0 -0
  21. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ML_evaluation.py +0 -0
  22. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ML_evaluation_multi.py +0 -0
  23. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ML_inference.py +0 -0
  24. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ML_models.py +0 -0
  25. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ML_scaler.py +0 -0
  26. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ML_simple_optimization.py +0 -0
  27. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ML_trainer.py +0 -0
  28. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ML_utilities.py +0 -0
  29. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/PSO_optimization.py +0 -0
  30. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/RNN_forecast.py +0 -0
  31. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/SQL.py +0 -0
  32. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/VIF_factor.py +0 -0
  33. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/__init__.py +0 -0
  34. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/_logger.py +0 -0
  35. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/_script_info.py +0 -0
  36. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/constants.py +0 -0
  37. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ensemble_evaluation.py +0 -0
  38. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ensemble_inference.py +0 -0
  39. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ensemble_learning.py +0 -0
  40. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/handle_excel.py +0 -0
  41. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/keys.py +0 -0
  42. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/math_utilities.py +0 -0
  43. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/path_manager.py +0 -0
  44. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/serde.py +0 -0
  45. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/utilities.py +0 -0
  46. {dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.2.0
3
+ Version: 12.4.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.2.0
3
+ Version: 12.4.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -24,6 +24,7 @@ from .math_utilities import discretize_categorical_values
24
24
 
25
25
  __all__ = [
26
26
  "MLOptimizer",
27
+ "FitnessEvaluator",
27
28
  "create_pytorch_problem",
28
29
  "run_optimization"
29
30
  ]
@@ -33,8 +34,8 @@ class MLOptimizer:
33
34
  """
34
35
  A wrapper class for setting up and running EvoTorch optimization tasks.
35
36
 
36
- This class combines the functionality of `create_pytorch_problem` and
37
- `run_optimization` functions into a single, streamlined workflow.
37
+ This class combines the functionality of `FitnessEvaluator`, `create_pytorch_problem`, and
38
+ `run_optimization` into a single, streamlined workflow.
38
39
 
39
40
  SNES and CEM algorithms do not accept bounds, the given bounds will be used as an initial starting point.
40
41
 
@@ -91,9 +92,16 @@ class MLOptimizer:
91
92
  False if it starts at 1 (e.g., [1, 2, 3]).
92
93
  **searcher_kwargs: Additional keyword arguments for the selected search algorithm's constructor.
93
94
  """
95
+ # Make a fitness function
96
+ self.evaluator = FitnessEvaluator(
97
+ inference_handler=inference_handler,
98
+ categorical_index_map=categorical_index_map,
99
+ discretize_start_at_zero=discretize_start_at_zero
100
+ )
101
+
94
102
  # Call the existing factory function to get the problem and searcher factory
95
103
  self.problem, self.searcher_factory = create_pytorch_problem(
96
- inference_handler=inference_handler,
104
+ evaluator=self.evaluator,
97
105
  bounds=bounds,
98
106
  task=task,
99
107
  algorithm=algorithm,
@@ -144,10 +152,67 @@ class MLOptimizer:
144
152
  categorical_mappings=self.categorical_mappings,
145
153
  discretize_start_at_zero=self.discretize_start_at_zero
146
154
  )
155
+
156
+
157
+ class FitnessEvaluator:
158
+ """
159
+ A callable class that wraps the PyTorch model inference handler and performs
160
+ on-the-fly discretization for the EvoTorch fitness function.
161
+
162
+ This class is automatically instantiated by MLOptimizer and passed to
163
+ create_pytorch_problem, encapsulating the evaluation logic.
164
+ """
165
+ def __init__(self,
166
+ inference_handler: PyTorchInferenceHandler,
167
+ categorical_index_map: Optional[Dict[int, int]] = None,
168
+ discretize_start_at_zero: bool = True):
169
+ """
170
+ Initializes the fitness evaluator.
171
+
172
+ Args:
173
+ inference_handler (PyTorchInferenceHandler):
174
+ An initialized inference handler containing the model.
175
+ categorical_index_map (Dict[int, int] | None):
176
+ Maps {column_index: cardinality} for discretization.
177
+ discretize_start_at_zero (bool):
178
+ True if discrete encoding starts at 0.
179
+ """
180
+ self.inference_handler = inference_handler
181
+ self.categorical_index_map = categorical_index_map
182
+ self.discretize_start_at_zero = discretize_start_at_zero
183
+
184
+ # Expose the device
185
+ self.device = self.inference_handler.device
186
+
187
+ def __call__(self, solution_tensor: torch.Tensor) -> torch.Tensor:
188
+ """
189
+ This is the fitness function EvoTorch will call.
190
+
191
+ It receives a batch of continuous solutions, discretizes the
192
+ categorical ones, and returns the model's predictions.
193
+ """
194
+ # Clone to avoid modifying the optimizer's internal state (SNES, CEM, GA)
195
+ processed_tensor = solution_tensor.clone()
196
+
197
+ if self.categorical_index_map:
198
+ for col_idx, cardinality in self.categorical_index_map.items():
199
+ # 1. Round (using torch.floor(x + 0.5) for "round half up" behavior)
200
+ rounded_col = torch.floor(processed_tensor[:, col_idx] + 0.5)
201
+
202
+ # 2. Determine clamping bounds
203
+ min_bound = 0 if self.discretize_start_at_zero else 1
204
+ max_bound = cardinality - 1 if self.discretize_start_at_zero else cardinality
205
+
206
+ # 3. Clamp the values and update the processed tensor
207
+ processed_tensor[:, col_idx] = torch.clamp(rounded_col, min_bound, max_bound)
208
+
209
+ # Use the *processed_tensor* for prediction
210
+ predictions = self.inference_handler.predict_batch(processed_tensor)[PyTorchInferenceKeys.PREDICTIONS]
211
+ return predictions.flatten()
147
212
 
148
213
 
149
214
  def create_pytorch_problem(
150
- inference_handler: PyTorchInferenceHandler,
215
+ evaluator: FitnessEvaluator,
151
216
  bounds: Tuple[List[float], List[float]],
152
217
  task: Literal["min", "max"],
153
218
  algorithm: Literal["SNES", "CEM", "Genetic"] = "Genetic",
@@ -162,7 +227,7 @@ def create_pytorch_problem(
162
227
  The Genetic Algorithm works directly with the bounds, and operators such as SimulatedBinaryCrossOver and GaussianMutation.
163
228
 
164
229
  Args:
165
- inference_handler (PyTorchInferenceHandler): An initialized inference handler containing the model and weights.
230
+ evaluator (FitnessEvaluator): A callable class that wraps the model inference and handles on-the-fly discretization.
166
231
  bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for the solution features.
167
232
  Use the `optimization_tools.create_optimization_bounds()` helper to easily generate this and ensure unbiased categorical bounds.
168
233
  task (str): The optimization goal, either "minimize" or "maximize".
@@ -180,20 +245,13 @@ def create_pytorch_problem(
180
245
  upper_bounds = list(bounds[1])
181
246
 
182
247
  solution_length = len(lower_bounds)
183
- device = inference_handler.device
248
+ device = evaluator.device
184
249
 
185
- # Define the fitness function that EvoTorch will call.
186
- def fitness_func(solution_tensor: torch.Tensor) -> torch.Tensor:
187
- # Directly use the continuous-valued tensor from the optimizer for prediction
188
- predictions = inference_handler.predict_batch(solution_tensor)[PyTorchInferenceKeys.PREDICTIONS]
189
- return predictions.flatten()
190
-
191
-
192
250
  # Create the Problem instance.
193
251
  if algorithm == "CEM" or algorithm == "SNES":
194
252
  problem = evotorch.Problem(
195
253
  objective_sense=task,
196
- objective_func=fitness_func,
254
+ objective_func=evaluator,
197
255
  solution_length=solution_length,
198
256
  initial_bounds=(lower_bounds, upper_bounds),
199
257
  device=device,
@@ -219,7 +277,7 @@ def create_pytorch_problem(
219
277
  elif algorithm == "Genetic":
220
278
  problem = evotorch.Problem(
221
279
  objective_sense=task,
222
- objective_func=fitness_func,
280
+ objective_func=evaluator,
223
281
  solution_length=solution_length,
224
282
  bounds=(lower_bounds, upper_bounds),
225
283
  device=device,
@@ -172,7 +172,7 @@ def load_list_strings(text_file: Union[str,Path], verbose: bool=True) -> list[st
172
172
  raise ValueError()
173
173
 
174
174
  if verbose:
175
- _LOGGER.info(f"Text file loaded as list of strings.")
175
+ _LOGGER.info(f"Loaded '{target_path.name}' as list of strings.")
176
176
 
177
177
  return loaded_strings
178
178
 
@@ -891,7 +891,8 @@ def standardize_percentages(
891
891
  df: pd.DataFrame,
892
892
  columns: list[str],
893
893
  treat_one_as_proportion: bool = True,
894
- round_digits: int = 2
894
+ round_digits: int = 2,
895
+ verbose: bool=True
895
896
  ) -> pd.DataFrame:
896
897
  """
897
898
  Standardizes numeric columns containing mixed-format percentages.
@@ -932,6 +933,8 @@ def standardize_percentages(
932
933
 
933
934
  # Otherwise, the value is assumed to be a correctly formatted percentage
934
935
  return x
936
+
937
+ fixed_columns: list[str] = list()
935
938
 
936
939
  for col in columns:
937
940
  # --- Robustness Checks ---
@@ -949,6 +952,13 @@ def standardize_percentages(
949
952
 
950
953
  # Round the result
951
954
  df_copy[col] = df_copy[col].round(round_digits)
955
+
956
+ fixed_columns.append(col)
957
+
958
+ if verbose:
959
+ _LOGGER.info(f"Columns standardized:")
960
+ for fixed_col in fixed_columns:
961
+ print(f" '{fixed_col}'")
952
962
 
953
963
  return df_copy
954
964
 
@@ -66,7 +66,7 @@ def create_optimization_bounds(
66
66
  # 1. Read header and determine feature names
67
67
  full_csv_path = make_fullpath(csv_path, enforce="file")
68
68
  try:
69
- df_header = pd.read_csv(full_csv_path, nrows=0)
69
+ df_header = pd.read_csv(full_csv_path, nrows=0, encoding="utf-8")
70
70
  except Exception as e:
71
71
  _LOGGER.error(f"Failed to read header from CSV: {e}")
72
72
  raise
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "12.2.0"
3
+ version = "12.4.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }