PyPI - dragon-ml-toolbox - Versions diffs - 12.2.0__tar.gz → 12.4.0__tar.gz - Mend

dragon-ml-toolbox 12.2.0tar.gz → 12.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (46) hide show

{dragon_ml_toolbox-12.2.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-12.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 12.2.0
+Version: 12.4.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 12.2.0
+Version: 12.4.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/ML_optimization.py RENAMED Viewed

@@ -24,6 +24,7 @@ from .math_utilities import discretize_categorical_values
 __all__ = [
     "MLOptimizer",
+    "FitnessEvaluator",
     "create_pytorch_problem",
     "run_optimization"
 ]
@@ -33,8 +34,8 @@ class MLOptimizer:
     """
     A wrapper class for setting up and running EvoTorch optimization tasks.
-    This class combines the functionality of `create_pytorch_problem` and
-    `run_optimization` functions into a single, streamlined workflow.
+    This class combines the functionality of `FitnessEvaluator`, `create_pytorch_problem`, and
+    `run_optimization` into a single, streamlined workflow.
     SNES and CEM algorithms do not accept bounds, the given bounds will be used as an initial starting point.
@@ -91,9 +92,16 @@ class MLOptimizer:
                 False if it starts at 1 (e.g., [1, 2, 3]).
             **searcher_kwargs: Additional keyword arguments for the selected search algorithm's constructor.
         """
+        # Make a fitness function
+        self.evaluator = FitnessEvaluator(
+            inference_handler=inference_handler,
+            categorical_index_map=categorical_index_map,
+            discretize_start_at_zero=discretize_start_at_zero
+        )
         # Call the existing factory function to get the problem and searcher factory
         self.problem, self.searcher_factory = create_pytorch_problem(
-            inference_handler=inference_handler,
+            evaluator=self.evaluator,
             bounds=bounds,
             task=task,
             algorithm=algorithm,
@@ -144,10 +152,67 @@ class MLOptimizer:
             categorical_mappings=self.categorical_mappings,
             discretize_start_at_zero=self.discretize_start_at_zero
         )
+class FitnessEvaluator:
+    """
+    A callable class that wraps the PyTorch model inference handler and performs
+    on-the-fly discretization for the EvoTorch fitness function.
+    This class is automatically instantiated by MLOptimizer and passed to
+    create_pytorch_problem, encapsulating the evaluation logic.
+    """
+    def __init__(self,
+                 inference_handler: PyTorchInferenceHandler,
+                 categorical_index_map: Optional[Dict[int, int]] = None,
+                 discretize_start_at_zero: bool = True):
+        """
+        Initializes the fitness evaluator.
+        Args:
+            inference_handler (PyTorchInferenceHandler):
+                An initialized inference handler containing the model.
+            categorical_index_map (Dict[int, int] | None):
+                Maps {column_index: cardinality} for discretization.
+            discretize_start_at_zero (bool):
+                True if discrete encoding starts at 0.
+        """
+        self.inference_handler = inference_handler
+        self.categorical_index_map = categorical_index_map
+        self.discretize_start_at_zero = discretize_start_at_zero
+        # Expose the device
+        self.device = self.inference_handler.device
+    def __call__(self, solution_tensor: torch.Tensor) -> torch.Tensor:
+        """
+        This is the fitness function EvoTorch will call.
+        It receives a batch of continuous solutions, discretizes the
+        categorical ones, and returns the model's predictions.
+        """
+        # Clone to avoid modifying the optimizer's internal state (SNES, CEM, GA)
+        processed_tensor = solution_tensor.clone()
+        if self.categorical_index_map:
+            for col_idx, cardinality in self.categorical_index_map.items():
+                # 1. Round (using torch.floor(x + 0.5) for "round half up" behavior)
+                rounded_col = torch.floor(processed_tensor[:, col_idx] + 0.5)
+                # 2. Determine clamping bounds
+                min_bound = 0 if self.discretize_start_at_zero else 1
+                max_bound = cardinality - 1 if self.discretize_start_at_zero else cardinality
+                # 3. Clamp the values and update the processed tensor
+                processed_tensor[:, col_idx] = torch.clamp(rounded_col, min_bound, max_bound)
+        # Use the *processed_tensor* for prediction
+        predictions = self.inference_handler.predict_batch(processed_tensor)[PyTorchInferenceKeys.PREDICTIONS]
+        return predictions.flatten()
 def create_pytorch_problem(
-    inference_handler: PyTorchInferenceHandler,
+    evaluator: FitnessEvaluator,
     bounds: Tuple[List[float], List[float]],
     task: Literal["min", "max"],
     algorithm: Literal["SNES", "CEM", "Genetic"] = "Genetic",
@@ -162,7 +227,7 @@ def create_pytorch_problem(
     The Genetic Algorithm works directly with the bounds, and operators such as SimulatedBinaryCrossOver and GaussianMutation.
     Args:
-        inference_handler (PyTorchInferenceHandler): An initialized inference handler containing the model and weights.
+        evaluator (FitnessEvaluator): A callable class that wraps the model inference and handles on-the-fly discretization.
         bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for the solution features.
             Use the `optimization_tools.create_optimization_bounds()` helper to easily generate this and ensure unbiased categorical bounds.
         task (str): The optimization goal, either "minimize" or "maximize".
@@ -180,20 +245,13 @@ def create_pytorch_problem(
     upper_bounds = list(bounds[1])
     solution_length = len(lower_bounds)
-    device = inference_handler.device
+    device = evaluator.device
-    # Define the fitness function that EvoTorch will call.
-    def fitness_func(solution_tensor: torch.Tensor) -> torch.Tensor:
-        # Directly use the continuous-valued tensor from the optimizer for prediction
-        predictions = inference_handler.predict_batch(solution_tensor)[PyTorchInferenceKeys.PREDICTIONS]
-        return predictions.flatten()
     # Create the Problem instance.
     if algorithm == "CEM" or algorithm == "SNES":
         problem = evotorch.Problem(
             objective_sense=task,
-            objective_func=fitness_func,
+            objective_func=evaluator,
             solution_length=solution_length,
             initial_bounds=(lower_bounds, upper_bounds),
             device=device,
@@ -219,7 +277,7 @@ def create_pytorch_problem(
     elif algorithm == "Genetic":
         problem = evotorch.Problem(
             objective_sense=task,
-            objective_func=fitness_func,
+            objective_func=evaluator,
             solution_length=solution_length,
             bounds=(lower_bounds, upper_bounds),
             device=device,

{dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/custom_logger.py RENAMED Viewed

@@ -172,7 +172,7 @@ def load_list_strings(text_file: Union[str,Path], verbose: bool=True) -> list[st
         raise ValueError()
     if verbose:
-        _LOGGER.info(f"Text file loaded as list of strings.")
+        _LOGGER.info(f"Loaded '{target_path.name}' as list of strings.")
     return loaded_strings

{dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/data_exploration.py RENAMED Viewed

@@ -891,7 +891,8 @@ def standardize_percentages(
     df: pd.DataFrame,
     columns: list[str],
     treat_one_as_proportion: bool = True,
-    round_digits: int = 2
+    round_digits: int = 2,
+    verbose: bool=True
 ) -> pd.DataFrame:
     """
     Standardizes numeric columns containing mixed-format percentages.
@@ -932,6 +933,8 @@ def standardize_percentages(
         # Otherwise, the value is assumed to be a correctly formatted percentage
         return x
+    fixed_columns: list[str] = list()
     for col in columns:
         # --- Robustness Checks ---
@@ -949,6 +952,13 @@ def standardize_percentages(
         # Round the result
         df_copy[col] = df_copy[col].round(round_digits)
+        fixed_columns.append(col)
+    if verbose:
+        _LOGGER.info(f"Columns standardized:")
+        for fixed_col in fixed_columns:
+            print(f"  '{fixed_col}'")
     return df_copy

{dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/ml_tools/optimization_tools.py RENAMED Viewed

@@ -66,7 +66,7 @@ def create_optimization_bounds(
     # 1. Read header and determine feature names
     full_csv_path = make_fullpath(csv_path, enforce="file")
     try:
-        df_header = pd.read_csv(full_csv_path, nrows=0)
+        df_header = pd.read_csv(full_csv_path, nrows=0, encoding="utf-8")
     except Exception as e:
         _LOGGER.error(f"Failed to read header from CSV: {e}")
         raise

{dragon_ml_toolbox-12.2.0 → dragon_ml_toolbox-12.4.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dragon-ml-toolbox"
-version = "12.2.0"
+version = "12.4.0"
 description = "A collection of tools for data science and machine learning projects."
 authors = [
     { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }