PyPI - dragon-ml-toolbox - Versions diffs - 12.0.1__py3-none-any.whl → 12.1.0__py3-none-any.whl - Mend

dragon-ml-toolbox 12.0.1py3-none-any.whl → 12.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (11) hide show

{dragon_ml_toolbox-12.0.1.dist-info → dragon_ml_toolbox-12.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 12.0.1
+Version: 12.1.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-12.0.1.dist-info → dragon_ml_toolbox-12.1.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-dragon_ml_toolbox-12.0.1.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-12.0.1.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
+dragon_ml_toolbox-12.1.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-12.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
 ml_tools/ETL_cleaning.py,sha256=PLRSR-VYnt1nNT9XrcWq40SE0VzHCw7DQ8v9czfSQsU,20366
 ml_tools/ETL_engineering.py,sha256=l0I6Og9o4s6EODdk0kZXjbbC-a3vVPYy1FopP2BkQSQ,54909
 ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
@@ -10,8 +10,9 @@ ml_tools/ML_evaluation.py,sha256=tLswOPgH4G1KExSMn0876YtNkbxPh-W3J4MYOjomMWA,162
 ml_tools/ML_evaluation_multi.py,sha256=6OZyQ4SM9ALh38mOABmiHgIQDWcovsD_iOo7Bg9YZCE,12516
 ml_tools/ML_inference.py,sha256=ymFvncFsU10PExq87xnEj541DKV5ck0nMuK8ToJHzVQ,23067
 ml_tools/ML_models.py,sha256=pSCV6KbmVnPZr49Kbyg7g25CYaWBWJr6IinBHKgVKGw,28042
-ml_tools/ML_optimization.py,sha256=r1lAQiztTtRuh13rWj1iqbXvWO0LCqbzlkRdy3gEWo4,18124
+ml_tools/ML_optimization.py,sha256=TfVccKfZ_W6BgraZZ01-SNcNgGuViPozWLezBY8mBIg,20466
 ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
+ml_tools/ML_simple_optimization.py,sha256=X96zX6XPu3ggrcOapuG69jsiZJczJNihS1rcwi9OsBI,18159
 ml_tools/ML_trainer.py,sha256=_g48w5Ak-wQr5fGHdJqlcpnzv3gWyL1ghkOhy9VOZbo,23930
 ml_tools/ML_utilities.py,sha256=35DfZzAwfDwVwfRECD8X_2ynsU2NCpTdNJSmza6oAzQ,8712
 ml_tools/PSO_optimization.py,sha256=fVHeemqilBS0zrGV25E5yKwDlGdd2ZKa18d8CZ6Q6Fk,22961
@@ -23,18 +24,18 @@ ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
 ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
 ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
 ml_tools/custom_logger.py,sha256=OZqG7FR_UE6byzY3RDmlj08a336ZU-4DzNBMPLr_d5c,5881
-ml_tools/data_exploration.py,sha256=qpRUCQEVUmkxjx7DAztT6yIdI___xNV5NVPMBqCp3Mk,38870
+ml_tools/data_exploration.py,sha256=is9P4c4orIKW6gRhTeScZlCGYH9ODguxMtVlrVubb4E,42515
 ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
 ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
 ml_tools/ensemble_learning.py,sha256=aTPeKthO4zRWBEaQJOUj8jEqVHiHjjOMXuiEWjI9NxM,21946
 ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
 ml_tools/keys.py,sha256=FDpbS3Jb0pjrVvvp2_8nZi919mbob_-xwuy5OOtKM_A,1848
-ml_tools/math_utilities.py,sha256=CUkyBuExFOnEHp9J1Xsh6H4xILwYOBilwFccM9J_Dxo,7870
-ml_tools/optimization_tools.py,sha256=P3I6lIpvZ8Xf2kX5FvvBKBmrK2pB6idBpkTzfUJxTeE,5073
+ml_tools/math_utilities.py,sha256=PxoOrnuj6Ntp7_TJqyDWi0JX03WpAO5iaFNK2Oeq5I4,8800
+ml_tools/optimization_tools.py,sha256=bkKrTjukNOpxgVDMW5mUX5vQ72ckBcS5VA4eG8uZsOI,13515
 ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
 ml_tools/serde.py,sha256=k0qAwfMf13lVBQSgq5u9MSXEoo31iOA2-Ncm8XgMCMI,3974
 ml_tools/utilities.py,sha256=gef62GLK7ev5BWkkQekeJoVZqwf2mIuOlOfyCw6WdtE,13882
-dragon_ml_toolbox-12.0.1.dist-info/METADATA,sha256=hFWAnmb8qKkNtj4wekSG86EN1H7M9oOzEcCvjDyJjHI,6166
-dragon_ml_toolbox-12.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-12.0.1.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-12.0.1.dist-info/RECORD,,
+dragon_ml_toolbox-12.1.0.dist-info/METADATA,sha256=PJbBSG9h6juu_srL07VVhgOIGqebQwn_rlI1RgZdTwo,6166
+dragon_ml_toolbox-12.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-12.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-12.1.0.dist-info/RECORD,,

ml_tools/ML_optimization.py CHANGED Viewed

@@ -5,7 +5,7 @@ import evotorch
 from evotorch.algorithms import SNES, CEM, GeneticAlgorithm
 from evotorch.logging import PandasLogger
 from evotorch.operators import SimulatedBinaryCrossOver, GaussianMutation
-from typing import Literal, Union, Tuple, List, Optional, Any, Callable
+from typing import Literal, Union, Tuple, List, Optional, Any, Callable, Dict
 from pathlib import Path
 from tqdm.auto import trange
 from contextlib import nullcontext
@@ -19,7 +19,7 @@ from .keys import PyTorchInferenceKeys
 from .SQL import DatabaseManager
 from .optimization_tools import _save_result
 from .utilities import save_dataframe
-from .math_utilities import threshold_binary_values
+from .math_utilities import discretize_categorical_values
 __all__ = [
@@ -39,15 +39,23 @@ class MLOptimizer:
     SNES and CEM algorithms do not accept bounds, the given bounds will be used as an initial starting point.
     Example:
-        >>> # 1. Initialize the optimizer with model and search parameters
+        >>> # 1. Get categorical info from preprocessing steps
+        >>> # e.g., from data_exploration.encode_categorical_features
+        >>> cat_mappings = {'feature_C': {'A': 0, 'B': 1}, 'feature_D': {'X': 0, 'Y': 1}}
+        >>> # e.g., from data_exploration.create_transformer_categorical_map
+        >>> # Assumes feature_C is at index 2 (cardinality 2) and feature_D is at index 3 (cardinality 2)
+        >>> cat_index_map = {2: 2, 3: 2}
+        >>>
+        >>> # 2. Initialize the optimizer
         >>> optimizer = MLOptimizer(
         ...     inference_handler=my_handler,
-        ...     bounds=(lower_bounds, upper_bounds),
-        ...     number_binary_features=2,
+        ...     bounds=(lower_bounds, upper_bounds), # Bounds for ALL features
         ...     task="max",
-        ...     algorithm="Genetic"
+        ...     algorithm="Genetic",
+        ...     categorical_index_map=cat_index_map,
+        ...     categorical_mappings=cat_mappings,
         ... )
-        >>> # 2. Run the optimization and save the results
+        >>> # 3. Run the optimization
         >>> best_result = optimizer.run(
         ...     num_generations=100,
         ...     target_name="my_target",
@@ -59,35 +67,43 @@ class MLOptimizer:
     def __init__(self,
                  inference_handler: PyTorchInferenceHandler,
                  bounds: Tuple[List[float], List[float]],
-                 number_binary_features: int,
                  task: Literal["min", "max"],
                  algorithm: Literal["SNES", "CEM", "Genetic"] = "Genetic",
                  population_size: int = 200,
+                 categorical_index_map: Optional[Dict[int, int]] = None,
+                 categorical_mappings: Optional[Dict[str, Dict[str, int]]] = None,
+                 discretize_start_at_zero: bool = True,
                  **searcher_kwargs):
         """
         Initializes the optimizer by creating the EvoTorch problem and searcher.
         Args:
             inference_handler (PyTorchInferenceHandler): An initialized inference handler containing the model and weights.
-            bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for the solution features.
-            number_binary_features (int): Number of binary features located at the END of the feature vector.
+            bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for ALL solution features.
+                Use the `optimization_tools.create_optimization_bounds()` helper to easily generate this and ensure unbiased categorical bounds.
             task (str): The optimization goal, either "min" or "max".
             algorithm (str): The search algorithm to use ("SNES", "CEM", "Genetic").
             population_size (int): Population size for CEM and GeneticAlgorithm.
+            categorical_index_map (Dict[int, int] | None): Used to discretize values after optimization. Maps {column_index: cardinality}.
+            categorical_mappings (Dict[str, Dict[str, int]] | None): Used to map discrete integer values back to strings (e.g., {0: 'Category_A'}) before saving.
+            discretize_start_at_zero (bool):
+                True if the discrete encoding starts at 0 (e.g., [0, 1, 2]).
+                False if it starts at 1 (e.g., [1, 2, 3]).
             **searcher_kwargs: Additional keyword arguments for the selected search algorithm's constructor.
         """
         # Call the existing factory function to get the problem and searcher factory
         self.problem, self.searcher_factory = create_pytorch_problem(
             inference_handler=inference_handler,
             bounds=bounds,
-            binary_features=number_binary_features,
             task=task,
             algorithm=algorithm,
             population_size=population_size,
             **searcher_kwargs
         )
-        # Store binary_features count to pass it to the run function later
-        self._binary_features = number_binary_features
+        # Store categorical info to pass to the run function
+        self.categorical_map = categorical_index_map
+        self.categorical_mappings = categorical_mappings
+        self.discretize_start_at_zero = discretize_start_at_zero
     def run(self,
             num_generations: int,
@@ -104,7 +120,8 @@ class MLOptimizer:
             num_generations (int): The total number of generations for each repetition.
             target_name (str): Target name used for the CSV filename and/or SQL table.
             save_dir (str | Path): The directory where result files will be saved.
-            feature_names (List[str] | None): Names of the solution features for labeling output. If None, generic names like 'feature_0', 'feature_1', ... , will be created.
+            feature_names (List[str] | None): Names of the solution features for labeling output.
+                If None, generic names like 'feature_0', 'feature_1', ... , will be created.
             save_format (Literal['csv', 'sqlite', 'both']): The format for saving results.
             repetitions (int): The number of independent times to run the optimization.
             verbose (bool): If True, enables detailed logging.
@@ -112,25 +129,26 @@ class MLOptimizer:
         Returns:
             Optional[dict]: A dictionary with the best result if repetitions is 1, otherwise None.
         """
-        # Call the existing run function with the stored problem, searcher, and binary feature count
+        # Call the existing run function with the stored problem, searcher, and categorical info
         return run_optimization(
             problem=self.problem,
             searcher_factory=self.searcher_factory,
             num_generations=num_generations,
             target_name=target_name,
-            binary_features=self._binary_features,
             save_dir=save_dir,
             save_format=save_format,
             feature_names=feature_names,
             repetitions=repetitions,
-            verbose=verbose
+            verbose=verbose,
+            categorical_map=self.categorical_map,
+            categorical_mappings=self.categorical_mappings,
+            discretize_start_at_zero=self.discretize_start_at_zero
         )
 def create_pytorch_problem(
     inference_handler: PyTorchInferenceHandler,
     bounds: Tuple[List[float], List[float]],
-    binary_features: int,
     task: Literal["min", "max"],
     algorithm: Literal["SNES", "CEM", "Genetic"] = "Genetic",
     population_size: int = 200,
@@ -146,7 +164,7 @@ def create_pytorch_problem(
     Args:
         inference_handler (PyTorchInferenceHandler): An initialized inference handler containing the model and weights.
         bounds (tuple[list[float], list[float]]): A tuple containing the lower and upper bounds for the solution features.
-        binary_features (int): Number of binary features located at the END of the feature vector. Will be automatically added to the bounds.
+            Use the `optimization_tools.create_optimization_bounds()` helper to easily generate this and ensure unbiased categorical bounds.
         task (str): The optimization goal, either "minimize" or "maximize".
         algorithm (str): The search algorithm to use.
         population_size (int): Used for CEM and GeneticAlgorithm.
@@ -161,11 +179,6 @@ def create_pytorch_problem(
     lower_bounds = list(bounds[0])
     upper_bounds = list(bounds[1])
-    # add binary bounds
-    if binary_features > 0:
-        lower_bounds.extend([0.48] * binary_features)
-        upper_bounds.extend([0.52] * binary_features)
     solution_length = len(lower_bounds)
     device = inference_handler.device
@@ -242,12 +255,14 @@ def run_optimization(
     searcher_factory: Callable[[],Any],
     num_generations: int,
     target_name: str,
-    binary_features: int,
     save_dir: Union[str, Path],
     save_format: Literal['csv', 'sqlite', 'both'],
     feature_names: Optional[List[str]],
     repetitions: int = 1,
-    verbose: bool = True
+    verbose: bool = True,
+    categorical_map: Optional[Dict[int, int]] = None,
+    categorical_mappings: Optional[Dict[str, Dict[str, int]]] = None,
+    discretize_start_at_zero: bool = True
 ) -> Optional[dict]:
     """
     Runs the evolutionary optimization process, with support for multiple repetitions.
@@ -270,7 +285,6 @@ def run_optimization(
         searcher_factory (Callable): The searcher factory to generate fresh evolutionary algorithms.
         num_generations (int): The total number of generations to run the search algorithm for in each repetition.
         target_name (str): Target name that will also be used for the CSV filename and SQL table.
-        binary_features (int): Number of binary features located at the END of the feature vector.
         save_dir (str | Path): The directory where the result file(s) will be saved.
         save_format (Literal['csv', 'sqlite', 'both'], optional): The format for
             saving results during iterative analysis.
@@ -280,13 +294,18 @@ def run_optimization(
         repetitions (int, optional): The number of independent times to run the
             entire optimization process.
         verbose (bool): Add an Evotorch Pandas logger saved as a csv. Only for the first repetition.
+        categorical_index_map (Dict[int, int] | None): Used to discretize values after optimization. Maps {column_index: cardinality}.
+        categorical_mappings (Dict[str, Dict[str, int]] | None): Used to map discrete integer values back to strings (e.g., {0: 'Category_A'}) before saving.
+        discretize_start_at_zero (bool):
+            True if the discrete encoding starts at 0 (e.g., [0, 1, 2]).
+            False if it starts at 1 (e.g., [1, 2, 3]).
     Returns:
         Optional[dict]: A dictionary containing the best feature values and the
         fitness score if `repetitions` is 1. Returns `None` if `repetitions`
         is greater than 1, as results are streamed to files instead.
     """
-    # preprocess paths
+    # --- 1. Setup Paths and Feature Names ---
     save_path = make_fullpath(save_dir, make=True, enforce="directory")
     sanitized_target_name = sanitize_filename(target_name)
@@ -294,54 +313,38 @@ def run_optimization(
         sanitized_target_name = sanitized_target_name + ".csv"
     csv_path = save_path / sanitized_target_name
     db_path = save_path / "Optimization.db"
     db_table_name = target_name
-    # preprocess feature names
+    # Use problem's solution_length to create default names if none provided
     if feature_names is None:
-        feature_names = [f"feature_{i}" for i in range(problem.solution_length)] # type: ignore
+        feat_len = problem.solution_length
+        feature_names = [f"feature_{i}" for i in range(feat_len)] # type: ignore
+    # --- 2. Run Optimization ---
     # --- SINGLE RUN LOGIC ---
     if repetitions <= 1:
-        searcher = searcher_factory()
-        _LOGGER.info(f"🤖 Starting optimization with {searcher.__class__.__name__} Algorithm for {num_generations} generations...")
-        # for _ in trange(num_generations, desc="Optimizing"):
-        #     searcher.step()
-        # Attach logger if requested
-        if verbose:
-            pandas_logger = PandasLogger(searcher)
-        searcher.run(num_generations) # Use the built-in run method for simplicity
-        # # DEBUG new searcher objects
-        # for status_key in searcher.iter_status_keys():
-        #     print("===", status_key, "===")
-        #     print(searcher.status[status_key])
-        #     print()
+        _LOGGER.info(f"🤖 Starting optimization for {num_generations} generations...")
-        # Get results from the .status dictionary
-        # SNES and CEM use the key 'center' to get mean values if needed    best_solution_tensor = searcher.status["center"]
-        best_solution_container = searcher.status["pop_best"]
-        best_solution_tensor = best_solution_container.values
-        best_fitness = best_solution_container.evals
-        best_solution_np = best_solution_tensor.cpu().numpy()
-        # threshold binary features
-        if binary_features > 0:
-            best_solution_thresholded = threshold_binary_values(input_array=best_solution_np, binary_values=binary_features)
-        else:
-            best_solution_thresholded = best_solution_np
-        result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
-        result_dict[target_name] = best_fitness.item()
+        result_dict, pandas_logger = _run_single_optimization_rep(
+            searcher_factory=searcher_factory,
+            num_generations=num_generations,
+            feature_names=feature_names,
+            target_name=target_name,
+            categorical_map=categorical_map,
+            discretize_start_at_zero=discretize_start_at_zero,
+            attach_logger=verbose
+        )
-        _save_result(result_dict, 'csv', csv_path) # Single run defaults to CSV
+        # Single run defaults to CSV, pass mappings for reverse mapping
+        _save_result(
+            result_dict=result_dict,
+            save_format='csv',
+            csv_path=csv_path,
+            categorical_mappings=categorical_mappings
+        )
-        # Process logger
-        if verbose:
+        if pandas_logger:
             _handle_pandas_log(pandas_logger, save_path=save_path, target_name=target_name)
         _LOGGER.info(f"Optimization complete. Best solution saved to '{csv_path.name}'")
@@ -350,57 +353,106 @@ def run_optimization(
     # --- MULTIPLE REPETITIONS LOGIC ---
     else:
         _LOGGER.info(f"🏁 Starting optimal solution space analysis with {repetitions} repetitions...")
+        first_run_logger = None # To store the logger from the first rep
         db_context = DatabaseManager(db_path) if save_format in ['sqlite', 'both'] else nullcontext()
         with db_context as db_manager:
+            # --- Setup Database Schema (if applicable) ---
             if db_manager:
-                schema = {name: "REAL" for name in feature_names}
+                schema = {}
+                categorical_cols = set(categorical_mappings.keys()) if categorical_mappings else set()
+                for name in feature_names:
+                    schema[name] = "TEXT" if name in categorical_cols else "REAL"
                 schema[target_name] = "REAL"
                 db_manager.create_table(db_table_name, schema)
+            # --- Repetitions Loop ---
             print("")
-            # Repetitions loop
-            pandas_logger = None
             for i in trange(repetitions, desc="Repetitions"):
-                # CRITICAL: Create a fresh searcher for each run using the factory
-                searcher = searcher_factory()
-                # Attach logger if requested
-                if verbose and i==0:
-                    pandas_logger = PandasLogger(searcher)
-                searcher.run(num_generations) # Use the built-in run method for simplicity
+                # Only attach a logger for the first repetition if verbose
+                attach_logger = verbose and (i == 0)
-                # Get results from the .status dictionary
-                # SNES and CEM use the key 'center' to get mean values if needed    best_solution_tensor = searcher.status["center"]
-                best_solution_container = searcher.status["pop_best"]
-                best_solution_tensor = best_solution_container.values
-                best_fitness = best_solution_container.evals
-                best_solution_np = best_solution_tensor.cpu().numpy()
+                result_dict, pandas_logger = _run_single_optimization_rep(
+                    searcher_factory=searcher_factory,
+                    num_generations=num_generations,
+                    feature_names=feature_names,
+                    target_name=target_name,
+                    categorical_map=categorical_map,
+                    discretize_start_at_zero=discretize_start_at_zero,
+                    attach_logger=attach_logger
+                )
-                # threshold binary features
-                if binary_features > 0:
-                    best_solution_thresholded = threshold_binary_values(input_array=best_solution_np, binary_values=binary_features)
-                else:
-                    best_solution_thresholded = best_solution_np
-                # make results dictionary
-                result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
-                result_dict[target_name] = best_fitness.item()
+                if pandas_logger:
+                    first_run_logger = pandas_logger
                 # Save each result incrementally
-                _save_result(result_dict, save_format, csv_path, db_manager, db_table_name)
+                _save_result(
+                    result_dict=result_dict,
+                    save_format=save_format,
+                    csv_path=csv_path,
+                    db_manager=db_manager,
+                    db_table_name=db_table_name,
+                    categorical_mappings=categorical_mappings
+                )
-        # Process logger
-        if pandas_logger is not None:
-            _handle_pandas_log(pandas_logger, save_path=save_path, target_name=target_name)
+        if first_run_logger:
+            _handle_pandas_log(first_run_logger, save_path=save_path, target_name=target_name)
         _LOGGER.info(f"Optimal solution space complete. Results saved to '{save_path}'")
         return None
+def _run_single_optimization_rep(
+    searcher_factory: Callable[[],Any],
+    num_generations: int,
+    feature_names: List[str],
+    target_name: str,
+    categorical_map: Optional[Dict[int, int]],
+    discretize_start_at_zero: bool,
+    attach_logger: bool
+) -> Tuple[dict, Optional[PandasLogger]]:
+    """
+    Internal helper to run one full optimization repetition.
+    Handles searcher creation, logging, running, and result post-processing.
+    """
+    # CRITICAL: Create a fresh searcher for each run using the factory
+    searcher = searcher_factory()
+    # Attach logger if requested
+    pandas_logger = PandasLogger(searcher) if attach_logger else None
+    # Run the optimization
+    searcher.run(num_generations)
+    # Get the best result
+    best_solution_container = searcher.status["pop_best"]
+    best_solution_tensor = best_solution_container.values
+    best_fitness = best_solution_container.evals
+    best_solution_np = best_solution_tensor.cpu().numpy()
+    # Discretize categorical/binary features
+    if categorical_map:
+        best_solution_thresholded = discretize_categorical_values(
+            input_array=best_solution_np,
+            categorical_info=categorical_map,
+            start_at_zero=discretize_start_at_zero
+        )
+    else:
+        best_solution_thresholded = best_solution_np
+    # Format results into a dictionary
+    result_dict = {name: value for name, value in zip(feature_names, best_solution_thresholded)}
+    result_dict[target_name] = best_fitness.item()
+    return result_dict, pandas_logger
 def _handle_pandas_log(logger: PandasLogger, save_path: Path, target_name: str):
     log_dataframe = logger.to_dataframe()
     save_dataframe(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)

dragon-ml-toolbox 12.0.1__py3-none-any.whl → 12.1.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 12.0.1py3-none-any.whl → 12.1.0py3-none-any.whl