PyPI - dragon-ml-toolbox - Versions diffs - 19.9.0__py3-none-any.whl → 19.11.0__py3-none-any.whl - Mend

dragon-ml-toolbox 19.9.0py3-none-any.whl → 19.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{dragon_ml_toolbox-19.9.0.dist-info → dragon_ml_toolbox-19.11.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 19.9.0
+Version: 19.11.0
 Summary: Complete pipelines and helper tools for data science and machine learning projects.
 Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
 License-Expression: MIT
@@ -90,12 +90,6 @@ Provides-Extra: gui-torch
 Requires-Dist: numpy<2.0; extra == "gui-torch"
 Requires-Dist: torch; extra == "gui-torch"
 Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-torch"
-Provides-Extra: pyinstaller
-Requires-Dist: pyinstaller; extra == "pyinstaller"
-Provides-Extra: nuitka
-Requires-Dist: nuitka; extra == "nuitka"
-Requires-Dist: zstandard; extra == "nuitka"
-Requires-Dist: ordered-set; extra == "nuitka"
 Dynamic: license-file
 # dragon-ml-toolbox
@@ -137,7 +131,7 @@ conda install -c conda-forge dragon-ml-toolbox
 ## Modular Installation
-This toolbox is designed as a collection of mutually exclusive environments due to conflicting core dependencies, except APP bundlers (PyInstaller/Nuitka).
+This toolbox is designed as a collection of mutually exclusive environments due to conflicting core dependencies.
 - Rule: Create a fresh virtual environment for each module to use.
@@ -330,18 +324,6 @@ schema
 ---
-### ⚒️ APP bundlers
-Dependencies required to compile applications, inference scripts, or GUIs into standalone executables (`.exe` or binary) for distribution. Choose your preferred backend:
-```Bash
-pip install "dragon-ml-toolbox[pyinstaller]"
-```
-```Bash
-pip install "dragon-ml-toolbox[nuitka]"
-```
 ## Usage
 After installation, import modules like this:

{dragon_ml_toolbox-19.9.0.dist-info → dragon_ml_toolbox-19.11.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-dragon_ml_toolbox-19.9.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-19.9.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=XBLtvGjvBf-q93a5iylHj94Lm78UzInC-3Cii01jc6I,3127
+dragon_ml_toolbox-19.11.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-19.11.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
 ml_tools/ETL_cleaning.py,sha256=cKXyRFaaFs_beAGDnQM54xnML671kq-yJEGjHafW-20,351
 ml_tools/ETL_engineering.py,sha256=cwh1FhtNdUHllUDvho-x3SIVj4KwG_rFQR6VYzWUg0U,898
 ml_tools/GUI_tools.py,sha256=O89rG8WQv6GY1DiphQjIsPzXFCQID6te7q_Sgt1iTkQ,294
@@ -45,7 +45,7 @@ ml_tools/ensemble_learning.py,sha256=BLPnpfJWCly-D75mkRP1FE5TExoWAAlAHR89KAzW9iU
 ml_tools/excel_handler.py,sha256=h35HMNnO44btxsTSfZXj2HiJtpRS4fdrJLbzru4heMs,453
 ml_tools/keys.py,sha256=s9HEIAJCRw4DO7ll0yjc8u5rrSI9MOmfkR_1fKpkfy8,263
 ml_tools/math_utilities.py,sha256=53nOXlhb5taUHj4CDHsXliArEfPkOlJD7G_dJa3_iOU,321
-ml_tools/optimization_tools.py,sha256=YQZiXT86kP28NEcyLDbowGeNXHgJDm9hzl-ClNgGQXU,389
+ml_tools/optimization_tools.py,sha256=rPG2VJ7hk9hv5wfKPq4zPJDXFWKioROOiJWmzXlXzVA,541
 ml_tools/path_manager.py,sha256=ion-x2W_rQjra3ChuOHwVtgXhv7LkpXP0lkBef730tk,350
 ml_tools/plot_fonts.py,sha256=6-WevfhDjbyWbSrFM6bqW-h5NC_mAO4XzdFR-oQ3DPE,110
 ml_tools/schema.py,sha256=AddXOa4P9HinlJ6SnICksHzBqRyi7MaichwVn-z_oVE,219
@@ -55,11 +55,11 @@ ml_tools/_core/_ETL_cleaning.py,sha256=_pTNKuapNHgWErmxvsXW-2YzCm4BaTshKV627A38R
 ml_tools/_core/_ETL_engineering.py,sha256=JgIWrQGyNjmLrbyv5Kh0EHKBLmYlyrGKSnKRxGzxSco,57930
 ml_tools/_core/_GUI_tools.py,sha256=kpvk18Eb4vdLzo-I5mBV1yuwPXs-NJJ01rn-iCXHvIY,49079
 ml_tools/_core/_IO_tools.py,sha256=oWaYa_OVO-8ANVt_a9F1QPMvyOcI2yLbtq7LoVHlqek,16625
-ml_tools/_core/_MICE_imputation.py,sha256=_juIymUnNDRWjSLepL8Ee_PncoShbxjR7YtqTtYbteU,21107
+ml_tools/_core/_MICE_imputation.py,sha256=64l20duGWt93Q2MbqcWqrA1s99JPRf5AJACb1CZi2xI,21149
 ml_tools/_core/_ML_callbacks.py,sha256=qtCrVFHTq-nk4NIsAdwIkfkKwFXX6I-6PoCgqZELp70,16734
 ml_tools/_core/_ML_chaining_inference.py,sha256=vXUPZzuQ2yKU71kkvUsE0xPo0hN-Yu6gfnL0JbXoRjI,7783
 ml_tools/_core/_ML_chaining_utilities.py,sha256=nsYowgRbkIYuzRiHlqsM3tnC3c-8O73CY8DHUF14XL0,19248
-ml_tools/_core/_ML_configuration.py,sha256=6lKod_NuXSj0ElYmkkwnRxZEiZctMlX1x4b0ByRKKhg,52281
+ml_tools/_core/_ML_configuration.py,sha256=hwnDCo9URsFqRCgLuFJhGTtoOqbE1XJreNY8B_3spTg,52693
 ml_tools/_core/_ML_configuration_pytab.py,sha256=C3e4iScqdRePVDoqnic6xXMOW7DNYqpgTCeaFDyMdL4,3286
 ml_tools/_core/_ML_datasetmaster.py,sha256=yU1BMtzz6XumMWCetVACrRLk7WJQwmYhaQ-VAWu9Ots,32043
 ml_tools/_core/_ML_evaluation.py,sha256=bu8qlYzhWSC1B7wNfCC5TSF-oed-uP8EF7TV45VTiBM,37325
@@ -70,27 +70,27 @@ ml_tools/_core/_ML_inference.py,sha256=5swm2lnsrDLalBnCm7gZPlDucX4yNCq5vn7ck3SW_
 ml_tools/_core/_ML_models.py,sha256=8FUx4-TVghlBF9srh1_5UxovrWPU7YEZ6XXLqwJei88,27974
 ml_tools/_core/_ML_models_advanced.py,sha256=oU6M5FEBMQ9yPp32cziWh3bz8SXRho07vFMC8ZDVcuU,45002
 ml_tools/_core/_ML_models_pytab.py,sha256=EHHnDG02ghcJORy2gipm3NcrlzL0qygD44o7QGmT1Zs,26297
-ml_tools/_core/_ML_optimization.py,sha256=b1qfHiGyvVoj-ENqDbHTf1jNx55niUWE9KEZJv3vg80,28253
-ml_tools/_core/_ML_optimization_pareto.py,sha256=fad4UjW5TDbCgIsVFk1qmkq8DnU5sahFFuC2DgKAQ3I,36889
+ml_tools/_core/_ML_optimization.py,sha256=mvG1joVS3U67lmSwzMgLgNGzh4H3Py3ttKeaTM3EUnU,28126
+ml_tools/_core/_ML_optimization_pareto.py,sha256=1PA8o5qbI13x5QusWhRIJMiPz3cMA2dUT1ZwU9NIZQM,37609
 ml_tools/_core/_ML_scaler.py,sha256=Nhu6qli_QezHQi5NKhRb8Z51bBJgzk2nEp_yW4B9H4U,8134
 ml_tools/_core/_ML_sequence_datasetmaster.py,sha256=0YVOPf-y4ZNdgUxropXUWrmInNyGYaUYprYvXf31n9U,17811
 ml_tools/_core/_ML_sequence_evaluation.py,sha256=AiPHtZ9DRpE6zL9n3Tp5eGGD9vrYRkLbZ0Nc274mL7I,8069
 ml_tools/_core/_ML_sequence_inference.py,sha256=zd3hBwOtLmjAV4JtdB2qFY9GxhysajFufATdy8fjGTE,16316
 ml_tools/_core/_ML_sequence_models.py,sha256=5qcEYLU6wDePBITnikBrj_H9mCvyJmElKa3HiWGXhZs,5639
 ml_tools/_core/_ML_trainer.py,sha256=hSsudWrlYWpi53DXIlKI6ovVhz7xLrQ8oKIDJOXf4Eg,117747
-ml_tools/_core/_ML_utilities.py,sha256=yXVKow-bgpahMChpp7iUlSxAEtgityXwC54FPReeNNA,30487
+ml_tools/_core/_ML_utilities.py,sha256=elLGD0QYh148_9iNLlqGe1vz-wCFspJa6CWtWTfA3jY,35594
 ml_tools/_core/_ML_vision_datasetmaster.py,sha256=8EsE7luzphVlwBXdOsOwsFfz1D4UIUSEQtqHlM0Vf-o,67084
 ml_tools/_core/_ML_vision_evaluation.py,sha256=BSLf9xrGpaR02Dhkf-fAbgxSpwRjf7DruNIcQadl7qg,11631
 ml_tools/_core/_ML_vision_inference.py,sha256=6K9gMFjAAZKfLAIQlOkm_I9hvCPmO--9-1vnskQRk0I,20190
 ml_tools/_core/_ML_vision_models.py,sha256=oUik-RLxFvZFZCtFztjkSfFYgJuRx4QzfwHVY1ny4Sc,26217
 ml_tools/_core/_ML_vision_transformers.py,sha256=imjL9h5kwpfuRn9rBelNpgtrdU-EecBEcHMFZMXTeZA,15303
-ml_tools/_core/_PSO_optimization.py,sha256=Dg76d7t2ixPCXqQ-KceG9nzuLajHGN0s5RiawRGzsT4,22970
+ml_tools/_core/_PSO_optimization.py,sha256=W3g5xw2v2eOUQadv8KHFkt5HNm9AiY3ZUk-TeyVuZjw,22991
 ml_tools/_core/_SQL.py,sha256=zX_8EgYfmLmvvrnL851KMkI4w9kdkjHJ997BTvS5aig,11556
 ml_tools/_core/_VIF_factor.py,sha256=BM0mTowBqt45PXFy9oJLhT9C-CTWWo0TQhgCyWYLHtQ,10457
 ml_tools/_core/__init__.py,sha256=d4IG0OxUXj2HffepzQcYixHlZeuuuDMAFa09H_6LtmU,12
 ml_tools/_core/_data_exploration.py,sha256=-g_e4Lox4LN8c2AfhpcPmnI9TNIZGl84O8hWEVH5asA,77438
 ml_tools/_core/_ensemble_evaluation.py,sha256=17lWl4bWLT1BAMv_fhGf2D3wy-F4jx0HgnJ79lYkRuE,28419
-ml_tools/_core/_ensemble_inference.py,sha256=PfZG-r65Vw3IAmBJZg9W0zYGEe-QbhfUh_rd2ho-rr8,8610
+ml_tools/_core/_ensemble_inference.py,sha256=9UpARSETzmqPdQmxqizD768tjkqldxHw1ER_hM9Kx9M,8631
 ml_tools/_core/_ensemble_learning.py,sha256=X8ghbjDOLMENCWdISXLhDlHQtR3C6SW1tkTBAcfRRPY,22016
 ml_tools/_core/_excel_handler.py,sha256=gV4rSIsiowb0xllpEJxzUKaYDDVpmP_lxs9wZA76-cc,14050
 ml_tools/_core/_keys.py,sha256=pOqxhEFcDuAeuQveJNykdQfB6gVEg8ZY7L7MYQmtY_o,7551
@@ -98,14 +98,14 @@ ml_tools/_core/_logger.py,sha256=86Ge0sDE_WgwsZBglQRYPyFYX3lcsIo0NzszNPzlxuk,525
 ml_tools/_core/_math_utilities.py,sha256=IlXAiZgTcLtus03jJOBOyF9ZCQDf8qLGjrCHu9Mrgak,9091
 ml_tools/_core/_models_advanced_base.py,sha256=ceW0V_CcfOnSFqHlxUhVU8-5mtQq4tFyo8TX-xVexrY,4982
 ml_tools/_core/_models_advanced_helpers.py,sha256=yrAVgYdBsNYD6Vy-pYL5__wI9Z7inOvNUngMgyuypjo,38973
-ml_tools/_core/_optimization_tools.py,sha256=2LkntNRc19uGur9u0yI-KnNX56tc63sxaRNj6W440Og,20077
-ml_tools/_core/_path_manager.py,sha256=z4zqYqppKhgOj3ArfkKo4tieO8oNaHWUoshCQRNbd1w,20284
+ml_tools/_core/_optimization_tools.py,sha256=WdQkkknbErk4p1cCj2l5CLImK2oRAzhmR3QFR50Hbzk,20098
+ml_tools/_core/_path_manager.py,sha256=-gJ5qoEzpXsQT7gfxV_6kA7mk33iDsMXRmtOoVPl1JA,20845
 ml_tools/_core/_plot_fonts.py,sha256=CjYXW2gZ9AUaGkyX8_WOXXNYs6d1PTK-nEJBrv_Zb2o,2287
 ml_tools/_core/_schema.py,sha256=TM5WVVMoKOvr_Bc2z34sU_gzKlM465PRKTgdZaEOkGY,14076
 ml_tools/_core/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
 ml_tools/_core/_serde.py,sha256=tsI4EO2Y7jrBMmbQ1pinDsPOrOg-SaPuB-Dt40q0taE,5609
-ml_tools/_core/_utilities.py,sha256=iA8fLWdhsIx4ut2Dp8M_OyU0Y3PPLgGdIklyl17x6xk,22560
-dragon_ml_toolbox-19.9.0.dist-info/METADATA,sha256=_EtgLq25qcjnIMmFvPVOfa-xWTp176hHC_VbxFLdWno,8774
-dragon_ml_toolbox-19.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-19.9.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-19.9.0.dist-info/RECORD,,
+ml_tools/_core/_utilities.py,sha256=D7FGyEszcMHxGkMW4aqN7JUwabTICCcQz9qsGtOj97o,22787
+dragon_ml_toolbox-19.11.0.dist-info/METADATA,sha256=HUeAsHLQTdaopzM0YVyRgaofMaHXoOZUan456E5M1JU,8193
+dragon_ml_toolbox-19.11.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-19.11.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-19.11.0.dist-info/RECORD,,

{dragon_ml_toolbox-19.9.0.dist-info → dragon_ml_toolbox-19.11.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

@@ -31,13 +31,9 @@ This project depends on the following third-party packages. Each is governed by
 - [colorlog](https://github.com/borntyping/python-colorlog/blob/main/LICENSE)
 - [evotorch](https://github.com/nnaisense/evotorch/blob/master/LICENSE)
 - [FreeSimpleGUI](https://github.com/spyoungtech/FreeSimpleGUI/blob/main/license.txt)
-- [nuitka](https://github.com/Nuitka/Nuitka/blob/main/LICENSE.txt)
 - [omegaconf](https://github.com/omry/omegaconf/blob/master/LICENSE)
-- [ordered-set](https://github.com/rspeer/ordered-set/blob/master/MIT-LICENSE)
-- [pyinstaller](https://github.com/pyinstaller/pyinstaller/blob/develop/COPYING.txt)
 - [pytorch_tabular](https://github.com/manujosephv/pytorch_tabular/blob/main/LICENSE)
 - [torchmetrics](https://github.com/Lightning-AI/torchmetrics/blob/master/LICENSE)
-- [zstandard](https://github.com/indygreg/python-zstandard/blob/main/LICENSE)
 - [captum](https://github.com/meta-pytorch/captum/blob/master/LICENSE)
 - [node](https://github.com/Qwicen/node/blob/master/LICENSE.md)
 - [pytorch-widedeep](https://github.com/jrzaurin/pytorch-widedeep?tab=readme-ov-file#license)

ml_tools/_core/_MICE_imputation.py CHANGED Viewed

@@ -256,7 +256,7 @@ def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str]
     if input_path.is_file():
         all_file_paths = [input_path]
     else:
-        all_file_paths = list(list_csv_paths(input_path).values())
+        all_file_paths = list(list_csv_paths(input_path, raise_on_empty=True).values())
     for df_path in all_file_paths:
         df: pd.DataFrame
@@ -461,7 +461,7 @@ class DragonMICE:
         if input_path.is_file():
             all_file_paths = [input_path]
         else:
-            all_file_paths = list(list_csv_paths(input_path).values())
+            all_file_paths = list(list_csv_paths(input_path, raise_on_empty=True).values())
         for df_path in all_file_paths:

ml_tools/_core/_ML_configuration.py CHANGED Viewed

@@ -6,7 +6,7 @@ import numpy as np
 from ._schema import FeatureSchema
 from ._script_info import _script_info
 from ._logger import get_logger
-from ._path_manager import sanitize_filename
+from ._path_manager import sanitize_filename, make_fullpath
 from ._keys import MLTaskKeys
@@ -701,11 +701,11 @@ class DragonParetoConfig(_BaseModelParams):
     def __init__(self,
                  save_directory: Union[str, Path],
                  target_objectives: Dict[str, Literal["min", "max"]],
-                 continuous_bounds_map: Union[Dict[str, Tuple[float, float]], Dict[str, List[float]]],
+                 continuous_bounds_map: Union[Dict[str, Tuple[float, float]], Dict[str, List[float]], str, Path],
                  columns_to_round: Optional[List[str]] = None,
-                 population_size: int = 400,
+                 population_size: int = 500,
                  generations: int = 1000,
-                 solutions_filename: str = "ParetoSolutions",
+                 solutions_filename: str = "NonDominatedSolutions",
                  float_precision: int = 4,
                  log_interval: int = 10,
                  plot_size: Tuple[int, int] = (10, 7),
@@ -718,7 +718,7 @@ class DragonParetoConfig(_BaseModelParams):
             save_directory (str | Path): Directory to save artifacts.
             target_objectives (Dict[str, "min"|"max"]): Dictionary mapping target names to optimization direction.
                 Example: {"price": "max", "error": "min"}
-            continuous_bounds_map (Dict): Bounds for continuous features {name: (min, max)}.
+            continuous_bounds_map (Dict): Bounds for continuous features {name: (min, max)}. Or a path/str to a directory containing the "optimization_bounds.json" file.
             columns_to_round (List[str] | None): List of continuous column names that should be rounded to the nearest integer.
             population_size (int): Size of the genetic population.
             generations (int): Number of generations to run.
@@ -729,7 +729,13 @@ class DragonParetoConfig(_BaseModelParams):
             plot_font_size (int): Font size for plot text.
             discretize_start_at_zero (bool): Categorical encoding start index. True=0, False=1.
         """
-        self.save_directory = save_directory
+        # Validate string or Path
+        valid_save_dir = make_fullpath(save_directory, make=True, enforce="directory")
+        if isinstance(continuous_bounds_map, (str, Path)):
+            continuous_bounds_map = make_fullpath(continuous_bounds_map, make=False, enforce="directory")
+        self.save_directory = valid_save_dir
         self.target_objectives = target_objectives
         self.continuous_bounds_map = continuous_bounds_map
         self.columns_to_round = columns_to_round
@@ -742,7 +748,6 @@ class DragonParetoConfig(_BaseModelParams):
         self.plot_font_size = plot_font_size
         self.discretize_start_at_zero = discretize_start_at_zero
 # ----------------------------
 # Metrics Configurations
 # ----------------------------

ml_tools/_core/_ML_optimization.py CHANGED Viewed

@@ -44,12 +44,10 @@ class DragonOptimizer:
     SNES and CEM algorithms do not accept bounds, the given bounds will be used as an initial starting point.
     Example:
-        >>> # 1. Get the final schema from data exploration
-        >>> schema = data_exploration.finalize_feature_schema(...)
-        >>> # 2. Define bounds for continuous features
+        >>> # 1. Define bounds for continuous features
         >>> cont_bounds = {'feature_A': (0, 100), 'feature_B': (-10, 10)}
         >>>
-        >>> # 3. Initialize the optimizer
+        >>> # 2. Initialize the optimizer
         >>> optimizer = DragonOptimizer(
         ...     inference_handler=my_handler,
         ...     schema=schema,
@@ -58,7 +56,7 @@ class DragonOptimizer:
         ...     task="max",
         ...     algorithm="Genetic",
         ... )
-        >>> # 4. Run the optimization
+        >>> # 3. Run the optimization
         >>> best_result = optimizer.run(
         ...     num_generations=100,
         ...     save_dir="/path/to/results",

ml_tools/_core/_ML_optimization_pareto.py CHANGED Viewed

@@ -7,7 +7,7 @@ import matplotlib.cm as cm
 from matplotlib.collections import LineCollection
 import seaborn as sns
 from pathlib import Path
-from typing import Literal, Union, Tuple, List, Optional, Dict
+from typing import Literal, Union, List, Optional, Dict
 from tqdm import tqdm
 import plotly.express as px
 import plotly.graph_objects as go
@@ -21,7 +21,7 @@ from ._SQL import DragonSQL
 from ._ML_inference import DragonInferenceHandler
 from ._ML_chaining_inference import DragonChainInference
 from ._ML_configuration import DragonParetoConfig
-from ._optimization_tools import create_optimization_bounds, plot_optimal_feature_distributions_from_dataframe
+from ._optimization_tools import create_optimization_bounds, plot_optimal_feature_distributions_from_dataframe, load_continuous_bounds_template
 from ._math_utilities import discretize_categorical_values
 from ._utilities import save_dataframe_filename
 from ._IO_tools import save_json
@@ -107,6 +107,10 @@ class DragonParetoOptimizer:
                 _LOGGER.error(f"Target '{name}' not found in model targets: {available_targets}")
                 raise ValueError()
+            if direction not in ["min" , "max"]:
+                _LOGGER.error(f"Invalid optimization direction '{direction}' for target '{name}'. Use 'min' or 'max'.")
+                raise ValueError()
             # For standard handlers, we need indices to slice the output tensor.
             # For chain handlers, we just rely on name matching, but we track index for consistency.
             idx = available_targets.index(name)
@@ -117,10 +121,20 @@ class DragonParetoOptimizer:
         _LOGGER.info(f"Pareto Optimization setup for: {self.ordered_target_names}")
         # --- 2. Bounds Setup ---
+        # check type
+        raw_bounds_map = config.continuous_bounds_map
+        if isinstance(raw_bounds_map, (str, Path)):
+            continuous_bounds = load_continuous_bounds_template(raw_bounds_map)
+        elif isinstance(raw_bounds_map, dict):
+            continuous_bounds = raw_bounds_map
+        else:
+            _LOGGER.error(f"Invalid type for 'continuous_bounds_map' in config. Expected dict or Path. Got {type(raw_bounds_map)}.")
+            raise ValueError()
         # Uses the external tool which reads the schema to set correct bounds for both continuous and categorical
         bounds = create_optimization_bounds(
             schema=schema,
-            continuous_bounds_map=config.continuous_bounds_map,
+            continuous_bounds_map=continuous_bounds,
             start_at_zero=self.discretize_start_at_zero
         )
         self.lower_bounds = list(bounds[0])

ml_tools/_core/_ML_utilities.py CHANGED Viewed

@@ -46,17 +46,24 @@ class ArtifactFinder:
         └── FeatureSchema.json     (Required if `load_schema` is True)
     ```
     """
-    def __init__(self, directory: Union[str, Path], load_scaler: bool, load_schema: bool) -> None:
+    def __init__(self,
+                 directory: Union[str, Path],
+                 load_scaler: bool,
+                 load_schema: bool,
+                 strict: bool=False,
+                 verbose: bool=True) -> None:
         """
         Args:
             directory (str | Path): The path to the directory that contains training artifacts.
             load_scaler (bool): If True, requires and searches for a scaler file `scaler_*.pth`.
             load_schema (bool): If True, requires and searches for a FeatureSchema file `FeatureSchema.json`.
+            strict (bool): If True, raises an error if any artifact is missing. If False, returns None for missing artifacts silently.
+            verbose (bool): Displays the missing artifacts in the directory or a success message.
         """
         # validate directory
         dir_path = make_fullpath(directory, enforce="directory")
-        parsing_dict = _find_model_artifacts(target_directory=dir_path, load_scaler=load_scaler, verbose=False)
+        parsing_dict = _find_model_artifacts(target_directory=dir_path, load_scaler=load_scaler, verbose=False, strict=strict)
         self._weights_path = parsing_dict[PytorchArtifactPathKeys.WEIGHTS_PATH]
         self._feature_names_path = parsing_dict[PytorchArtifactPathKeys.FEATURES_PATH]
@@ -64,71 +71,121 @@ class ArtifactFinder:
         self._model_architecture_path = parsing_dict[PytorchArtifactPathKeys.ARCHITECTURE_PATH]
         self._scaler_path = None
         self._schema = None
+        self._strict = strict
         if load_scaler:
             self._scaler_path = parsing_dict[PytorchArtifactPathKeys.SCALER_PATH]
         if load_schema:
-            self._schema = FeatureSchema.from_json(directory=dir_path)
+            try:
+                self._schema = FeatureSchema.from_json(directory=dir_path)
+            except Exception:
+                if strict:
+                    # FeatureSchema logs its own error details
+                    # _LOGGER.error(f"Failed to load FeatureSchema from '{dir_path.name}': {e}")
+                    raise FileNotFoundError()
+                else:
+                    # _LOGGER.warning(f"Could not load FeatureSchema from '{dir_path.name}': {e}")
+                    self._schema = None
+        # Process feature names
+        if self._feature_names_path is not None:
+            self._feature_names = self._process_text(self._feature_names_path)
+        else:
+            self._feature_names = None
+        # Process target names
+        if self._target_names_path is not None:
+            self._target_names = self._process_text(self._target_names_path)
+        else:
+            self._target_names = None
+        if verbose:
+            # log missing artifacts
+            missing_artifacts = []
+            if self._feature_names is None:
+                missing_artifacts.append("Feature Names")
+            if self._target_names is None:
+                missing_artifacts.append("Target Names")
+            if self._weights_path is None:
+                missing_artifacts.append("Weights File")
+            if self._model_architecture_path is None:
+                missing_artifacts.append("Model Architecture File")
+            if load_scaler and self._scaler_path is None:
+                missing_artifacts.append("Scaler File")
+            if load_schema and self._schema is None:
+                missing_artifacts.append("FeatureSchema File")
+            if missing_artifacts:
+                _LOGGER.warning(f"Missing artifacts in '{dir_path.name}': {', '.join(missing_artifacts)}.")
+            else:
+                _LOGGER.info(f"All artifacts successfully loaded from '{dir_path.name}'.")
-        # Process text files
-        self._feature_names = self._process_text(self._feature_names_path)
-        self._target_names = self._process_text(self._target_names_path)
     def _process_text(self, text_file_path: Path):
         list_strings = load_list_strings(text_file=text_file_path, verbose=False)
         return list_strings
     @property
-    def feature_names(self) -> list[str]:
+    def feature_names(self) -> Union[list[str], None]:
         """Returns the feature names as a list of strings."""
+        if self._strict and not self._feature_names:
+            _LOGGER.error("No feature names loaded for Strict mode.")
+            raise ValueError()
         return self._feature_names
     @property
-    def target_names(self) -> list[str]:
+    def target_names(self) -> Union[list[str], None]:
         """Returns the target names as a list of strings."""
+        if self._strict and not self._target_names:
+            _LOGGER.error("No target names loaded for Strict mode.")
+            raise ValueError()
         return self._target_names
     @property
-    def weights_path(self) -> Path:
+    def weights_path(self) -> Union[Path, None]:
         """Returns the path to the state dictionary pth file."""
+        if self._strict and self._weights_path is None:
+            _LOGGER.error("No weights file loaded for Strict mode.")
+            raise ValueError()
         return self._weights_path
     @property
-    def model_architecture_path(self) -> Path:
+    def model_architecture_path(self) -> Union[Path, None]:
         """Returns the path to the model architecture json file."""
+        if self._strict and self._model_architecture_path is None:
+            _LOGGER.error("No model architecture file loaded for Strict mode.")
+            raise ValueError()
         return self._model_architecture_path
     @property
-    def scaler_path(self) -> Path:
+    def scaler_path(self) -> Union[Path, None]:
         """Returns the path to the scaler file."""
-        if self._scaler_path is None:
-            _LOGGER.error("No scaler file loaded. Set 'load_scaler=True'.")
+        if self._strict and self._scaler_path is None:
+            _LOGGER.error("No scaler file loaded for Strict mode.")
             raise ValueError()
         else:
             return self._scaler_path
     @property
-    def feature_schema(self) -> FeatureSchema:
+    def feature_schema(self) -> Union[FeatureSchema, None]:
         """Returns the FeatureSchema object."""
-        if self._schema is None:
-            _LOGGER.error("No FeatureSchema loaded. Set 'load_schema=True'.")
+        if self._strict and self._schema is None:
+            _LOGGER.error("No FeatureSchema loaded for Strict mode.")
             raise ValueError()
         else:
             return self._schema
     def __repr__(self) -> str:
-        dir_name = self._weights_path.parent.name
-        n_features = len(self._feature_names)
-        n_targets = len(self._target_names)
+        dir_name = self._weights_path.parent.name if self._weights_path else "Unknown"
+        n_features = len(self._feature_names) if self._feature_names else "None"
+        n_targets = len(self._target_names) if self._target_names else "None"
         scaler_status = self._scaler_path.name if self._scaler_path else "None"
         schema_status = "Loaded" if self._schema else "None"
         return (
             f"{self.__class__.__name__}\n"
             f"    directory='{dir_name}'\n"
-            f"    weights='{self._weights_path.name}'\n"
-            f"    architecture='{self._model_architecture_path.name}'\n"
+            f"    weights='{self._weights_path.name if self._weights_path else 'None'}'\n"
+            f"    architecture='{self._model_architecture_path.name if self._model_architecture_path else 'None'}'\n"
             f"    scaler='{scaler_status}'\n"
             f"    schema='{schema_status}'\n"
             f"    features={n_features}\n"
@@ -136,7 +193,7 @@ class ArtifactFinder:
         )
-def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, verbose: bool=False) -> dict[str, Path]:
+def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, verbose: bool=True, strict:bool=True) -> dict[str, Union[Path, None]]:
     """
     Scans a directory to find paths to model weights, target names, feature names, and model architecture. Optionally an scaler path if `load_scaler` is True.
@@ -155,41 +212,70 @@ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool,
         target_directory (str | Path): The path to the directory that contains training artifacts.
         load_scaler (bool): If True, the function requires and searches for a scaler file `scaler_*.pth`.
         verbose (bool): If True, enables detailed logging during the search process.
+        strict (bool): If True, raises errors on missing files. If False, returns None for missing files.
     """
     # validate directory
     dir_path = make_fullpath(target_directory, enforce="directory")
     dir_name = dir_path.name
     # find files
-    model_pth_dict = list_files_by_extension(directory=dir_path, extension="pth", verbose=verbose)
+    model_pth_dict = list_files_by_extension(directory=dir_path, extension="pth", verbose=False, raise_on_empty=False)
-    # restriction
-    if load_scaler:
-        if len(model_pth_dict) != 2:
-            _LOGGER.error(f"Directory '{dir_name}' should contain exactly 2 '.pth' files: scaler and weights.")
-            raise IOError()
-    else:
-        if len(model_pth_dict) != 1:
-            _LOGGER.error(f"Directory '{dir_name}' should contain exactly 1 '.pth' file for weights.")
+    if not model_pth_dict:
+        pth_msg=f"No '.pth' files found in directory: {dir_name}."
+        if strict:
+            _LOGGER.error(pth_msg)
             raise IOError()
+        else:
+            if verbose:
+                _LOGGER.warning(pth_msg)
+            model_pth_dict = None
+    # restriction
+    if model_pth_dict is not None:
+        valid_count = False
+        msg = ""
+        if load_scaler:
+            if len(model_pth_dict) == 2:
+                valid_count = True
+            else:
+                msg = f"Directory '{dir_name}' should contain exactly 2 '.pth' files: scaler and weights. Found {len(model_pth_dict)}."
+        else:
+            if len(model_pth_dict) == 1:
+                valid_count = True
+            else:
+                msg = f"Directory '{dir_name}' should contain exactly 1 '.pth' file for weights. Found {len(model_pth_dict)}."
+        # Respect strict mode for count mismatch
+        if not valid_count:
+            if strict:
+                _LOGGER.error(msg)
+                raise IOError()
+            else:
+                if verbose:
+                    _LOGGER.warning(msg)
+                # Invalidate dictionary
+                model_pth_dict = None
     ##### Scaler and Weights #####
     scaler_path = None
     weights_path = None
     # load weights and scaler if present
-    for pth_filename, pth_path in model_pth_dict.items():
-        if load_scaler and pth_filename.lower().startswith(DatasetKeys.SCALER_PREFIX):
-            scaler_path = pth_path
-        else:
-            weights_path = pth_path
+    if model_pth_dict is not None:
+        for pth_filename, pth_path in model_pth_dict.items():
+            if load_scaler and pth_filename.lower().startswith(DatasetKeys.SCALER_PREFIX):
+                scaler_path = pth_path
+            else:
+                weights_path = pth_path
     # validation
-    if not weights_path:
+    if not weights_path and strict:
         _LOGGER.error(f"Error parsing the model weights path from '{dir_name}'")
         raise IOError()
-    if load_scaler and not scaler_path:
+    if strict and load_scaler and not scaler_path:
         _LOGGER.error(f"Error parsing the scaler path from '{dir_name}'")
         raise IOError()
@@ -198,32 +284,44 @@ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool,
     feature_names_path = None
     # load feature and target names
-    model_txt_dict = list_files_by_extension(directory=dir_path, extension="txt", verbose=verbose)
+    model_txt_dict = list_files_by_extension(directory=dir_path, extension="txt", verbose=False, raise_on_empty=False)
+    # if the directory has no txt files, the loop is skipped
     for txt_filename, txt_path in model_txt_dict.items():
         if txt_filename == DatasetKeys.FEATURE_NAMES:
             feature_names_path = txt_path
         elif txt_filename == DatasetKeys.TARGET_NAMES:
             target_names_path = txt_path
-    # validation
-    if not target_names_path or not feature_names_path:
-        _LOGGER.error(f"Error parsing features path or targets path from '{dir_name}'")
+    # validation per case
+    if strict and not target_names_path:
+        _LOGGER.error(f"Error parsing the target names path from '{dir_name}'")
         raise IOError()
+    elif verbose and not target_names_path:
+        _LOGGER.warning(f"Target names file not found in '{dir_name}'.")
+    if strict and not feature_names_path:
+        _LOGGER.error(f"Error parsing the feature names path from '{dir_name}'")
+        raise IOError()
+    elif verbose and not feature_names_path:
+        _LOGGER.warning(f"Feature names file not found in '{dir_name}'.")
     ##### load model architecture path #####
     architecture_path = None
-    model_json_dict = list_files_by_extension(directory=dir_path, extension="json", verbose=verbose)
+    model_json_dict = list_files_by_extension(directory=dir_path, extension="json", verbose=False, raise_on_empty=False)
+    # if the directory has no json files, the loop is skipped
     for json_filename, json_path in model_json_dict.items():
         if json_filename == PytorchModelArchitectureKeys.SAVENAME:
             architecture_path = json_path
     # validation
-    if not architecture_path:
+    if strict and not architecture_path:
         _LOGGER.error(f"Error parsing the model architecture path from '{dir_name}'")
         raise IOError()
+    elif verbose and not architecture_path:
+        _LOGGER.warning(f"Model architecture file not found in '{dir_name}'.")
     ##### Paths dictionary #####
     parsing_dict = {
@@ -233,7 +331,7 @@ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool,
         PytorchArtifactPathKeys.TARGETS_PATH: target_names_path,
     }
-    if scaler_path is not None:
+    if load_scaler:
         parsing_dict[PytorchArtifactPathKeys.SCALER_PATH] = scaler_path
     return parsing_dict
@@ -246,6 +344,9 @@ def find_model_artifacts_multi(target_directory: Union[str,Path], load_scaler: b
     This function operates on a specific directory structure. It expects the
     `target_directory` to contain one or more subdirectories, where each
     subdirectory represents a single trained model result.
+    This function works using a strict mode, meaning that it will raise errors if
+    any required artifacts are missing in a model's subdirectory.
     The expected directory structure for each model is as follows:
     ```
@@ -278,14 +379,16 @@ def find_model_artifacts_multi(target_directory: Union[str,Path], load_scaler: b
     all_artifacts: list[dict[str, Path]] = list()
     # find model directories
-    result_dirs_dict = list_subdirectories(root_dir=root_path, verbose=verbose)
+    result_dirs_dict = list_subdirectories(root_dir=root_path, verbose=verbose, raise_on_empty=True)
     for _dir_name, dir_path in result_dirs_dict.items():
         parsing_dict = _find_model_artifacts(target_directory=dir_path,
                                             load_scaler=load_scaler,
-                                            verbose=verbose)
+                                            verbose=verbose,
+                                            strict=True)
-        all_artifacts.append(parsing_dict)
+        # parsing_dict is guaranteed to have all required paths due to strict=True
+        all_artifacts.append(parsing_dict)  # type: ignore
     return all_artifacts
@@ -721,7 +824,7 @@ def select_features_by_shap(
     root_path = make_fullpath(root_directory, enforce="directory")
     # --- Step 2: Directory and File Discovery ---
-    subdirectories = list_subdirectories(root_dir=root_path, verbose=False)
+    subdirectories = list_subdirectories(root_dir=root_path, verbose=False, raise_on_empty=True)
     shap_filename = SHAPKeys.SAVENAME + ".csv"

ml_tools/_core/_PSO_optimization.py CHANGED Viewed

@@ -169,7 +169,7 @@ def multiple_objective_functions_from_dir(directory: Union[str,Path], add_noise:
     """
     objective_functions = list()
     objective_function_names = list()
-    for file_name, file_path in list_files_by_extension(directory=directory, extension='joblib').items():
+    for file_name, file_path in list_files_by_extension(directory=directory, extension='joblib', raise_on_empty=True).items():
         current_objective = ObjectiveFunction(trained_model_path=file_path,
                                               add_noise=add_noise,
                                               task=task,

ml_tools/_core/_ensemble_inference.py CHANGED Viewed

@@ -42,7 +42,7 @@ class DragonEnsembleInferenceHandler:
         self.verbose = verbose
         self._feature_names: Optional[List[str]] = None
-        model_files = list_files_by_extension(directory=models_dir, extension="joblib")
+        model_files = list_files_by_extension(directory=models_dir, extension="joblib", raise_on_empty=True)
         for fname, fpath in model_files.items():
             try:

ml_tools/_core/_optimization_tools.py CHANGED Viewed

@@ -269,7 +269,7 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path],
     output_path = make_fullpath(results_path / "DistributionPlots", make=True)
     # Check that the directory contains csv files
-    list_csv_paths(results_path, verbose=False)
+    list_csv_paths(results_path, verbose=False, raise_on_empty=True)
     # --- Data Loading and Preparation ---
     _LOGGER.debug(f"📁 Starting analysis from results in: '{results_dir}'")

ml_tools/_core/_path_manager.py CHANGED Viewed

@@ -436,35 +436,28 @@ def sanitize_filename(filename: str) -> str:
     return sanitized
-def list_csv_paths(directory: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
+def list_csv_paths(directory: Union[str, Path], verbose: bool = True, raise_on_empty: bool = True) -> dict[str, Path]:
     """
     Lists all `.csv` files in the specified directory and returns a mapping: filenames (without extensions) to their absolute paths.
     Parameters:
         directory (str | Path): Path to the directory containing `.csv` files.
+        verbose (bool): If True, prints found files.
+        raise_on_empty (bool): If True, raises IOError if no files are found.
     Returns:
         (dict[str, Path]): Dictionary mapping {filename: filepath}.
     """
-    dir_path = make_fullpath(directory)
+    # wraps the more general function
+    return list_files_by_extension(directory=directory, extension="csv", verbose=verbose, raise_on_empty=raise_on_empty)
-    csv_paths = list(dir_path.glob("*.csv"))
-    if not csv_paths:
-        _LOGGER.error(f"No CSV files found in directory: {dir_path.name}")
-        raise IOError()
-    # make a dictionary of paths and names
-    name_path_dict = {p.stem: p for p in csv_paths}
-    if verbose:
-        _LOGGER.info("🗂️ CSV files found:")
-        for name in name_path_dict.keys():
-            print(f"\t{name}")
-    return name_path_dict
-def list_files_by_extension(directory: Union[str,Path], extension: str, verbose: bool=True) -> dict[str, Path]:
+def list_files_by_extension(
+    directory: Union[str, Path],
+    extension: str,
+    verbose: bool = True,
+    raise_on_empty: bool = True
+) -> dict[str, Path]:
     """
     Lists all files with the specified extension in the given directory and returns a mapping:
     filenames (without extensions) to their absolute paths.
@@ -472,20 +465,29 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
     Parameters:
         directory (str | Path): Path to the directory to search in.
         extension (str): File extension to search for (e.g., 'json', 'txt').
+        verbose (bool): If True, logs the files found.
+        raise_on_empty (bool): If True, raises IOError if no matching files are found.
     Returns:
-        (dict[str, Path]): Dictionary mapping {filename: filepath}.
+        (dict[str, Path]): Dictionary mapping {filename: filepath}. Returns empty dict if none found and raise_on_empty is False.
     """
-    dir_path = make_fullpath(directory)
+    dir_path = make_fullpath(directory, enforce="directory")
     # Normalize the extension (remove leading dot if present)
     normalized_ext = extension.lstrip(".").lower()
     pattern = f"*.{normalized_ext}"
     matched_paths = list(dir_path.glob(pattern))
     if not matched_paths:
-        _LOGGER.error(f"No '.{normalized_ext}' files found in directory: {dir_path}.")
-        raise IOError()
+        msg = f"No '.{normalized_ext}' files found in directory: {dir_path}."
+        if raise_on_empty:
+            _LOGGER.error(msg)
+            raise IOError()
+        else:
+            if verbose:
+                _LOGGER.warning(msg)
+            return {}
     name_path_dict = {p.stem: p for p in matched_paths}
@@ -497,13 +499,18 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
     return name_path_dict
-def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
+def list_subdirectories(
+    root_dir: Union[str, Path],
+    verbose: bool = True,
+    raise_on_empty: bool = True
+) -> dict[str, Path]:
     """
     Scans a directory and returns a dictionary of its immediate subdirectories.
     Args:
         root_dir (str | Path): The path to the directory to scan.
         verbose (bool): If True, prints the number of directories found.
+        raise_on_empty (bool): If True, raises IOError if no subdirectories are found.
     Returns:
         dict[str, Path]: A dictionary mapping subdirectory names (str) to their full Path objects.
@@ -513,8 +520,14 @@ def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[s
     directories = [p.resolve() for p in root_path.iterdir() if p.is_dir()]
     if len(directories) < 1:
-        _LOGGER.error(f"No subdirectories found inside '{root_path}'")
-        raise IOError()
+        msg = f"No subdirectories found inside '{root_path}'"
+        if raise_on_empty:
+            _LOGGER.error(msg)
+            raise IOError()
+        else:
+            if verbose:
+                _LOGGER.warning(msg)
+            return {}
     if verbose:
         count = len(directories)

ml_tools/_core/_utilities.py CHANGED Viewed

@@ -166,8 +166,12 @@ def load_dataframe_greedy(directory: Union[str, Path],
     dir_path = make_fullpath(directory, enforce="directory")
     # list all csv files and grab one (should be the only one)
-    csv_dict = list_csv_paths(directory=dir_path, verbose=False)
+    csv_dict = list_csv_paths(directory=dir_path, verbose=False, raise_on_empty=True)
+    # explicitly check that there is only one csv file
+    if len(csv_dict) > 1:
+        _LOGGER.warning(f"Multiple CSV files found in '{dir_path}'. Only one will be loaded.")
     for df_path in csv_dict.values():
         df , _df_name = load_dataframe(df_path=df_path,
                                     use_columns=use_columns,
@@ -260,7 +264,7 @@ def yield_dataframes_from_dir(datasets_dir: Union[str,Path], verbose: bool=True)
     - Output is streamed via a generator to support lazy loading of multiple datasets.
     """
     datasets_path = make_fullpath(datasets_dir)
-    files_dict = list_csv_paths(datasets_path, verbose=verbose)
+    files_dict = list_csv_paths(datasets_path, verbose=verbose, raise_on_empty=True)
     for df_name, df_path in files_dict.items():
         df: pd.DataFrame
         df, _ = load_dataframe(df_path, kind="pandas", verbose=verbose) # type: ignore

ml_tools/optimization_tools.py CHANGED Viewed

@@ -1,4 +1,6 @@
 from ._core._optimization_tools import (
+    make_continuous_bounds_template,
+    load_continuous_bounds_template,
     create_optimization_bounds,
     parse_lower_upper_bounds,
     plot_optimal_feature_distributions,
@@ -7,6 +9,8 @@ from ._core._optimization_tools import (
 )
 __all__ = [
+    "make_continuous_bounds_template",
+    "load_continuous_bounds_template",
     "create_optimization_bounds",
     "parse_lower_upper_bounds",
     "plot_optimal_feature_distributions",

{dragon_ml_toolbox-19.9.0.dist-info → dragon_ml_toolbox-19.11.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-19.9.0.dist-info → dragon_ml_toolbox-19.11.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-19.9.0.dist-info → dragon_ml_toolbox-19.11.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 19.9.0__py3-none-any.whl → 19.11.0__py3-none-any.whl

dragon-ml-toolbox 19.9.0py3-none-any.whl → 19.11.0py3-none-any.whl