dragon-ml-toolbox 19.9.0__py3-none-any.whl → 19.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 19.9.0
3
+ Version: 19.11.0
4
4
  Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
5
  Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -90,12 +90,6 @@ Provides-Extra: gui-torch
90
90
  Requires-Dist: numpy<2.0; extra == "gui-torch"
91
91
  Requires-Dist: torch; extra == "gui-torch"
92
92
  Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-torch"
93
- Provides-Extra: pyinstaller
94
- Requires-Dist: pyinstaller; extra == "pyinstaller"
95
- Provides-Extra: nuitka
96
- Requires-Dist: nuitka; extra == "nuitka"
97
- Requires-Dist: zstandard; extra == "nuitka"
98
- Requires-Dist: ordered-set; extra == "nuitka"
99
93
  Dynamic: license-file
100
94
 
101
95
  # dragon-ml-toolbox
@@ -137,7 +131,7 @@ conda install -c conda-forge dragon-ml-toolbox
137
131
 
138
132
  ## Modular Installation
139
133
 
140
- This toolbox is designed as a collection of mutually exclusive environments due to conflicting core dependencies, except APP bundlers (PyInstaller/Nuitka).
134
+ This toolbox is designed as a collection of mutually exclusive environments due to conflicting core dependencies.
141
135
 
142
136
  - Rule: Create a fresh virtual environment for each module to use.
143
137
 
@@ -330,18 +324,6 @@ schema
330
324
 
331
325
  ---
332
326
 
333
- ### ⚒️ APP bundlers
334
-
335
- Dependencies required to compile applications, inference scripts, or GUIs into standalone executables (`.exe` or binary) for distribution. Choose your preferred backend:
336
-
337
- ```Bash
338
- pip install "dragon-ml-toolbox[pyinstaller]"
339
- ```
340
-
341
- ```Bash
342
- pip install "dragon-ml-toolbox[nuitka]"
343
- ```
344
-
345
327
  ## Usage
346
328
 
347
329
  After installation, import modules like this:
@@ -1,5 +1,5 @@
1
- dragon_ml_toolbox-19.9.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-19.9.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=XBLtvGjvBf-q93a5iylHj94Lm78UzInC-3Cii01jc6I,3127
1
+ dragon_ml_toolbox-19.11.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-19.11.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
3
3
  ml_tools/ETL_cleaning.py,sha256=cKXyRFaaFs_beAGDnQM54xnML671kq-yJEGjHafW-20,351
4
4
  ml_tools/ETL_engineering.py,sha256=cwh1FhtNdUHllUDvho-x3SIVj4KwG_rFQR6VYzWUg0U,898
5
5
  ml_tools/GUI_tools.py,sha256=O89rG8WQv6GY1DiphQjIsPzXFCQID6te7q_Sgt1iTkQ,294
@@ -45,7 +45,7 @@ ml_tools/ensemble_learning.py,sha256=BLPnpfJWCly-D75mkRP1FE5TExoWAAlAHR89KAzW9iU
45
45
  ml_tools/excel_handler.py,sha256=h35HMNnO44btxsTSfZXj2HiJtpRS4fdrJLbzru4heMs,453
46
46
  ml_tools/keys.py,sha256=s9HEIAJCRw4DO7ll0yjc8u5rrSI9MOmfkR_1fKpkfy8,263
47
47
  ml_tools/math_utilities.py,sha256=53nOXlhb5taUHj4CDHsXliArEfPkOlJD7G_dJa3_iOU,321
48
- ml_tools/optimization_tools.py,sha256=YQZiXT86kP28NEcyLDbowGeNXHgJDm9hzl-ClNgGQXU,389
48
+ ml_tools/optimization_tools.py,sha256=rPG2VJ7hk9hv5wfKPq4zPJDXFWKioROOiJWmzXlXzVA,541
49
49
  ml_tools/path_manager.py,sha256=ion-x2W_rQjra3ChuOHwVtgXhv7LkpXP0lkBef730tk,350
50
50
  ml_tools/plot_fonts.py,sha256=6-WevfhDjbyWbSrFM6bqW-h5NC_mAO4XzdFR-oQ3DPE,110
51
51
  ml_tools/schema.py,sha256=AddXOa4P9HinlJ6SnICksHzBqRyi7MaichwVn-z_oVE,219
@@ -55,11 +55,11 @@ ml_tools/_core/_ETL_cleaning.py,sha256=_pTNKuapNHgWErmxvsXW-2YzCm4BaTshKV627A38R
55
55
  ml_tools/_core/_ETL_engineering.py,sha256=JgIWrQGyNjmLrbyv5Kh0EHKBLmYlyrGKSnKRxGzxSco,57930
56
56
  ml_tools/_core/_GUI_tools.py,sha256=kpvk18Eb4vdLzo-I5mBV1yuwPXs-NJJ01rn-iCXHvIY,49079
57
57
  ml_tools/_core/_IO_tools.py,sha256=oWaYa_OVO-8ANVt_a9F1QPMvyOcI2yLbtq7LoVHlqek,16625
58
- ml_tools/_core/_MICE_imputation.py,sha256=_juIymUnNDRWjSLepL8Ee_PncoShbxjR7YtqTtYbteU,21107
58
+ ml_tools/_core/_MICE_imputation.py,sha256=64l20duGWt93Q2MbqcWqrA1s99JPRf5AJACb1CZi2xI,21149
59
59
  ml_tools/_core/_ML_callbacks.py,sha256=qtCrVFHTq-nk4NIsAdwIkfkKwFXX6I-6PoCgqZELp70,16734
60
60
  ml_tools/_core/_ML_chaining_inference.py,sha256=vXUPZzuQ2yKU71kkvUsE0xPo0hN-Yu6gfnL0JbXoRjI,7783
61
61
  ml_tools/_core/_ML_chaining_utilities.py,sha256=nsYowgRbkIYuzRiHlqsM3tnC3c-8O73CY8DHUF14XL0,19248
62
- ml_tools/_core/_ML_configuration.py,sha256=6lKod_NuXSj0ElYmkkwnRxZEiZctMlX1x4b0ByRKKhg,52281
62
+ ml_tools/_core/_ML_configuration.py,sha256=hwnDCo9URsFqRCgLuFJhGTtoOqbE1XJreNY8B_3spTg,52693
63
63
  ml_tools/_core/_ML_configuration_pytab.py,sha256=C3e4iScqdRePVDoqnic6xXMOW7DNYqpgTCeaFDyMdL4,3286
64
64
  ml_tools/_core/_ML_datasetmaster.py,sha256=yU1BMtzz6XumMWCetVACrRLk7WJQwmYhaQ-VAWu9Ots,32043
65
65
  ml_tools/_core/_ML_evaluation.py,sha256=bu8qlYzhWSC1B7wNfCC5TSF-oed-uP8EF7TV45VTiBM,37325
@@ -70,27 +70,27 @@ ml_tools/_core/_ML_inference.py,sha256=5swm2lnsrDLalBnCm7gZPlDucX4yNCq5vn7ck3SW_
70
70
  ml_tools/_core/_ML_models.py,sha256=8FUx4-TVghlBF9srh1_5UxovrWPU7YEZ6XXLqwJei88,27974
71
71
  ml_tools/_core/_ML_models_advanced.py,sha256=oU6M5FEBMQ9yPp32cziWh3bz8SXRho07vFMC8ZDVcuU,45002
72
72
  ml_tools/_core/_ML_models_pytab.py,sha256=EHHnDG02ghcJORy2gipm3NcrlzL0qygD44o7QGmT1Zs,26297
73
- ml_tools/_core/_ML_optimization.py,sha256=b1qfHiGyvVoj-ENqDbHTf1jNx55niUWE9KEZJv3vg80,28253
74
- ml_tools/_core/_ML_optimization_pareto.py,sha256=fad4UjW5TDbCgIsVFk1qmkq8DnU5sahFFuC2DgKAQ3I,36889
73
+ ml_tools/_core/_ML_optimization.py,sha256=mvG1joVS3U67lmSwzMgLgNGzh4H3Py3ttKeaTM3EUnU,28126
74
+ ml_tools/_core/_ML_optimization_pareto.py,sha256=1PA8o5qbI13x5QusWhRIJMiPz3cMA2dUT1ZwU9NIZQM,37609
75
75
  ml_tools/_core/_ML_scaler.py,sha256=Nhu6qli_QezHQi5NKhRb8Z51bBJgzk2nEp_yW4B9H4U,8134
76
76
  ml_tools/_core/_ML_sequence_datasetmaster.py,sha256=0YVOPf-y4ZNdgUxropXUWrmInNyGYaUYprYvXf31n9U,17811
77
77
  ml_tools/_core/_ML_sequence_evaluation.py,sha256=AiPHtZ9DRpE6zL9n3Tp5eGGD9vrYRkLbZ0Nc274mL7I,8069
78
78
  ml_tools/_core/_ML_sequence_inference.py,sha256=zd3hBwOtLmjAV4JtdB2qFY9GxhysajFufATdy8fjGTE,16316
79
79
  ml_tools/_core/_ML_sequence_models.py,sha256=5qcEYLU6wDePBITnikBrj_H9mCvyJmElKa3HiWGXhZs,5639
80
80
  ml_tools/_core/_ML_trainer.py,sha256=hSsudWrlYWpi53DXIlKI6ovVhz7xLrQ8oKIDJOXf4Eg,117747
81
- ml_tools/_core/_ML_utilities.py,sha256=yXVKow-bgpahMChpp7iUlSxAEtgityXwC54FPReeNNA,30487
81
+ ml_tools/_core/_ML_utilities.py,sha256=elLGD0QYh148_9iNLlqGe1vz-wCFspJa6CWtWTfA3jY,35594
82
82
  ml_tools/_core/_ML_vision_datasetmaster.py,sha256=8EsE7luzphVlwBXdOsOwsFfz1D4UIUSEQtqHlM0Vf-o,67084
83
83
  ml_tools/_core/_ML_vision_evaluation.py,sha256=BSLf9xrGpaR02Dhkf-fAbgxSpwRjf7DruNIcQadl7qg,11631
84
84
  ml_tools/_core/_ML_vision_inference.py,sha256=6K9gMFjAAZKfLAIQlOkm_I9hvCPmO--9-1vnskQRk0I,20190
85
85
  ml_tools/_core/_ML_vision_models.py,sha256=oUik-RLxFvZFZCtFztjkSfFYgJuRx4QzfwHVY1ny4Sc,26217
86
86
  ml_tools/_core/_ML_vision_transformers.py,sha256=imjL9h5kwpfuRn9rBelNpgtrdU-EecBEcHMFZMXTeZA,15303
87
- ml_tools/_core/_PSO_optimization.py,sha256=Dg76d7t2ixPCXqQ-KceG9nzuLajHGN0s5RiawRGzsT4,22970
87
+ ml_tools/_core/_PSO_optimization.py,sha256=W3g5xw2v2eOUQadv8KHFkt5HNm9AiY3ZUk-TeyVuZjw,22991
88
88
  ml_tools/_core/_SQL.py,sha256=zX_8EgYfmLmvvrnL851KMkI4w9kdkjHJ997BTvS5aig,11556
89
89
  ml_tools/_core/_VIF_factor.py,sha256=BM0mTowBqt45PXFy9oJLhT9C-CTWWo0TQhgCyWYLHtQ,10457
90
90
  ml_tools/_core/__init__.py,sha256=d4IG0OxUXj2HffepzQcYixHlZeuuuDMAFa09H_6LtmU,12
91
91
  ml_tools/_core/_data_exploration.py,sha256=-g_e4Lox4LN8c2AfhpcPmnI9TNIZGl84O8hWEVH5asA,77438
92
92
  ml_tools/_core/_ensemble_evaluation.py,sha256=17lWl4bWLT1BAMv_fhGf2D3wy-F4jx0HgnJ79lYkRuE,28419
93
- ml_tools/_core/_ensemble_inference.py,sha256=PfZG-r65Vw3IAmBJZg9W0zYGEe-QbhfUh_rd2ho-rr8,8610
93
+ ml_tools/_core/_ensemble_inference.py,sha256=9UpARSETzmqPdQmxqizD768tjkqldxHw1ER_hM9Kx9M,8631
94
94
  ml_tools/_core/_ensemble_learning.py,sha256=X8ghbjDOLMENCWdISXLhDlHQtR3C6SW1tkTBAcfRRPY,22016
95
95
  ml_tools/_core/_excel_handler.py,sha256=gV4rSIsiowb0xllpEJxzUKaYDDVpmP_lxs9wZA76-cc,14050
96
96
  ml_tools/_core/_keys.py,sha256=pOqxhEFcDuAeuQveJNykdQfB6gVEg8ZY7L7MYQmtY_o,7551
@@ -98,14 +98,14 @@ ml_tools/_core/_logger.py,sha256=86Ge0sDE_WgwsZBglQRYPyFYX3lcsIo0NzszNPzlxuk,525
98
98
  ml_tools/_core/_math_utilities.py,sha256=IlXAiZgTcLtus03jJOBOyF9ZCQDf8qLGjrCHu9Mrgak,9091
99
99
  ml_tools/_core/_models_advanced_base.py,sha256=ceW0V_CcfOnSFqHlxUhVU8-5mtQq4tFyo8TX-xVexrY,4982
100
100
  ml_tools/_core/_models_advanced_helpers.py,sha256=yrAVgYdBsNYD6Vy-pYL5__wI9Z7inOvNUngMgyuypjo,38973
101
- ml_tools/_core/_optimization_tools.py,sha256=2LkntNRc19uGur9u0yI-KnNX56tc63sxaRNj6W440Og,20077
102
- ml_tools/_core/_path_manager.py,sha256=z4zqYqppKhgOj3ArfkKo4tieO8oNaHWUoshCQRNbd1w,20284
101
+ ml_tools/_core/_optimization_tools.py,sha256=WdQkkknbErk4p1cCj2l5CLImK2oRAzhmR3QFR50Hbzk,20098
102
+ ml_tools/_core/_path_manager.py,sha256=-gJ5qoEzpXsQT7gfxV_6kA7mk33iDsMXRmtOoVPl1JA,20845
103
103
  ml_tools/_core/_plot_fonts.py,sha256=CjYXW2gZ9AUaGkyX8_WOXXNYs6d1PTK-nEJBrv_Zb2o,2287
104
104
  ml_tools/_core/_schema.py,sha256=TM5WVVMoKOvr_Bc2z34sU_gzKlM465PRKTgdZaEOkGY,14076
105
105
  ml_tools/_core/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
106
106
  ml_tools/_core/_serde.py,sha256=tsI4EO2Y7jrBMmbQ1pinDsPOrOg-SaPuB-Dt40q0taE,5609
107
- ml_tools/_core/_utilities.py,sha256=iA8fLWdhsIx4ut2Dp8M_OyU0Y3PPLgGdIklyl17x6xk,22560
108
- dragon_ml_toolbox-19.9.0.dist-info/METADATA,sha256=_EtgLq25qcjnIMmFvPVOfa-xWTp176hHC_VbxFLdWno,8774
109
- dragon_ml_toolbox-19.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
110
- dragon_ml_toolbox-19.9.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
111
- dragon_ml_toolbox-19.9.0.dist-info/RECORD,,
107
+ ml_tools/_core/_utilities.py,sha256=D7FGyEszcMHxGkMW4aqN7JUwabTICCcQz9qsGtOj97o,22787
108
+ dragon_ml_toolbox-19.11.0.dist-info/METADATA,sha256=HUeAsHLQTdaopzM0YVyRgaofMaHXoOZUan456E5M1JU,8193
109
+ dragon_ml_toolbox-19.11.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
110
+ dragon_ml_toolbox-19.11.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
111
+ dragon_ml_toolbox-19.11.0.dist-info/RECORD,,
@@ -31,13 +31,9 @@ This project depends on the following third-party packages. Each is governed by
31
31
  - [colorlog](https://github.com/borntyping/python-colorlog/blob/main/LICENSE)
32
32
  - [evotorch](https://github.com/nnaisense/evotorch/blob/master/LICENSE)
33
33
  - [FreeSimpleGUI](https://github.com/spyoungtech/FreeSimpleGUI/blob/main/license.txt)
34
- - [nuitka](https://github.com/Nuitka/Nuitka/blob/main/LICENSE.txt)
35
34
  - [omegaconf](https://github.com/omry/omegaconf/blob/master/LICENSE)
36
- - [ordered-set](https://github.com/rspeer/ordered-set/blob/master/MIT-LICENSE)
37
- - [pyinstaller](https://github.com/pyinstaller/pyinstaller/blob/develop/COPYING.txt)
38
35
  - [pytorch_tabular](https://github.com/manujosephv/pytorch_tabular/blob/main/LICENSE)
39
36
  - [torchmetrics](https://github.com/Lightning-AI/torchmetrics/blob/master/LICENSE)
40
- - [zstandard](https://github.com/indygreg/python-zstandard/blob/main/LICENSE)
41
37
  - [captum](https://github.com/meta-pytorch/captum/blob/master/LICENSE)
42
38
  - [node](https://github.com/Qwicen/node/blob/master/LICENSE.md)
43
39
  - [pytorch-widedeep](https://github.com/jrzaurin/pytorch-widedeep?tab=readme-ov-file#license)
@@ -256,7 +256,7 @@ def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str]
256
256
  if input_path.is_file():
257
257
  all_file_paths = [input_path]
258
258
  else:
259
- all_file_paths = list(list_csv_paths(input_path).values())
259
+ all_file_paths = list(list_csv_paths(input_path, raise_on_empty=True).values())
260
260
 
261
261
  for df_path in all_file_paths:
262
262
  df: pd.DataFrame
@@ -461,7 +461,7 @@ class DragonMICE:
461
461
  if input_path.is_file():
462
462
  all_file_paths = [input_path]
463
463
  else:
464
- all_file_paths = list(list_csv_paths(input_path).values())
464
+ all_file_paths = list(list_csv_paths(input_path, raise_on_empty=True).values())
465
465
 
466
466
  for df_path in all_file_paths:
467
467
 
@@ -6,7 +6,7 @@ import numpy as np
6
6
  from ._schema import FeatureSchema
7
7
  from ._script_info import _script_info
8
8
  from ._logger import get_logger
9
- from ._path_manager import sanitize_filename
9
+ from ._path_manager import sanitize_filename, make_fullpath
10
10
  from ._keys import MLTaskKeys
11
11
 
12
12
 
@@ -701,11 +701,11 @@ class DragonParetoConfig(_BaseModelParams):
701
701
  def __init__(self,
702
702
  save_directory: Union[str, Path],
703
703
  target_objectives: Dict[str, Literal["min", "max"]],
704
- continuous_bounds_map: Union[Dict[str, Tuple[float, float]], Dict[str, List[float]]],
704
+ continuous_bounds_map: Union[Dict[str, Tuple[float, float]], Dict[str, List[float]], str, Path],
705
705
  columns_to_round: Optional[List[str]] = None,
706
- population_size: int = 400,
706
+ population_size: int = 500,
707
707
  generations: int = 1000,
708
- solutions_filename: str = "ParetoSolutions",
708
+ solutions_filename: str = "NonDominatedSolutions",
709
709
  float_precision: int = 4,
710
710
  log_interval: int = 10,
711
711
  plot_size: Tuple[int, int] = (10, 7),
@@ -718,7 +718,7 @@ class DragonParetoConfig(_BaseModelParams):
718
718
  save_directory (str | Path): Directory to save artifacts.
719
719
  target_objectives (Dict[str, "min"|"max"]): Dictionary mapping target names to optimization direction.
720
720
  Example: {"price": "max", "error": "min"}
721
- continuous_bounds_map (Dict): Bounds for continuous features {name: (min, max)}.
721
+ continuous_bounds_map (Dict): Bounds for continuous features {name: (min, max)}. Or a path/str to a directory containing the "optimization_bounds.json" file.
722
722
  columns_to_round (List[str] | None): List of continuous column names that should be rounded to the nearest integer.
723
723
  population_size (int): Size of the genetic population.
724
724
  generations (int): Number of generations to run.
@@ -729,7 +729,13 @@ class DragonParetoConfig(_BaseModelParams):
729
729
  plot_font_size (int): Font size for plot text.
730
730
  discretize_start_at_zero (bool): Categorical encoding start index. True=0, False=1.
731
731
  """
732
- self.save_directory = save_directory
732
+ # Validate string or Path
733
+ valid_save_dir = make_fullpath(save_directory, make=True, enforce="directory")
734
+
735
+ if isinstance(continuous_bounds_map, (str, Path)):
736
+ continuous_bounds_map = make_fullpath(continuous_bounds_map, make=False, enforce="directory")
737
+
738
+ self.save_directory = valid_save_dir
733
739
  self.target_objectives = target_objectives
734
740
  self.continuous_bounds_map = continuous_bounds_map
735
741
  self.columns_to_round = columns_to_round
@@ -742,7 +748,6 @@ class DragonParetoConfig(_BaseModelParams):
742
748
  self.plot_font_size = plot_font_size
743
749
  self.discretize_start_at_zero = discretize_start_at_zero
744
750
 
745
-
746
751
  # ----------------------------
747
752
  # Metrics Configurations
748
753
  # ----------------------------
@@ -44,12 +44,10 @@ class DragonOptimizer:
44
44
  SNES and CEM algorithms do not accept bounds, the given bounds will be used as an initial starting point.
45
45
 
46
46
  Example:
47
- >>> # 1. Get the final schema from data exploration
48
- >>> schema = data_exploration.finalize_feature_schema(...)
49
- >>> # 2. Define bounds for continuous features
47
+ >>> # 1. Define bounds for continuous features
50
48
  >>> cont_bounds = {'feature_A': (0, 100), 'feature_B': (-10, 10)}
51
49
  >>>
52
- >>> # 3. Initialize the optimizer
50
+ >>> # 2. Initialize the optimizer
53
51
  >>> optimizer = DragonOptimizer(
54
52
  ... inference_handler=my_handler,
55
53
  ... schema=schema,
@@ -58,7 +56,7 @@ class DragonOptimizer:
58
56
  ... task="max",
59
57
  ... algorithm="Genetic",
60
58
  ... )
61
- >>> # 4. Run the optimization
59
+ >>> # 3. Run the optimization
62
60
  >>> best_result = optimizer.run(
63
61
  ... num_generations=100,
64
62
  ... save_dir="/path/to/results",
@@ -7,7 +7,7 @@ import matplotlib.cm as cm
7
7
  from matplotlib.collections import LineCollection
8
8
  import seaborn as sns
9
9
  from pathlib import Path
10
- from typing import Literal, Union, Tuple, List, Optional, Dict
10
+ from typing import Literal, Union, List, Optional, Dict
11
11
  from tqdm import tqdm
12
12
  import plotly.express as px
13
13
  import plotly.graph_objects as go
@@ -21,7 +21,7 @@ from ._SQL import DragonSQL
21
21
  from ._ML_inference import DragonInferenceHandler
22
22
  from ._ML_chaining_inference import DragonChainInference
23
23
  from ._ML_configuration import DragonParetoConfig
24
- from ._optimization_tools import create_optimization_bounds, plot_optimal_feature_distributions_from_dataframe
24
+ from ._optimization_tools import create_optimization_bounds, plot_optimal_feature_distributions_from_dataframe, load_continuous_bounds_template
25
25
  from ._math_utilities import discretize_categorical_values
26
26
  from ._utilities import save_dataframe_filename
27
27
  from ._IO_tools import save_json
@@ -107,6 +107,10 @@ class DragonParetoOptimizer:
107
107
  _LOGGER.error(f"Target '{name}' not found in model targets: {available_targets}")
108
108
  raise ValueError()
109
109
 
110
+ if direction not in ["min" , "max"]:
111
+ _LOGGER.error(f"Invalid optimization direction '{direction}' for target '{name}'. Use 'min' or 'max'.")
112
+ raise ValueError()
113
+
110
114
  # For standard handlers, we need indices to slice the output tensor.
111
115
  # For chain handlers, we just rely on name matching, but we track index for consistency.
112
116
  idx = available_targets.index(name)
@@ -117,10 +121,20 @@ class DragonParetoOptimizer:
117
121
  _LOGGER.info(f"Pareto Optimization setup for: {self.ordered_target_names}")
118
122
 
119
123
  # --- 2. Bounds Setup ---
124
+ # check type
125
+ raw_bounds_map = config.continuous_bounds_map
126
+ if isinstance(raw_bounds_map, (str, Path)):
127
+ continuous_bounds = load_continuous_bounds_template(raw_bounds_map)
128
+ elif isinstance(raw_bounds_map, dict):
129
+ continuous_bounds = raw_bounds_map
130
+ else:
131
+ _LOGGER.error(f"Invalid type for 'continuous_bounds_map' in config. Expected dict or Path. Got {type(raw_bounds_map)}.")
132
+ raise ValueError()
133
+
120
134
  # Uses the external tool which reads the schema to set correct bounds for both continuous and categorical
121
135
  bounds = create_optimization_bounds(
122
136
  schema=schema,
123
- continuous_bounds_map=config.continuous_bounds_map,
137
+ continuous_bounds_map=continuous_bounds,
124
138
  start_at_zero=self.discretize_start_at_zero
125
139
  )
126
140
  self.lower_bounds = list(bounds[0])
@@ -46,17 +46,24 @@ class ArtifactFinder:
46
46
  └── FeatureSchema.json (Required if `load_schema` is True)
47
47
  ```
48
48
  """
49
- def __init__(self, directory: Union[str, Path], load_scaler: bool, load_schema: bool) -> None:
49
+ def __init__(self,
50
+ directory: Union[str, Path],
51
+ load_scaler: bool,
52
+ load_schema: bool,
53
+ strict: bool=False,
54
+ verbose: bool=True) -> None:
50
55
  """
51
56
  Args:
52
57
  directory (str | Path): The path to the directory that contains training artifacts.
53
58
  load_scaler (bool): If True, requires and searches for a scaler file `scaler_*.pth`.
54
59
  load_schema (bool): If True, requires and searches for a FeatureSchema file `FeatureSchema.json`.
60
+ strict (bool): If True, raises an error if any artifact is missing. If False, returns None for missing artifacts silently.
61
+ verbose (bool): Displays the missing artifacts in the directory or a success message.
55
62
  """
56
63
  # validate directory
57
64
  dir_path = make_fullpath(directory, enforce="directory")
58
65
 
59
- parsing_dict = _find_model_artifacts(target_directory=dir_path, load_scaler=load_scaler, verbose=False)
66
+ parsing_dict = _find_model_artifacts(target_directory=dir_path, load_scaler=load_scaler, verbose=False, strict=strict)
60
67
 
61
68
  self._weights_path = parsing_dict[PytorchArtifactPathKeys.WEIGHTS_PATH]
62
69
  self._feature_names_path = parsing_dict[PytorchArtifactPathKeys.FEATURES_PATH]
@@ -64,71 +71,121 @@ class ArtifactFinder:
64
71
  self._model_architecture_path = parsing_dict[PytorchArtifactPathKeys.ARCHITECTURE_PATH]
65
72
  self._scaler_path = None
66
73
  self._schema = None
74
+ self._strict = strict
67
75
 
68
76
  if load_scaler:
69
77
  self._scaler_path = parsing_dict[PytorchArtifactPathKeys.SCALER_PATH]
70
78
 
71
79
  if load_schema:
72
- self._schema = FeatureSchema.from_json(directory=dir_path)
80
+ try:
81
+ self._schema = FeatureSchema.from_json(directory=dir_path)
82
+ except Exception:
83
+ if strict:
84
+ # FeatureSchema logs its own error details
85
+ # _LOGGER.error(f"Failed to load FeatureSchema from '{dir_path.name}': {e}")
86
+ raise FileNotFoundError()
87
+ else:
88
+ # _LOGGER.warning(f"Could not load FeatureSchema from '{dir_path.name}': {e}")
89
+ self._schema = None
90
+
91
+ # Process feature names
92
+ if self._feature_names_path is not None:
93
+ self._feature_names = self._process_text(self._feature_names_path)
94
+ else:
95
+ self._feature_names = None
96
+ # Process target names
97
+ if self._target_names_path is not None:
98
+ self._target_names = self._process_text(self._target_names_path)
99
+ else:
100
+ self._target_names = None
101
+
102
+ if verbose:
103
+ # log missing artifacts
104
+ missing_artifacts = []
105
+ if self._feature_names is None:
106
+ missing_artifacts.append("Feature Names")
107
+ if self._target_names is None:
108
+ missing_artifacts.append("Target Names")
109
+ if self._weights_path is None:
110
+ missing_artifacts.append("Weights File")
111
+ if self._model_architecture_path is None:
112
+ missing_artifacts.append("Model Architecture File")
113
+ if load_scaler and self._scaler_path is None:
114
+ missing_artifacts.append("Scaler File")
115
+ if load_schema and self._schema is None:
116
+ missing_artifacts.append("FeatureSchema File")
117
+
118
+ if missing_artifacts:
119
+ _LOGGER.warning(f"Missing artifacts in '{dir_path.name}': {', '.join(missing_artifacts)}.")
120
+ else:
121
+ _LOGGER.info(f"All artifacts successfully loaded from '{dir_path.name}'.")
73
122
 
74
- # Process text files
75
- self._feature_names = self._process_text(self._feature_names_path)
76
- self._target_names = self._process_text(self._target_names_path)
77
-
78
123
  def _process_text(self, text_file_path: Path):
79
124
  list_strings = load_list_strings(text_file=text_file_path, verbose=False)
80
125
  return list_strings
81
126
 
82
127
  @property
83
- def feature_names(self) -> list[str]:
128
+ def feature_names(self) -> Union[list[str], None]:
84
129
  """Returns the feature names as a list of strings."""
130
+ if self._strict and not self._feature_names:
131
+ _LOGGER.error("No feature names loaded for Strict mode.")
132
+ raise ValueError()
85
133
  return self._feature_names
86
134
 
87
135
  @property
88
- def target_names(self) -> list[str]:
136
+ def target_names(self) -> Union[list[str], None]:
89
137
  """Returns the target names as a list of strings."""
138
+ if self._strict and not self._target_names:
139
+ _LOGGER.error("No target names loaded for Strict mode.")
140
+ raise ValueError()
90
141
  return self._target_names
91
142
 
92
143
  @property
93
- def weights_path(self) -> Path:
144
+ def weights_path(self) -> Union[Path, None]:
94
145
  """Returns the path to the state dictionary pth file."""
146
+ if self._strict and self._weights_path is None:
147
+ _LOGGER.error("No weights file loaded for Strict mode.")
148
+ raise ValueError()
95
149
  return self._weights_path
96
150
 
97
151
  @property
98
- def model_architecture_path(self) -> Path:
152
+ def model_architecture_path(self) -> Union[Path, None]:
99
153
  """Returns the path to the model architecture json file."""
154
+ if self._strict and self._model_architecture_path is None:
155
+ _LOGGER.error("No model architecture file loaded for Strict mode.")
156
+ raise ValueError()
100
157
  return self._model_architecture_path
101
158
 
102
159
  @property
103
- def scaler_path(self) -> Path:
160
+ def scaler_path(self) -> Union[Path, None]:
104
161
  """Returns the path to the scaler file."""
105
- if self._scaler_path is None:
106
- _LOGGER.error("No scaler file loaded. Set 'load_scaler=True'.")
162
+ if self._strict and self._scaler_path is None:
163
+ _LOGGER.error("No scaler file loaded for Strict mode.")
107
164
  raise ValueError()
108
165
  else:
109
166
  return self._scaler_path
110
167
 
111
168
  @property
112
- def feature_schema(self) -> FeatureSchema:
169
+ def feature_schema(self) -> Union[FeatureSchema, None]:
113
170
  """Returns the FeatureSchema object."""
114
- if self._schema is None:
115
- _LOGGER.error("No FeatureSchema loaded. Set 'load_schema=True'.")
171
+ if self._strict and self._schema is None:
172
+ _LOGGER.error("No FeatureSchema loaded for Strict mode.")
116
173
  raise ValueError()
117
174
  else:
118
175
  return self._schema
119
176
 
120
177
  def __repr__(self) -> str:
121
- dir_name = self._weights_path.parent.name
122
- n_features = len(self._feature_names)
123
- n_targets = len(self._target_names)
178
+ dir_name = self._weights_path.parent.name if self._weights_path else "Unknown"
179
+ n_features = len(self._feature_names) if self._feature_names else "None"
180
+ n_targets = len(self._target_names) if self._target_names else "None"
124
181
  scaler_status = self._scaler_path.name if self._scaler_path else "None"
125
182
  schema_status = "Loaded" if self._schema else "None"
126
183
 
127
184
  return (
128
185
  f"{self.__class__.__name__}\n"
129
186
  f" directory='{dir_name}'\n"
130
- f" weights='{self._weights_path.name}'\n"
131
- f" architecture='{self._model_architecture_path.name}'\n"
187
+ f" weights='{self._weights_path.name if self._weights_path else 'None'}'\n"
188
+ f" architecture='{self._model_architecture_path.name if self._model_architecture_path else 'None'}'\n"
132
189
  f" scaler='{scaler_status}'\n"
133
190
  f" schema='{schema_status}'\n"
134
191
  f" features={n_features}\n"
@@ -136,7 +193,7 @@ class ArtifactFinder:
136
193
  )
137
194
 
138
195
 
139
- def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, verbose: bool=False) -> dict[str, Path]:
196
+ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, verbose: bool=True, strict:bool=True) -> dict[str, Union[Path, None]]:
140
197
  """
141
198
  Scans a directory to find paths to model weights, target names, feature names, and model architecture. Optionally an scaler path if `load_scaler` is True.
142
199
 
@@ -155,41 +212,70 @@ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool,
155
212
  target_directory (str | Path): The path to the directory that contains training artifacts.
156
213
  load_scaler (bool): If True, the function requires and searches for a scaler file `scaler_*.pth`.
157
214
  verbose (bool): If True, enables detailed logging during the search process.
215
+ strict (bool): If True, raises errors on missing files. If False, returns None for missing files.
158
216
  """
159
217
  # validate directory
160
218
  dir_path = make_fullpath(target_directory, enforce="directory")
161
219
  dir_name = dir_path.name
162
220
 
163
221
  # find files
164
- model_pth_dict = list_files_by_extension(directory=dir_path, extension="pth", verbose=verbose)
222
+ model_pth_dict = list_files_by_extension(directory=dir_path, extension="pth", verbose=False, raise_on_empty=False)
165
223
 
166
- # restriction
167
- if load_scaler:
168
- if len(model_pth_dict) != 2:
169
- _LOGGER.error(f"Directory '{dir_name}' should contain exactly 2 '.pth' files: scaler and weights.")
170
- raise IOError()
171
- else:
172
- if len(model_pth_dict) != 1:
173
- _LOGGER.error(f"Directory '{dir_name}' should contain exactly 1 '.pth' file for weights.")
224
+ if not model_pth_dict:
225
+ pth_msg=f"No '.pth' files found in directory: {dir_name}."
226
+ if strict:
227
+ _LOGGER.error(pth_msg)
174
228
  raise IOError()
229
+ else:
230
+ if verbose:
231
+ _LOGGER.warning(pth_msg)
232
+ model_pth_dict = None
233
+
234
+ # restriction
235
+ if model_pth_dict is not None:
236
+ valid_count = False
237
+ msg = ""
238
+
239
+ if load_scaler:
240
+ if len(model_pth_dict) == 2:
241
+ valid_count = True
242
+ else:
243
+ msg = f"Directory '{dir_name}' should contain exactly 2 '.pth' files: scaler and weights. Found {len(model_pth_dict)}."
244
+ else:
245
+ if len(model_pth_dict) == 1:
246
+ valid_count = True
247
+ else:
248
+ msg = f"Directory '{dir_name}' should contain exactly 1 '.pth' file for weights. Found {len(model_pth_dict)}."
249
+
250
+ # Respect strict mode for count mismatch
251
+ if not valid_count:
252
+ if strict:
253
+ _LOGGER.error(msg)
254
+ raise IOError()
255
+ else:
256
+ if verbose:
257
+ _LOGGER.warning(msg)
258
+ # Invalidate dictionary
259
+ model_pth_dict = None
175
260
 
176
261
  ##### Scaler and Weights #####
177
262
  scaler_path = None
178
263
  weights_path = None
179
264
 
180
265
  # load weights and scaler if present
181
- for pth_filename, pth_path in model_pth_dict.items():
182
- if load_scaler and pth_filename.lower().startswith(DatasetKeys.SCALER_PREFIX):
183
- scaler_path = pth_path
184
- else:
185
- weights_path = pth_path
266
+ if model_pth_dict is not None:
267
+ for pth_filename, pth_path in model_pth_dict.items():
268
+ if load_scaler and pth_filename.lower().startswith(DatasetKeys.SCALER_PREFIX):
269
+ scaler_path = pth_path
270
+ else:
271
+ weights_path = pth_path
186
272
 
187
273
  # validation
188
- if not weights_path:
274
+ if not weights_path and strict:
189
275
  _LOGGER.error(f"Error parsing the model weights path from '{dir_name}'")
190
276
  raise IOError()
191
277
 
192
- if load_scaler and not scaler_path:
278
+ if strict and load_scaler and not scaler_path:
193
279
  _LOGGER.error(f"Error parsing the scaler path from '{dir_name}'")
194
280
  raise IOError()
195
281
 
@@ -198,32 +284,44 @@ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool,
198
284
  feature_names_path = None
199
285
 
200
286
  # load feature and target names
201
- model_txt_dict = list_files_by_extension(directory=dir_path, extension="txt", verbose=verbose)
287
+ model_txt_dict = list_files_by_extension(directory=dir_path, extension="txt", verbose=False, raise_on_empty=False)
202
288
 
289
+ # if the directory has no txt files, the loop is skipped
203
290
  for txt_filename, txt_path in model_txt_dict.items():
204
291
  if txt_filename == DatasetKeys.FEATURE_NAMES:
205
292
  feature_names_path = txt_path
206
293
  elif txt_filename == DatasetKeys.TARGET_NAMES:
207
294
  target_names_path = txt_path
208
295
 
209
- # validation
210
- if not target_names_path or not feature_names_path:
211
- _LOGGER.error(f"Error parsing features path or targets path from '{dir_name}'")
296
+ # validation per case
297
+ if strict and not target_names_path:
298
+ _LOGGER.error(f"Error parsing the target names path from '{dir_name}'")
212
299
  raise IOError()
300
+ elif verbose and not target_names_path:
301
+ _LOGGER.warning(f"Target names file not found in '{dir_name}'.")
213
302
 
303
+ if strict and not feature_names_path:
304
+ _LOGGER.error(f"Error parsing the feature names path from '{dir_name}'")
305
+ raise IOError()
306
+ elif verbose and not feature_names_path:
307
+ _LOGGER.warning(f"Feature names file not found in '{dir_name}'.")
308
+
214
309
  ##### load model architecture path #####
215
310
  architecture_path = None
216
311
 
217
- model_json_dict = list_files_by_extension(directory=dir_path, extension="json", verbose=verbose)
312
+ model_json_dict = list_files_by_extension(directory=dir_path, extension="json", verbose=False, raise_on_empty=False)
218
313
 
314
+ # if the directory has no json files, the loop is skipped
219
315
  for json_filename, json_path in model_json_dict.items():
220
316
  if json_filename == PytorchModelArchitectureKeys.SAVENAME:
221
317
  architecture_path = json_path
222
318
 
223
319
  # validation
224
- if not architecture_path:
320
+ if strict and not architecture_path:
225
321
  _LOGGER.error(f"Error parsing the model architecture path from '{dir_name}'")
226
322
  raise IOError()
323
+ elif verbose and not architecture_path:
324
+ _LOGGER.warning(f"Model architecture file not found in '{dir_name}'.")
227
325
 
228
326
  ##### Paths dictionary #####
229
327
  parsing_dict = {
@@ -233,7 +331,7 @@ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool,
233
331
  PytorchArtifactPathKeys.TARGETS_PATH: target_names_path,
234
332
  }
235
333
 
236
- if scaler_path is not None:
334
+ if load_scaler:
237
335
  parsing_dict[PytorchArtifactPathKeys.SCALER_PATH] = scaler_path
238
336
 
239
337
  return parsing_dict
@@ -246,6 +344,9 @@ def find_model_artifacts_multi(target_directory: Union[str,Path], load_scaler: b
246
344
  This function operates on a specific directory structure. It expects the
247
345
  `target_directory` to contain one or more subdirectories, where each
248
346
  subdirectory represents a single trained model result.
347
+
348
+ This function works using a strict mode, meaning that it will raise errors if
349
+ any required artifacts are missing in a model's subdirectory.
249
350
 
250
351
  The expected directory structure for each model is as follows:
251
352
  ```
@@ -278,14 +379,16 @@ def find_model_artifacts_multi(target_directory: Union[str,Path], load_scaler: b
278
379
  all_artifacts: list[dict[str, Path]] = list()
279
380
 
280
381
  # find model directories
281
- result_dirs_dict = list_subdirectories(root_dir=root_path, verbose=verbose)
382
+ result_dirs_dict = list_subdirectories(root_dir=root_path, verbose=verbose, raise_on_empty=True)
282
383
  for _dir_name, dir_path in result_dirs_dict.items():
283
384
 
284
385
  parsing_dict = _find_model_artifacts(target_directory=dir_path,
285
386
  load_scaler=load_scaler,
286
- verbose=verbose)
387
+ verbose=verbose,
388
+ strict=True)
287
389
 
288
- all_artifacts.append(parsing_dict)
390
+ # parsing_dict is guaranteed to have all required paths due to strict=True
391
+ all_artifacts.append(parsing_dict) # type: ignore
289
392
 
290
393
  return all_artifacts
291
394
 
@@ -721,7 +824,7 @@ def select_features_by_shap(
721
824
  root_path = make_fullpath(root_directory, enforce="directory")
722
825
 
723
826
  # --- Step 2: Directory and File Discovery ---
724
- subdirectories = list_subdirectories(root_dir=root_path, verbose=False)
827
+ subdirectories = list_subdirectories(root_dir=root_path, verbose=False, raise_on_empty=True)
725
828
 
726
829
  shap_filename = SHAPKeys.SAVENAME + ".csv"
727
830
 
@@ -169,7 +169,7 @@ def multiple_objective_functions_from_dir(directory: Union[str,Path], add_noise:
169
169
  """
170
170
  objective_functions = list()
171
171
  objective_function_names = list()
172
- for file_name, file_path in list_files_by_extension(directory=directory, extension='joblib').items():
172
+ for file_name, file_path in list_files_by_extension(directory=directory, extension='joblib', raise_on_empty=True).items():
173
173
  current_objective = ObjectiveFunction(trained_model_path=file_path,
174
174
  add_noise=add_noise,
175
175
  task=task,
@@ -42,7 +42,7 @@ class DragonEnsembleInferenceHandler:
42
42
  self.verbose = verbose
43
43
  self._feature_names: Optional[List[str]] = None
44
44
 
45
- model_files = list_files_by_extension(directory=models_dir, extension="joblib")
45
+ model_files = list_files_by_extension(directory=models_dir, extension="joblib", raise_on_empty=True)
46
46
 
47
47
  for fname, fpath in model_files.items():
48
48
  try:
@@ -269,7 +269,7 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path],
269
269
  output_path = make_fullpath(results_path / "DistributionPlots", make=True)
270
270
 
271
271
  # Check that the directory contains csv files
272
- list_csv_paths(results_path, verbose=False)
272
+ list_csv_paths(results_path, verbose=False, raise_on_empty=True)
273
273
 
274
274
  # --- Data Loading and Preparation ---
275
275
  _LOGGER.debug(f"📁 Starting analysis from results in: '{results_dir}'")
@@ -436,35 +436,28 @@ def sanitize_filename(filename: str) -> str:
436
436
  return sanitized
437
437
 
438
438
 
439
- def list_csv_paths(directory: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
439
+ def list_csv_paths(directory: Union[str, Path], verbose: bool = True, raise_on_empty: bool = True) -> dict[str, Path]:
440
440
  """
441
441
  Lists all `.csv` files in the specified directory and returns a mapping: filenames (without extensions) to their absolute paths.
442
442
 
443
443
  Parameters:
444
444
  directory (str | Path): Path to the directory containing `.csv` files.
445
+ verbose (bool): If True, prints found files.
446
+ raise_on_empty (bool): If True, raises IOError if no files are found.
445
447
 
446
448
  Returns:
447
449
  (dict[str, Path]): Dictionary mapping {filename: filepath}.
448
450
  """
449
- dir_path = make_fullpath(directory)
451
+ # wraps the more general function
452
+ return list_files_by_extension(directory=directory, extension="csv", verbose=verbose, raise_on_empty=raise_on_empty)
450
453
 
451
- csv_paths = list(dir_path.glob("*.csv"))
452
- if not csv_paths:
453
- _LOGGER.error(f"No CSV files found in directory: {dir_path.name}")
454
- raise IOError()
455
-
456
- # make a dictionary of paths and names
457
- name_path_dict = {p.stem: p for p in csv_paths}
458
-
459
- if verbose:
460
- _LOGGER.info("🗂️ CSV files found:")
461
- for name in name_path_dict.keys():
462
- print(f"\t{name}")
463
454
 
464
- return name_path_dict
465
-
466
-
467
- def list_files_by_extension(directory: Union[str,Path], extension: str, verbose: bool=True) -> dict[str, Path]:
455
+ def list_files_by_extension(
456
+ directory: Union[str, Path],
457
+ extension: str,
458
+ verbose: bool = True,
459
+ raise_on_empty: bool = True
460
+ ) -> dict[str, Path]:
468
461
  """
469
462
  Lists all files with the specified extension in the given directory and returns a mapping:
470
463
  filenames (without extensions) to their absolute paths.
@@ -472,20 +465,29 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
472
465
  Parameters:
473
466
  directory (str | Path): Path to the directory to search in.
474
467
  extension (str): File extension to search for (e.g., 'json', 'txt').
468
+ verbose (bool): If True, logs the files found.
469
+ raise_on_empty (bool): If True, raises IOError if no matching files are found.
475
470
 
476
471
  Returns:
477
- (dict[str, Path]): Dictionary mapping {filename: filepath}.
472
+ (dict[str, Path]): Dictionary mapping {filename: filepath}. Returns empty dict if none found and raise_on_empty is False.
478
473
  """
479
- dir_path = make_fullpath(directory)
474
+ dir_path = make_fullpath(directory, enforce="directory")
480
475
 
481
476
  # Normalize the extension (remove leading dot if present)
482
477
  normalized_ext = extension.lstrip(".").lower()
483
478
  pattern = f"*.{normalized_ext}"
484
479
 
485
480
  matched_paths = list(dir_path.glob(pattern))
481
+
486
482
  if not matched_paths:
487
- _LOGGER.error(f"No '.{normalized_ext}' files found in directory: {dir_path}.")
488
- raise IOError()
483
+ msg = f"No '.{normalized_ext}' files found in directory: {dir_path}."
484
+ if raise_on_empty:
485
+ _LOGGER.error(msg)
486
+ raise IOError()
487
+ else:
488
+ if verbose:
489
+ _LOGGER.warning(msg)
490
+ return {}
489
491
 
490
492
  name_path_dict = {p.stem: p for p in matched_paths}
491
493
 
@@ -497,13 +499,18 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
497
499
  return name_path_dict
498
500
 
499
501
 
500
- def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
502
+ def list_subdirectories(
503
+ root_dir: Union[str, Path],
504
+ verbose: bool = True,
505
+ raise_on_empty: bool = True
506
+ ) -> dict[str, Path]:
501
507
  """
502
508
  Scans a directory and returns a dictionary of its immediate subdirectories.
503
509
 
504
510
  Args:
505
511
  root_dir (str | Path): The path to the directory to scan.
506
512
  verbose (bool): If True, prints the number of directories found.
513
+ raise_on_empty (bool): If True, raises IOError if no subdirectories are found.
507
514
 
508
515
  Returns:
509
516
  dict[str, Path]: A dictionary mapping subdirectory names (str) to their full Path objects.
@@ -513,8 +520,14 @@ def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[s
513
520
  directories = [p.resolve() for p in root_path.iterdir() if p.is_dir()]
514
521
 
515
522
  if len(directories) < 1:
516
- _LOGGER.error(f"No subdirectories found inside '{root_path}'")
517
- raise IOError()
523
+ msg = f"No subdirectories found inside '{root_path}'"
524
+ if raise_on_empty:
525
+ _LOGGER.error(msg)
526
+ raise IOError()
527
+ else:
528
+ if verbose:
529
+ _LOGGER.warning(msg)
530
+ return {}
518
531
 
519
532
  if verbose:
520
533
  count = len(directories)
@@ -166,8 +166,12 @@ def load_dataframe_greedy(directory: Union[str, Path],
166
166
  dir_path = make_fullpath(directory, enforce="directory")
167
167
 
168
168
  # list all csv files and grab one (should be the only one)
169
- csv_dict = list_csv_paths(directory=dir_path, verbose=False)
169
+ csv_dict = list_csv_paths(directory=dir_path, verbose=False, raise_on_empty=True)
170
170
 
171
+ # explicitly check that there is only one csv file
172
+ if len(csv_dict) > 1:
173
+ _LOGGER.warning(f"Multiple CSV files found in '{dir_path}'. Only one will be loaded.")
174
+
171
175
  for df_path in csv_dict.values():
172
176
  df , _df_name = load_dataframe(df_path=df_path,
173
177
  use_columns=use_columns,
@@ -260,7 +264,7 @@ def yield_dataframes_from_dir(datasets_dir: Union[str,Path], verbose: bool=True)
260
264
  - Output is streamed via a generator to support lazy loading of multiple datasets.
261
265
  """
262
266
  datasets_path = make_fullpath(datasets_dir)
263
- files_dict = list_csv_paths(datasets_path, verbose=verbose)
267
+ files_dict = list_csv_paths(datasets_path, verbose=verbose, raise_on_empty=True)
264
268
  for df_name, df_path in files_dict.items():
265
269
  df: pd.DataFrame
266
270
  df, _ = load_dataframe(df_path, kind="pandas", verbose=verbose) # type: ignore
@@ -1,4 +1,6 @@
1
1
  from ._core._optimization_tools import (
2
+ make_continuous_bounds_template,
3
+ load_continuous_bounds_template,
2
4
  create_optimization_bounds,
3
5
  parse_lower_upper_bounds,
4
6
  plot_optimal_feature_distributions,
@@ -7,6 +9,8 @@ from ._core._optimization_tools import (
7
9
  )
8
10
 
9
11
  __all__ = [
12
+ "make_continuous_bounds_template",
13
+ "load_continuous_bounds_template",
10
14
  "create_optimization_bounds",
11
15
  "parse_lower_upper_bounds",
12
16
  "plot_optimal_feature_distributions",