dragon-ml-toolbox 19.9.0__py3-none-any.whl → 19.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 19.9.0
3
+ Version: 19.10.0
4
4
  Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
5
  Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -90,12 +90,6 @@ Provides-Extra: gui-torch
90
90
  Requires-Dist: numpy<2.0; extra == "gui-torch"
91
91
  Requires-Dist: torch; extra == "gui-torch"
92
92
  Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-torch"
93
- Provides-Extra: pyinstaller
94
- Requires-Dist: pyinstaller; extra == "pyinstaller"
95
- Provides-Extra: nuitka
96
- Requires-Dist: nuitka; extra == "nuitka"
97
- Requires-Dist: zstandard; extra == "nuitka"
98
- Requires-Dist: ordered-set; extra == "nuitka"
99
93
  Dynamic: license-file
100
94
 
101
95
  # dragon-ml-toolbox
@@ -137,7 +131,7 @@ conda install -c conda-forge dragon-ml-toolbox
137
131
 
138
132
  ## Modular Installation
139
133
 
140
- This toolbox is designed as a collection of mutually exclusive environments due to conflicting core dependencies, except APP bundlers (PyInstaller/Nuitka).
134
+ This toolbox is designed as a collection of mutually exclusive environments due to conflicting core dependencies.
141
135
 
142
136
  - Rule: Create a fresh virtual environment for each module to use.
143
137
 
@@ -330,18 +324,6 @@ schema
330
324
 
331
325
  ---
332
326
 
333
- ### ⚒️ APP bundlers
334
-
335
- Dependencies required to compile applications, inference scripts, or GUIs into standalone executables (`.exe` or binary) for distribution. Choose your preferred backend:
336
-
337
- ```Bash
338
- pip install "dragon-ml-toolbox[pyinstaller]"
339
- ```
340
-
341
- ```Bash
342
- pip install "dragon-ml-toolbox[nuitka]"
343
- ```
344
-
345
327
  ## Usage
346
328
 
347
329
  After installation, import modules like this:
@@ -1,5 +1,5 @@
1
- dragon_ml_toolbox-19.9.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-19.9.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=XBLtvGjvBf-q93a5iylHj94Lm78UzInC-3Cii01jc6I,3127
1
+ dragon_ml_toolbox-19.10.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-19.10.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
3
3
  ml_tools/ETL_cleaning.py,sha256=cKXyRFaaFs_beAGDnQM54xnML671kq-yJEGjHafW-20,351
4
4
  ml_tools/ETL_engineering.py,sha256=cwh1FhtNdUHllUDvho-x3SIVj4KwG_rFQR6VYzWUg0U,898
5
5
  ml_tools/GUI_tools.py,sha256=O89rG8WQv6GY1DiphQjIsPzXFCQID6te7q_Sgt1iTkQ,294
@@ -45,7 +45,7 @@ ml_tools/ensemble_learning.py,sha256=BLPnpfJWCly-D75mkRP1FE5TExoWAAlAHR89KAzW9iU
45
45
  ml_tools/excel_handler.py,sha256=h35HMNnO44btxsTSfZXj2HiJtpRS4fdrJLbzru4heMs,453
46
46
  ml_tools/keys.py,sha256=s9HEIAJCRw4DO7ll0yjc8u5rrSI9MOmfkR_1fKpkfy8,263
47
47
  ml_tools/math_utilities.py,sha256=53nOXlhb5taUHj4CDHsXliArEfPkOlJD7G_dJa3_iOU,321
48
- ml_tools/optimization_tools.py,sha256=YQZiXT86kP28NEcyLDbowGeNXHgJDm9hzl-ClNgGQXU,389
48
+ ml_tools/optimization_tools.py,sha256=rPG2VJ7hk9hv5wfKPq4zPJDXFWKioROOiJWmzXlXzVA,541
49
49
  ml_tools/path_manager.py,sha256=ion-x2W_rQjra3ChuOHwVtgXhv7LkpXP0lkBef730tk,350
50
50
  ml_tools/plot_fonts.py,sha256=6-WevfhDjbyWbSrFM6bqW-h5NC_mAO4XzdFR-oQ3DPE,110
51
51
  ml_tools/schema.py,sha256=AddXOa4P9HinlJ6SnICksHzBqRyi7MaichwVn-z_oVE,219
@@ -59,7 +59,7 @@ ml_tools/_core/_MICE_imputation.py,sha256=_juIymUnNDRWjSLepL8Ee_PncoShbxjR7YtqTt
59
59
  ml_tools/_core/_ML_callbacks.py,sha256=qtCrVFHTq-nk4NIsAdwIkfkKwFXX6I-6PoCgqZELp70,16734
60
60
  ml_tools/_core/_ML_chaining_inference.py,sha256=vXUPZzuQ2yKU71kkvUsE0xPo0hN-Yu6gfnL0JbXoRjI,7783
61
61
  ml_tools/_core/_ML_chaining_utilities.py,sha256=nsYowgRbkIYuzRiHlqsM3tnC3c-8O73CY8DHUF14XL0,19248
62
- ml_tools/_core/_ML_configuration.py,sha256=6lKod_NuXSj0ElYmkkwnRxZEiZctMlX1x4b0ByRKKhg,52281
62
+ ml_tools/_core/_ML_configuration.py,sha256=hwnDCo9URsFqRCgLuFJhGTtoOqbE1XJreNY8B_3spTg,52693
63
63
  ml_tools/_core/_ML_configuration_pytab.py,sha256=C3e4iScqdRePVDoqnic6xXMOW7DNYqpgTCeaFDyMdL4,3286
64
64
  ml_tools/_core/_ML_datasetmaster.py,sha256=yU1BMtzz6XumMWCetVACrRLk7WJQwmYhaQ-VAWu9Ots,32043
65
65
  ml_tools/_core/_ML_evaluation.py,sha256=bu8qlYzhWSC1B7wNfCC5TSF-oed-uP8EF7TV45VTiBM,37325
@@ -70,8 +70,8 @@ ml_tools/_core/_ML_inference.py,sha256=5swm2lnsrDLalBnCm7gZPlDucX4yNCq5vn7ck3SW_
70
70
  ml_tools/_core/_ML_models.py,sha256=8FUx4-TVghlBF9srh1_5UxovrWPU7YEZ6XXLqwJei88,27974
71
71
  ml_tools/_core/_ML_models_advanced.py,sha256=oU6M5FEBMQ9yPp32cziWh3bz8SXRho07vFMC8ZDVcuU,45002
72
72
  ml_tools/_core/_ML_models_pytab.py,sha256=EHHnDG02ghcJORy2gipm3NcrlzL0qygD44o7QGmT1Zs,26297
73
- ml_tools/_core/_ML_optimization.py,sha256=b1qfHiGyvVoj-ENqDbHTf1jNx55niUWE9KEZJv3vg80,28253
74
- ml_tools/_core/_ML_optimization_pareto.py,sha256=fad4UjW5TDbCgIsVFk1qmkq8DnU5sahFFuC2DgKAQ3I,36889
73
+ ml_tools/_core/_ML_optimization.py,sha256=mvG1joVS3U67lmSwzMgLgNGzh4H3Py3ttKeaTM3EUnU,28126
74
+ ml_tools/_core/_ML_optimization_pareto.py,sha256=1PA8o5qbI13x5QusWhRIJMiPz3cMA2dUT1ZwU9NIZQM,37609
75
75
  ml_tools/_core/_ML_scaler.py,sha256=Nhu6qli_QezHQi5NKhRb8Z51bBJgzk2nEp_yW4B9H4U,8134
76
76
  ml_tools/_core/_ML_sequence_datasetmaster.py,sha256=0YVOPf-y4ZNdgUxropXUWrmInNyGYaUYprYvXf31n9U,17811
77
77
  ml_tools/_core/_ML_sequence_evaluation.py,sha256=AiPHtZ9DRpE6zL9n3Tp5eGGD9vrYRkLbZ0Nc274mL7I,8069
@@ -105,7 +105,7 @@ ml_tools/_core/_schema.py,sha256=TM5WVVMoKOvr_Bc2z34sU_gzKlM465PRKTgdZaEOkGY,140
105
105
  ml_tools/_core/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
106
106
  ml_tools/_core/_serde.py,sha256=tsI4EO2Y7jrBMmbQ1pinDsPOrOg-SaPuB-Dt40q0taE,5609
107
107
  ml_tools/_core/_utilities.py,sha256=iA8fLWdhsIx4ut2Dp8M_OyU0Y3PPLgGdIklyl17x6xk,22560
108
- dragon_ml_toolbox-19.9.0.dist-info/METADATA,sha256=_EtgLq25qcjnIMmFvPVOfa-xWTp176hHC_VbxFLdWno,8774
109
- dragon_ml_toolbox-19.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
110
- dragon_ml_toolbox-19.9.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
111
- dragon_ml_toolbox-19.9.0.dist-info/RECORD,,
108
+ dragon_ml_toolbox-19.10.0.dist-info/METADATA,sha256=HNycos2k-C6KCjfj1g1pprR6APPYcst7HkoKylfgeR8,8193
109
+ dragon_ml_toolbox-19.10.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
110
+ dragon_ml_toolbox-19.10.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
111
+ dragon_ml_toolbox-19.10.0.dist-info/RECORD,,
@@ -31,13 +31,9 @@ This project depends on the following third-party packages. Each is governed by
31
31
  - [colorlog](https://github.com/borntyping/python-colorlog/blob/main/LICENSE)
32
32
  - [evotorch](https://github.com/nnaisense/evotorch/blob/master/LICENSE)
33
33
  - [FreeSimpleGUI](https://github.com/spyoungtech/FreeSimpleGUI/blob/main/license.txt)
34
- - [nuitka](https://github.com/Nuitka/Nuitka/blob/main/LICENSE.txt)
35
34
  - [omegaconf](https://github.com/omry/omegaconf/blob/master/LICENSE)
36
- - [ordered-set](https://github.com/rspeer/ordered-set/blob/master/MIT-LICENSE)
37
- - [pyinstaller](https://github.com/pyinstaller/pyinstaller/blob/develop/COPYING.txt)
38
35
  - [pytorch_tabular](https://github.com/manujosephv/pytorch_tabular/blob/main/LICENSE)
39
36
  - [torchmetrics](https://github.com/Lightning-AI/torchmetrics/blob/master/LICENSE)
40
- - [zstandard](https://github.com/indygreg/python-zstandard/blob/main/LICENSE)
41
37
  - [captum](https://github.com/meta-pytorch/captum/blob/master/LICENSE)
42
38
  - [node](https://github.com/Qwicen/node/blob/master/LICENSE.md)
43
39
  - [pytorch-widedeep](https://github.com/jrzaurin/pytorch-widedeep?tab=readme-ov-file#license)
@@ -6,7 +6,7 @@ import numpy as np
6
6
  from ._schema import FeatureSchema
7
7
  from ._script_info import _script_info
8
8
  from ._logger import get_logger
9
- from ._path_manager import sanitize_filename
9
+ from ._path_manager import sanitize_filename, make_fullpath
10
10
  from ._keys import MLTaskKeys
11
11
 
12
12
 
@@ -701,11 +701,11 @@ class DragonParetoConfig(_BaseModelParams):
701
701
  def __init__(self,
702
702
  save_directory: Union[str, Path],
703
703
  target_objectives: Dict[str, Literal["min", "max"]],
704
- continuous_bounds_map: Union[Dict[str, Tuple[float, float]], Dict[str, List[float]]],
704
+ continuous_bounds_map: Union[Dict[str, Tuple[float, float]], Dict[str, List[float]], str, Path],
705
705
  columns_to_round: Optional[List[str]] = None,
706
- population_size: int = 400,
706
+ population_size: int = 500,
707
707
  generations: int = 1000,
708
- solutions_filename: str = "ParetoSolutions",
708
+ solutions_filename: str = "NonDominatedSolutions",
709
709
  float_precision: int = 4,
710
710
  log_interval: int = 10,
711
711
  plot_size: Tuple[int, int] = (10, 7),
@@ -718,7 +718,7 @@ class DragonParetoConfig(_BaseModelParams):
718
718
  save_directory (str | Path): Directory to save artifacts.
719
719
  target_objectives (Dict[str, "min"|"max"]): Dictionary mapping target names to optimization direction.
720
720
  Example: {"price": "max", "error": "min"}
721
- continuous_bounds_map (Dict): Bounds for continuous features {name: (min, max)}.
721
+ continuous_bounds_map (Dict): Bounds for continuous features {name: (min, max)}. Or a path/str to a directory containing the "optimization_bounds.json" file.
722
722
  columns_to_round (List[str] | None): List of continuous column names that should be rounded to the nearest integer.
723
723
  population_size (int): Size of the genetic population.
724
724
  generations (int): Number of generations to run.
@@ -729,7 +729,13 @@ class DragonParetoConfig(_BaseModelParams):
729
729
  plot_font_size (int): Font size for plot text.
730
730
  discretize_start_at_zero (bool): Categorical encoding start index. True=0, False=1.
731
731
  """
732
- self.save_directory = save_directory
732
+ # Validate string or Path
733
+ valid_save_dir = make_fullpath(save_directory, make=True, enforce="directory")
734
+
735
+ if isinstance(continuous_bounds_map, (str, Path)):
736
+ continuous_bounds_map = make_fullpath(continuous_bounds_map, make=False, enforce="directory")
737
+
738
+ self.save_directory = valid_save_dir
733
739
  self.target_objectives = target_objectives
734
740
  self.continuous_bounds_map = continuous_bounds_map
735
741
  self.columns_to_round = columns_to_round
@@ -742,7 +748,6 @@ class DragonParetoConfig(_BaseModelParams):
742
748
  self.plot_font_size = plot_font_size
743
749
  self.discretize_start_at_zero = discretize_start_at_zero
744
750
 
745
-
746
751
  # ----------------------------
747
752
  # Metrics Configurations
748
753
  # ----------------------------
@@ -44,12 +44,10 @@ class DragonOptimizer:
44
44
  SNES and CEM algorithms do not accept bounds, the given bounds will be used as an initial starting point.
45
45
 
46
46
  Example:
47
- >>> # 1. Get the final schema from data exploration
48
- >>> schema = data_exploration.finalize_feature_schema(...)
49
- >>> # 2. Define bounds for continuous features
47
+ >>> # 1. Define bounds for continuous features
50
48
  >>> cont_bounds = {'feature_A': (0, 100), 'feature_B': (-10, 10)}
51
49
  >>>
52
- >>> # 3. Initialize the optimizer
50
+ >>> # 2. Initialize the optimizer
53
51
  >>> optimizer = DragonOptimizer(
54
52
  ... inference_handler=my_handler,
55
53
  ... schema=schema,
@@ -58,7 +56,7 @@ class DragonOptimizer:
58
56
  ... task="max",
59
57
  ... algorithm="Genetic",
60
58
  ... )
61
- >>> # 4. Run the optimization
59
+ >>> # 3. Run the optimization
62
60
  >>> best_result = optimizer.run(
63
61
  ... num_generations=100,
64
62
  ... save_dir="/path/to/results",
@@ -7,7 +7,7 @@ import matplotlib.cm as cm
7
7
  from matplotlib.collections import LineCollection
8
8
  import seaborn as sns
9
9
  from pathlib import Path
10
- from typing import Literal, Union, Tuple, List, Optional, Dict
10
+ from typing import Literal, Union, List, Optional, Dict
11
11
  from tqdm import tqdm
12
12
  import plotly.express as px
13
13
  import plotly.graph_objects as go
@@ -21,7 +21,7 @@ from ._SQL import DragonSQL
21
21
  from ._ML_inference import DragonInferenceHandler
22
22
  from ._ML_chaining_inference import DragonChainInference
23
23
  from ._ML_configuration import DragonParetoConfig
24
- from ._optimization_tools import create_optimization_bounds, plot_optimal_feature_distributions_from_dataframe
24
+ from ._optimization_tools import create_optimization_bounds, plot_optimal_feature_distributions_from_dataframe, load_continuous_bounds_template
25
25
  from ._math_utilities import discretize_categorical_values
26
26
  from ._utilities import save_dataframe_filename
27
27
  from ._IO_tools import save_json
@@ -107,6 +107,10 @@ class DragonParetoOptimizer:
107
107
  _LOGGER.error(f"Target '{name}' not found in model targets: {available_targets}")
108
108
  raise ValueError()
109
109
 
110
+ if direction not in ["min" , "max"]:
111
+ _LOGGER.error(f"Invalid optimization direction '{direction}' for target '{name}'. Use 'min' or 'max'.")
112
+ raise ValueError()
113
+
110
114
  # For standard handlers, we need indices to slice the output tensor.
111
115
  # For chain handlers, we just rely on name matching, but we track index for consistency.
112
116
  idx = available_targets.index(name)
@@ -117,10 +121,20 @@ class DragonParetoOptimizer:
117
121
  _LOGGER.info(f"Pareto Optimization setup for: {self.ordered_target_names}")
118
122
 
119
123
  # --- 2. Bounds Setup ---
124
+ # check type
125
+ raw_bounds_map = config.continuous_bounds_map
126
+ if isinstance(raw_bounds_map, (str, Path)):
127
+ continuous_bounds = load_continuous_bounds_template(raw_bounds_map)
128
+ elif isinstance(raw_bounds_map, dict):
129
+ continuous_bounds = raw_bounds_map
130
+ else:
131
+ _LOGGER.error(f"Invalid type for 'continuous_bounds_map' in config. Expected dict or Path. Got {type(raw_bounds_map)}.")
132
+ raise ValueError()
133
+
120
134
  # Uses the external tool which reads the schema to set correct bounds for both continuous and categorical
121
135
  bounds = create_optimization_bounds(
122
136
  schema=schema,
123
- continuous_bounds_map=config.continuous_bounds_map,
137
+ continuous_bounds_map=continuous_bounds,
124
138
  start_at_zero=self.discretize_start_at_zero
125
139
  )
126
140
  self.lower_bounds = list(bounds[0])
@@ -1,4 +1,6 @@
1
1
  from ._core._optimization_tools import (
2
+ make_continuous_bounds_template,
3
+ load_continuous_bounds_template,
2
4
  create_optimization_bounds,
3
5
  parse_lower_upper_bounds,
4
6
  plot_optimal_feature_distributions,
@@ -7,6 +9,8 @@ from ._core._optimization_tools import (
7
9
  )
8
10
 
9
11
  __all__ = [
12
+ "make_continuous_bounds_template",
13
+ "load_continuous_bounds_template",
10
14
  "create_optimization_bounds",
11
15
  "parse_lower_upper_bounds",
12
16
  "plot_optimal_feature_distributions",