dragon-ml-toolbox 19.14.0__py3-none-any.whl → 20.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
  2. dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
  3. ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
  4. ml_tools/ETL_cleaning/_basic_clean.py +351 -0
  5. ml_tools/ETL_cleaning/_clean_tools.py +128 -0
  6. ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
  7. ml_tools/ETL_cleaning/_imprimir.py +13 -0
  8. ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
  9. ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
  10. ml_tools/ETL_engineering/_imprimir.py +24 -0
  11. ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
  12. ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
  13. ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
  14. ml_tools/GUI_tools/_imprimir.py +12 -0
  15. ml_tools/IO_tools/_IO_loggers.py +235 -0
  16. ml_tools/IO_tools/_IO_save_load.py +151 -0
  17. ml_tools/IO_tools/_IO_utils.py +140 -0
  18. ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
  19. ml_tools/IO_tools/_imprimir.py +14 -0
  20. ml_tools/MICE/_MICE_imputation.py +132 -0
  21. ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
  22. ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
  23. ml_tools/MICE/_imprimir.py +11 -0
  24. ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
  25. ml_tools/ML_callbacks/_base.py +101 -0
  26. ml_tools/ML_callbacks/_checkpoint.py +232 -0
  27. ml_tools/ML_callbacks/_early_stop.py +208 -0
  28. ml_tools/ML_callbacks/_imprimir.py +12 -0
  29. ml_tools/ML_callbacks/_scheduler.py +197 -0
  30. ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
  31. ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
  32. ml_tools/ML_chain/_dragon_chain.py +140 -0
  33. ml_tools/ML_chain/_imprimir.py +11 -0
  34. ml_tools/ML_configuration/__init__.py +90 -0
  35. ml_tools/ML_configuration/_base_model_config.py +69 -0
  36. ml_tools/ML_configuration/_finalize.py +366 -0
  37. ml_tools/ML_configuration/_imprimir.py +47 -0
  38. ml_tools/ML_configuration/_metrics.py +593 -0
  39. ml_tools/ML_configuration/_models.py +206 -0
  40. ml_tools/ML_configuration/_training.py +124 -0
  41. ml_tools/ML_datasetmaster/__init__.py +28 -0
  42. ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
  43. ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
  44. ml_tools/ML_datasetmaster/_imprimir.py +15 -0
  45. ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
  46. ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
  47. ml_tools/ML_evaluation/__init__.py +53 -0
  48. ml_tools/ML_evaluation/_classification.py +629 -0
  49. ml_tools/ML_evaluation/_feature_importance.py +409 -0
  50. ml_tools/ML_evaluation/_imprimir.py +25 -0
  51. ml_tools/ML_evaluation/_loss.py +92 -0
  52. ml_tools/ML_evaluation/_regression.py +273 -0
  53. ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
  54. ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
  55. ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
  56. ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
  57. ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
  58. ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
  59. ml_tools/ML_finalize_handler/__init__.py +10 -0
  60. ml_tools/ML_finalize_handler/_imprimir.py +8 -0
  61. ml_tools/ML_inference/__init__.py +22 -0
  62. ml_tools/ML_inference/_base_inference.py +166 -0
  63. ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
  64. ml_tools/ML_inference/_dragon_inference.py +332 -0
  65. ml_tools/ML_inference/_imprimir.py +11 -0
  66. ml_tools/ML_inference/_multi_inference.py +180 -0
  67. ml_tools/ML_inference_sequence/__init__.py +10 -0
  68. ml_tools/ML_inference_sequence/_imprimir.py +8 -0
  69. ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
  70. ml_tools/ML_inference_vision/__init__.py +10 -0
  71. ml_tools/ML_inference_vision/_imprimir.py +8 -0
  72. ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
  73. ml_tools/ML_models/__init__.py +32 -0
  74. ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
  75. ml_tools/ML_models/_base_mlp_attention.py +198 -0
  76. ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
  77. ml_tools/ML_models/_dragon_tabular.py +248 -0
  78. ml_tools/ML_models/_imprimir.py +18 -0
  79. ml_tools/ML_models/_mlp_attention.py +134 -0
  80. ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
  81. ml_tools/ML_models_sequence/__init__.py +10 -0
  82. ml_tools/ML_models_sequence/_imprimir.py +8 -0
  83. ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
  84. ml_tools/ML_models_vision/__init__.py +29 -0
  85. ml_tools/ML_models_vision/_base_wrapper.py +254 -0
  86. ml_tools/ML_models_vision/_image_classification.py +182 -0
  87. ml_tools/ML_models_vision/_image_segmentation.py +108 -0
  88. ml_tools/ML_models_vision/_imprimir.py +16 -0
  89. ml_tools/ML_models_vision/_object_detection.py +135 -0
  90. ml_tools/ML_optimization/__init__.py +21 -0
  91. ml_tools/ML_optimization/_imprimir.py +13 -0
  92. ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
  93. ml_tools/ML_optimization/_single_dragon.py +203 -0
  94. ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
  95. ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
  96. ml_tools/ML_scaler/__init__.py +10 -0
  97. ml_tools/ML_scaler/_imprimir.py +8 -0
  98. ml_tools/ML_trainer/__init__.py +20 -0
  99. ml_tools/ML_trainer/_base_trainer.py +297 -0
  100. ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
  101. ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
  102. ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
  103. ml_tools/ML_trainer/_imprimir.py +10 -0
  104. ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
  105. ml_tools/ML_utilities/_artifact_finder.py +382 -0
  106. ml_tools/ML_utilities/_imprimir.py +16 -0
  107. ml_tools/ML_utilities/_inspection.py +325 -0
  108. ml_tools/ML_utilities/_train_tools.py +205 -0
  109. ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
  110. ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
  111. ml_tools/ML_vision_transformers/_imprimir.py +14 -0
  112. ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
  113. ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
  114. ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
  115. ml_tools/PSO_optimization/_imprimir.py +10 -0
  116. ml_tools/SQL/__init__.py +7 -0
  117. ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
  118. ml_tools/SQL/_imprimir.py +8 -0
  119. ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
  120. ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
  121. ml_tools/VIF/_imprimir.py +10 -0
  122. ml_tools/_core/__init__.py +7 -1
  123. ml_tools/_core/_logger.py +8 -18
  124. ml_tools/_core/_schema_load_ops.py +43 -0
  125. ml_tools/_core/_script_info.py +2 -2
  126. ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
  127. ml_tools/data_exploration/_analysis.py +214 -0
  128. ml_tools/data_exploration/_cleaning.py +566 -0
  129. ml_tools/data_exploration/_features.py +583 -0
  130. ml_tools/data_exploration/_imprimir.py +32 -0
  131. ml_tools/data_exploration/_plotting.py +487 -0
  132. ml_tools/data_exploration/_schema_ops.py +176 -0
  133. ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
  134. ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
  135. ml_tools/ensemble_evaluation/_imprimir.py +14 -0
  136. ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
  137. ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
  138. ml_tools/ensemble_inference/_imprimir.py +9 -0
  139. ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
  140. ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
  141. ml_tools/ensemble_learning/_imprimir.py +10 -0
  142. ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
  143. ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
  144. ml_tools/excel_handler/_imprimir.py +13 -0
  145. ml_tools/{keys.py → keys/__init__.py} +4 -1
  146. ml_tools/keys/_imprimir.py +11 -0
  147. ml_tools/{_core → keys}/_keys.py +2 -0
  148. ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
  149. ml_tools/math_utilities/_imprimir.py +11 -0
  150. ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
  151. ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
  152. ml_tools/optimization_tools/_imprimir.py +13 -0
  153. ml_tools/optimization_tools/_optimization_bounds.py +236 -0
  154. ml_tools/optimization_tools/_optimization_plots.py +218 -0
  155. ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
  156. ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
  157. ml_tools/path_manager/_imprimir.py +15 -0
  158. ml_tools/path_manager/_path_tools.py +346 -0
  159. ml_tools/plot_fonts/__init__.py +8 -0
  160. ml_tools/plot_fonts/_imprimir.py +8 -0
  161. ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
  162. ml_tools/schema/__init__.py +15 -0
  163. ml_tools/schema/_feature_schema.py +223 -0
  164. ml_tools/schema/_gui_schema.py +191 -0
  165. ml_tools/schema/_imprimir.py +10 -0
  166. ml_tools/{serde.py → serde/__init__.py} +4 -2
  167. ml_tools/serde/_imprimir.py +10 -0
  168. ml_tools/{_core → serde}/_serde.py +3 -8
  169. ml_tools/{utilities.py → utilities/__init__.py} +11 -6
  170. ml_tools/utilities/_imprimir.py +18 -0
  171. ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
  172. ml_tools/utilities/_utility_tools.py +192 -0
  173. dragon_ml_toolbox-19.14.0.dist-info/RECORD +0 -111
  174. ml_tools/ML_chaining_inference.py +0 -8
  175. ml_tools/ML_configuration.py +0 -86
  176. ml_tools/ML_configuration_pytab.py +0 -14
  177. ml_tools/ML_datasetmaster.py +0 -10
  178. ml_tools/ML_evaluation.py +0 -16
  179. ml_tools/ML_evaluation_multi.py +0 -12
  180. ml_tools/ML_finalize_handler.py +0 -8
  181. ml_tools/ML_inference.py +0 -12
  182. ml_tools/ML_models.py +0 -14
  183. ml_tools/ML_models_advanced.py +0 -14
  184. ml_tools/ML_models_pytab.py +0 -14
  185. ml_tools/ML_optimization.py +0 -14
  186. ml_tools/ML_optimization_pareto.py +0 -8
  187. ml_tools/ML_scaler.py +0 -8
  188. ml_tools/ML_sequence_datasetmaster.py +0 -8
  189. ml_tools/ML_sequence_evaluation.py +0 -10
  190. ml_tools/ML_sequence_inference.py +0 -8
  191. ml_tools/ML_sequence_models.py +0 -8
  192. ml_tools/ML_trainer.py +0 -12
  193. ml_tools/ML_vision_datasetmaster.py +0 -12
  194. ml_tools/ML_vision_evaluation.py +0 -10
  195. ml_tools/ML_vision_inference.py +0 -8
  196. ml_tools/ML_vision_models.py +0 -18
  197. ml_tools/SQL.py +0 -8
  198. ml_tools/_core/_ETL_cleaning.py +0 -694
  199. ml_tools/_core/_IO_tools.py +0 -498
  200. ml_tools/_core/_ML_callbacks.py +0 -702
  201. ml_tools/_core/_ML_configuration.py +0 -1332
  202. ml_tools/_core/_ML_configuration_pytab.py +0 -102
  203. ml_tools/_core/_ML_evaluation.py +0 -867
  204. ml_tools/_core/_ML_evaluation_multi.py +0 -544
  205. ml_tools/_core/_ML_inference.py +0 -646
  206. ml_tools/_core/_ML_models.py +0 -668
  207. ml_tools/_core/_ML_models_pytab.py +0 -693
  208. ml_tools/_core/_ML_trainer.py +0 -2323
  209. ml_tools/_core/_ML_utilities.py +0 -886
  210. ml_tools/_core/_ML_vision_models.py +0 -644
  211. ml_tools/_core/_data_exploration.py +0 -1909
  212. ml_tools/_core/_optimization_tools.py +0 -493
  213. ml_tools/_core/_schema.py +0 -359
  214. ml_tools/plot_fonts.py +0 -8
  215. ml_tools/schema.py +0 -12
  216. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
  217. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
  218. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  219. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
@@ -22,10 +22,9 @@ from sklearn.metrics import (accuracy_score,
22
22
  from pathlib import Path
23
23
  from typing import Union, Optional, Literal
24
24
 
25
- from ._path_manager import sanitize_filename, make_fullpath
26
- from ._script_info import _script_info
27
- from ._logger import get_logger
28
- from ._keys import SHAPKeys
25
+ from ..path_manager import sanitize_filename, make_fullpath
26
+ from .._core import get_logger
27
+ from ..keys._keys import SHAPKeys
29
28
 
30
29
 
31
30
  _LOGGER = get_logger("Ensemble Evaluation")
@@ -731,6 +730,3 @@ def plot_learning_curves(
731
730
  fig.savefig(full_save_path, bbox_inches="tight", format="svg")
732
731
  plt.close(fig)
733
732
 
734
-
735
- def info():
736
- _script_info(__all__)
@@ -0,0 +1,14 @@
1
+ from .._core import _imprimir_disponibles
2
+
3
+ _GRUPOS = [
4
+ "evaluate_model_classification",
5
+ "plot_roc_curve",
6
+ "plot_precision_recall_curve",
7
+ "plot_calibration_curve",
8
+ "evaluate_model_regression",
9
+ "get_shap_values",
10
+ "plot_learning_curves"
11
+ ]
12
+
13
+ def info():
14
+ _imprimir_disponibles(_GRUPOS)
@@ -1,9 +1,11 @@
1
- from ._core._ensemble_inference import (
1
+ from ._ensemble_inference import (
2
2
  DragonEnsembleInferenceHandler,
3
- model_report,
4
- info
3
+ model_report
5
4
  )
6
5
 
6
+ from ._imprimir import info
7
+
8
+
7
9
  __all__ = [
8
10
  "DragonEnsembleInferenceHandler",
9
11
  "model_report"
@@ -1,4 +1,4 @@
1
- from typing import Union, Literal, Dict, Any, Optional, List
1
+ from typing import Union, Literal, Any, Optional
2
2
  from pathlib import Path
3
3
  import json
4
4
  import numpy as np
@@ -6,11 +6,11 @@ import numpy as np
6
6
  import xgboost
7
7
  import lightgbm
8
8
 
9
- from ._script_info import _script_info
10
- from ._logger import get_logger
11
- from ._path_manager import make_fullpath, list_files_by_extension
12
- from ._keys import EnsembleKeys
13
- from ._serde import deserialize_object
9
+ from ..serde import deserialize_object
10
+
11
+ from .._core import get_logger
12
+ from ..path_manager import make_fullpath, list_files_by_extension
13
+ from ..keys._keys import EnsembleKeys
14
14
 
15
15
 
16
16
  _LOGGER = get_logger("Ensemble Inference")
@@ -37,10 +37,10 @@ class DragonEnsembleInferenceHandler:
37
37
  models_dir (Path): The directory containing the saved .joblib model files.
38
38
  task ("regression" | "classification"): The type of task the models perform.
39
39
  """
40
- self.models: Dict[str, Any] = dict()
40
+ self.models: dict[str, Any] = dict()
41
41
  self.task: str = task
42
42
  self.verbose = verbose
43
- self._feature_names: Optional[List[str]] = None
43
+ self._feature_names: Optional[list[str]] = None
44
44
 
45
45
  model_files = list_files_by_extension(directory=models_dir, extension="joblib", raise_on_empty=True)
46
46
 
@@ -53,7 +53,7 @@ class DragonEnsembleInferenceHandler:
53
53
 
54
54
  model: Any = full_object[EnsembleKeys.MODEL]
55
55
  target_name: str = full_object[EnsembleKeys.TARGET]
56
- feature_names_list: List[str] = full_object[EnsembleKeys.FEATURES]
56
+ feature_names_list: list[str] = full_object[EnsembleKeys.FEATURES]
57
57
 
58
58
  # Check that feature names match
59
59
  if self._feature_names is None:
@@ -71,14 +71,14 @@ class DragonEnsembleInferenceHandler:
71
71
  _LOGGER.error(f"Failed to load or parse {fname}.")
72
72
 
73
73
  @property
74
- def feature_names(self) -> List[str]:
74
+ def feature_names(self) -> list[str]:
75
75
  """
76
76
  Getter for the list of feature names the models expect.
77
77
  Returns an empty list if no models were loaded.
78
78
  """
79
79
  return self._feature_names if self._feature_names is not None else []
80
80
 
81
- def predict(self, features: np.ndarray) -> Dict[str, Any]:
81
+ def predict(self, features: np.ndarray) -> dict[str, Any]:
82
82
  """
83
83
  Predicts on a single feature vector.
84
84
 
@@ -97,7 +97,7 @@ class DragonEnsembleInferenceHandler:
97
97
  _LOGGER.error("The 'predict()' method is for a single sample. Use 'predict_batch()' for multiple samples.")
98
98
  raise ValueError()
99
99
 
100
- results: Dict[str, Any] = dict()
100
+ results: dict[str, Any] = dict()
101
101
  for target_name, model in self.models.items():
102
102
  if self.task == "regression":
103
103
  prediction = model.predict(features)
@@ -112,7 +112,7 @@ class DragonEnsembleInferenceHandler:
112
112
  _LOGGER.info("Inference process complete.")
113
113
  return results
114
114
 
115
- def predict_batch(self, features: np.ndarray) -> Dict[str, Any]:
115
+ def predict_batch(self, features: np.ndarray) -> dict[str, Any]:
116
116
  """
117
117
  Predicts on a batch of feature vectors.
118
118
 
@@ -128,7 +128,7 @@ class DragonEnsembleInferenceHandler:
128
128
  _LOGGER.error("Input for batch prediction must be a 2D array.")
129
129
  raise ValueError()
130
130
 
131
- results: Dict[str, Any] = dict()
131
+ results: dict[str, Any] = dict()
132
132
  for target_name, model in self.models.items():
133
133
  if self.task == "regression":
134
134
  results[target_name] = model.predict(features)
@@ -147,7 +147,7 @@ def model_report(
147
147
  model_path: Union[str,Path],
148
148
  output_dir: Optional[Union[str,Path]] = None,
149
149
  verbose: bool = True
150
- ) -> Dict[str, Any]:
150
+ ) -> dict[str, Any]:
151
151
  """
152
152
  Deserializes a model and generates a summary report.
153
153
 
@@ -220,6 +220,3 @@ def model_report(
220
220
  # --- 5. Return the extracted data ---
221
221
  return report_data
222
222
 
223
-
224
- def info():
225
- _script_info(__all__)
@@ -0,0 +1,9 @@
1
+ from .._core import _imprimir_disponibles
2
+
3
+ _GRUPOS = [
4
+ "DragonEnsembleInferenceHandler",
5
+ "model_report"
6
+ ]
7
+
8
+ def info():
9
+ _imprimir_disponibles(_GRUPOS)
@@ -1,16 +1,14 @@
1
- from ._core._ensemble_learning import (
1
+ from ._ensemble_learning import (
2
2
  RegressionTreeModels,
3
3
  ClassificationTreeModels,
4
- dataset_pipeline,
5
- train_test_pipeline,
6
4
  run_ensemble_pipeline,
7
- info
8
5
  )
9
6
 
7
+ from ._imprimir import info
8
+
9
+
10
10
  __all__ = [
11
11
  "RegressionTreeModels",
12
12
  "ClassificationTreeModels",
13
- "dataset_pipeline",
14
- "train_test_pipeline",
15
13
  "run_ensemble_pipeline",
16
14
  ]
@@ -13,13 +13,9 @@ import lightgbm as lgb
13
13
  from sklearn.model_selection import train_test_split
14
14
  from sklearn.base import clone
15
15
 
16
- from ._utilities import yield_dataframes_from_dir, train_dataset_yielder
17
- from ._serde import serialize_object_filename
18
- from ._path_manager import sanitize_filename, make_fullpath
19
- from ._script_info import _script_info
20
- from ._keys import EnsembleKeys
21
- from ._logger import get_logger
22
- from ._ensemble_evaluation import (evaluate_model_classification,
16
+ from ..utilities import yield_dataframes_from_dir, train_dataset_yielder
17
+ from ..serde import serialize_object_filename
18
+ from ..ensemble_evaluation import (evaluate_model_classification,
23
19
  plot_roc_curve,
24
20
  plot_precision_recall_curve,
25
21
  plot_calibration_curve,
@@ -27,6 +23,10 @@ from ._ensemble_evaluation import (evaluate_model_classification,
27
23
  get_shap_values,
28
24
  plot_learning_curves)
29
25
 
26
+ from ..path_manager import sanitize_filename, make_fullpath
27
+ from ..keys._keys import EnsembleKeys
28
+ from .._core import get_logger
29
+
30
30
  import warnings # Ignore warnings
31
31
  warnings.filterwarnings('ignore', category=DeprecationWarning)
32
32
  warnings.filterwarnings('ignore', category=FutureWarning)
@@ -528,6 +528,3 @@ def run_ensemble_pipeline(datasets_dir: Union[str,Path], save_dir: Union[str,Pat
528
528
 
529
529
  _LOGGER.info("Training and evaluation complete.")
530
530
 
531
-
532
- def info():
533
- _script_info(__all__)
@@ -0,0 +1,10 @@
1
+ from .._core import _imprimir_disponibles
2
+
3
+ _GRUPOS = [
4
+ "RegressionTreeModels",
5
+ "ClassificationTreeModels",
6
+ "run_ensemble_pipeline",
7
+ ]
8
+
9
+ def info():
10
+ _imprimir_disponibles(_GRUPOS)
@@ -1,13 +1,15 @@
1
- from ._core._excel_handler import (
1
+ from ._excel_handler import (
2
2
  find_excel_files,
3
3
  unmerge_and_split_excel,
4
4
  unmerge_and_split_from_directory,
5
5
  validate_excel_schema,
6
6
  vertical_merge_transform_excel,
7
- horizontal_merge_transform_excel,
8
- info
7
+ horizontal_merge_transform_excel
9
8
  )
10
9
 
10
+ from ._imprimir import info
11
+
12
+
11
13
  __all__ = [
12
14
  "find_excel_files",
13
15
  "unmerge_and_split_excel",
@@ -1,11 +1,10 @@
1
1
  from pathlib import Path
2
2
  from openpyxl import load_workbook, Workbook
3
3
  import pandas as pd
4
- from typing import List, Optional, Union
4
+ from typing import Optional, Union
5
5
 
6
- from ._path_manager import sanitize_filename, make_fullpath
7
- from ._script_info import _script_info
8
- from ._logger import get_logger
6
+ from ..path_manager import sanitize_filename, make_fullpath
7
+ from .._core import get_logger
9
8
 
10
9
 
11
10
  _LOGGER = get_logger("Excel Handler")
@@ -167,7 +166,7 @@ def unmerge_and_split_from_directory(input_dir: Union[str,Path], output_dir: Uni
167
166
 
168
167
  def validate_excel_schema(
169
168
  target_dir: Union[str,Path],
170
- expected_columns: List[str],
169
+ expected_columns: list[str],
171
170
  strict: bool = False
172
171
  ) -> None:
173
172
  """
@@ -236,8 +235,8 @@ def vertical_merge_transform_excel(
236
235
  target_dir: Union[str,Path],
237
236
  csv_filename: str,
238
237
  output_dir: Union[str,Path],
239
- target_columns: Optional[List[str]] = None,
240
- rename_columns: Optional[List[str]] = None
238
+ target_columns: Optional[list[str]] = None,
239
+ rename_columns: Optional[list[str]] = None
241
240
  ) -> None:
242
241
  """
243
242
  Merges multiple Excel files in a directory vertically and saves as a single CSV file.
@@ -371,6 +370,3 @@ def horizontal_merge_transform_excel(
371
370
 
372
371
  _LOGGER.info(f"Merged {len(excel_files)} Excel files into '{csv_filename}'.")
373
372
 
374
-
375
- def info():
376
- _script_info(__all__)
@@ -0,0 +1,13 @@
1
+ from .._core import _imprimir_disponibles
2
+
3
+ _GRUPOS = [
4
+ "find_excel_files",
5
+ "unmerge_and_split_excel",
6
+ "unmerge_and_split_from_directory",
7
+ "validate_excel_schema",
8
+ "vertical_merge_transform_excel",
9
+ "horizontal_merge_transform_excel"
10
+ ]
11
+
12
+ def info():
13
+ _imprimir_disponibles(_GRUPOS)
@@ -1,10 +1,13 @@
1
- from ._core._keys import (
1
+ from ._keys import (
2
2
  PyTorchInferenceKeys as InferenceKeys,
3
3
  _CheckpointCallbackKeys as CheckpointCallbackKeys,
4
4
  _FinalizedFileKeys as FinalizedFileKeys,
5
5
  _PublicTaskKeys as TaskKeys,
6
6
  )
7
7
 
8
+ from ._imprimir import info
9
+
10
+
8
11
  __all__ = [
9
12
  "InferenceKeys",
10
13
  "CheckpointCallbackKeys",
@@ -0,0 +1,11 @@
1
+ from .._core import _imprimir_disponibles
2
+
3
+ _GRUPOS = [
4
+ "InferenceKeys",
5
+ "CheckpointCallbackKeys",
6
+ "FinalizedFileKeys",
7
+ "TaskKeys",
8
+ ]
9
+
10
+ def info():
11
+ _imprimir_disponibles(_GRUPOS)
@@ -262,6 +262,8 @@ class SchemaKeys:
262
262
  """Used by the schema module."""
263
263
  SCHEMA_FILENAME = "FeatureSchema.json"
264
264
  GUI_SCHEMA_FILENAME = "GUISchema.json"
265
+ # Model architecture API
266
+ SCHEMA_DICT = "schema_dict"
265
267
  # GUI Schema
266
268
  TARGETS = "targets"
267
269
  CONTINUOUS = "continuous"
@@ -1,11 +1,14 @@
1
- from ._core._math_utilities import (
1
+ from ._math_utilities import (
2
2
  normalize_mixed_list,
3
3
  threshold_binary_values,
4
4
  threshold_binary_values_batch,
5
5
  discretize_categorical_values,
6
- info
7
6
  )
8
7
 
8
+
9
+ from ._imprimir import info
10
+
11
+
9
12
  __all__ = [
10
13
  "normalize_mixed_list",
11
14
  "threshold_binary_values",
@@ -0,0 +1,11 @@
1
+ from .._core import _imprimir_disponibles
2
+
3
+ _GRUPOS = [
4
+ "normalize_mixed_list",
5
+ "threshold_binary_values",
6
+ "threshold_binary_values_batch",
7
+ "discretize_categorical_values",
8
+ ]
9
+
10
+ def info():
11
+ _imprimir_disponibles(_GRUPOS)
@@ -3,8 +3,7 @@ import numpy as np
3
3
  import math
4
4
  from typing import Union, Sequence, Optional
5
5
 
6
- from ._script_info import _script_info
7
- from ._logger import get_logger
6
+ from .._core import get_logger
8
7
 
9
8
 
10
9
  _LOGGER = get_logger("Math Utilities")
@@ -260,6 +259,3 @@ def discretize_categorical_values(
260
259
  else:
261
260
  return final_output
262
261
 
263
-
264
- def info():
265
- _script_info(__all__)
@@ -1,13 +1,18 @@
1
- from ._core._optimization_tools import (
1
+ from ._optimization_plots import (
2
+ plot_optimal_feature_distributions,
3
+ plot_optimal_feature_distributions_from_dataframe,
4
+ )
5
+
6
+ from ._optimization_bounds import (
2
7
  make_continuous_bounds_template,
3
8
  load_continuous_bounds_template,
4
9
  create_optimization_bounds,
5
10
  parse_lower_upper_bounds,
6
- plot_optimal_feature_distributions,
7
- plot_optimal_feature_distributions_from_dataframe,
8
- info
9
11
  )
10
12
 
13
+ from ._imprimir import info
14
+
15
+
11
16
  __all__ = [
12
17
  "make_continuous_bounds_template",
13
18
  "load_continuous_bounds_template",
@@ -0,0 +1,13 @@
1
+ from .._core import _imprimir_disponibles
2
+
3
+ _GRUPOS = [
4
+ "make_continuous_bounds_template",
5
+ "load_continuous_bounds_template",
6
+ "create_optimization_bounds",
7
+ "parse_lower_upper_bounds",
8
+ "plot_optimal_feature_distributions",
9
+ "plot_optimal_feature_distributions_from_dataframe",
10
+ ]
11
+
12
+ def info():
13
+ _imprimir_disponibles(_GRUPOS)
@@ -0,0 +1,236 @@
1
+ from typing import Union, Any, Optional
2
+ from pathlib import Path
3
+
4
+ from ..schema import FeatureSchema
5
+ from ..IO_tools import save_json, load_json
6
+
7
+ from ..keys._keys import OptimizationToolsKeys
8
+ from ..path_manager import make_fullpath
9
+ from .._core import get_logger
10
+
11
+
12
+ _LOGGER = get_logger("Optimization Bounds")
13
+
14
+
15
+ __all__ = [
16
+ "make_continuous_bounds_template",
17
+ "load_continuous_bounds_template",
18
+ "create_optimization_bounds",
19
+ "parse_lower_upper_bounds",
20
+ ]
21
+
22
+
23
+ def make_continuous_bounds_template(
24
+ directory: Union[str, Path],
25
+ feature_schema: FeatureSchema,
26
+ default_bounds: tuple[float, float] = (0, 1)
27
+ ) -> None:
28
+ """
29
+ Creates a JSON template for manual entry of continuous feature optimization bounds.
30
+
31
+ The resulting file maps each continuous feature name to a [min, max] list
32
+ populated with `default_bounds`. Edit the values in this file before using.
33
+
34
+ Args:
35
+ directory (str | Path): The directory where the template will be saved.
36
+ feature_schema (FeatureSchema): The loaded schema containing feature definitions.
37
+ default_bounds (Tuple[float, float]): Default (min, max) values to populate the template.
38
+ """
39
+ # validate directory path
40
+ dir_path = make_fullpath(directory, make=True, enforce="directory")
41
+
42
+ # 1. Check if continuous features exist
43
+ if not feature_schema.continuous_feature_names:
44
+ _LOGGER.warning("No continuous features found in FeatureSchema. Skipping bounds template generation.")
45
+ return
46
+
47
+ # 2. Construct the dictionary: {feature_name: [min, max]}
48
+ bounds_map = {
49
+ name: list(default_bounds)
50
+ for name in feature_schema.continuous_feature_names
51
+ }
52
+
53
+ # use a fixed key for the filename
54
+ filename = OptimizationToolsKeys.OPTIMIZATION_BOUNDS_FILENAME + ".json"
55
+
56
+ # 3. Save to JSON using the IO tool
57
+ save_json(
58
+ data=bounds_map,
59
+ directory=dir_path,
60
+ filename=filename,
61
+ verbose=False
62
+ )
63
+
64
+ _LOGGER.info(f"💾 Continuous bounds template saved to: '{dir_path.name}/{filename}'")
65
+
66
+
67
+ def load_continuous_bounds_template(directory: Union[str, Path]) -> dict[str, list[float]]:
68
+ """
69
+ Loads the continuous feature bounds template from JSON. Expected filename: `optimization_bounds.json`.
70
+
71
+ Args:
72
+ directory (str | Path): The directory where the template is located.
73
+
74
+ Returns:
75
+ Dictionary (Dict[str, List[float]]): A dictionary mapping feature names to [min, max] bounds.
76
+ """
77
+ dir_path = make_fullpath(directory, enforce="directory")
78
+ full_path = dir_path / (OptimizationToolsKeys.OPTIMIZATION_BOUNDS_FILENAME + ".json")
79
+
80
+ bounds_map = load_json(
81
+ file_path=full_path,
82
+ expected_type='dict',
83
+ verbose=False
84
+ )
85
+
86
+ # validate loaded data
87
+ if not all(
88
+ isinstance(v, list) and # Check type
89
+ len(v) == 2 and # Check length
90
+ all(isinstance(i, (int, float)) for i in v) # Check contents are numbers
91
+ for v in bounds_map.values()
92
+ ):
93
+ _LOGGER.error(f"Invalid format in bounds template at '{full_path}'. Each value must be a list of [min, max].")
94
+ raise ValueError()
95
+
96
+ _LOGGER.info(f"Continuous bounds template loaded from: '{dir_path.name}'")
97
+
98
+ return bounds_map
99
+
100
+
101
+ def create_optimization_bounds(
102
+ schema: FeatureSchema,
103
+ continuous_bounds_map: Union[dict[str, tuple[float, float]], dict[str, list[float]]],
104
+ start_at_zero: bool = True
105
+ ) -> tuple[list[float], list[float]]:
106
+ """
107
+ Generates the lower and upper bounds lists for the optimizer from a FeatureSchema.
108
+
109
+ This helper function automates the creation of unbiased bounds for
110
+ categorical features and combines them with user-defined bounds for
111
+ continuous features, using the schema as the single source of truth
112
+ for feature order and type.
113
+
114
+ Args:
115
+ schema (FeatureSchema):
116
+ The definitive schema object created by
117
+ `data_exploration.finalize_feature_schema()`.
118
+ continuous_bounds_map (Dict[str, Tuple[float, float]], Dict[str, List[float]]):
119
+ A dictionary mapping the *name* of each **continuous** feature
120
+ to its (min_bound, max_bound).
121
+ start_at_zero (bool):
122
+ - If True, assumes categorical encoding is [0, 1, ..., k-1].
123
+ Bounds will be set as [-0.5, k - 0.5].
124
+ - If False, assumes encoding is [1, 2, ..., k].
125
+ Bounds will be set as [0.5, k + 0.5].
126
+
127
+ Returns:
128
+ Tuple[List[float], List[float]]:
129
+ A tuple containing two lists: (lower_bounds, upper_bounds).
130
+
131
+ Raises:
132
+ ValueError: If a feature is missing from `continuous_bounds_map`
133
+ or if a feature name in the map is not a
134
+ continuous feature according to the schema.
135
+ """
136
+ # validate length in the continuous_bounds_map values
137
+ for name, bounds in continuous_bounds_map.items():
138
+ if not (isinstance(bounds, (list, tuple)) and len(bounds) == 2):
139
+ _LOGGER.error(f"Bounds for feature '{name}' must be a list or tuple of length 2 (min, max). Found: {bounds}")
140
+ raise ValueError()
141
+
142
+ # 1. Get feature names and map from schema
143
+ feature_names = schema.feature_names
144
+ categorical_index_map = schema.categorical_index_map
145
+ total_features = len(feature_names)
146
+
147
+ if total_features <= 0:
148
+ _LOGGER.error("Schema contains no features.")
149
+ raise ValueError()
150
+
151
+ _LOGGER.info(f"Generating bounds for {total_features} total features...")
152
+
153
+ # 2. Initialize bound lists
154
+ lower_bounds: list[Optional[float]] = [None] * total_features
155
+ upper_bounds: list[Optional[float]] = [None] * total_features
156
+
157
+ # 3. Populate categorical bounds (Index-based)
158
+ if categorical_index_map:
159
+ for index, cardinality in categorical_index_map.items():
160
+ if not (0 <= index < total_features):
161
+ _LOGGER.error(f"Categorical index {index} is out of range for the {total_features} features.")
162
+ raise ValueError()
163
+
164
+ if start_at_zero:
165
+ # Rule for [0, k-1]: bounds are [-0.5, k - 0.5]
166
+ low = -0.5
167
+ high = float(cardinality) - 0.5
168
+ else:
169
+ # Rule for [1, k]: bounds are [0.5, k + 0.5]
170
+ low = 0.5
171
+ high = float(cardinality) + 0.5
172
+
173
+ lower_bounds[index] = low
174
+ upper_bounds[index] = high
175
+
176
+ _LOGGER.info(f"Automatically set bounds for {len(categorical_index_map)} categorical features.")
177
+ else:
178
+ _LOGGER.info("No categorical features found in schema.")
179
+
180
+ # 4. Populate continuous bounds (Name-based)
181
+ # Use schema.continuous_feature_names for robust checking
182
+ continuous_names_set = set(schema.continuous_feature_names)
183
+
184
+ if continuous_names_set != set(continuous_bounds_map.keys()):
185
+ missing_in_map = continuous_names_set - set(continuous_bounds_map.keys())
186
+ if missing_in_map:
187
+ _LOGGER.error(f"The following continuous features are missing from 'continuous_bounds_map': {list(missing_in_map)}")
188
+
189
+ extra_in_map = set(continuous_bounds_map.keys()) - continuous_names_set
190
+ if extra_in_map:
191
+ _LOGGER.error(f"The following features in 'continuous_bounds_map' are not defined as continuous in the schema: {list(extra_in_map)}")
192
+
193
+ raise ValueError("Mismatch between 'continuous_bounds_map' and schema's continuous features.")
194
+
195
+ count_continuous = 0
196
+ for name, (low, high) in continuous_bounds_map.items():
197
+ # Map name to its index in the *feature-only* list
198
+ # This is guaranteed to be correct by the schema
199
+ index = feature_names.index(name)
200
+
201
+ if lower_bounds[index] is not None:
202
+ # This should be impossible if schema is correct, but good to check
203
+ _LOGGER.error(f"Schema conflict: Feature '{name}' (at index {index}) is defined as both continuous and categorical.")
204
+ raise ValueError()
205
+
206
+ lower_bounds[index] = float(low)
207
+ upper_bounds[index] = float(high)
208
+ count_continuous += 1
209
+
210
+ _LOGGER.info(f"Manually set bounds for {count_continuous} continuous features.")
211
+
212
+ # 5. Final Validation (all Nones should be filled)
213
+ if None in lower_bounds:
214
+ missing_indices = [i for i, b in enumerate(lower_bounds) if b is None]
215
+ missing_names = [feature_names[i] for i in missing_indices]
216
+ _LOGGER.error(f"Failed to create all bounds. This indicates an internal logic error. Missing: {missing_names}")
217
+ raise RuntimeError("Internal error: Not all bounds were populated.")
218
+
219
+ # Cast to float lists, as 'None' sentinels are gone
220
+ return (
221
+ [float(b) for b in lower_bounds], # type: ignore
222
+ [float(b) for b in upper_bounds] # type: ignore
223
+ )
224
+
225
+
226
+ def parse_lower_upper_bounds(source: dict[str,tuple[Any,Any]]):
227
+ """
228
+ Parse lower and upper boundaries, returning 2 lists:
229
+
230
+ `lower_bounds`, `upper_bounds`
231
+ """
232
+ lower = [low[0] for low in source.values()]
233
+ upper = [up[1] for up in source.values()]
234
+
235
+ return lower, upper
236
+