dragon-ml-toolbox 19.13.0__py3-none-any.whl → 20.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
- dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
- ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
- ml_tools/ETL_cleaning/_basic_clean.py +351 -0
- ml_tools/ETL_cleaning/_clean_tools.py +128 -0
- ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
- ml_tools/ETL_cleaning/_imprimir.py +13 -0
- ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
- ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
- ml_tools/ETL_engineering/_imprimir.py +24 -0
- ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
- ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
- ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
- ml_tools/GUI_tools/_imprimir.py +12 -0
- ml_tools/IO_tools/_IO_loggers.py +235 -0
- ml_tools/IO_tools/_IO_save_load.py +151 -0
- ml_tools/IO_tools/_IO_utils.py +140 -0
- ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
- ml_tools/IO_tools/_imprimir.py +14 -0
- ml_tools/MICE/_MICE_imputation.py +132 -0
- ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
- ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
- ml_tools/MICE/_imprimir.py +11 -0
- ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
- ml_tools/ML_callbacks/_base.py +101 -0
- ml_tools/ML_callbacks/_checkpoint.py +232 -0
- ml_tools/ML_callbacks/_early_stop.py +208 -0
- ml_tools/ML_callbacks/_imprimir.py +12 -0
- ml_tools/ML_callbacks/_scheduler.py +197 -0
- ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
- ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
- ml_tools/ML_chain/_dragon_chain.py +140 -0
- ml_tools/ML_chain/_imprimir.py +11 -0
- ml_tools/ML_configuration/__init__.py +90 -0
- ml_tools/ML_configuration/_base_model_config.py +69 -0
- ml_tools/ML_configuration/_finalize.py +366 -0
- ml_tools/ML_configuration/_imprimir.py +47 -0
- ml_tools/ML_configuration/_metrics.py +593 -0
- ml_tools/ML_configuration/_models.py +206 -0
- ml_tools/ML_configuration/_training.py +124 -0
- ml_tools/ML_datasetmaster/__init__.py +28 -0
- ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
- ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
- ml_tools/ML_datasetmaster/_imprimir.py +15 -0
- ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
- ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
- ml_tools/ML_evaluation/__init__.py +53 -0
- ml_tools/ML_evaluation/_classification.py +629 -0
- ml_tools/ML_evaluation/_feature_importance.py +409 -0
- ml_tools/ML_evaluation/_imprimir.py +25 -0
- ml_tools/ML_evaluation/_loss.py +92 -0
- ml_tools/ML_evaluation/_regression.py +273 -0
- ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
- ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
- ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
- ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
- ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
- ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
- ml_tools/ML_finalize_handler/__init__.py +10 -0
- ml_tools/ML_finalize_handler/_imprimir.py +8 -0
- ml_tools/ML_inference/__init__.py +22 -0
- ml_tools/ML_inference/_base_inference.py +166 -0
- ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
- ml_tools/ML_inference/_dragon_inference.py +332 -0
- ml_tools/ML_inference/_imprimir.py +11 -0
- ml_tools/ML_inference/_multi_inference.py +180 -0
- ml_tools/ML_inference_sequence/__init__.py +10 -0
- ml_tools/ML_inference_sequence/_imprimir.py +8 -0
- ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
- ml_tools/ML_inference_vision/__init__.py +10 -0
- ml_tools/ML_inference_vision/_imprimir.py +8 -0
- ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
- ml_tools/ML_models/__init__.py +32 -0
- ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
- ml_tools/ML_models/_base_mlp_attention.py +198 -0
- ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
- ml_tools/ML_models/_dragon_tabular.py +248 -0
- ml_tools/ML_models/_imprimir.py +18 -0
- ml_tools/ML_models/_mlp_attention.py +134 -0
- ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
- ml_tools/ML_models_sequence/__init__.py +10 -0
- ml_tools/ML_models_sequence/_imprimir.py +8 -0
- ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
- ml_tools/ML_models_vision/__init__.py +29 -0
- ml_tools/ML_models_vision/_base_wrapper.py +254 -0
- ml_tools/ML_models_vision/_image_classification.py +182 -0
- ml_tools/ML_models_vision/_image_segmentation.py +108 -0
- ml_tools/ML_models_vision/_imprimir.py +16 -0
- ml_tools/ML_models_vision/_object_detection.py +135 -0
- ml_tools/ML_optimization/__init__.py +21 -0
- ml_tools/ML_optimization/_imprimir.py +13 -0
- ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
- ml_tools/ML_optimization/_single_dragon.py +203 -0
- ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
- ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
- ml_tools/ML_scaler/__init__.py +10 -0
- ml_tools/ML_scaler/_imprimir.py +8 -0
- ml_tools/ML_trainer/__init__.py +20 -0
- ml_tools/ML_trainer/_base_trainer.py +297 -0
- ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
- ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
- ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
- ml_tools/ML_trainer/_imprimir.py +10 -0
- ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
- ml_tools/ML_utilities/_artifact_finder.py +382 -0
- ml_tools/ML_utilities/_imprimir.py +16 -0
- ml_tools/ML_utilities/_inspection.py +325 -0
- ml_tools/ML_utilities/_train_tools.py +205 -0
- ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
- ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
- ml_tools/ML_vision_transformers/_imprimir.py +14 -0
- ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
- ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
- ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
- ml_tools/PSO_optimization/_imprimir.py +10 -0
- ml_tools/SQL/__init__.py +7 -0
- ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
- ml_tools/SQL/_imprimir.py +8 -0
- ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
- ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
- ml_tools/VIF/_imprimir.py +10 -0
- ml_tools/_core/__init__.py +7 -1
- ml_tools/_core/_logger.py +8 -18
- ml_tools/_core/_schema_load_ops.py +43 -0
- ml_tools/_core/_script_info.py +2 -2
- ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
- ml_tools/data_exploration/_analysis.py +214 -0
- ml_tools/data_exploration/_cleaning.py +566 -0
- ml_tools/data_exploration/_features.py +583 -0
- ml_tools/data_exploration/_imprimir.py +32 -0
- ml_tools/data_exploration/_plotting.py +487 -0
- ml_tools/data_exploration/_schema_ops.py +176 -0
- ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
- ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
- ml_tools/ensemble_evaluation/_imprimir.py +14 -0
- ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
- ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
- ml_tools/ensemble_inference/_imprimir.py +9 -0
- ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
- ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
- ml_tools/ensemble_learning/_imprimir.py +10 -0
- ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
- ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
- ml_tools/excel_handler/_imprimir.py +13 -0
- ml_tools/{keys.py → keys/__init__.py} +4 -1
- ml_tools/keys/_imprimir.py +11 -0
- ml_tools/{_core → keys}/_keys.py +2 -0
- ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
- ml_tools/math_utilities/_imprimir.py +11 -0
- ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
- ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
- ml_tools/optimization_tools/_imprimir.py +13 -0
- ml_tools/optimization_tools/_optimization_bounds.py +236 -0
- ml_tools/optimization_tools/_optimization_plots.py +218 -0
- ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
- ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
- ml_tools/path_manager/_imprimir.py +15 -0
- ml_tools/path_manager/_path_tools.py +346 -0
- ml_tools/plot_fonts/__init__.py +8 -0
- ml_tools/plot_fonts/_imprimir.py +8 -0
- ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
- ml_tools/schema/__init__.py +15 -0
- ml_tools/schema/_feature_schema.py +223 -0
- ml_tools/schema/_gui_schema.py +191 -0
- ml_tools/schema/_imprimir.py +10 -0
- ml_tools/{serde.py → serde/__init__.py} +4 -2
- ml_tools/serde/_imprimir.py +10 -0
- ml_tools/{_core → serde}/_serde.py +3 -8
- ml_tools/{utilities.py → utilities/__init__.py} +11 -6
- ml_tools/utilities/_imprimir.py +18 -0
- ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
- ml_tools/utilities/_utility_tools.py +192 -0
- dragon_ml_toolbox-19.13.0.dist-info/RECORD +0 -111
- ml_tools/ML_chaining_inference.py +0 -8
- ml_tools/ML_configuration.py +0 -86
- ml_tools/ML_configuration_pytab.py +0 -14
- ml_tools/ML_datasetmaster.py +0 -10
- ml_tools/ML_evaluation.py +0 -16
- ml_tools/ML_evaluation_multi.py +0 -12
- ml_tools/ML_finalize_handler.py +0 -8
- ml_tools/ML_inference.py +0 -12
- ml_tools/ML_models.py +0 -14
- ml_tools/ML_models_advanced.py +0 -14
- ml_tools/ML_models_pytab.py +0 -14
- ml_tools/ML_optimization.py +0 -14
- ml_tools/ML_optimization_pareto.py +0 -8
- ml_tools/ML_scaler.py +0 -8
- ml_tools/ML_sequence_datasetmaster.py +0 -8
- ml_tools/ML_sequence_evaluation.py +0 -10
- ml_tools/ML_sequence_inference.py +0 -8
- ml_tools/ML_sequence_models.py +0 -8
- ml_tools/ML_trainer.py +0 -12
- ml_tools/ML_vision_datasetmaster.py +0 -12
- ml_tools/ML_vision_evaluation.py +0 -10
- ml_tools/ML_vision_inference.py +0 -8
- ml_tools/ML_vision_models.py +0 -18
- ml_tools/SQL.py +0 -8
- ml_tools/_core/_ETL_cleaning.py +0 -694
- ml_tools/_core/_IO_tools.py +0 -498
- ml_tools/_core/_ML_callbacks.py +0 -702
- ml_tools/_core/_ML_configuration.py +0 -1332
- ml_tools/_core/_ML_configuration_pytab.py +0 -102
- ml_tools/_core/_ML_evaluation.py +0 -867
- ml_tools/_core/_ML_evaluation_multi.py +0 -544
- ml_tools/_core/_ML_inference.py +0 -646
- ml_tools/_core/_ML_models.py +0 -668
- ml_tools/_core/_ML_models_pytab.py +0 -693
- ml_tools/_core/_ML_trainer.py +0 -2323
- ml_tools/_core/_ML_utilities.py +0 -886
- ml_tools/_core/_ML_vision_models.py +0 -644
- ml_tools/_core/_data_exploration.py +0 -1901
- ml_tools/_core/_optimization_tools.py +0 -493
- ml_tools/_core/_schema.py +0 -359
- ml_tools/plot_fonts.py +0 -8
- ml_tools/schema.py +0 -12
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
|
@@ -22,10 +22,9 @@ from sklearn.metrics import (accuracy_score,
|
|
|
22
22
|
from pathlib import Path
|
|
23
23
|
from typing import Union, Optional, Literal
|
|
24
24
|
|
|
25
|
-
from
|
|
26
|
-
from
|
|
27
|
-
from .
|
|
28
|
-
from ._keys import SHAPKeys
|
|
25
|
+
from ..path_manager import sanitize_filename, make_fullpath
|
|
26
|
+
from .._core import get_logger
|
|
27
|
+
from ..keys._keys import SHAPKeys
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
_LOGGER = get_logger("Ensemble Evaluation")
|
|
@@ -731,6 +730,3 @@ def plot_learning_curves(
|
|
|
731
730
|
fig.savefig(full_save_path, bbox_inches="tight", format="svg")
|
|
732
731
|
plt.close(fig)
|
|
733
732
|
|
|
734
|
-
|
|
735
|
-
def info():
|
|
736
|
-
_script_info(__all__)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from .._core import _imprimir_disponibles
|
|
2
|
+
|
|
3
|
+
_GRUPOS = [
|
|
4
|
+
"evaluate_model_classification",
|
|
5
|
+
"plot_roc_curve",
|
|
6
|
+
"plot_precision_recall_curve",
|
|
7
|
+
"plot_calibration_curve",
|
|
8
|
+
"evaluate_model_regression",
|
|
9
|
+
"get_shap_values",
|
|
10
|
+
"plot_learning_curves"
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
def info():
|
|
14
|
+
_imprimir_disponibles(_GRUPOS)
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from ._ensemble_inference import (
|
|
2
2
|
DragonEnsembleInferenceHandler,
|
|
3
|
-
model_report
|
|
4
|
-
info
|
|
3
|
+
model_report
|
|
5
4
|
)
|
|
6
5
|
|
|
6
|
+
from ._imprimir import info
|
|
7
|
+
|
|
8
|
+
|
|
7
9
|
__all__ = [
|
|
8
10
|
"DragonEnsembleInferenceHandler",
|
|
9
11
|
"model_report"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Union, Literal,
|
|
1
|
+
from typing import Union, Literal, Any, Optional
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
import json
|
|
4
4
|
import numpy as np
|
|
@@ -6,11 +6,11 @@ import numpy as np
|
|
|
6
6
|
import xgboost
|
|
7
7
|
import lightgbm
|
|
8
8
|
|
|
9
|
-
from
|
|
10
|
-
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from .
|
|
9
|
+
from ..serde import deserialize_object
|
|
10
|
+
|
|
11
|
+
from .._core import get_logger
|
|
12
|
+
from ..path_manager import make_fullpath, list_files_by_extension
|
|
13
|
+
from ..keys._keys import EnsembleKeys
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
_LOGGER = get_logger("Ensemble Inference")
|
|
@@ -37,10 +37,10 @@ class DragonEnsembleInferenceHandler:
|
|
|
37
37
|
models_dir (Path): The directory containing the saved .joblib model files.
|
|
38
38
|
task ("regression" | "classification"): The type of task the models perform.
|
|
39
39
|
"""
|
|
40
|
-
self.models:
|
|
40
|
+
self.models: dict[str, Any] = dict()
|
|
41
41
|
self.task: str = task
|
|
42
42
|
self.verbose = verbose
|
|
43
|
-
self._feature_names: Optional[
|
|
43
|
+
self._feature_names: Optional[list[str]] = None
|
|
44
44
|
|
|
45
45
|
model_files = list_files_by_extension(directory=models_dir, extension="joblib", raise_on_empty=True)
|
|
46
46
|
|
|
@@ -53,7 +53,7 @@ class DragonEnsembleInferenceHandler:
|
|
|
53
53
|
|
|
54
54
|
model: Any = full_object[EnsembleKeys.MODEL]
|
|
55
55
|
target_name: str = full_object[EnsembleKeys.TARGET]
|
|
56
|
-
feature_names_list:
|
|
56
|
+
feature_names_list: list[str] = full_object[EnsembleKeys.FEATURES]
|
|
57
57
|
|
|
58
58
|
# Check that feature names match
|
|
59
59
|
if self._feature_names is None:
|
|
@@ -71,14 +71,14 @@ class DragonEnsembleInferenceHandler:
|
|
|
71
71
|
_LOGGER.error(f"Failed to load or parse {fname}.")
|
|
72
72
|
|
|
73
73
|
@property
|
|
74
|
-
def feature_names(self) ->
|
|
74
|
+
def feature_names(self) -> list[str]:
|
|
75
75
|
"""
|
|
76
76
|
Getter for the list of feature names the models expect.
|
|
77
77
|
Returns an empty list if no models were loaded.
|
|
78
78
|
"""
|
|
79
79
|
return self._feature_names if self._feature_names is not None else []
|
|
80
80
|
|
|
81
|
-
def predict(self, features: np.ndarray) ->
|
|
81
|
+
def predict(self, features: np.ndarray) -> dict[str, Any]:
|
|
82
82
|
"""
|
|
83
83
|
Predicts on a single feature vector.
|
|
84
84
|
|
|
@@ -97,7 +97,7 @@ class DragonEnsembleInferenceHandler:
|
|
|
97
97
|
_LOGGER.error("The 'predict()' method is for a single sample. Use 'predict_batch()' for multiple samples.")
|
|
98
98
|
raise ValueError()
|
|
99
99
|
|
|
100
|
-
results:
|
|
100
|
+
results: dict[str, Any] = dict()
|
|
101
101
|
for target_name, model in self.models.items():
|
|
102
102
|
if self.task == "regression":
|
|
103
103
|
prediction = model.predict(features)
|
|
@@ -112,7 +112,7 @@ class DragonEnsembleInferenceHandler:
|
|
|
112
112
|
_LOGGER.info("Inference process complete.")
|
|
113
113
|
return results
|
|
114
114
|
|
|
115
|
-
def predict_batch(self, features: np.ndarray) ->
|
|
115
|
+
def predict_batch(self, features: np.ndarray) -> dict[str, Any]:
|
|
116
116
|
"""
|
|
117
117
|
Predicts on a batch of feature vectors.
|
|
118
118
|
|
|
@@ -128,7 +128,7 @@ class DragonEnsembleInferenceHandler:
|
|
|
128
128
|
_LOGGER.error("Input for batch prediction must be a 2D array.")
|
|
129
129
|
raise ValueError()
|
|
130
130
|
|
|
131
|
-
results:
|
|
131
|
+
results: dict[str, Any] = dict()
|
|
132
132
|
for target_name, model in self.models.items():
|
|
133
133
|
if self.task == "regression":
|
|
134
134
|
results[target_name] = model.predict(features)
|
|
@@ -147,7 +147,7 @@ def model_report(
|
|
|
147
147
|
model_path: Union[str,Path],
|
|
148
148
|
output_dir: Optional[Union[str,Path]] = None,
|
|
149
149
|
verbose: bool = True
|
|
150
|
-
) ->
|
|
150
|
+
) -> dict[str, Any]:
|
|
151
151
|
"""
|
|
152
152
|
Deserializes a model and generates a summary report.
|
|
153
153
|
|
|
@@ -220,6 +220,3 @@ def model_report(
|
|
|
220
220
|
# --- 5. Return the extracted data ---
|
|
221
221
|
return report_data
|
|
222
222
|
|
|
223
|
-
|
|
224
|
-
def info():
|
|
225
|
-
_script_info(__all__)
|
|
@@ -1,16 +1,14 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from ._ensemble_learning import (
|
|
2
2
|
RegressionTreeModels,
|
|
3
3
|
ClassificationTreeModels,
|
|
4
|
-
dataset_pipeline,
|
|
5
|
-
train_test_pipeline,
|
|
6
4
|
run_ensemble_pipeline,
|
|
7
|
-
info
|
|
8
5
|
)
|
|
9
6
|
|
|
7
|
+
from ._imprimir import info
|
|
8
|
+
|
|
9
|
+
|
|
10
10
|
__all__ = [
|
|
11
11
|
"RegressionTreeModels",
|
|
12
12
|
"ClassificationTreeModels",
|
|
13
|
-
"dataset_pipeline",
|
|
14
|
-
"train_test_pipeline",
|
|
15
13
|
"run_ensemble_pipeline",
|
|
16
14
|
]
|
|
@@ -13,13 +13,9 @@ import lightgbm as lgb
|
|
|
13
13
|
from sklearn.model_selection import train_test_split
|
|
14
14
|
from sklearn.base import clone
|
|
15
15
|
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from ._script_info import _script_info
|
|
20
|
-
from ._keys import EnsembleKeys
|
|
21
|
-
from ._logger import get_logger
|
|
22
|
-
from ._ensemble_evaluation import (evaluate_model_classification,
|
|
16
|
+
from ..utilities import yield_dataframes_from_dir, train_dataset_yielder
|
|
17
|
+
from ..serde import serialize_object_filename
|
|
18
|
+
from ..ensemble_evaluation import (evaluate_model_classification,
|
|
23
19
|
plot_roc_curve,
|
|
24
20
|
plot_precision_recall_curve,
|
|
25
21
|
plot_calibration_curve,
|
|
@@ -27,6 +23,10 @@ from ._ensemble_evaluation import (evaluate_model_classification,
|
|
|
27
23
|
get_shap_values,
|
|
28
24
|
plot_learning_curves)
|
|
29
25
|
|
|
26
|
+
from ..path_manager import sanitize_filename, make_fullpath
|
|
27
|
+
from ..keys._keys import EnsembleKeys
|
|
28
|
+
from .._core import get_logger
|
|
29
|
+
|
|
30
30
|
import warnings # Ignore warnings
|
|
31
31
|
warnings.filterwarnings('ignore', category=DeprecationWarning)
|
|
32
32
|
warnings.filterwarnings('ignore', category=FutureWarning)
|
|
@@ -528,6 +528,3 @@ def run_ensemble_pipeline(datasets_dir: Union[str,Path], save_dir: Union[str,Pat
|
|
|
528
528
|
|
|
529
529
|
_LOGGER.info("Training and evaluation complete.")
|
|
530
530
|
|
|
531
|
-
|
|
532
|
-
def info():
|
|
533
|
-
_script_info(__all__)
|
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from ._excel_handler import (
|
|
2
2
|
find_excel_files,
|
|
3
3
|
unmerge_and_split_excel,
|
|
4
4
|
unmerge_and_split_from_directory,
|
|
5
5
|
validate_excel_schema,
|
|
6
6
|
vertical_merge_transform_excel,
|
|
7
|
-
horizontal_merge_transform_excel
|
|
8
|
-
info
|
|
7
|
+
horizontal_merge_transform_excel
|
|
9
8
|
)
|
|
10
9
|
|
|
10
|
+
from ._imprimir import info
|
|
11
|
+
|
|
12
|
+
|
|
11
13
|
__all__ = [
|
|
12
14
|
"find_excel_files",
|
|
13
15
|
"unmerge_and_split_excel",
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from openpyxl import load_workbook, Workbook
|
|
3
3
|
import pandas as pd
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Optional, Union
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from ._logger import get_logger
|
|
6
|
+
from ..path_manager import sanitize_filename, make_fullpath
|
|
7
|
+
from .._core import get_logger
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
_LOGGER = get_logger("Excel Handler")
|
|
@@ -167,7 +166,7 @@ def unmerge_and_split_from_directory(input_dir: Union[str,Path], output_dir: Uni
|
|
|
167
166
|
|
|
168
167
|
def validate_excel_schema(
|
|
169
168
|
target_dir: Union[str,Path],
|
|
170
|
-
expected_columns:
|
|
169
|
+
expected_columns: list[str],
|
|
171
170
|
strict: bool = False
|
|
172
171
|
) -> None:
|
|
173
172
|
"""
|
|
@@ -236,8 +235,8 @@ def vertical_merge_transform_excel(
|
|
|
236
235
|
target_dir: Union[str,Path],
|
|
237
236
|
csv_filename: str,
|
|
238
237
|
output_dir: Union[str,Path],
|
|
239
|
-
target_columns: Optional[
|
|
240
|
-
rename_columns: Optional[
|
|
238
|
+
target_columns: Optional[list[str]] = None,
|
|
239
|
+
rename_columns: Optional[list[str]] = None
|
|
241
240
|
) -> None:
|
|
242
241
|
"""
|
|
243
242
|
Merges multiple Excel files in a directory vertically and saves as a single CSV file.
|
|
@@ -371,6 +370,3 @@ def horizontal_merge_transform_excel(
|
|
|
371
370
|
|
|
372
371
|
_LOGGER.info(f"Merged {len(excel_files)} Excel files into '{csv_filename}'.")
|
|
373
372
|
|
|
374
|
-
|
|
375
|
-
def info():
|
|
376
|
-
_script_info(__all__)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .._core import _imprimir_disponibles
|
|
2
|
+
|
|
3
|
+
_GRUPOS = [
|
|
4
|
+
"find_excel_files",
|
|
5
|
+
"unmerge_and_split_excel",
|
|
6
|
+
"unmerge_and_split_from_directory",
|
|
7
|
+
"validate_excel_schema",
|
|
8
|
+
"vertical_merge_transform_excel",
|
|
9
|
+
"horizontal_merge_transform_excel"
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
def info():
|
|
13
|
+
_imprimir_disponibles(_GRUPOS)
|
|
@@ -1,10 +1,13 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from ._keys import (
|
|
2
2
|
PyTorchInferenceKeys as InferenceKeys,
|
|
3
3
|
_CheckpointCallbackKeys as CheckpointCallbackKeys,
|
|
4
4
|
_FinalizedFileKeys as FinalizedFileKeys,
|
|
5
5
|
_PublicTaskKeys as TaskKeys,
|
|
6
6
|
)
|
|
7
7
|
|
|
8
|
+
from ._imprimir import info
|
|
9
|
+
|
|
10
|
+
|
|
8
11
|
__all__ = [
|
|
9
12
|
"InferenceKeys",
|
|
10
13
|
"CheckpointCallbackKeys",
|
ml_tools/{_core → keys}/_keys.py
RENAMED
|
@@ -262,6 +262,8 @@ class SchemaKeys:
|
|
|
262
262
|
"""Used by the schema module."""
|
|
263
263
|
SCHEMA_FILENAME = "FeatureSchema.json"
|
|
264
264
|
GUI_SCHEMA_FILENAME = "GUISchema.json"
|
|
265
|
+
# Model architecture API
|
|
266
|
+
SCHEMA_DICT = "schema_dict"
|
|
265
267
|
# GUI Schema
|
|
266
268
|
TARGETS = "targets"
|
|
267
269
|
CONTINUOUS = "continuous"
|
|
@@ -1,11 +1,14 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from ._math_utilities import (
|
|
2
2
|
normalize_mixed_list,
|
|
3
3
|
threshold_binary_values,
|
|
4
4
|
threshold_binary_values_batch,
|
|
5
5
|
discretize_categorical_values,
|
|
6
|
-
info
|
|
7
6
|
)
|
|
8
7
|
|
|
8
|
+
|
|
9
|
+
from ._imprimir import info
|
|
10
|
+
|
|
11
|
+
|
|
9
12
|
__all__ = [
|
|
10
13
|
"normalize_mixed_list",
|
|
11
14
|
"threshold_binary_values",
|
|
@@ -3,8 +3,7 @@ import numpy as np
|
|
|
3
3
|
import math
|
|
4
4
|
from typing import Union, Sequence, Optional
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
from ._logger import get_logger
|
|
6
|
+
from .._core import get_logger
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
_LOGGER = get_logger("Math Utilities")
|
|
@@ -260,6 +259,3 @@ def discretize_categorical_values(
|
|
|
260
259
|
else:
|
|
261
260
|
return final_output
|
|
262
261
|
|
|
263
|
-
|
|
264
|
-
def info():
|
|
265
|
-
_script_info(__all__)
|
|
@@ -1,13 +1,18 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from ._optimization_plots import (
|
|
2
|
+
plot_optimal_feature_distributions,
|
|
3
|
+
plot_optimal_feature_distributions_from_dataframe,
|
|
4
|
+
)
|
|
5
|
+
|
|
6
|
+
from ._optimization_bounds import (
|
|
2
7
|
make_continuous_bounds_template,
|
|
3
8
|
load_continuous_bounds_template,
|
|
4
9
|
create_optimization_bounds,
|
|
5
10
|
parse_lower_upper_bounds,
|
|
6
|
-
plot_optimal_feature_distributions,
|
|
7
|
-
plot_optimal_feature_distributions_from_dataframe,
|
|
8
|
-
info
|
|
9
11
|
)
|
|
10
12
|
|
|
13
|
+
from ._imprimir import info
|
|
14
|
+
|
|
15
|
+
|
|
11
16
|
__all__ = [
|
|
12
17
|
"make_continuous_bounds_template",
|
|
13
18
|
"load_continuous_bounds_template",
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .._core import _imprimir_disponibles
|
|
2
|
+
|
|
3
|
+
_GRUPOS = [
|
|
4
|
+
"make_continuous_bounds_template",
|
|
5
|
+
"load_continuous_bounds_template",
|
|
6
|
+
"create_optimization_bounds",
|
|
7
|
+
"parse_lower_upper_bounds",
|
|
8
|
+
"plot_optimal_feature_distributions",
|
|
9
|
+
"plot_optimal_feature_distributions_from_dataframe",
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
def info():
|
|
13
|
+
_imprimir_disponibles(_GRUPOS)
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
from typing import Union, Any, Optional
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from ..schema import FeatureSchema
|
|
5
|
+
from ..IO_tools import save_json, load_json
|
|
6
|
+
|
|
7
|
+
from ..keys._keys import OptimizationToolsKeys
|
|
8
|
+
from ..path_manager import make_fullpath
|
|
9
|
+
from .._core import get_logger
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
_LOGGER = get_logger("Optimization Bounds")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"make_continuous_bounds_template",
|
|
17
|
+
"load_continuous_bounds_template",
|
|
18
|
+
"create_optimization_bounds",
|
|
19
|
+
"parse_lower_upper_bounds",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def make_continuous_bounds_template(
|
|
24
|
+
directory: Union[str, Path],
|
|
25
|
+
feature_schema: FeatureSchema,
|
|
26
|
+
default_bounds: tuple[float, float] = (0, 1)
|
|
27
|
+
) -> None:
|
|
28
|
+
"""
|
|
29
|
+
Creates a JSON template for manual entry of continuous feature optimization bounds.
|
|
30
|
+
|
|
31
|
+
The resulting file maps each continuous feature name to a [min, max] list
|
|
32
|
+
populated with `default_bounds`. Edit the values in this file before using.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
directory (str | Path): The directory where the template will be saved.
|
|
36
|
+
feature_schema (FeatureSchema): The loaded schema containing feature definitions.
|
|
37
|
+
default_bounds (Tuple[float, float]): Default (min, max) values to populate the template.
|
|
38
|
+
"""
|
|
39
|
+
# validate directory path
|
|
40
|
+
dir_path = make_fullpath(directory, make=True, enforce="directory")
|
|
41
|
+
|
|
42
|
+
# 1. Check if continuous features exist
|
|
43
|
+
if not feature_schema.continuous_feature_names:
|
|
44
|
+
_LOGGER.warning("No continuous features found in FeatureSchema. Skipping bounds template generation.")
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
# 2. Construct the dictionary: {feature_name: [min, max]}
|
|
48
|
+
bounds_map = {
|
|
49
|
+
name: list(default_bounds)
|
|
50
|
+
for name in feature_schema.continuous_feature_names
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# use a fixed key for the filename
|
|
54
|
+
filename = OptimizationToolsKeys.OPTIMIZATION_BOUNDS_FILENAME + ".json"
|
|
55
|
+
|
|
56
|
+
# 3. Save to JSON using the IO tool
|
|
57
|
+
save_json(
|
|
58
|
+
data=bounds_map,
|
|
59
|
+
directory=dir_path,
|
|
60
|
+
filename=filename,
|
|
61
|
+
verbose=False
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
_LOGGER.info(f"💾 Continuous bounds template saved to: '{dir_path.name}/{filename}'")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def load_continuous_bounds_template(directory: Union[str, Path]) -> dict[str, list[float]]:
|
|
68
|
+
"""
|
|
69
|
+
Loads the continuous feature bounds template from JSON. Expected filename: `optimization_bounds.json`.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
directory (str | Path): The directory where the template is located.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Dictionary (Dict[str, List[float]]): A dictionary mapping feature names to [min, max] bounds.
|
|
76
|
+
"""
|
|
77
|
+
dir_path = make_fullpath(directory, enforce="directory")
|
|
78
|
+
full_path = dir_path / (OptimizationToolsKeys.OPTIMIZATION_BOUNDS_FILENAME + ".json")
|
|
79
|
+
|
|
80
|
+
bounds_map = load_json(
|
|
81
|
+
file_path=full_path,
|
|
82
|
+
expected_type='dict',
|
|
83
|
+
verbose=False
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# validate loaded data
|
|
87
|
+
if not all(
|
|
88
|
+
isinstance(v, list) and # Check type
|
|
89
|
+
len(v) == 2 and # Check length
|
|
90
|
+
all(isinstance(i, (int, float)) for i in v) # Check contents are numbers
|
|
91
|
+
for v in bounds_map.values()
|
|
92
|
+
):
|
|
93
|
+
_LOGGER.error(f"Invalid format in bounds template at '{full_path}'. Each value must be a list of [min, max].")
|
|
94
|
+
raise ValueError()
|
|
95
|
+
|
|
96
|
+
_LOGGER.info(f"Continuous bounds template loaded from: '{dir_path.name}'")
|
|
97
|
+
|
|
98
|
+
return bounds_map
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def create_optimization_bounds(
|
|
102
|
+
schema: FeatureSchema,
|
|
103
|
+
continuous_bounds_map: Union[dict[str, tuple[float, float]], dict[str, list[float]]],
|
|
104
|
+
start_at_zero: bool = True
|
|
105
|
+
) -> tuple[list[float], list[float]]:
|
|
106
|
+
"""
|
|
107
|
+
Generates the lower and upper bounds lists for the optimizer from a FeatureSchema.
|
|
108
|
+
|
|
109
|
+
This helper function automates the creation of unbiased bounds for
|
|
110
|
+
categorical features and combines them with user-defined bounds for
|
|
111
|
+
continuous features, using the schema as the single source of truth
|
|
112
|
+
for feature order and type.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
schema (FeatureSchema):
|
|
116
|
+
The definitive schema object created by
|
|
117
|
+
`data_exploration.finalize_feature_schema()`.
|
|
118
|
+
continuous_bounds_map (Dict[str, Tuple[float, float]], Dict[str, List[float]]):
|
|
119
|
+
A dictionary mapping the *name* of each **continuous** feature
|
|
120
|
+
to its (min_bound, max_bound).
|
|
121
|
+
start_at_zero (bool):
|
|
122
|
+
- If True, assumes categorical encoding is [0, 1, ..., k-1].
|
|
123
|
+
Bounds will be set as [-0.5, k - 0.5].
|
|
124
|
+
- If False, assumes encoding is [1, 2, ..., k].
|
|
125
|
+
Bounds will be set as [0.5, k + 0.5].
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Tuple[List[float], List[float]]:
|
|
129
|
+
A tuple containing two lists: (lower_bounds, upper_bounds).
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
ValueError: If a feature is missing from `continuous_bounds_map`
|
|
133
|
+
or if a feature name in the map is not a
|
|
134
|
+
continuous feature according to the schema.
|
|
135
|
+
"""
|
|
136
|
+
# validate length in the continuous_bounds_map values
|
|
137
|
+
for name, bounds in continuous_bounds_map.items():
|
|
138
|
+
if not (isinstance(bounds, (list, tuple)) and len(bounds) == 2):
|
|
139
|
+
_LOGGER.error(f"Bounds for feature '{name}' must be a list or tuple of length 2 (min, max). Found: {bounds}")
|
|
140
|
+
raise ValueError()
|
|
141
|
+
|
|
142
|
+
# 1. Get feature names and map from schema
|
|
143
|
+
feature_names = schema.feature_names
|
|
144
|
+
categorical_index_map = schema.categorical_index_map
|
|
145
|
+
total_features = len(feature_names)
|
|
146
|
+
|
|
147
|
+
if total_features <= 0:
|
|
148
|
+
_LOGGER.error("Schema contains no features.")
|
|
149
|
+
raise ValueError()
|
|
150
|
+
|
|
151
|
+
_LOGGER.info(f"Generating bounds for {total_features} total features...")
|
|
152
|
+
|
|
153
|
+
# 2. Initialize bound lists
|
|
154
|
+
lower_bounds: list[Optional[float]] = [None] * total_features
|
|
155
|
+
upper_bounds: list[Optional[float]] = [None] * total_features
|
|
156
|
+
|
|
157
|
+
# 3. Populate categorical bounds (Index-based)
|
|
158
|
+
if categorical_index_map:
|
|
159
|
+
for index, cardinality in categorical_index_map.items():
|
|
160
|
+
if not (0 <= index < total_features):
|
|
161
|
+
_LOGGER.error(f"Categorical index {index} is out of range for the {total_features} features.")
|
|
162
|
+
raise ValueError()
|
|
163
|
+
|
|
164
|
+
if start_at_zero:
|
|
165
|
+
# Rule for [0, k-1]: bounds are [-0.5, k - 0.5]
|
|
166
|
+
low = -0.5
|
|
167
|
+
high = float(cardinality) - 0.5
|
|
168
|
+
else:
|
|
169
|
+
# Rule for [1, k]: bounds are [0.5, k + 0.5]
|
|
170
|
+
low = 0.5
|
|
171
|
+
high = float(cardinality) + 0.5
|
|
172
|
+
|
|
173
|
+
lower_bounds[index] = low
|
|
174
|
+
upper_bounds[index] = high
|
|
175
|
+
|
|
176
|
+
_LOGGER.info(f"Automatically set bounds for {len(categorical_index_map)} categorical features.")
|
|
177
|
+
else:
|
|
178
|
+
_LOGGER.info("No categorical features found in schema.")
|
|
179
|
+
|
|
180
|
+
# 4. Populate continuous bounds (Name-based)
|
|
181
|
+
# Use schema.continuous_feature_names for robust checking
|
|
182
|
+
continuous_names_set = set(schema.continuous_feature_names)
|
|
183
|
+
|
|
184
|
+
if continuous_names_set != set(continuous_bounds_map.keys()):
|
|
185
|
+
missing_in_map = continuous_names_set - set(continuous_bounds_map.keys())
|
|
186
|
+
if missing_in_map:
|
|
187
|
+
_LOGGER.error(f"The following continuous features are missing from 'continuous_bounds_map': {list(missing_in_map)}")
|
|
188
|
+
|
|
189
|
+
extra_in_map = set(continuous_bounds_map.keys()) - continuous_names_set
|
|
190
|
+
if extra_in_map:
|
|
191
|
+
_LOGGER.error(f"The following features in 'continuous_bounds_map' are not defined as continuous in the schema: {list(extra_in_map)}")
|
|
192
|
+
|
|
193
|
+
raise ValueError("Mismatch between 'continuous_bounds_map' and schema's continuous features.")
|
|
194
|
+
|
|
195
|
+
count_continuous = 0
|
|
196
|
+
for name, (low, high) in continuous_bounds_map.items():
|
|
197
|
+
# Map name to its index in the *feature-only* list
|
|
198
|
+
# This is guaranteed to be correct by the schema
|
|
199
|
+
index = feature_names.index(name)
|
|
200
|
+
|
|
201
|
+
if lower_bounds[index] is not None:
|
|
202
|
+
# This should be impossible if schema is correct, but good to check
|
|
203
|
+
_LOGGER.error(f"Schema conflict: Feature '{name}' (at index {index}) is defined as both continuous and categorical.")
|
|
204
|
+
raise ValueError()
|
|
205
|
+
|
|
206
|
+
lower_bounds[index] = float(low)
|
|
207
|
+
upper_bounds[index] = float(high)
|
|
208
|
+
count_continuous += 1
|
|
209
|
+
|
|
210
|
+
_LOGGER.info(f"Manually set bounds for {count_continuous} continuous features.")
|
|
211
|
+
|
|
212
|
+
# 5. Final Validation (all Nones should be filled)
|
|
213
|
+
if None in lower_bounds:
|
|
214
|
+
missing_indices = [i for i, b in enumerate(lower_bounds) if b is None]
|
|
215
|
+
missing_names = [feature_names[i] for i in missing_indices]
|
|
216
|
+
_LOGGER.error(f"Failed to create all bounds. This indicates an internal logic error. Missing: {missing_names}")
|
|
217
|
+
raise RuntimeError("Internal error: Not all bounds were populated.")
|
|
218
|
+
|
|
219
|
+
# Cast to float lists, as 'None' sentinels are gone
|
|
220
|
+
return (
|
|
221
|
+
[float(b) for b in lower_bounds], # type: ignore
|
|
222
|
+
[float(b) for b in upper_bounds] # type: ignore
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def parse_lower_upper_bounds(source: dict[str,tuple[Any,Any]]):
|
|
227
|
+
"""
|
|
228
|
+
Parse lower and upper boundaries, returning 2 lists:
|
|
229
|
+
|
|
230
|
+
`lower_bounds`, `upper_bounds`
|
|
231
|
+
"""
|
|
232
|
+
lower = [low[0] for low in source.values()]
|
|
233
|
+
upper = [up[1] for up in source.values()]
|
|
234
|
+
|
|
235
|
+
return lower, upper
|
|
236
|
+
|