dragon-ml-toolbox 19.14.0__py3-none-any.whl → 20.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
- dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
- ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
- ml_tools/ETL_cleaning/_basic_clean.py +351 -0
- ml_tools/ETL_cleaning/_clean_tools.py +128 -0
- ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
- ml_tools/ETL_cleaning/_imprimir.py +13 -0
- ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
- ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
- ml_tools/ETL_engineering/_imprimir.py +24 -0
- ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
- ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
- ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
- ml_tools/GUI_tools/_imprimir.py +12 -0
- ml_tools/IO_tools/_IO_loggers.py +235 -0
- ml_tools/IO_tools/_IO_save_load.py +151 -0
- ml_tools/IO_tools/_IO_utils.py +140 -0
- ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
- ml_tools/IO_tools/_imprimir.py +14 -0
- ml_tools/MICE/_MICE_imputation.py +132 -0
- ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
- ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
- ml_tools/MICE/_imprimir.py +11 -0
- ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
- ml_tools/ML_callbacks/_base.py +101 -0
- ml_tools/ML_callbacks/_checkpoint.py +232 -0
- ml_tools/ML_callbacks/_early_stop.py +208 -0
- ml_tools/ML_callbacks/_imprimir.py +12 -0
- ml_tools/ML_callbacks/_scheduler.py +197 -0
- ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
- ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
- ml_tools/ML_chain/_dragon_chain.py +140 -0
- ml_tools/ML_chain/_imprimir.py +11 -0
- ml_tools/ML_configuration/__init__.py +90 -0
- ml_tools/ML_configuration/_base_model_config.py +69 -0
- ml_tools/ML_configuration/_finalize.py +366 -0
- ml_tools/ML_configuration/_imprimir.py +47 -0
- ml_tools/ML_configuration/_metrics.py +593 -0
- ml_tools/ML_configuration/_models.py +206 -0
- ml_tools/ML_configuration/_training.py +124 -0
- ml_tools/ML_datasetmaster/__init__.py +28 -0
- ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
- ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
- ml_tools/ML_datasetmaster/_imprimir.py +15 -0
- ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
- ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
- ml_tools/ML_evaluation/__init__.py +53 -0
- ml_tools/ML_evaluation/_classification.py +629 -0
- ml_tools/ML_evaluation/_feature_importance.py +409 -0
- ml_tools/ML_evaluation/_imprimir.py +25 -0
- ml_tools/ML_evaluation/_loss.py +92 -0
- ml_tools/ML_evaluation/_regression.py +273 -0
- ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
- ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
- ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
- ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
- ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
- ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
- ml_tools/ML_finalize_handler/__init__.py +10 -0
- ml_tools/ML_finalize_handler/_imprimir.py +8 -0
- ml_tools/ML_inference/__init__.py +22 -0
- ml_tools/ML_inference/_base_inference.py +166 -0
- ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
- ml_tools/ML_inference/_dragon_inference.py +332 -0
- ml_tools/ML_inference/_imprimir.py +11 -0
- ml_tools/ML_inference/_multi_inference.py +180 -0
- ml_tools/ML_inference_sequence/__init__.py +10 -0
- ml_tools/ML_inference_sequence/_imprimir.py +8 -0
- ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
- ml_tools/ML_inference_vision/__init__.py +10 -0
- ml_tools/ML_inference_vision/_imprimir.py +8 -0
- ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
- ml_tools/ML_models/__init__.py +32 -0
- ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
- ml_tools/ML_models/_base_mlp_attention.py +198 -0
- ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
- ml_tools/ML_models/_dragon_tabular.py +248 -0
- ml_tools/ML_models/_imprimir.py +18 -0
- ml_tools/ML_models/_mlp_attention.py +134 -0
- ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
- ml_tools/ML_models_sequence/__init__.py +10 -0
- ml_tools/ML_models_sequence/_imprimir.py +8 -0
- ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
- ml_tools/ML_models_vision/__init__.py +29 -0
- ml_tools/ML_models_vision/_base_wrapper.py +254 -0
- ml_tools/ML_models_vision/_image_classification.py +182 -0
- ml_tools/ML_models_vision/_image_segmentation.py +108 -0
- ml_tools/ML_models_vision/_imprimir.py +16 -0
- ml_tools/ML_models_vision/_object_detection.py +135 -0
- ml_tools/ML_optimization/__init__.py +21 -0
- ml_tools/ML_optimization/_imprimir.py +13 -0
- ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
- ml_tools/ML_optimization/_single_dragon.py +203 -0
- ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
- ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
- ml_tools/ML_scaler/__init__.py +10 -0
- ml_tools/ML_scaler/_imprimir.py +8 -0
- ml_tools/ML_trainer/__init__.py +20 -0
- ml_tools/ML_trainer/_base_trainer.py +297 -0
- ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
- ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
- ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
- ml_tools/ML_trainer/_imprimir.py +10 -0
- ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
- ml_tools/ML_utilities/_artifact_finder.py +382 -0
- ml_tools/ML_utilities/_imprimir.py +16 -0
- ml_tools/ML_utilities/_inspection.py +325 -0
- ml_tools/ML_utilities/_train_tools.py +205 -0
- ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
- ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
- ml_tools/ML_vision_transformers/_imprimir.py +14 -0
- ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
- ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
- ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
- ml_tools/PSO_optimization/_imprimir.py +10 -0
- ml_tools/SQL/__init__.py +7 -0
- ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
- ml_tools/SQL/_imprimir.py +8 -0
- ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
- ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
- ml_tools/VIF/_imprimir.py +10 -0
- ml_tools/_core/__init__.py +7 -1
- ml_tools/_core/_logger.py +8 -18
- ml_tools/_core/_schema_load_ops.py +43 -0
- ml_tools/_core/_script_info.py +2 -2
- ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
- ml_tools/data_exploration/_analysis.py +214 -0
- ml_tools/data_exploration/_cleaning.py +566 -0
- ml_tools/data_exploration/_features.py +583 -0
- ml_tools/data_exploration/_imprimir.py +32 -0
- ml_tools/data_exploration/_plotting.py +487 -0
- ml_tools/data_exploration/_schema_ops.py +176 -0
- ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
- ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
- ml_tools/ensemble_evaluation/_imprimir.py +14 -0
- ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
- ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
- ml_tools/ensemble_inference/_imprimir.py +9 -0
- ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
- ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
- ml_tools/ensemble_learning/_imprimir.py +10 -0
- ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
- ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
- ml_tools/excel_handler/_imprimir.py +13 -0
- ml_tools/{keys.py → keys/__init__.py} +4 -1
- ml_tools/keys/_imprimir.py +11 -0
- ml_tools/{_core → keys}/_keys.py +2 -0
- ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
- ml_tools/math_utilities/_imprimir.py +11 -0
- ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
- ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
- ml_tools/optimization_tools/_imprimir.py +13 -0
- ml_tools/optimization_tools/_optimization_bounds.py +236 -0
- ml_tools/optimization_tools/_optimization_plots.py +218 -0
- ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
- ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
- ml_tools/path_manager/_imprimir.py +15 -0
- ml_tools/path_manager/_path_tools.py +346 -0
- ml_tools/plot_fonts/__init__.py +8 -0
- ml_tools/plot_fonts/_imprimir.py +8 -0
- ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
- ml_tools/schema/__init__.py +15 -0
- ml_tools/schema/_feature_schema.py +223 -0
- ml_tools/schema/_gui_schema.py +191 -0
- ml_tools/schema/_imprimir.py +10 -0
- ml_tools/{serde.py → serde/__init__.py} +4 -2
- ml_tools/serde/_imprimir.py +10 -0
- ml_tools/{_core → serde}/_serde.py +3 -8
- ml_tools/{utilities.py → utilities/__init__.py} +11 -6
- ml_tools/utilities/_imprimir.py +18 -0
- ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
- ml_tools/utilities/_utility_tools.py +192 -0
- dragon_ml_toolbox-19.14.0.dist-info/RECORD +0 -111
- ml_tools/ML_chaining_inference.py +0 -8
- ml_tools/ML_configuration.py +0 -86
- ml_tools/ML_configuration_pytab.py +0 -14
- ml_tools/ML_datasetmaster.py +0 -10
- ml_tools/ML_evaluation.py +0 -16
- ml_tools/ML_evaluation_multi.py +0 -12
- ml_tools/ML_finalize_handler.py +0 -8
- ml_tools/ML_inference.py +0 -12
- ml_tools/ML_models.py +0 -14
- ml_tools/ML_models_advanced.py +0 -14
- ml_tools/ML_models_pytab.py +0 -14
- ml_tools/ML_optimization.py +0 -14
- ml_tools/ML_optimization_pareto.py +0 -8
- ml_tools/ML_scaler.py +0 -8
- ml_tools/ML_sequence_datasetmaster.py +0 -8
- ml_tools/ML_sequence_evaluation.py +0 -10
- ml_tools/ML_sequence_inference.py +0 -8
- ml_tools/ML_sequence_models.py +0 -8
- ml_tools/ML_trainer.py +0 -12
- ml_tools/ML_vision_datasetmaster.py +0 -12
- ml_tools/ML_vision_evaluation.py +0 -10
- ml_tools/ML_vision_inference.py +0 -8
- ml_tools/ML_vision_models.py +0 -18
- ml_tools/SQL.py +0 -8
- ml_tools/_core/_ETL_cleaning.py +0 -694
- ml_tools/_core/_IO_tools.py +0 -498
- ml_tools/_core/_ML_callbacks.py +0 -702
- ml_tools/_core/_ML_configuration.py +0 -1332
- ml_tools/_core/_ML_configuration_pytab.py +0 -102
- ml_tools/_core/_ML_evaluation.py +0 -867
- ml_tools/_core/_ML_evaluation_multi.py +0 -544
- ml_tools/_core/_ML_inference.py +0 -646
- ml_tools/_core/_ML_models.py +0 -668
- ml_tools/_core/_ML_models_pytab.py +0 -693
- ml_tools/_core/_ML_trainer.py +0 -2323
- ml_tools/_core/_ML_utilities.py +0 -886
- ml_tools/_core/_ML_vision_models.py +0 -644
- ml_tools/_core/_data_exploration.py +0 -1909
- ml_tools/_core/_optimization_tools.py +0 -493
- ml_tools/_core/_schema.py +0 -359
- ml_tools/plot_fonts.py +0 -8
- ml_tools/schema.py +0 -12
- {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,20 +1,15 @@
|
|
|
1
1
|
import torch
|
|
2
|
-
from torch.utils.data import Dataset
|
|
3
2
|
import pandas
|
|
4
|
-
import numpy
|
|
5
3
|
from sklearn.model_selection import train_test_split
|
|
6
|
-
from typing import Literal, Union,
|
|
7
|
-
from abc import ABC
|
|
8
|
-
from pathlib import Path
|
|
4
|
+
from typing import Literal, Union, Optional
|
|
9
5
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
|
|
13
|
-
from
|
|
14
|
-
from .
|
|
15
|
-
|
|
16
|
-
from .
|
|
17
|
-
from ._IO_tools import custom_logger
|
|
6
|
+
from ..ML_scaler import DragonScaler
|
|
7
|
+
from ..schema import FeatureSchema
|
|
8
|
+
|
|
9
|
+
from .._core import get_logger
|
|
10
|
+
from ..keys._keys import MLTaskKeys
|
|
11
|
+
|
|
12
|
+
from ._base_datasetmaster import _BaseDatasetMaker, _PytorchDataset
|
|
18
13
|
|
|
19
14
|
|
|
20
15
|
_LOGGER = get_logger("DragonDataset")
|
|
@@ -26,318 +21,6 @@ __all__ = [
|
|
|
26
21
|
]
|
|
27
22
|
|
|
28
23
|
|
|
29
|
-
# --- Internal Helper Class ---
|
|
30
|
-
class _PytorchDataset(Dataset):
|
|
31
|
-
"""
|
|
32
|
-
Internal helper class to create a PyTorch Dataset.
|
|
33
|
-
Converts numpy/pandas data into tensors for model consumption.
|
|
34
|
-
"""
|
|
35
|
-
def __init__(self, features: Union[numpy.ndarray, pandas.DataFrame],
|
|
36
|
-
labels: Union[numpy.ndarray, pandas.Series, pandas.DataFrame],
|
|
37
|
-
labels_dtype: torch.dtype,
|
|
38
|
-
features_dtype: torch.dtype = torch.float32,
|
|
39
|
-
feature_names: Optional[List[str]] = None,
|
|
40
|
-
target_names: Optional[List[str]] = None):
|
|
41
|
-
|
|
42
|
-
if isinstance(features, numpy.ndarray):
|
|
43
|
-
self.features = torch.tensor(features, dtype=features_dtype)
|
|
44
|
-
else: # It's a pandas.DataFrame
|
|
45
|
-
self.features = torch.tensor(features.to_numpy(), dtype=features_dtype)
|
|
46
|
-
|
|
47
|
-
if isinstance(labels, numpy.ndarray):
|
|
48
|
-
self.labels = torch.tensor(labels, dtype=labels_dtype)
|
|
49
|
-
elif isinstance(labels, (pandas.Series, pandas.DataFrame)):
|
|
50
|
-
self.labels = torch.tensor(labels.to_numpy(), dtype=labels_dtype)
|
|
51
|
-
else:
|
|
52
|
-
self.labels = torch.tensor(labels, dtype=labels_dtype)
|
|
53
|
-
|
|
54
|
-
self._feature_names = feature_names
|
|
55
|
-
self._target_names = target_names
|
|
56
|
-
self._classes: List[str] = []
|
|
57
|
-
self._class_map: dict[str,int] = dict()
|
|
58
|
-
self._feature_scaler: Optional[DragonScaler] = None
|
|
59
|
-
self._target_scaler: Optional[DragonScaler] = None
|
|
60
|
-
|
|
61
|
-
def __len__(self):
|
|
62
|
-
return len(self.features)
|
|
63
|
-
|
|
64
|
-
def __getitem__(self, index):
|
|
65
|
-
return self.features[index], self.labels[index]
|
|
66
|
-
|
|
67
|
-
@property
|
|
68
|
-
def feature_names(self):
|
|
69
|
-
if self._feature_names is not None:
|
|
70
|
-
return self._feature_names
|
|
71
|
-
else:
|
|
72
|
-
_LOGGER.error(f"Dataset {self.__class__} has not been initialized with any feature names.")
|
|
73
|
-
raise ValueError()
|
|
74
|
-
|
|
75
|
-
@property
|
|
76
|
-
def target_names(self):
|
|
77
|
-
if self._target_names is not None:
|
|
78
|
-
return self._target_names
|
|
79
|
-
else:
|
|
80
|
-
_LOGGER.error(f"Dataset {self.__class__} has not been initialized with any target names.")
|
|
81
|
-
raise ValueError()
|
|
82
|
-
|
|
83
|
-
@property
|
|
84
|
-
def classes(self):
|
|
85
|
-
return self._classes
|
|
86
|
-
|
|
87
|
-
@property
|
|
88
|
-
def class_map(self):
|
|
89
|
-
return self._class_map
|
|
90
|
-
|
|
91
|
-
@property
|
|
92
|
-
def feature_scaler(self):
|
|
93
|
-
return self._feature_scaler
|
|
94
|
-
|
|
95
|
-
@property
|
|
96
|
-
def target_scaler(self):
|
|
97
|
-
return self._target_scaler
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
# --- Abstract Base Class ---
|
|
101
|
-
class _BaseDatasetMaker(ABC):
|
|
102
|
-
"""
|
|
103
|
-
Abstract base class for dataset makers. Contains shared logic.
|
|
104
|
-
"""
|
|
105
|
-
def __init__(self):
|
|
106
|
-
self._train_ds: Optional[Dataset] = None
|
|
107
|
-
self._val_ds: Optional[Dataset] = None
|
|
108
|
-
self._test_ds: Optional[Dataset] = None
|
|
109
|
-
|
|
110
|
-
self.feature_scaler: Optional[DragonScaler] = None
|
|
111
|
-
self.target_scaler: Optional[DragonScaler] = None
|
|
112
|
-
|
|
113
|
-
self._id: Optional[str] = None
|
|
114
|
-
self._feature_names: List[str] = []
|
|
115
|
-
self._target_names: List[str] = []
|
|
116
|
-
self._X_train_shape = (0,0)
|
|
117
|
-
self._X_val_shape = (0,0)
|
|
118
|
-
self._X_test_shape = (0,0)
|
|
119
|
-
self._y_train_shape = (0,)
|
|
120
|
-
self._y_val_shape = (0,)
|
|
121
|
-
self._y_test_shape = (0,)
|
|
122
|
-
self.class_map: dict[str, int] = dict()
|
|
123
|
-
self.classes: list[str] = list()
|
|
124
|
-
|
|
125
|
-
def _prepare_feature_scaler(self,
|
|
126
|
-
X_train: pandas.DataFrame,
|
|
127
|
-
y_train: Union[pandas.Series, pandas.DataFrame],
|
|
128
|
-
X_val: pandas.DataFrame,
|
|
129
|
-
X_test: pandas.DataFrame,
|
|
130
|
-
label_dtype: torch.dtype,
|
|
131
|
-
schema: FeatureSchema) -> Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]:
|
|
132
|
-
"""Internal helper to fit and apply a DragonScaler for FEATURES using a FeatureSchema."""
|
|
133
|
-
continuous_feature_indices: Optional[List[int]] = None
|
|
134
|
-
|
|
135
|
-
# Get continuous feature indices *from the schema*
|
|
136
|
-
if schema.continuous_feature_names:
|
|
137
|
-
_LOGGER.info("Getting continuous feature indices from schema.")
|
|
138
|
-
try:
|
|
139
|
-
# Convert columns to a standard list for .index()
|
|
140
|
-
train_cols_list = X_train.columns.to_list()
|
|
141
|
-
# Map names from schema to column indices in the training DataFrame
|
|
142
|
-
continuous_feature_indices = [train_cols_list.index(name) for name in schema.continuous_feature_names]
|
|
143
|
-
except ValueError as e:
|
|
144
|
-
_LOGGER.error(f"Feature name from schema not found in training data columns:\n{e}")
|
|
145
|
-
raise ValueError()
|
|
146
|
-
else:
|
|
147
|
-
_LOGGER.info("No continuous features listed in schema. Feature scaler will not be fitted.")
|
|
148
|
-
|
|
149
|
-
X_train_values = X_train.to_numpy()
|
|
150
|
-
X_val_values = X_val.to_numpy()
|
|
151
|
-
X_test_values = X_test.to_numpy()
|
|
152
|
-
|
|
153
|
-
# continuous_feature_indices is derived
|
|
154
|
-
if self.feature_scaler is None and continuous_feature_indices:
|
|
155
|
-
_LOGGER.info("Fitting a new DragonScaler on training features.")
|
|
156
|
-
temp_train_ds = _PytorchDataset(X_train_values, y_train, label_dtype)
|
|
157
|
-
self.feature_scaler = DragonScaler.fit(temp_train_ds, continuous_feature_indices)
|
|
158
|
-
|
|
159
|
-
if self.feature_scaler and self.feature_scaler.mean_ is not None:
|
|
160
|
-
_LOGGER.info("Applying scaler transformation to train, validation, and test feature sets.")
|
|
161
|
-
X_train_tensor = self.feature_scaler.transform(torch.tensor(X_train_values, dtype=torch.float32))
|
|
162
|
-
X_val_tensor = self.feature_scaler.transform(torch.tensor(X_val_values, dtype=torch.float32))
|
|
163
|
-
X_test_tensor = self.feature_scaler.transform(torch.tensor(X_test_values, dtype=torch.float32))
|
|
164
|
-
return X_train_tensor.numpy(), X_val_tensor.numpy(), X_test_tensor.numpy()
|
|
165
|
-
|
|
166
|
-
return X_train_values, X_val_values, X_test_values
|
|
167
|
-
|
|
168
|
-
def _prepare_target_scaler(self,
|
|
169
|
-
y_train: Union[pandas.Series, pandas.DataFrame],
|
|
170
|
-
y_val: Union[pandas.Series, pandas.DataFrame],
|
|
171
|
-
y_test: Union[pandas.Series, pandas.DataFrame]) -> Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]:
|
|
172
|
-
"""Internal helper to fit and apply a DragonScaler for TARGETS."""
|
|
173
|
-
|
|
174
|
-
y_train_arr = y_train.to_numpy() if isinstance(y_train, (pandas.Series, pandas.DataFrame)) else y_train
|
|
175
|
-
y_val_arr = y_val.to_numpy() if isinstance(y_val, (pandas.Series, pandas.DataFrame)) else y_val
|
|
176
|
-
y_test_arr = y_test.to_numpy() if isinstance(y_test, (pandas.Series, pandas.DataFrame)) else y_test
|
|
177
|
-
|
|
178
|
-
if self.target_scaler is None:
|
|
179
|
-
_LOGGER.info("Fitting a new DragonScaler on training targets.")
|
|
180
|
-
# Convert to float tensor for calculation
|
|
181
|
-
y_train_tensor = torch.tensor(y_train_arr, dtype=torch.float32)
|
|
182
|
-
self.target_scaler = DragonScaler.fit_tensor(y_train_tensor)
|
|
183
|
-
|
|
184
|
-
if self.target_scaler and self.target_scaler.mean_ is not None:
|
|
185
|
-
_LOGGER.info("Applying scaler transformation to train, validation, and test targets.")
|
|
186
|
-
y_train_tensor = self.target_scaler.transform(torch.tensor(y_train_arr, dtype=torch.float32))
|
|
187
|
-
y_val_tensor = self.target_scaler.transform(torch.tensor(y_val_arr, dtype=torch.float32))
|
|
188
|
-
y_test_tensor = self.target_scaler.transform(torch.tensor(y_test_arr, dtype=torch.float32))
|
|
189
|
-
return y_train_tensor.numpy(), y_val_tensor.numpy(), y_test_tensor.numpy()
|
|
190
|
-
|
|
191
|
-
return y_train_arr, y_val_arr, y_test_arr
|
|
192
|
-
|
|
193
|
-
def _attach_scalers_to_datasets(self):
|
|
194
|
-
"""Helper to attach the master scalers to the child datasets."""
|
|
195
|
-
for ds in [self._train_ds, self._val_ds, self._test_ds]:
|
|
196
|
-
if ds is not None:
|
|
197
|
-
ds._feature_scaler = self.feature_scaler
|
|
198
|
-
ds._target_scaler = self.target_scaler
|
|
199
|
-
|
|
200
|
-
@property
|
|
201
|
-
def train_dataset(self) -> Dataset:
|
|
202
|
-
if self._train_ds is None:
|
|
203
|
-
_LOGGER.error("Train Dataset not yet created.")
|
|
204
|
-
raise RuntimeError()
|
|
205
|
-
return self._train_ds
|
|
206
|
-
|
|
207
|
-
@property
|
|
208
|
-
def validation_dataset(self) -> Dataset:
|
|
209
|
-
if self._val_ds is None:
|
|
210
|
-
_LOGGER.error("Validation Dataset not yet created.")
|
|
211
|
-
raise RuntimeError()
|
|
212
|
-
return self._val_ds
|
|
213
|
-
|
|
214
|
-
@property
|
|
215
|
-
def test_dataset(self) -> Dataset:
|
|
216
|
-
if self._test_ds is None:
|
|
217
|
-
_LOGGER.error("Test Dataset not yet created.")
|
|
218
|
-
raise RuntimeError()
|
|
219
|
-
return self._test_ds
|
|
220
|
-
|
|
221
|
-
@property
|
|
222
|
-
def feature_names(self) -> list[str]:
|
|
223
|
-
return self._feature_names
|
|
224
|
-
|
|
225
|
-
@property
|
|
226
|
-
def target_names(self) -> list[str]:
|
|
227
|
-
return self._target_names
|
|
228
|
-
|
|
229
|
-
@property
|
|
230
|
-
def number_of_features(self) -> int:
|
|
231
|
-
return len(self._feature_names)
|
|
232
|
-
|
|
233
|
-
@property
|
|
234
|
-
def number_of_targets(self) -> int:
|
|
235
|
-
return len(self._target_names)
|
|
236
|
-
|
|
237
|
-
@property
|
|
238
|
-
def id(self) -> Optional[str]:
|
|
239
|
-
return self._id
|
|
240
|
-
|
|
241
|
-
@id.setter
|
|
242
|
-
def id(self, dataset_id: str):
|
|
243
|
-
if not isinstance(dataset_id, str): raise ValueError("ID must be a string.")
|
|
244
|
-
self._id = dataset_id
|
|
245
|
-
|
|
246
|
-
def dataframes_info(self) -> None:
|
|
247
|
-
print("--- DataFrame Shapes After Split ---")
|
|
248
|
-
print(f" X_train shape: {self._X_train_shape}, y_train shape: {self._y_train_shape}")
|
|
249
|
-
print(f" X_val shape: {self._X_val_shape}, y_val shape: {self._y_val_shape}")
|
|
250
|
-
print(f" X_test shape: {self._X_test_shape}, y_test shape: {self._y_test_shape}")
|
|
251
|
-
print("------------------------------------")
|
|
252
|
-
|
|
253
|
-
def save_feature_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
|
|
254
|
-
save_list_strings(list_strings=self._feature_names,
|
|
255
|
-
directory=directory,
|
|
256
|
-
filename=DatasetKeys.FEATURE_NAMES,
|
|
257
|
-
verbose=verbose)
|
|
258
|
-
|
|
259
|
-
def save_target_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
|
|
260
|
-
save_list_strings(list_strings=self._target_names,
|
|
261
|
-
directory=directory,
|
|
262
|
-
filename=DatasetKeys.TARGET_NAMES,
|
|
263
|
-
verbose=verbose)
|
|
264
|
-
|
|
265
|
-
def save_scaler(self, directory: Union[str, Path], verbose: bool=True) -> None:
|
|
266
|
-
"""
|
|
267
|
-
Saves both feature and target scalers (if they exist) to a single .pth file
|
|
268
|
-
using a dictionary structure.
|
|
269
|
-
"""
|
|
270
|
-
if self.feature_scaler is None and self.target_scaler is None:
|
|
271
|
-
_LOGGER.warning("No scalers (feature or target) were fitted. Nothing to save.")
|
|
272
|
-
return
|
|
273
|
-
|
|
274
|
-
if not self.id:
|
|
275
|
-
_LOGGER.error("Must set the dataset `id` before saving scaler.")
|
|
276
|
-
raise ValueError()
|
|
277
|
-
|
|
278
|
-
save_path = make_fullpath(directory, make=True, enforce="directory")
|
|
279
|
-
sanitized_id = sanitize_filename(self.id)
|
|
280
|
-
filename = f"{DatasetKeys.SCALER_PREFIX}{sanitized_id}.pth"
|
|
281
|
-
filepath = save_path / filename
|
|
282
|
-
|
|
283
|
-
# Construct the consolidated dictionary
|
|
284
|
-
combined_state = {}
|
|
285
|
-
|
|
286
|
-
print_message = "Saved "
|
|
287
|
-
|
|
288
|
-
if self.feature_scaler:
|
|
289
|
-
combined_state[ScalerKeys.FEATURE_SCALER] = self.feature_scaler._get_state()
|
|
290
|
-
print_message += "feature scaler "
|
|
291
|
-
|
|
292
|
-
if self.target_scaler:
|
|
293
|
-
if self.feature_scaler:
|
|
294
|
-
print_message += "and "
|
|
295
|
-
combined_state[ScalerKeys.TARGET_SCALER] = self.target_scaler._get_state()
|
|
296
|
-
print_message += "target scaler "
|
|
297
|
-
|
|
298
|
-
torch.save(combined_state, filepath)
|
|
299
|
-
|
|
300
|
-
if verbose:
|
|
301
|
-
_LOGGER.info(f"{print_message}to '{filepath.name}'.")
|
|
302
|
-
|
|
303
|
-
def save_class_map(self, directory: Union[str,Path], verbose: bool=True) -> None:
|
|
304
|
-
"""
|
|
305
|
-
Saves the class map dictionary to a JSON file.
|
|
306
|
-
|
|
307
|
-
Args:
|
|
308
|
-
directory (str | Path): Directory to save the class map.
|
|
309
|
-
verbose (bool): Whether to print log messages.
|
|
310
|
-
"""
|
|
311
|
-
if not self.class_map:
|
|
312
|
-
_LOGGER.warning(f"No class_map defined. Skipping.")
|
|
313
|
-
return
|
|
314
|
-
|
|
315
|
-
log_name = f"Class_to_Index_{self.id}" if self.id else "Class_to_Index"
|
|
316
|
-
|
|
317
|
-
custom_logger(data=self.class_map,
|
|
318
|
-
save_directory=directory,
|
|
319
|
-
log_name=log_name,
|
|
320
|
-
add_timestamp=False,
|
|
321
|
-
dict_as="json")
|
|
322
|
-
if verbose:
|
|
323
|
-
_LOGGER.info(f"Class map for '{self.id}' saved as '{log_name}.json'.")
|
|
324
|
-
|
|
325
|
-
def save_artifacts(self, directory: Union[str, Path], verbose: bool=True) -> None:
|
|
326
|
-
"""
|
|
327
|
-
Saves all dataset artifacts: feature names, target names, scalers, and class map (if applicable).
|
|
328
|
-
|
|
329
|
-
Args:
|
|
330
|
-
directory (str | Path): Directory to save artifacts.
|
|
331
|
-
verbose (bool): Whether to print log messages.
|
|
332
|
-
"""
|
|
333
|
-
self.save_feature_names(directory=directory, verbose=verbose)
|
|
334
|
-
self.save_target_names(directory=directory, verbose=verbose)
|
|
335
|
-
if self.feature_scaler is not None or self.target_scaler is not None:
|
|
336
|
-
self.save_scaler(directory=directory, verbose=verbose)
|
|
337
|
-
if self.class_map:
|
|
338
|
-
self.save_class_map(directory=directory, verbose=verbose)
|
|
339
|
-
|
|
340
|
-
|
|
341
24
|
# Single target dataset
|
|
342
25
|
class DragonDataset(_BaseDatasetMaker):
|
|
343
26
|
"""
|
|
@@ -549,7 +232,7 @@ class DragonDatasetMulti(_BaseDatasetMaker):
|
|
|
549
232
|
"""
|
|
550
233
|
def __init__(self,
|
|
551
234
|
pandas_df: pandas.DataFrame,
|
|
552
|
-
target_columns:
|
|
235
|
+
target_columns: list[str],
|
|
553
236
|
schema: FeatureSchema,
|
|
554
237
|
kind: Literal["multitarget regression", "multilabel binary classification"],
|
|
555
238
|
feature_scaler: Union[Literal["fit"], Literal["none"], DragonScaler] = "fit",
|
|
@@ -700,6 +383,3 @@ class DragonDatasetMulti(_BaseDatasetMaker):
|
|
|
700
383
|
if self._test_ds: s += f" Test Samples: {len(self._test_ds)}\n" # type: ignore
|
|
701
384
|
return s
|
|
702
385
|
|
|
703
|
-
|
|
704
|
-
def info():
|
|
705
|
-
_script_info(__all__)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .._core import _imprimir_disponibles
|
|
2
|
+
|
|
3
|
+
_GRUPOS = [
|
|
4
|
+
"DragonDataset",
|
|
5
|
+
"DragonDatasetMulti",
|
|
6
|
+
# sequence
|
|
7
|
+
"DragonDatasetSequence",
|
|
8
|
+
# vision
|
|
9
|
+
"DragonDatasetVision",
|
|
10
|
+
"DragonDatasetSegmentation",
|
|
11
|
+
"DragonDatasetObjectDetection",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
def info():
|
|
15
|
+
_imprimir_disponibles(_GRUPOS)
|
ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py}
RENAMED
|
@@ -2,19 +2,20 @@ import torch
|
|
|
2
2
|
from torch.utils.data import Dataset
|
|
3
3
|
import pandas
|
|
4
4
|
import numpy
|
|
5
|
-
from typing import Literal, Union
|
|
5
|
+
from typing import Literal, Union
|
|
6
6
|
import matplotlib.pyplot as plt
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
|
|
9
|
-
from
|
|
10
|
-
from ._logger import get_logger
|
|
11
|
-
from ._script_info import _script_info
|
|
12
|
-
from ._ML_scaler import DragonScaler
|
|
13
|
-
from ._ML_datasetmaster import _PytorchDataset
|
|
14
|
-
from ._keys import DatasetKeys, MLTaskKeys, SequenceDatasetKeys, ScalerKeys
|
|
9
|
+
from ..ML_scaler import DragonScaler
|
|
15
10
|
|
|
11
|
+
from ..path_manager import make_fullpath
|
|
12
|
+
from .._core import get_logger
|
|
13
|
+
from ..keys._keys import DatasetKeys, MLTaskKeys, SequenceDatasetKeys, ScalerKeys
|
|
16
14
|
|
|
17
|
-
|
|
15
|
+
from ._base_datasetmaster import _PytorchDataset
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
_LOGGER = get_logger("DragonSequenceDataset")
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
__all__ = [
|
|
@@ -202,8 +203,8 @@ class DragonDatasetSequence:
|
|
|
202
203
|
if self.scaler is not None:
|
|
203
204
|
for ds in [self._train_dataset, self._val_dataset, self._test_dataset]:
|
|
204
205
|
if ds is not None:
|
|
205
|
-
ds._feature_scaler = self.scaler
|
|
206
|
-
ds._target_scaler = self.scaler
|
|
206
|
+
ds._feature_scaler = self.scaler # type: ignore
|
|
207
|
+
ds._target_scaler = self.scaler # type: ignore
|
|
207
208
|
|
|
208
209
|
self._are_windows_generated = True
|
|
209
210
|
_LOGGER.info("Feature and label windows generated for train, validation, and test sets.")
|
|
@@ -291,7 +292,7 @@ class DragonDatasetSequence:
|
|
|
291
292
|
_LOGGER.info(f"📈 Sequence data splits saved as '{full_path.name}'.")
|
|
292
293
|
plt.close()
|
|
293
294
|
|
|
294
|
-
def get_datasets(self) ->
|
|
295
|
+
def get_datasets(self) -> tuple[Dataset, Dataset, Dataset]:
|
|
295
296
|
"""Returns the final train, validation, and test datasets."""
|
|
296
297
|
if not self._are_windows_generated:
|
|
297
298
|
_LOGGER.error("Windows have not been generated. Call .generate_windows() first.")
|
|
@@ -349,7 +350,7 @@ class DragonDatasetSequence:
|
|
|
349
350
|
start_idx = val_split_idx - self.sequence_length
|
|
350
351
|
end_idx = val_split_idx
|
|
351
352
|
|
|
352
|
-
return self.sequence[start_idx:end_idx]
|
|
353
|
+
return self.sequence[start_idx:end_idx] # type: ignore
|
|
353
354
|
|
|
354
355
|
@property
|
|
355
356
|
def feature_names(self):
|
|
@@ -398,6 +399,3 @@ class DragonDatasetSequence:
|
|
|
398
399
|
|
|
399
400
|
return s
|
|
400
401
|
|
|
401
|
-
|
|
402
|
-
def info():
|
|
403
|
-
_script_info(__all__)
|