dragon-ml-toolbox 19.14.0__py3-none-any.whl → 20.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
  2. dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
  3. ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
  4. ml_tools/ETL_cleaning/_basic_clean.py +351 -0
  5. ml_tools/ETL_cleaning/_clean_tools.py +128 -0
  6. ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
  7. ml_tools/ETL_cleaning/_imprimir.py +13 -0
  8. ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
  9. ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
  10. ml_tools/ETL_engineering/_imprimir.py +24 -0
  11. ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
  12. ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
  13. ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
  14. ml_tools/GUI_tools/_imprimir.py +12 -0
  15. ml_tools/IO_tools/_IO_loggers.py +235 -0
  16. ml_tools/IO_tools/_IO_save_load.py +151 -0
  17. ml_tools/IO_tools/_IO_utils.py +140 -0
  18. ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
  19. ml_tools/IO_tools/_imprimir.py +14 -0
  20. ml_tools/MICE/_MICE_imputation.py +132 -0
  21. ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
  22. ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
  23. ml_tools/MICE/_imprimir.py +11 -0
  24. ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
  25. ml_tools/ML_callbacks/_base.py +101 -0
  26. ml_tools/ML_callbacks/_checkpoint.py +232 -0
  27. ml_tools/ML_callbacks/_early_stop.py +208 -0
  28. ml_tools/ML_callbacks/_imprimir.py +12 -0
  29. ml_tools/ML_callbacks/_scheduler.py +197 -0
  30. ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
  31. ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
  32. ml_tools/ML_chain/_dragon_chain.py +140 -0
  33. ml_tools/ML_chain/_imprimir.py +11 -0
  34. ml_tools/ML_configuration/__init__.py +90 -0
  35. ml_tools/ML_configuration/_base_model_config.py +69 -0
  36. ml_tools/ML_configuration/_finalize.py +366 -0
  37. ml_tools/ML_configuration/_imprimir.py +47 -0
  38. ml_tools/ML_configuration/_metrics.py +593 -0
  39. ml_tools/ML_configuration/_models.py +206 -0
  40. ml_tools/ML_configuration/_training.py +124 -0
  41. ml_tools/ML_datasetmaster/__init__.py +28 -0
  42. ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
  43. ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
  44. ml_tools/ML_datasetmaster/_imprimir.py +15 -0
  45. ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
  46. ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
  47. ml_tools/ML_evaluation/__init__.py +53 -0
  48. ml_tools/ML_evaluation/_classification.py +629 -0
  49. ml_tools/ML_evaluation/_feature_importance.py +409 -0
  50. ml_tools/ML_evaluation/_imprimir.py +25 -0
  51. ml_tools/ML_evaluation/_loss.py +92 -0
  52. ml_tools/ML_evaluation/_regression.py +273 -0
  53. ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
  54. ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
  55. ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
  56. ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
  57. ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
  58. ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
  59. ml_tools/ML_finalize_handler/__init__.py +10 -0
  60. ml_tools/ML_finalize_handler/_imprimir.py +8 -0
  61. ml_tools/ML_inference/__init__.py +22 -0
  62. ml_tools/ML_inference/_base_inference.py +166 -0
  63. ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
  64. ml_tools/ML_inference/_dragon_inference.py +332 -0
  65. ml_tools/ML_inference/_imprimir.py +11 -0
  66. ml_tools/ML_inference/_multi_inference.py +180 -0
  67. ml_tools/ML_inference_sequence/__init__.py +10 -0
  68. ml_tools/ML_inference_sequence/_imprimir.py +8 -0
  69. ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
  70. ml_tools/ML_inference_vision/__init__.py +10 -0
  71. ml_tools/ML_inference_vision/_imprimir.py +8 -0
  72. ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
  73. ml_tools/ML_models/__init__.py +32 -0
  74. ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
  75. ml_tools/ML_models/_base_mlp_attention.py +198 -0
  76. ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
  77. ml_tools/ML_models/_dragon_tabular.py +248 -0
  78. ml_tools/ML_models/_imprimir.py +18 -0
  79. ml_tools/ML_models/_mlp_attention.py +134 -0
  80. ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
  81. ml_tools/ML_models_sequence/__init__.py +10 -0
  82. ml_tools/ML_models_sequence/_imprimir.py +8 -0
  83. ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
  84. ml_tools/ML_models_vision/__init__.py +29 -0
  85. ml_tools/ML_models_vision/_base_wrapper.py +254 -0
  86. ml_tools/ML_models_vision/_image_classification.py +182 -0
  87. ml_tools/ML_models_vision/_image_segmentation.py +108 -0
  88. ml_tools/ML_models_vision/_imprimir.py +16 -0
  89. ml_tools/ML_models_vision/_object_detection.py +135 -0
  90. ml_tools/ML_optimization/__init__.py +21 -0
  91. ml_tools/ML_optimization/_imprimir.py +13 -0
  92. ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
  93. ml_tools/ML_optimization/_single_dragon.py +203 -0
  94. ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
  95. ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
  96. ml_tools/ML_scaler/__init__.py +10 -0
  97. ml_tools/ML_scaler/_imprimir.py +8 -0
  98. ml_tools/ML_trainer/__init__.py +20 -0
  99. ml_tools/ML_trainer/_base_trainer.py +297 -0
  100. ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
  101. ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
  102. ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
  103. ml_tools/ML_trainer/_imprimir.py +10 -0
  104. ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
  105. ml_tools/ML_utilities/_artifact_finder.py +382 -0
  106. ml_tools/ML_utilities/_imprimir.py +16 -0
  107. ml_tools/ML_utilities/_inspection.py +325 -0
  108. ml_tools/ML_utilities/_train_tools.py +205 -0
  109. ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
  110. ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
  111. ml_tools/ML_vision_transformers/_imprimir.py +14 -0
  112. ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
  113. ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
  114. ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
  115. ml_tools/PSO_optimization/_imprimir.py +10 -0
  116. ml_tools/SQL/__init__.py +7 -0
  117. ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
  118. ml_tools/SQL/_imprimir.py +8 -0
  119. ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
  120. ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
  121. ml_tools/VIF/_imprimir.py +10 -0
  122. ml_tools/_core/__init__.py +7 -1
  123. ml_tools/_core/_logger.py +8 -18
  124. ml_tools/_core/_schema_load_ops.py +43 -0
  125. ml_tools/_core/_script_info.py +2 -2
  126. ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
  127. ml_tools/data_exploration/_analysis.py +214 -0
  128. ml_tools/data_exploration/_cleaning.py +566 -0
  129. ml_tools/data_exploration/_features.py +583 -0
  130. ml_tools/data_exploration/_imprimir.py +32 -0
  131. ml_tools/data_exploration/_plotting.py +487 -0
  132. ml_tools/data_exploration/_schema_ops.py +176 -0
  133. ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
  134. ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
  135. ml_tools/ensemble_evaluation/_imprimir.py +14 -0
  136. ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
  137. ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
  138. ml_tools/ensemble_inference/_imprimir.py +9 -0
  139. ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
  140. ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
  141. ml_tools/ensemble_learning/_imprimir.py +10 -0
  142. ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
  143. ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
  144. ml_tools/excel_handler/_imprimir.py +13 -0
  145. ml_tools/{keys.py → keys/__init__.py} +4 -1
  146. ml_tools/keys/_imprimir.py +11 -0
  147. ml_tools/{_core → keys}/_keys.py +2 -0
  148. ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
  149. ml_tools/math_utilities/_imprimir.py +11 -0
  150. ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
  151. ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
  152. ml_tools/optimization_tools/_imprimir.py +13 -0
  153. ml_tools/optimization_tools/_optimization_bounds.py +236 -0
  154. ml_tools/optimization_tools/_optimization_plots.py +218 -0
  155. ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
  156. ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
  157. ml_tools/path_manager/_imprimir.py +15 -0
  158. ml_tools/path_manager/_path_tools.py +346 -0
  159. ml_tools/plot_fonts/__init__.py +8 -0
  160. ml_tools/plot_fonts/_imprimir.py +8 -0
  161. ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
  162. ml_tools/schema/__init__.py +15 -0
  163. ml_tools/schema/_feature_schema.py +223 -0
  164. ml_tools/schema/_gui_schema.py +191 -0
  165. ml_tools/schema/_imprimir.py +10 -0
  166. ml_tools/{serde.py → serde/__init__.py} +4 -2
  167. ml_tools/serde/_imprimir.py +10 -0
  168. ml_tools/{_core → serde}/_serde.py +3 -8
  169. ml_tools/{utilities.py → utilities/__init__.py} +11 -6
  170. ml_tools/utilities/_imprimir.py +18 -0
  171. ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
  172. ml_tools/utilities/_utility_tools.py +192 -0
  173. dragon_ml_toolbox-19.14.0.dist-info/RECORD +0 -111
  174. ml_tools/ML_chaining_inference.py +0 -8
  175. ml_tools/ML_configuration.py +0 -86
  176. ml_tools/ML_configuration_pytab.py +0 -14
  177. ml_tools/ML_datasetmaster.py +0 -10
  178. ml_tools/ML_evaluation.py +0 -16
  179. ml_tools/ML_evaluation_multi.py +0 -12
  180. ml_tools/ML_finalize_handler.py +0 -8
  181. ml_tools/ML_inference.py +0 -12
  182. ml_tools/ML_models.py +0 -14
  183. ml_tools/ML_models_advanced.py +0 -14
  184. ml_tools/ML_models_pytab.py +0 -14
  185. ml_tools/ML_optimization.py +0 -14
  186. ml_tools/ML_optimization_pareto.py +0 -8
  187. ml_tools/ML_scaler.py +0 -8
  188. ml_tools/ML_sequence_datasetmaster.py +0 -8
  189. ml_tools/ML_sequence_evaluation.py +0 -10
  190. ml_tools/ML_sequence_inference.py +0 -8
  191. ml_tools/ML_sequence_models.py +0 -8
  192. ml_tools/ML_trainer.py +0 -12
  193. ml_tools/ML_vision_datasetmaster.py +0 -12
  194. ml_tools/ML_vision_evaluation.py +0 -10
  195. ml_tools/ML_vision_inference.py +0 -8
  196. ml_tools/ML_vision_models.py +0 -18
  197. ml_tools/SQL.py +0 -8
  198. ml_tools/_core/_ETL_cleaning.py +0 -694
  199. ml_tools/_core/_IO_tools.py +0 -498
  200. ml_tools/_core/_ML_callbacks.py +0 -702
  201. ml_tools/_core/_ML_configuration.py +0 -1332
  202. ml_tools/_core/_ML_configuration_pytab.py +0 -102
  203. ml_tools/_core/_ML_evaluation.py +0 -867
  204. ml_tools/_core/_ML_evaluation_multi.py +0 -544
  205. ml_tools/_core/_ML_inference.py +0 -646
  206. ml_tools/_core/_ML_models.py +0 -668
  207. ml_tools/_core/_ML_models_pytab.py +0 -693
  208. ml_tools/_core/_ML_trainer.py +0 -2323
  209. ml_tools/_core/_ML_utilities.py +0 -886
  210. ml_tools/_core/_ML_vision_models.py +0 -644
  211. ml_tools/_core/_data_exploration.py +0 -1909
  212. ml_tools/_core/_optimization_tools.py +0 -493
  213. ml_tools/_core/_schema.py +0 -359
  214. ml_tools/plot_fonts.py +0 -8
  215. ml_tools/schema.py +0 -12
  216. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
  217. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
  218. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  219. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
@@ -1,20 +1,15 @@
1
1
  import torch
2
- from torch.utils.data import Dataset
3
2
  import pandas
4
- import numpy
5
3
  from sklearn.model_selection import train_test_split
6
- from typing import Literal, Union, List, Optional, Tuple
7
- from abc import ABC
8
- from pathlib import Path
4
+ from typing import Literal, Union, Optional
9
5
 
10
- from ._path_manager import make_fullpath, sanitize_filename
11
- from ._logger import get_logger
12
- from ._script_info import _script_info
13
- from ._IO_tools import save_list_strings
14
- from ._ML_scaler import DragonScaler
15
- from ._keys import DatasetKeys, MLTaskKeys, ScalerKeys
16
- from ._schema import FeatureSchema
17
- from ._IO_tools import custom_logger
6
+ from ..ML_scaler import DragonScaler
7
+ from ..schema import FeatureSchema
8
+
9
+ from .._core import get_logger
10
+ from ..keys._keys import MLTaskKeys
11
+
12
+ from ._base_datasetmaster import _BaseDatasetMaker, _PytorchDataset
18
13
 
19
14
 
20
15
  _LOGGER = get_logger("DragonDataset")
@@ -26,318 +21,6 @@ __all__ = [
26
21
  ]
27
22
 
28
23
 
29
- # --- Internal Helper Class ---
30
- class _PytorchDataset(Dataset):
31
- """
32
- Internal helper class to create a PyTorch Dataset.
33
- Converts numpy/pandas data into tensors for model consumption.
34
- """
35
- def __init__(self, features: Union[numpy.ndarray, pandas.DataFrame],
36
- labels: Union[numpy.ndarray, pandas.Series, pandas.DataFrame],
37
- labels_dtype: torch.dtype,
38
- features_dtype: torch.dtype = torch.float32,
39
- feature_names: Optional[List[str]] = None,
40
- target_names: Optional[List[str]] = None):
41
-
42
- if isinstance(features, numpy.ndarray):
43
- self.features = torch.tensor(features, dtype=features_dtype)
44
- else: # It's a pandas.DataFrame
45
- self.features = torch.tensor(features.to_numpy(), dtype=features_dtype)
46
-
47
- if isinstance(labels, numpy.ndarray):
48
- self.labels = torch.tensor(labels, dtype=labels_dtype)
49
- elif isinstance(labels, (pandas.Series, pandas.DataFrame)):
50
- self.labels = torch.tensor(labels.to_numpy(), dtype=labels_dtype)
51
- else:
52
- self.labels = torch.tensor(labels, dtype=labels_dtype)
53
-
54
- self._feature_names = feature_names
55
- self._target_names = target_names
56
- self._classes: List[str] = []
57
- self._class_map: dict[str,int] = dict()
58
- self._feature_scaler: Optional[DragonScaler] = None
59
- self._target_scaler: Optional[DragonScaler] = None
60
-
61
- def __len__(self):
62
- return len(self.features)
63
-
64
- def __getitem__(self, index):
65
- return self.features[index], self.labels[index]
66
-
67
- @property
68
- def feature_names(self):
69
- if self._feature_names is not None:
70
- return self._feature_names
71
- else:
72
- _LOGGER.error(f"Dataset {self.__class__} has not been initialized with any feature names.")
73
- raise ValueError()
74
-
75
- @property
76
- def target_names(self):
77
- if self._target_names is not None:
78
- return self._target_names
79
- else:
80
- _LOGGER.error(f"Dataset {self.__class__} has not been initialized with any target names.")
81
- raise ValueError()
82
-
83
- @property
84
- def classes(self):
85
- return self._classes
86
-
87
- @property
88
- def class_map(self):
89
- return self._class_map
90
-
91
- @property
92
- def feature_scaler(self):
93
- return self._feature_scaler
94
-
95
- @property
96
- def target_scaler(self):
97
- return self._target_scaler
98
-
99
-
100
- # --- Abstract Base Class ---
101
- class _BaseDatasetMaker(ABC):
102
- """
103
- Abstract base class for dataset makers. Contains shared logic.
104
- """
105
- def __init__(self):
106
- self._train_ds: Optional[Dataset] = None
107
- self._val_ds: Optional[Dataset] = None
108
- self._test_ds: Optional[Dataset] = None
109
-
110
- self.feature_scaler: Optional[DragonScaler] = None
111
- self.target_scaler: Optional[DragonScaler] = None
112
-
113
- self._id: Optional[str] = None
114
- self._feature_names: List[str] = []
115
- self._target_names: List[str] = []
116
- self._X_train_shape = (0,0)
117
- self._X_val_shape = (0,0)
118
- self._X_test_shape = (0,0)
119
- self._y_train_shape = (0,)
120
- self._y_val_shape = (0,)
121
- self._y_test_shape = (0,)
122
- self.class_map: dict[str, int] = dict()
123
- self.classes: list[str] = list()
124
-
125
- def _prepare_feature_scaler(self,
126
- X_train: pandas.DataFrame,
127
- y_train: Union[pandas.Series, pandas.DataFrame],
128
- X_val: pandas.DataFrame,
129
- X_test: pandas.DataFrame,
130
- label_dtype: torch.dtype,
131
- schema: FeatureSchema) -> Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]:
132
- """Internal helper to fit and apply a DragonScaler for FEATURES using a FeatureSchema."""
133
- continuous_feature_indices: Optional[List[int]] = None
134
-
135
- # Get continuous feature indices *from the schema*
136
- if schema.continuous_feature_names:
137
- _LOGGER.info("Getting continuous feature indices from schema.")
138
- try:
139
- # Convert columns to a standard list for .index()
140
- train_cols_list = X_train.columns.to_list()
141
- # Map names from schema to column indices in the training DataFrame
142
- continuous_feature_indices = [train_cols_list.index(name) for name in schema.continuous_feature_names]
143
- except ValueError as e:
144
- _LOGGER.error(f"Feature name from schema not found in training data columns:\n{e}")
145
- raise ValueError()
146
- else:
147
- _LOGGER.info("No continuous features listed in schema. Feature scaler will not be fitted.")
148
-
149
- X_train_values = X_train.to_numpy()
150
- X_val_values = X_val.to_numpy()
151
- X_test_values = X_test.to_numpy()
152
-
153
- # continuous_feature_indices is derived
154
- if self.feature_scaler is None and continuous_feature_indices:
155
- _LOGGER.info("Fitting a new DragonScaler on training features.")
156
- temp_train_ds = _PytorchDataset(X_train_values, y_train, label_dtype)
157
- self.feature_scaler = DragonScaler.fit(temp_train_ds, continuous_feature_indices)
158
-
159
- if self.feature_scaler and self.feature_scaler.mean_ is not None:
160
- _LOGGER.info("Applying scaler transformation to train, validation, and test feature sets.")
161
- X_train_tensor = self.feature_scaler.transform(torch.tensor(X_train_values, dtype=torch.float32))
162
- X_val_tensor = self.feature_scaler.transform(torch.tensor(X_val_values, dtype=torch.float32))
163
- X_test_tensor = self.feature_scaler.transform(torch.tensor(X_test_values, dtype=torch.float32))
164
- return X_train_tensor.numpy(), X_val_tensor.numpy(), X_test_tensor.numpy()
165
-
166
- return X_train_values, X_val_values, X_test_values
167
-
168
- def _prepare_target_scaler(self,
169
- y_train: Union[pandas.Series, pandas.DataFrame],
170
- y_val: Union[pandas.Series, pandas.DataFrame],
171
- y_test: Union[pandas.Series, pandas.DataFrame]) -> Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]:
172
- """Internal helper to fit and apply a DragonScaler for TARGETS."""
173
-
174
- y_train_arr = y_train.to_numpy() if isinstance(y_train, (pandas.Series, pandas.DataFrame)) else y_train
175
- y_val_arr = y_val.to_numpy() if isinstance(y_val, (pandas.Series, pandas.DataFrame)) else y_val
176
- y_test_arr = y_test.to_numpy() if isinstance(y_test, (pandas.Series, pandas.DataFrame)) else y_test
177
-
178
- if self.target_scaler is None:
179
- _LOGGER.info("Fitting a new DragonScaler on training targets.")
180
- # Convert to float tensor for calculation
181
- y_train_tensor = torch.tensor(y_train_arr, dtype=torch.float32)
182
- self.target_scaler = DragonScaler.fit_tensor(y_train_tensor)
183
-
184
- if self.target_scaler and self.target_scaler.mean_ is not None:
185
- _LOGGER.info("Applying scaler transformation to train, validation, and test targets.")
186
- y_train_tensor = self.target_scaler.transform(torch.tensor(y_train_arr, dtype=torch.float32))
187
- y_val_tensor = self.target_scaler.transform(torch.tensor(y_val_arr, dtype=torch.float32))
188
- y_test_tensor = self.target_scaler.transform(torch.tensor(y_test_arr, dtype=torch.float32))
189
- return y_train_tensor.numpy(), y_val_tensor.numpy(), y_test_tensor.numpy()
190
-
191
- return y_train_arr, y_val_arr, y_test_arr
192
-
193
- def _attach_scalers_to_datasets(self):
194
- """Helper to attach the master scalers to the child datasets."""
195
- for ds in [self._train_ds, self._val_ds, self._test_ds]:
196
- if ds is not None:
197
- ds._feature_scaler = self.feature_scaler
198
- ds._target_scaler = self.target_scaler
199
-
200
- @property
201
- def train_dataset(self) -> Dataset:
202
- if self._train_ds is None:
203
- _LOGGER.error("Train Dataset not yet created.")
204
- raise RuntimeError()
205
- return self._train_ds
206
-
207
- @property
208
- def validation_dataset(self) -> Dataset:
209
- if self._val_ds is None:
210
- _LOGGER.error("Validation Dataset not yet created.")
211
- raise RuntimeError()
212
- return self._val_ds
213
-
214
- @property
215
- def test_dataset(self) -> Dataset:
216
- if self._test_ds is None:
217
- _LOGGER.error("Test Dataset not yet created.")
218
- raise RuntimeError()
219
- return self._test_ds
220
-
221
- @property
222
- def feature_names(self) -> list[str]:
223
- return self._feature_names
224
-
225
- @property
226
- def target_names(self) -> list[str]:
227
- return self._target_names
228
-
229
- @property
230
- def number_of_features(self) -> int:
231
- return len(self._feature_names)
232
-
233
- @property
234
- def number_of_targets(self) -> int:
235
- return len(self._target_names)
236
-
237
- @property
238
- def id(self) -> Optional[str]:
239
- return self._id
240
-
241
- @id.setter
242
- def id(self, dataset_id: str):
243
- if not isinstance(dataset_id, str): raise ValueError("ID must be a string.")
244
- self._id = dataset_id
245
-
246
- def dataframes_info(self) -> None:
247
- print("--- DataFrame Shapes After Split ---")
248
- print(f" X_train shape: {self._X_train_shape}, y_train shape: {self._y_train_shape}")
249
- print(f" X_val shape: {self._X_val_shape}, y_val shape: {self._y_val_shape}")
250
- print(f" X_test shape: {self._X_test_shape}, y_test shape: {self._y_test_shape}")
251
- print("------------------------------------")
252
-
253
- def save_feature_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
254
- save_list_strings(list_strings=self._feature_names,
255
- directory=directory,
256
- filename=DatasetKeys.FEATURE_NAMES,
257
- verbose=verbose)
258
-
259
- def save_target_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
260
- save_list_strings(list_strings=self._target_names,
261
- directory=directory,
262
- filename=DatasetKeys.TARGET_NAMES,
263
- verbose=verbose)
264
-
265
- def save_scaler(self, directory: Union[str, Path], verbose: bool=True) -> None:
266
- """
267
- Saves both feature and target scalers (if they exist) to a single .pth file
268
- using a dictionary structure.
269
- """
270
- if self.feature_scaler is None and self.target_scaler is None:
271
- _LOGGER.warning("No scalers (feature or target) were fitted. Nothing to save.")
272
- return
273
-
274
- if not self.id:
275
- _LOGGER.error("Must set the dataset `id` before saving scaler.")
276
- raise ValueError()
277
-
278
- save_path = make_fullpath(directory, make=True, enforce="directory")
279
- sanitized_id = sanitize_filename(self.id)
280
- filename = f"{DatasetKeys.SCALER_PREFIX}{sanitized_id}.pth"
281
- filepath = save_path / filename
282
-
283
- # Construct the consolidated dictionary
284
- combined_state = {}
285
-
286
- print_message = "Saved "
287
-
288
- if self.feature_scaler:
289
- combined_state[ScalerKeys.FEATURE_SCALER] = self.feature_scaler._get_state()
290
- print_message += "feature scaler "
291
-
292
- if self.target_scaler:
293
- if self.feature_scaler:
294
- print_message += "and "
295
- combined_state[ScalerKeys.TARGET_SCALER] = self.target_scaler._get_state()
296
- print_message += "target scaler "
297
-
298
- torch.save(combined_state, filepath)
299
-
300
- if verbose:
301
- _LOGGER.info(f"{print_message}to '{filepath.name}'.")
302
-
303
- def save_class_map(self, directory: Union[str,Path], verbose: bool=True) -> None:
304
- """
305
- Saves the class map dictionary to a JSON file.
306
-
307
- Args:
308
- directory (str | Path): Directory to save the class map.
309
- verbose (bool): Whether to print log messages.
310
- """
311
- if not self.class_map:
312
- _LOGGER.warning(f"No class_map defined. Skipping.")
313
- return
314
-
315
- log_name = f"Class_to_Index_{self.id}" if self.id else "Class_to_Index"
316
-
317
- custom_logger(data=self.class_map,
318
- save_directory=directory,
319
- log_name=log_name,
320
- add_timestamp=False,
321
- dict_as="json")
322
- if verbose:
323
- _LOGGER.info(f"Class map for '{self.id}' saved as '{log_name}.json'.")
324
-
325
- def save_artifacts(self, directory: Union[str, Path], verbose: bool=True) -> None:
326
- """
327
- Saves all dataset artifacts: feature names, target names, scalers, and class map (if applicable).
328
-
329
- Args:
330
- directory (str | Path): Directory to save artifacts.
331
- verbose (bool): Whether to print log messages.
332
- """
333
- self.save_feature_names(directory=directory, verbose=verbose)
334
- self.save_target_names(directory=directory, verbose=verbose)
335
- if self.feature_scaler is not None or self.target_scaler is not None:
336
- self.save_scaler(directory=directory, verbose=verbose)
337
- if self.class_map:
338
- self.save_class_map(directory=directory, verbose=verbose)
339
-
340
-
341
24
  # Single target dataset
342
25
  class DragonDataset(_BaseDatasetMaker):
343
26
  """
@@ -549,7 +232,7 @@ class DragonDatasetMulti(_BaseDatasetMaker):
549
232
  """
550
233
  def __init__(self,
551
234
  pandas_df: pandas.DataFrame,
552
- target_columns: List[str],
235
+ target_columns: list[str],
553
236
  schema: FeatureSchema,
554
237
  kind: Literal["multitarget regression", "multilabel binary classification"],
555
238
  feature_scaler: Union[Literal["fit"], Literal["none"], DragonScaler] = "fit",
@@ -700,6 +383,3 @@ class DragonDatasetMulti(_BaseDatasetMaker):
700
383
  if self._test_ds: s += f" Test Samples: {len(self._test_ds)}\n" # type: ignore
701
384
  return s
702
385
 
703
-
704
- def info():
705
- _script_info(__all__)
@@ -0,0 +1,15 @@
1
+ from .._core import _imprimir_disponibles
2
+
3
+ _GRUPOS = [
4
+ "DragonDataset",
5
+ "DragonDatasetMulti",
6
+ # sequence
7
+ "DragonDatasetSequence",
8
+ # vision
9
+ "DragonDatasetVision",
10
+ "DragonDatasetSegmentation",
11
+ "DragonDatasetObjectDetection",
12
+ ]
13
+
14
+ def info():
15
+ _imprimir_disponibles(_GRUPOS)
@@ -2,19 +2,20 @@ import torch
2
2
  from torch.utils.data import Dataset
3
3
  import pandas
4
4
  import numpy
5
- from typing import Literal, Union, Tuple
5
+ from typing import Literal, Union
6
6
  import matplotlib.pyplot as plt
7
7
  from pathlib import Path
8
8
 
9
- from ._path_manager import make_fullpath
10
- from ._logger import get_logger
11
- from ._script_info import _script_info
12
- from ._ML_scaler import DragonScaler
13
- from ._ML_datasetmaster import _PytorchDataset
14
- from ._keys import DatasetKeys, MLTaskKeys, SequenceDatasetKeys, ScalerKeys
9
+ from ..ML_scaler import DragonScaler
15
10
 
11
+ from ..path_manager import make_fullpath
12
+ from .._core import get_logger
13
+ from ..keys._keys import DatasetKeys, MLTaskKeys, SequenceDatasetKeys, ScalerKeys
16
14
 
17
- _LOGGER = get_logger("DragonDataset")
15
+ from ._base_datasetmaster import _PytorchDataset
16
+
17
+
18
+ _LOGGER = get_logger("DragonSequenceDataset")
18
19
 
19
20
 
20
21
  __all__ = [
@@ -202,8 +203,8 @@ class DragonDatasetSequence:
202
203
  if self.scaler is not None:
203
204
  for ds in [self._train_dataset, self._val_dataset, self._test_dataset]:
204
205
  if ds is not None:
205
- ds._feature_scaler = self.scaler
206
- ds._target_scaler = self.scaler
206
+ ds._feature_scaler = self.scaler # type: ignore
207
+ ds._target_scaler = self.scaler # type: ignore
207
208
 
208
209
  self._are_windows_generated = True
209
210
  _LOGGER.info("Feature and label windows generated for train, validation, and test sets.")
@@ -291,7 +292,7 @@ class DragonDatasetSequence:
291
292
  _LOGGER.info(f"📈 Sequence data splits saved as '{full_path.name}'.")
292
293
  plt.close()
293
294
 
294
- def get_datasets(self) -> Tuple[Dataset, Dataset, Dataset]:
295
+ def get_datasets(self) -> tuple[Dataset, Dataset, Dataset]:
295
296
  """Returns the final train, validation, and test datasets."""
296
297
  if not self._are_windows_generated:
297
298
  _LOGGER.error("Windows have not been generated. Call .generate_windows() first.")
@@ -349,7 +350,7 @@ class DragonDatasetSequence:
349
350
  start_idx = val_split_idx - self.sequence_length
350
351
  end_idx = val_split_idx
351
352
 
352
- return self.sequence[start_idx:end_idx]
353
+ return self.sequence[start_idx:end_idx] # type: ignore
353
354
 
354
355
  @property
355
356
  def feature_names(self):
@@ -398,6 +399,3 @@ class DragonDatasetSequence:
398
399
 
399
400
  return s
400
401
 
401
-
402
- def info():
403
- _script_info(__all__)