dragon-ml-toolbox 19.13.0__py3-none-any.whl → 20.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
  2. dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
  3. ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
  4. ml_tools/ETL_cleaning/_basic_clean.py +351 -0
  5. ml_tools/ETL_cleaning/_clean_tools.py +128 -0
  6. ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
  7. ml_tools/ETL_cleaning/_imprimir.py +13 -0
  8. ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
  9. ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
  10. ml_tools/ETL_engineering/_imprimir.py +24 -0
  11. ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
  12. ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
  13. ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
  14. ml_tools/GUI_tools/_imprimir.py +12 -0
  15. ml_tools/IO_tools/_IO_loggers.py +235 -0
  16. ml_tools/IO_tools/_IO_save_load.py +151 -0
  17. ml_tools/IO_tools/_IO_utils.py +140 -0
  18. ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
  19. ml_tools/IO_tools/_imprimir.py +14 -0
  20. ml_tools/MICE/_MICE_imputation.py +132 -0
  21. ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
  22. ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
  23. ml_tools/MICE/_imprimir.py +11 -0
  24. ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
  25. ml_tools/ML_callbacks/_base.py +101 -0
  26. ml_tools/ML_callbacks/_checkpoint.py +232 -0
  27. ml_tools/ML_callbacks/_early_stop.py +208 -0
  28. ml_tools/ML_callbacks/_imprimir.py +12 -0
  29. ml_tools/ML_callbacks/_scheduler.py +197 -0
  30. ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
  31. ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
  32. ml_tools/ML_chain/_dragon_chain.py +140 -0
  33. ml_tools/ML_chain/_imprimir.py +11 -0
  34. ml_tools/ML_configuration/__init__.py +90 -0
  35. ml_tools/ML_configuration/_base_model_config.py +69 -0
  36. ml_tools/ML_configuration/_finalize.py +366 -0
  37. ml_tools/ML_configuration/_imprimir.py +47 -0
  38. ml_tools/ML_configuration/_metrics.py +593 -0
  39. ml_tools/ML_configuration/_models.py +206 -0
  40. ml_tools/ML_configuration/_training.py +124 -0
  41. ml_tools/ML_datasetmaster/__init__.py +28 -0
  42. ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
  43. ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
  44. ml_tools/ML_datasetmaster/_imprimir.py +15 -0
  45. ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
  46. ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
  47. ml_tools/ML_evaluation/__init__.py +53 -0
  48. ml_tools/ML_evaluation/_classification.py +629 -0
  49. ml_tools/ML_evaluation/_feature_importance.py +409 -0
  50. ml_tools/ML_evaluation/_imprimir.py +25 -0
  51. ml_tools/ML_evaluation/_loss.py +92 -0
  52. ml_tools/ML_evaluation/_regression.py +273 -0
  53. ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
  54. ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
  55. ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
  56. ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
  57. ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
  58. ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
  59. ml_tools/ML_finalize_handler/__init__.py +10 -0
  60. ml_tools/ML_finalize_handler/_imprimir.py +8 -0
  61. ml_tools/ML_inference/__init__.py +22 -0
  62. ml_tools/ML_inference/_base_inference.py +166 -0
  63. ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
  64. ml_tools/ML_inference/_dragon_inference.py +332 -0
  65. ml_tools/ML_inference/_imprimir.py +11 -0
  66. ml_tools/ML_inference/_multi_inference.py +180 -0
  67. ml_tools/ML_inference_sequence/__init__.py +10 -0
  68. ml_tools/ML_inference_sequence/_imprimir.py +8 -0
  69. ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
  70. ml_tools/ML_inference_vision/__init__.py +10 -0
  71. ml_tools/ML_inference_vision/_imprimir.py +8 -0
  72. ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
  73. ml_tools/ML_models/__init__.py +32 -0
  74. ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
  75. ml_tools/ML_models/_base_mlp_attention.py +198 -0
  76. ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
  77. ml_tools/ML_models/_dragon_tabular.py +248 -0
  78. ml_tools/ML_models/_imprimir.py +18 -0
  79. ml_tools/ML_models/_mlp_attention.py +134 -0
  80. ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
  81. ml_tools/ML_models_sequence/__init__.py +10 -0
  82. ml_tools/ML_models_sequence/_imprimir.py +8 -0
  83. ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
  84. ml_tools/ML_models_vision/__init__.py +29 -0
  85. ml_tools/ML_models_vision/_base_wrapper.py +254 -0
  86. ml_tools/ML_models_vision/_image_classification.py +182 -0
  87. ml_tools/ML_models_vision/_image_segmentation.py +108 -0
  88. ml_tools/ML_models_vision/_imprimir.py +16 -0
  89. ml_tools/ML_models_vision/_object_detection.py +135 -0
  90. ml_tools/ML_optimization/__init__.py +21 -0
  91. ml_tools/ML_optimization/_imprimir.py +13 -0
  92. ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
  93. ml_tools/ML_optimization/_single_dragon.py +203 -0
  94. ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
  95. ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
  96. ml_tools/ML_scaler/__init__.py +10 -0
  97. ml_tools/ML_scaler/_imprimir.py +8 -0
  98. ml_tools/ML_trainer/__init__.py +20 -0
  99. ml_tools/ML_trainer/_base_trainer.py +297 -0
  100. ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
  101. ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
  102. ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
  103. ml_tools/ML_trainer/_imprimir.py +10 -0
  104. ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
  105. ml_tools/ML_utilities/_artifact_finder.py +382 -0
  106. ml_tools/ML_utilities/_imprimir.py +16 -0
  107. ml_tools/ML_utilities/_inspection.py +325 -0
  108. ml_tools/ML_utilities/_train_tools.py +205 -0
  109. ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
  110. ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
  111. ml_tools/ML_vision_transformers/_imprimir.py +14 -0
  112. ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
  113. ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
  114. ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
  115. ml_tools/PSO_optimization/_imprimir.py +10 -0
  116. ml_tools/SQL/__init__.py +7 -0
  117. ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
  118. ml_tools/SQL/_imprimir.py +8 -0
  119. ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
  120. ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
  121. ml_tools/VIF/_imprimir.py +10 -0
  122. ml_tools/_core/__init__.py +7 -1
  123. ml_tools/_core/_logger.py +8 -18
  124. ml_tools/_core/_schema_load_ops.py +43 -0
  125. ml_tools/_core/_script_info.py +2 -2
  126. ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
  127. ml_tools/data_exploration/_analysis.py +214 -0
  128. ml_tools/data_exploration/_cleaning.py +566 -0
  129. ml_tools/data_exploration/_features.py +583 -0
  130. ml_tools/data_exploration/_imprimir.py +32 -0
  131. ml_tools/data_exploration/_plotting.py +487 -0
  132. ml_tools/data_exploration/_schema_ops.py +176 -0
  133. ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
  134. ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
  135. ml_tools/ensemble_evaluation/_imprimir.py +14 -0
  136. ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
  137. ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
  138. ml_tools/ensemble_inference/_imprimir.py +9 -0
  139. ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
  140. ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
  141. ml_tools/ensemble_learning/_imprimir.py +10 -0
  142. ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
  143. ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
  144. ml_tools/excel_handler/_imprimir.py +13 -0
  145. ml_tools/{keys.py → keys/__init__.py} +4 -1
  146. ml_tools/keys/_imprimir.py +11 -0
  147. ml_tools/{_core → keys}/_keys.py +2 -0
  148. ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
  149. ml_tools/math_utilities/_imprimir.py +11 -0
  150. ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
  151. ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
  152. ml_tools/optimization_tools/_imprimir.py +13 -0
  153. ml_tools/optimization_tools/_optimization_bounds.py +236 -0
  154. ml_tools/optimization_tools/_optimization_plots.py +218 -0
  155. ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
  156. ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
  157. ml_tools/path_manager/_imprimir.py +15 -0
  158. ml_tools/path_manager/_path_tools.py +346 -0
  159. ml_tools/plot_fonts/__init__.py +8 -0
  160. ml_tools/plot_fonts/_imprimir.py +8 -0
  161. ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
  162. ml_tools/schema/__init__.py +15 -0
  163. ml_tools/schema/_feature_schema.py +223 -0
  164. ml_tools/schema/_gui_schema.py +191 -0
  165. ml_tools/schema/_imprimir.py +10 -0
  166. ml_tools/{serde.py → serde/__init__.py} +4 -2
  167. ml_tools/serde/_imprimir.py +10 -0
  168. ml_tools/{_core → serde}/_serde.py +3 -8
  169. ml_tools/{utilities.py → utilities/__init__.py} +11 -6
  170. ml_tools/utilities/_imprimir.py +18 -0
  171. ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
  172. ml_tools/utilities/_utility_tools.py +192 -0
  173. dragon_ml_toolbox-19.13.0.dist-info/RECORD +0 -111
  174. ml_tools/ML_chaining_inference.py +0 -8
  175. ml_tools/ML_configuration.py +0 -86
  176. ml_tools/ML_configuration_pytab.py +0 -14
  177. ml_tools/ML_datasetmaster.py +0 -10
  178. ml_tools/ML_evaluation.py +0 -16
  179. ml_tools/ML_evaluation_multi.py +0 -12
  180. ml_tools/ML_finalize_handler.py +0 -8
  181. ml_tools/ML_inference.py +0 -12
  182. ml_tools/ML_models.py +0 -14
  183. ml_tools/ML_models_advanced.py +0 -14
  184. ml_tools/ML_models_pytab.py +0 -14
  185. ml_tools/ML_optimization.py +0 -14
  186. ml_tools/ML_optimization_pareto.py +0 -8
  187. ml_tools/ML_scaler.py +0 -8
  188. ml_tools/ML_sequence_datasetmaster.py +0 -8
  189. ml_tools/ML_sequence_evaluation.py +0 -10
  190. ml_tools/ML_sequence_inference.py +0 -8
  191. ml_tools/ML_sequence_models.py +0 -8
  192. ml_tools/ML_trainer.py +0 -12
  193. ml_tools/ML_vision_datasetmaster.py +0 -12
  194. ml_tools/ML_vision_evaluation.py +0 -10
  195. ml_tools/ML_vision_inference.py +0 -8
  196. ml_tools/ML_vision_models.py +0 -18
  197. ml_tools/SQL.py +0 -8
  198. ml_tools/_core/_ETL_cleaning.py +0 -694
  199. ml_tools/_core/_IO_tools.py +0 -498
  200. ml_tools/_core/_ML_callbacks.py +0 -702
  201. ml_tools/_core/_ML_configuration.py +0 -1332
  202. ml_tools/_core/_ML_configuration_pytab.py +0 -102
  203. ml_tools/_core/_ML_evaluation.py +0 -867
  204. ml_tools/_core/_ML_evaluation_multi.py +0 -544
  205. ml_tools/_core/_ML_inference.py +0 -646
  206. ml_tools/_core/_ML_models.py +0 -668
  207. ml_tools/_core/_ML_models_pytab.py +0 -693
  208. ml_tools/_core/_ML_trainer.py +0 -2323
  209. ml_tools/_core/_ML_utilities.py +0 -886
  210. ml_tools/_core/_ML_vision_models.py +0 -644
  211. ml_tools/_core/_data_exploration.py +0 -1901
  212. ml_tools/_core/_optimization_tools.py +0 -493
  213. ml_tools/_core/_schema.py +0 -359
  214. ml_tools/plot_fonts.py +0 -8
  215. ml_tools/schema.py +0 -12
  216. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
  217. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
  218. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  219. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,206 @@
1
+ from typing import Optional, Literal
2
+
3
+ from ..schema import FeatureSchema
4
+
5
+ from ._base_model_config import _BaseModelParams
6
+
7
+
8
+ __all__ = [
9
+ # --- Model Parameter Configs ---
10
+ "DragonMLPParams",
11
+ "DragonAttentionMLPParams",
12
+ "DragonMultiHeadAttentionNetParams",
13
+ "DragonTabularTransformerParams",
14
+ "DragonGateParams",
15
+ "DragonNodeParams",
16
+ "DragonTabNetParams",
17
+ "DragonAutoIntParams",
18
+ ]
19
+
20
+
21
+ # ----------------------------
22
+ # Model Parameters Configurations
23
+ # ----------------------------
24
+
25
+ # --- Standard Models ---
26
+
27
+ class DragonMLPParams(_BaseModelParams):
28
+ def __init__(self,
29
+ in_features: int,
30
+ out_targets: int,
31
+ hidden_layers: list[int],
32
+ drop_out: float = 0.2) -> None:
33
+ self.in_features = in_features
34
+ self.out_targets = out_targets
35
+ self.hidden_layers = hidden_layers
36
+ self.drop_out = drop_out
37
+
38
+
39
+ class DragonAttentionMLPParams(_BaseModelParams):
40
+ def __init__(self,
41
+ in_features: int,
42
+ out_targets: int,
43
+ hidden_layers: list[int],
44
+ drop_out: float = 0.2) -> None:
45
+ self.in_features = in_features
46
+ self.out_targets = out_targets
47
+ self.hidden_layers = hidden_layers
48
+ self.drop_out = drop_out
49
+
50
+
51
+ class DragonMultiHeadAttentionNetParams(_BaseModelParams):
52
+ def __init__(self,
53
+ in_features: int,
54
+ out_targets: int,
55
+ hidden_layers: list[int],
56
+ drop_out: float = 0.2,
57
+ num_heads: int = 4,
58
+ attention_dropout: float = 0.1) -> None:
59
+ self.in_features = in_features
60
+ self.out_targets = out_targets
61
+ self.hidden_layers = hidden_layers
62
+ self.drop_out = drop_out
63
+ self.num_heads = num_heads
64
+ self.attention_dropout = attention_dropout
65
+
66
+
67
+ class DragonTabularTransformerParams(_BaseModelParams):
68
+ def __init__(self, *,
69
+ schema: FeatureSchema,
70
+ out_targets: int,
71
+ embedding_dim: int = 256,
72
+ num_heads: int = 8,
73
+ num_layers: int = 6,
74
+ dropout: float = 0.2) -> None:
75
+ self.schema = schema
76
+ self.out_targets = out_targets
77
+ self.embedding_dim = embedding_dim
78
+ self.num_heads = num_heads
79
+ self.num_layers = num_layers
80
+ self.dropout = dropout
81
+
82
+ # --- Advanced Models ---
83
+
84
+ class DragonGateParams(_BaseModelParams):
85
+ def __init__(self, *,
86
+ schema: FeatureSchema,
87
+ out_targets: int,
88
+ embedding_dim: int = 16,
89
+ gflu_stages: int = 6,
90
+ gflu_dropout: float = 0.1,
91
+ num_trees: int = 20,
92
+ tree_depth: int = 4,
93
+ tree_dropout: float = 0.1,
94
+ chain_trees: bool = False,
95
+ tree_wise_attention: bool = True,
96
+ tree_wise_attention_dropout: float = 0.1,
97
+ binning_activation: Literal['entmoid', 'sparsemoid', 'sigmoid'] = "entmoid",
98
+ feature_mask_function: Literal['entmax', 'sparsemax', 'softmax', 't-softmax'] = "entmax",
99
+ share_head_weights: bool = True,
100
+ batch_norm_continuous: bool = True) -> None:
101
+ self.schema = schema
102
+ self.out_targets = out_targets
103
+ self.embedding_dim = embedding_dim
104
+ self.gflu_stages = gflu_stages
105
+ self.gflu_dropout = gflu_dropout
106
+ self.num_trees = num_trees
107
+ self.tree_depth = tree_depth
108
+ self.tree_dropout = tree_dropout
109
+ self.chain_trees = chain_trees
110
+ self.tree_wise_attention = tree_wise_attention
111
+ self.tree_wise_attention_dropout = tree_wise_attention_dropout
112
+ self.binning_activation = binning_activation
113
+ self.feature_mask_function = feature_mask_function
114
+ self.share_head_weights = share_head_weights
115
+ self.batch_norm_continuous = batch_norm_continuous
116
+
117
+
118
+ class DragonNodeParams(_BaseModelParams):
119
+ def __init__(self, *,
120
+ schema: FeatureSchema,
121
+ out_targets: int,
122
+ embedding_dim: int = 24,
123
+ num_trees: int = 1024,
124
+ num_layers: int = 2,
125
+ tree_depth: int = 6,
126
+ additional_tree_output_dim: int = 3,
127
+ max_features: Optional[int] = None,
128
+ input_dropout: float = 0.0,
129
+ embedding_dropout: float = 0.0,
130
+ choice_function: Literal['entmax', 'sparsemax', 'softmax'] = 'entmax',
131
+ bin_function: Literal['entmoid', 'sparsemoid', 'sigmoid'] = 'entmoid',
132
+ batch_norm_continuous: bool = False) -> None:
133
+ self.schema = schema
134
+ self.out_targets = out_targets
135
+ self.embedding_dim = embedding_dim
136
+ self.num_trees = num_trees
137
+ self.num_layers = num_layers
138
+ self.tree_depth = tree_depth
139
+ self.additional_tree_output_dim = additional_tree_output_dim
140
+ self.max_features = max_features
141
+ self.input_dropout = input_dropout
142
+ self.embedding_dropout = embedding_dropout
143
+ self.choice_function = choice_function
144
+ self.bin_function = bin_function
145
+ self.batch_norm_continuous = batch_norm_continuous
146
+
147
+
148
+ class DragonAutoIntParams(_BaseModelParams):
149
+ def __init__(self, *,
150
+ schema: FeatureSchema,
151
+ out_targets: int,
152
+ embedding_dim: int = 32,
153
+ attn_embed_dim: int = 32,
154
+ num_heads: int = 2,
155
+ num_attn_blocks: int = 3,
156
+ attn_dropout: float = 0.1,
157
+ has_residuals: bool = True,
158
+ attention_pooling: bool = True,
159
+ deep_layers: bool = True,
160
+ layers: str = "128-64-32",
161
+ activation: str = "ReLU",
162
+ embedding_dropout: float = 0.0,
163
+ batch_norm_continuous: bool = False) -> None:
164
+ self.schema = schema
165
+ self.out_targets = out_targets
166
+ self.embedding_dim = embedding_dim
167
+ self.attn_embed_dim = attn_embed_dim
168
+ self.num_heads = num_heads
169
+ self.num_attn_blocks = num_attn_blocks
170
+ self.attn_dropout = attn_dropout
171
+ self.has_residuals = has_residuals
172
+ self.attention_pooling = attention_pooling
173
+ self.deep_layers = deep_layers
174
+ self.layers = layers
175
+ self.activation = activation
176
+ self.embedding_dropout = embedding_dropout
177
+ self.batch_norm_continuous = batch_norm_continuous
178
+
179
+
180
+ class DragonTabNetParams(_BaseModelParams):
181
+ def __init__(self, *,
182
+ schema: FeatureSchema,
183
+ out_targets: int,
184
+ n_d: int = 8,
185
+ n_a: int = 8,
186
+ n_steps: int = 3,
187
+ gamma: float = 1.3,
188
+ n_independent: int = 2,
189
+ n_shared: int = 2,
190
+ virtual_batch_size: int = 128,
191
+ momentum: float = 0.02,
192
+ mask_type: Literal['sparsemax', 'entmax', 'softmax'] = 'sparsemax',
193
+ batch_norm_continuous: bool = False) -> None:
194
+ self.schema = schema
195
+ self.out_targets = out_targets
196
+ self.n_d = n_d
197
+ self.n_a = n_a
198
+ self.n_steps = n_steps
199
+ self.gamma = gamma
200
+ self.n_independent = n_independent
201
+ self.n_shared = n_shared
202
+ self.virtual_batch_size = virtual_batch_size
203
+ self.momentum = momentum
204
+ self.mask_type = mask_type
205
+ self.batch_norm_continuous = batch_norm_continuous
206
+
@@ -0,0 +1,124 @@
1
+ from typing import Union, Optional, Any, Literal
2
+ from pathlib import Path
3
+
4
+ from .._core import get_logger
5
+ from ..path_manager import make_fullpath
6
+
7
+ from ._base_model_config import _BaseModelParams
8
+
9
+
10
+ _LOGGER = get_logger("ML Configuration")
11
+
12
+
13
+ __all__ = [
14
+ # --- Training Config ---
15
+ "DragonTrainingConfig",
16
+ "DragonParetoConfig"
17
+ ]
18
+
19
+
20
+ class DragonTrainingConfig(_BaseModelParams):
21
+ """
22
+ Configuration object for the training process.
23
+
24
+ Can be unpacked as a dictionary for logging or accessed as an object.
25
+
26
+ Accepts arbitrary keyword arguments which are set as instance attributes.
27
+ """
28
+ def __init__(self,
29
+ validation_size: float,
30
+ test_size: float,
31
+ initial_learning_rate: float,
32
+ batch_size: int,
33
+ random_state: int = 101,
34
+ **kwargs: Any) -> None:
35
+ """
36
+ Args:
37
+ validation_size (float): Proportion of data for validation set.
38
+ test_size (float): Proportion of data for test set.
39
+ initial_learning_rate (float): Starting learning rate.
40
+ batch_size (int): Number of samples per training batch.
41
+ random_state (int): Seed for reproducibility.
42
+ **kwargs: Additional training parameters as key-value pairs.
43
+ """
44
+ self.validation_size = validation_size
45
+ self.test_size = test_size
46
+ self.initial_learning_rate = initial_learning_rate
47
+ self.batch_size = batch_size
48
+ self.random_state = random_state
49
+
50
+ # Process kwargs with validation
51
+ for key, value in kwargs.items():
52
+ # Python guarantees 'key' is a string for **kwargs
53
+
54
+ # Allow None in value
55
+ if value is None:
56
+ setattr(self, key, value)
57
+ continue
58
+
59
+ if isinstance(value, dict):
60
+ _LOGGER.error("Nested dictionaries are not supported, unpack them first.")
61
+ raise TypeError()
62
+
63
+ # Check if value is a number or a string or a JSON supported type, except dict
64
+ if not isinstance(value, (str, int, float, bool, list, tuple)):
65
+ _LOGGER.error(f"Invalid type for configuration '{key}': {type(value).__name__}")
66
+ raise TypeError()
67
+
68
+ setattr(self, key, value)
69
+
70
+
71
+ class DragonParetoConfig(_BaseModelParams):
72
+ """
73
+ Configuration object for the Pareto Optimization process.
74
+ """
75
+ def __init__(self,
76
+ save_directory: Union[str, Path],
77
+ target_objectives: dict[str, Literal["min", "max"]],
78
+ continuous_bounds_map: Union[dict[str, tuple[float, float]], dict[str, list[float]], str, Path],
79
+ columns_to_round: Optional[list[str]] = None,
80
+ population_size: int = 500,
81
+ generations: int = 1000,
82
+ solutions_filename: str = "NonDominatedSolutions",
83
+ float_precision: int = 4,
84
+ log_interval: int = 10,
85
+ plot_size: tuple[int, int] = (10, 7),
86
+ plot_font_size: int = 16,
87
+ discretize_start_at_zero: bool = True):
88
+ """
89
+ Configure the Pareto Optimizer.
90
+
91
+ Args:
92
+ save_directory (str | Path): Directory to save artifacts.
93
+ target_objectives (Dict[str, "min"|"max"]): Dictionary mapping target names to optimization direction.
94
+ Example: {"price": "max", "error": "min"}
95
+ continuous_bounds_map (Dict): Bounds for continuous features {name: (min, max)}. Or a path/str to a directory containing the "optimization_bounds.json" file.
96
+ columns_to_round (List[str] | None): List of continuous column names that should be rounded to the nearest integer.
97
+ population_size (int): Size of the genetic population.
98
+ generations (int): Number of generations to run.
99
+ solutions_filename (str): Filename for saving Pareto solutions.
100
+ float_precision (int): Number of decimal places to round standard float columns.
101
+ log_interval (int): Interval for logging progress.
102
+ plot_size (Tuple[int, int]): Size of the 2D plots.
103
+ plot_font_size (int): Font size for plot text.
104
+ discretize_start_at_zero (bool): Categorical encoding start index. True=0, False=1.
105
+ """
106
+ # Validate string or Path
107
+ valid_save_dir = make_fullpath(save_directory, make=True, enforce="directory")
108
+
109
+ if isinstance(continuous_bounds_map, (str, Path)):
110
+ continuous_bounds_map = make_fullpath(continuous_bounds_map, make=False, enforce="directory")
111
+
112
+ self.save_directory = valid_save_dir
113
+ self.target_objectives = target_objectives
114
+ self.continuous_bounds_map = continuous_bounds_map
115
+ self.columns_to_round = columns_to_round
116
+ self.population_size = population_size
117
+ self.generations = generations
118
+ self.solutions_filename = solutions_filename
119
+ self.float_precision = float_precision
120
+ self.log_interval = log_interval
121
+ self.plot_size = plot_size
122
+ self.plot_font_size = plot_font_size
123
+ self.discretize_start_at_zero = discretize_start_at_zero
124
+
@@ -0,0 +1,28 @@
1
+ from ._datasetmaster import (
2
+ DragonDataset,
3
+ DragonDatasetMulti,
4
+ )
5
+
6
+ from ._sequence_datasetmaster import (
7
+ DragonDatasetSequence
8
+ )
9
+
10
+ from ._vision_datasetmaster import (
11
+ DragonDatasetVision,
12
+ DragonDatasetSegmentation,
13
+ DragonDatasetObjectDetection
14
+ )
15
+
16
+ from ._imprimir import info
17
+
18
+
19
+ __all__ = [
20
+ "DragonDataset",
21
+ "DragonDatasetMulti",
22
+ # sequence
23
+ "DragonDatasetSequence",
24
+ # vision
25
+ "DragonDatasetVision",
26
+ "DragonDatasetSegmentation",
27
+ "DragonDatasetObjectDetection",
28
+ ]
@@ -0,0 +1,337 @@
1
+ import torch
2
+ from torch.utils.data import Dataset
3
+ import pandas
4
+ import numpy
5
+ from typing import Union, Optional
6
+ from abc import ABC
7
+ from pathlib import Path
8
+
9
+ from ..IO_tools import save_list_strings, custom_logger
10
+ from ..ML_scaler import DragonScaler
11
+ from ..schema import FeatureSchema
12
+
13
+ from ..path_manager import make_fullpath, sanitize_filename
14
+ from .._core import get_logger
15
+ from ..keys._keys import DatasetKeys, ScalerKeys
16
+
17
+
18
+ _LOGGER = get_logger("DragonDataset")
19
+
20
+
21
+ __all__ = [
22
+ "_BaseDatasetMaker",
23
+ "_PytorchDataset",
24
+ ]
25
+
26
+
27
+ # --- Internal Helper Class ---
28
+ class _PytorchDataset(Dataset):
29
+ """
30
+ Internal helper class to create a PyTorch Dataset.
31
+ Converts numpy/pandas data into tensors for model consumption.
32
+ """
33
+ def __init__(self, features: Union[numpy.ndarray, pandas.DataFrame],
34
+ labels: Union[numpy.ndarray, pandas.Series, pandas.DataFrame],
35
+ labels_dtype: torch.dtype,
36
+ features_dtype: torch.dtype = torch.float32,
37
+ feature_names: Optional[list[str]] = None,
38
+ target_names: Optional[list[str]] = None):
39
+
40
+ if isinstance(features, numpy.ndarray):
41
+ self.features = torch.tensor(features, dtype=features_dtype)
42
+ else: # It's a pandas.DataFrame
43
+ self.features = torch.tensor(features.to_numpy(), dtype=features_dtype)
44
+
45
+ if isinstance(labels, numpy.ndarray):
46
+ self.labels = torch.tensor(labels, dtype=labels_dtype)
47
+ elif isinstance(labels, (pandas.Series, pandas.DataFrame)):
48
+ self.labels = torch.tensor(labels.to_numpy(), dtype=labels_dtype)
49
+ else:
50
+ self.labels = torch.tensor(labels, dtype=labels_dtype)
51
+
52
+ self._feature_names = feature_names
53
+ self._target_names = target_names
54
+ self._classes: list[str] = []
55
+ self._class_map: dict[str,int] = dict()
56
+ self._feature_scaler: Optional[DragonScaler] = None
57
+ self._target_scaler: Optional[DragonScaler] = None
58
+
59
+ def __len__(self):
60
+ return len(self.features)
61
+
62
+ def __getitem__(self, index):
63
+ return self.features[index], self.labels[index]
64
+
65
+ @property
66
+ def feature_names(self):
67
+ if self._feature_names is not None:
68
+ return self._feature_names
69
+ else:
70
+ _LOGGER.error(f"Dataset {self.__class__} has not been initialized with any feature names.")
71
+ raise ValueError()
72
+
73
+ @property
74
+ def target_names(self):
75
+ if self._target_names is not None:
76
+ return self._target_names
77
+ else:
78
+ _LOGGER.error(f"Dataset {self.__class__} has not been initialized with any target names.")
79
+ raise ValueError()
80
+
81
+ @property
82
+ def classes(self):
83
+ return self._classes
84
+
85
+ @property
86
+ def class_map(self):
87
+ return self._class_map
88
+
89
+ @property
90
+ def feature_scaler(self):
91
+ return self._feature_scaler
92
+
93
+ @property
94
+ def target_scaler(self):
95
+ return self._target_scaler
96
+
97
+
98
+ # --- Abstract Base Class ---
99
+ class _BaseDatasetMaker(ABC):
100
+ """
101
+ Abstract base class for dataset makers. Contains shared logic.
102
+ """
103
+ def __init__(self):
104
+ self._train_ds: Optional[Dataset] = None
105
+ self._val_ds: Optional[Dataset] = None
106
+ self._test_ds: Optional[Dataset] = None
107
+
108
+ self.feature_scaler: Optional[DragonScaler] = None
109
+ self.target_scaler: Optional[DragonScaler] = None
110
+
111
+ self._id: Optional[str] = None
112
+ self._feature_names: list[str] = []
113
+ self._target_names: list[str] = []
114
+ self._X_train_shape = (0,0)
115
+ self._X_val_shape = (0,0)
116
+ self._X_test_shape = (0,0)
117
+ self._y_train_shape = (0,)
118
+ self._y_val_shape = (0,)
119
+ self._y_test_shape = (0,)
120
+ self.class_map: dict[str, int] = dict()
121
+ self.classes: list[str] = list()
122
+
123
+ def _prepare_feature_scaler(self,
124
+ X_train: pandas.DataFrame,
125
+ y_train: Union[pandas.Series, pandas.DataFrame],
126
+ X_val: pandas.DataFrame,
127
+ X_test: pandas.DataFrame,
128
+ label_dtype: torch.dtype,
129
+ schema: FeatureSchema) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]:
130
+ """Internal helper to fit and apply a DragonScaler for FEATURES using a FeatureSchema."""
131
+ continuous_feature_indices: Optional[list[int]] = None
132
+
133
+ # Get continuous feature indices *from the schema*
134
+ if schema.continuous_feature_names:
135
+ _LOGGER.info("Getting continuous feature indices from schema.")
136
+ try:
137
+ # Convert columns to a standard list for .index()
138
+ train_cols_list = X_train.columns.to_list()
139
+ # Map names from schema to column indices in the training DataFrame
140
+ continuous_feature_indices = [train_cols_list.index(name) for name in schema.continuous_feature_names]
141
+ except ValueError as e:
142
+ _LOGGER.error(f"Feature name from schema not found in training data columns:\n{e}")
143
+ raise ValueError()
144
+ else:
145
+ _LOGGER.info("No continuous features listed in schema. Feature scaler will not be fitted.")
146
+
147
+ X_train_values = X_train.to_numpy()
148
+ X_val_values = X_val.to_numpy()
149
+ X_test_values = X_test.to_numpy()
150
+
151
+ # continuous_feature_indices is derived
152
+ if self.feature_scaler is None and continuous_feature_indices:
153
+ _LOGGER.info("Fitting a new DragonScaler on training features.")
154
+ temp_train_ds = _PytorchDataset(X_train_values, y_train, label_dtype)
155
+ self.feature_scaler = DragonScaler.fit(temp_train_ds, continuous_feature_indices)
156
+
157
+ if self.feature_scaler and self.feature_scaler.mean_ is not None:
158
+ _LOGGER.info("Applying scaler transformation to train, validation, and test feature sets.")
159
+ X_train_tensor = self.feature_scaler.transform(torch.tensor(X_train_values, dtype=torch.float32))
160
+ X_val_tensor = self.feature_scaler.transform(torch.tensor(X_val_values, dtype=torch.float32))
161
+ X_test_tensor = self.feature_scaler.transform(torch.tensor(X_test_values, dtype=torch.float32))
162
+ return X_train_tensor.numpy(), X_val_tensor.numpy(), X_test_tensor.numpy()
163
+
164
+ return X_train_values, X_val_values, X_test_values
165
+
166
+ def _prepare_target_scaler(self,
167
+ y_train: Union[pandas.Series, pandas.DataFrame],
168
+ y_val: Union[pandas.Series, pandas.DataFrame],
169
+ y_test: Union[pandas.Series, pandas.DataFrame]) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]:
170
+ """Internal helper to fit and apply a DragonScaler for TARGETS."""
171
+
172
+ y_train_arr = y_train.to_numpy() if isinstance(y_train, (pandas.Series, pandas.DataFrame)) else y_train
173
+ y_val_arr = y_val.to_numpy() if isinstance(y_val, (pandas.Series, pandas.DataFrame)) else y_val
174
+ y_test_arr = y_test.to_numpy() if isinstance(y_test, (pandas.Series, pandas.DataFrame)) else y_test
175
+
176
+ if self.target_scaler is None:
177
+ _LOGGER.info("Fitting a new DragonScaler on training targets.")
178
+ # Convert to float tensor for calculation
179
+ y_train_tensor = torch.tensor(y_train_arr, dtype=torch.float32)
180
+ self.target_scaler = DragonScaler.fit_tensor(y_train_tensor)
181
+
182
+ if self.target_scaler and self.target_scaler.mean_ is not None:
183
+ _LOGGER.info("Applying scaler transformation to train, validation, and test targets.")
184
+ y_train_tensor = self.target_scaler.transform(torch.tensor(y_train_arr, dtype=torch.float32))
185
+ y_val_tensor = self.target_scaler.transform(torch.tensor(y_val_arr, dtype=torch.float32))
186
+ y_test_tensor = self.target_scaler.transform(torch.tensor(y_test_arr, dtype=torch.float32))
187
+ return y_train_tensor.numpy(), y_val_tensor.numpy(), y_test_tensor.numpy()
188
+
189
+ return y_train_arr, y_val_arr, y_test_arr
190
+
191
+ def _attach_scalers_to_datasets(self):
192
+ """Helper to attach the master scalers to the child datasets."""
193
+ for ds in [self._train_ds, self._val_ds, self._test_ds]:
194
+ if ds is not None:
195
+ ds._feature_scaler = self.feature_scaler # type: ignore
196
+ ds._target_scaler = self.target_scaler # type: ignore
197
+
198
+ @property
199
+ def train_dataset(self) -> Dataset:
200
+ if self._train_ds is None:
201
+ _LOGGER.error("Train Dataset not yet created.")
202
+ raise RuntimeError()
203
+ return self._train_ds
204
+
205
+ @property
206
+ def validation_dataset(self) -> Dataset:
207
+ if self._val_ds is None:
208
+ _LOGGER.error("Validation Dataset not yet created.")
209
+ raise RuntimeError()
210
+ return self._val_ds
211
+
212
+ @property
213
+ def test_dataset(self) -> Dataset:
214
+ if self._test_ds is None:
215
+ _LOGGER.error("Test Dataset not yet created.")
216
+ raise RuntimeError()
217
+ return self._test_ds
218
+
219
+ @property
220
+ def feature_names(self) -> list[str]:
221
+ return self._feature_names
222
+
223
+ @property
224
+ def target_names(self) -> list[str]:
225
+ return self._target_names
226
+
227
+ @property
228
+ def number_of_features(self) -> int:
229
+ return len(self._feature_names)
230
+
231
+ @property
232
+ def number_of_targets(self) -> int:
233
+ return len(self._target_names)
234
+
235
+ @property
236
+ def id(self) -> Optional[str]:
237
+ return self._id
238
+
239
+ @id.setter
240
+ def id(self, dataset_id: str):
241
+ if not isinstance(dataset_id, str): raise ValueError("ID must be a string.")
242
+ self._id = dataset_id
243
+
244
+ def dataframes_info(self) -> None:
245
+ print("--- DataFrame Shapes After Split ---")
246
+ print(f" X_train shape: {self._X_train_shape}, y_train shape: {self._y_train_shape}")
247
+ print(f" X_val shape: {self._X_val_shape}, y_val shape: {self._y_val_shape}")
248
+ print(f" X_test shape: {self._X_test_shape}, y_test shape: {self._y_test_shape}")
249
+ print("------------------------------------")
250
+
251
+ def save_feature_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
252
+ save_list_strings(list_strings=self._feature_names,
253
+ directory=directory,
254
+ filename=DatasetKeys.FEATURE_NAMES,
255
+ verbose=verbose)
256
+
257
+ def save_target_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
258
+ save_list_strings(list_strings=self._target_names,
259
+ directory=directory,
260
+ filename=DatasetKeys.TARGET_NAMES,
261
+ verbose=verbose)
262
+
263
+ def save_scaler(self, directory: Union[str, Path], verbose: bool=True) -> None:
264
+ """
265
+ Saves both feature and target scalers (if they exist) to a single .pth file
266
+ using a dictionary structure.
267
+ """
268
+ if self.feature_scaler is None and self.target_scaler is None:
269
+ _LOGGER.warning("No scalers (feature or target) were fitted. Nothing to save.")
270
+ return
271
+
272
+ if not self.id:
273
+ _LOGGER.error("Must set the dataset `id` before saving scaler.")
274
+ raise ValueError()
275
+
276
+ save_path = make_fullpath(directory, make=True, enforce="directory")
277
+ sanitized_id = sanitize_filename(self.id)
278
+ filename = f"{DatasetKeys.SCALER_PREFIX}{sanitized_id}.pth"
279
+ filepath = save_path / filename
280
+
281
+ # Construct the consolidated dictionary
282
+ combined_state = {}
283
+
284
+ print_message = "Saved "
285
+
286
+ if self.feature_scaler:
287
+ combined_state[ScalerKeys.FEATURE_SCALER] = self.feature_scaler._get_state()
288
+ print_message += "feature scaler "
289
+
290
+ if self.target_scaler:
291
+ if self.feature_scaler:
292
+ print_message += "and "
293
+ combined_state[ScalerKeys.TARGET_SCALER] = self.target_scaler._get_state()
294
+ print_message += "target scaler "
295
+
296
+ torch.save(combined_state, filepath)
297
+
298
+ if verbose:
299
+ _LOGGER.info(f"{print_message}to '{filepath.name}'.")
300
+
301
+ def save_class_map(self, directory: Union[str,Path], verbose: bool=True) -> None:
302
+ """
303
+ Saves the class map dictionary to a JSON file.
304
+
305
+ Args:
306
+ directory (str | Path): Directory to save the class map.
307
+ verbose (bool): Whether to print log messages.
308
+ """
309
+ if not self.class_map:
310
+ _LOGGER.warning(f"No class_map defined. Skipping.")
311
+ return
312
+
313
+ log_name = f"Class_to_Index_{self.id}" if self.id else "Class_to_Index"
314
+
315
+ custom_logger(data=self.class_map,
316
+ save_directory=directory,
317
+ log_name=log_name,
318
+ add_timestamp=False,
319
+ dict_as="json")
320
+ if verbose:
321
+ _LOGGER.info(f"Class map for '{self.id}' saved as '{log_name}.json'.")
322
+
323
+ def save_artifacts(self, directory: Union[str, Path], verbose: bool=True) -> None:
324
+ """
325
+ Saves all dataset artifacts: feature names, target names, scalers, and class map (if applicable).
326
+
327
+ Args:
328
+ directory (str | Path): Directory to save artifacts.
329
+ verbose (bool): Whether to print log messages.
330
+ """
331
+ self.save_feature_names(directory=directory, verbose=verbose)
332
+ self.save_target_names(directory=directory, verbose=verbose)
333
+ if self.feature_scaler is not None or self.target_scaler is not None:
334
+ self.save_scaler(directory=directory, verbose=verbose)
335
+ if self.class_map:
336
+ self.save_class_map(directory=directory, verbose=verbose)
337
+