dragon-ml-toolbox 19.14.0__py3-none-any.whl → 20.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
  2. dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
  3. ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
  4. ml_tools/ETL_cleaning/_basic_clean.py +351 -0
  5. ml_tools/ETL_cleaning/_clean_tools.py +128 -0
  6. ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
  7. ml_tools/ETL_cleaning/_imprimir.py +13 -0
  8. ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
  9. ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
  10. ml_tools/ETL_engineering/_imprimir.py +24 -0
  11. ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
  12. ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
  13. ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
  14. ml_tools/GUI_tools/_imprimir.py +12 -0
  15. ml_tools/IO_tools/_IO_loggers.py +235 -0
  16. ml_tools/IO_tools/_IO_save_load.py +151 -0
  17. ml_tools/IO_tools/_IO_utils.py +140 -0
  18. ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
  19. ml_tools/IO_tools/_imprimir.py +14 -0
  20. ml_tools/MICE/_MICE_imputation.py +132 -0
  21. ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
  22. ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
  23. ml_tools/MICE/_imprimir.py +11 -0
  24. ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
  25. ml_tools/ML_callbacks/_base.py +101 -0
  26. ml_tools/ML_callbacks/_checkpoint.py +232 -0
  27. ml_tools/ML_callbacks/_early_stop.py +208 -0
  28. ml_tools/ML_callbacks/_imprimir.py +12 -0
  29. ml_tools/ML_callbacks/_scheduler.py +197 -0
  30. ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
  31. ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
  32. ml_tools/ML_chain/_dragon_chain.py +140 -0
  33. ml_tools/ML_chain/_imprimir.py +11 -0
  34. ml_tools/ML_configuration/__init__.py +90 -0
  35. ml_tools/ML_configuration/_base_model_config.py +69 -0
  36. ml_tools/ML_configuration/_finalize.py +366 -0
  37. ml_tools/ML_configuration/_imprimir.py +47 -0
  38. ml_tools/ML_configuration/_metrics.py +593 -0
  39. ml_tools/ML_configuration/_models.py +206 -0
  40. ml_tools/ML_configuration/_training.py +124 -0
  41. ml_tools/ML_datasetmaster/__init__.py +28 -0
  42. ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
  43. ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
  44. ml_tools/ML_datasetmaster/_imprimir.py +15 -0
  45. ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
  46. ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
  47. ml_tools/ML_evaluation/__init__.py +53 -0
  48. ml_tools/ML_evaluation/_classification.py +629 -0
  49. ml_tools/ML_evaluation/_feature_importance.py +409 -0
  50. ml_tools/ML_evaluation/_imprimir.py +25 -0
  51. ml_tools/ML_evaluation/_loss.py +92 -0
  52. ml_tools/ML_evaluation/_regression.py +273 -0
  53. ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
  54. ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
  55. ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
  56. ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
  57. ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
  58. ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
  59. ml_tools/ML_finalize_handler/__init__.py +10 -0
  60. ml_tools/ML_finalize_handler/_imprimir.py +8 -0
  61. ml_tools/ML_inference/__init__.py +22 -0
  62. ml_tools/ML_inference/_base_inference.py +166 -0
  63. ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
  64. ml_tools/ML_inference/_dragon_inference.py +332 -0
  65. ml_tools/ML_inference/_imprimir.py +11 -0
  66. ml_tools/ML_inference/_multi_inference.py +180 -0
  67. ml_tools/ML_inference_sequence/__init__.py +10 -0
  68. ml_tools/ML_inference_sequence/_imprimir.py +8 -0
  69. ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
  70. ml_tools/ML_inference_vision/__init__.py +10 -0
  71. ml_tools/ML_inference_vision/_imprimir.py +8 -0
  72. ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
  73. ml_tools/ML_models/__init__.py +32 -0
  74. ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
  75. ml_tools/ML_models/_base_mlp_attention.py +198 -0
  76. ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
  77. ml_tools/ML_models/_dragon_tabular.py +248 -0
  78. ml_tools/ML_models/_imprimir.py +18 -0
  79. ml_tools/ML_models/_mlp_attention.py +134 -0
  80. ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
  81. ml_tools/ML_models_sequence/__init__.py +10 -0
  82. ml_tools/ML_models_sequence/_imprimir.py +8 -0
  83. ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
  84. ml_tools/ML_models_vision/__init__.py +29 -0
  85. ml_tools/ML_models_vision/_base_wrapper.py +254 -0
  86. ml_tools/ML_models_vision/_image_classification.py +182 -0
  87. ml_tools/ML_models_vision/_image_segmentation.py +108 -0
  88. ml_tools/ML_models_vision/_imprimir.py +16 -0
  89. ml_tools/ML_models_vision/_object_detection.py +135 -0
  90. ml_tools/ML_optimization/__init__.py +21 -0
  91. ml_tools/ML_optimization/_imprimir.py +13 -0
  92. ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
  93. ml_tools/ML_optimization/_single_dragon.py +203 -0
  94. ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
  95. ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
  96. ml_tools/ML_scaler/__init__.py +10 -0
  97. ml_tools/ML_scaler/_imprimir.py +8 -0
  98. ml_tools/ML_trainer/__init__.py +20 -0
  99. ml_tools/ML_trainer/_base_trainer.py +297 -0
  100. ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
  101. ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
  102. ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
  103. ml_tools/ML_trainer/_imprimir.py +10 -0
  104. ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
  105. ml_tools/ML_utilities/_artifact_finder.py +382 -0
  106. ml_tools/ML_utilities/_imprimir.py +16 -0
  107. ml_tools/ML_utilities/_inspection.py +325 -0
  108. ml_tools/ML_utilities/_train_tools.py +205 -0
  109. ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
  110. ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
  111. ml_tools/ML_vision_transformers/_imprimir.py +14 -0
  112. ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
  113. ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
  114. ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
  115. ml_tools/PSO_optimization/_imprimir.py +10 -0
  116. ml_tools/SQL/__init__.py +7 -0
  117. ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
  118. ml_tools/SQL/_imprimir.py +8 -0
  119. ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
  120. ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
  121. ml_tools/VIF/_imprimir.py +10 -0
  122. ml_tools/_core/__init__.py +7 -1
  123. ml_tools/_core/_logger.py +8 -18
  124. ml_tools/_core/_schema_load_ops.py +43 -0
  125. ml_tools/_core/_script_info.py +2 -2
  126. ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
  127. ml_tools/data_exploration/_analysis.py +214 -0
  128. ml_tools/data_exploration/_cleaning.py +566 -0
  129. ml_tools/data_exploration/_features.py +583 -0
  130. ml_tools/data_exploration/_imprimir.py +32 -0
  131. ml_tools/data_exploration/_plotting.py +487 -0
  132. ml_tools/data_exploration/_schema_ops.py +176 -0
  133. ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
  134. ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
  135. ml_tools/ensemble_evaluation/_imprimir.py +14 -0
  136. ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
  137. ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
  138. ml_tools/ensemble_inference/_imprimir.py +9 -0
  139. ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
  140. ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
  141. ml_tools/ensemble_learning/_imprimir.py +10 -0
  142. ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
  143. ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
  144. ml_tools/excel_handler/_imprimir.py +13 -0
  145. ml_tools/{keys.py → keys/__init__.py} +4 -1
  146. ml_tools/keys/_imprimir.py +11 -0
  147. ml_tools/{_core → keys}/_keys.py +2 -0
  148. ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
  149. ml_tools/math_utilities/_imprimir.py +11 -0
  150. ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
  151. ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
  152. ml_tools/optimization_tools/_imprimir.py +13 -0
  153. ml_tools/optimization_tools/_optimization_bounds.py +236 -0
  154. ml_tools/optimization_tools/_optimization_plots.py +218 -0
  155. ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
  156. ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
  157. ml_tools/path_manager/_imprimir.py +15 -0
  158. ml_tools/path_manager/_path_tools.py +346 -0
  159. ml_tools/plot_fonts/__init__.py +8 -0
  160. ml_tools/plot_fonts/_imprimir.py +8 -0
  161. ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
  162. ml_tools/schema/__init__.py +15 -0
  163. ml_tools/schema/_feature_schema.py +223 -0
  164. ml_tools/schema/_gui_schema.py +191 -0
  165. ml_tools/schema/_imprimir.py +10 -0
  166. ml_tools/{serde.py → serde/__init__.py} +4 -2
  167. ml_tools/serde/_imprimir.py +10 -0
  168. ml_tools/{_core → serde}/_serde.py +3 -8
  169. ml_tools/{utilities.py → utilities/__init__.py} +11 -6
  170. ml_tools/utilities/_imprimir.py +18 -0
  171. ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
  172. ml_tools/utilities/_utility_tools.py +192 -0
  173. dragon_ml_toolbox-19.14.0.dist-info/RECORD +0 -111
  174. ml_tools/ML_chaining_inference.py +0 -8
  175. ml_tools/ML_configuration.py +0 -86
  176. ml_tools/ML_configuration_pytab.py +0 -14
  177. ml_tools/ML_datasetmaster.py +0 -10
  178. ml_tools/ML_evaluation.py +0 -16
  179. ml_tools/ML_evaluation_multi.py +0 -12
  180. ml_tools/ML_finalize_handler.py +0 -8
  181. ml_tools/ML_inference.py +0 -12
  182. ml_tools/ML_models.py +0 -14
  183. ml_tools/ML_models_advanced.py +0 -14
  184. ml_tools/ML_models_pytab.py +0 -14
  185. ml_tools/ML_optimization.py +0 -14
  186. ml_tools/ML_optimization_pareto.py +0 -8
  187. ml_tools/ML_scaler.py +0 -8
  188. ml_tools/ML_sequence_datasetmaster.py +0 -8
  189. ml_tools/ML_sequence_evaluation.py +0 -10
  190. ml_tools/ML_sequence_inference.py +0 -8
  191. ml_tools/ML_sequence_models.py +0 -8
  192. ml_tools/ML_trainer.py +0 -12
  193. ml_tools/ML_vision_datasetmaster.py +0 -12
  194. ml_tools/ML_vision_evaluation.py +0 -10
  195. ml_tools/ML_vision_inference.py +0 -8
  196. ml_tools/ML_vision_models.py +0 -18
  197. ml_tools/SQL.py +0 -8
  198. ml_tools/_core/_ETL_cleaning.py +0 -694
  199. ml_tools/_core/_IO_tools.py +0 -498
  200. ml_tools/_core/_ML_callbacks.py +0 -702
  201. ml_tools/_core/_ML_configuration.py +0 -1332
  202. ml_tools/_core/_ML_configuration_pytab.py +0 -102
  203. ml_tools/_core/_ML_evaluation.py +0 -867
  204. ml_tools/_core/_ML_evaluation_multi.py +0 -544
  205. ml_tools/_core/_ML_inference.py +0 -646
  206. ml_tools/_core/_ML_models.py +0 -668
  207. ml_tools/_core/_ML_models_pytab.py +0 -693
  208. ml_tools/_core/_ML_trainer.py +0 -2323
  209. ml_tools/_core/_ML_utilities.py +0 -886
  210. ml_tools/_core/_ML_vision_models.py +0 -644
  211. ml_tools/_core/_data_exploration.py +0 -1909
  212. ml_tools/_core/_optimization_tools.py +0 -493
  213. ml_tools/_core/_schema.py +0 -359
  214. ml_tools/plot_fonts.py +0 -8
  215. ml_tools/schema.py +0 -12
  216. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
  217. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
  218. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  219. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
@@ -1,668 +0,0 @@
1
- import torch
2
- from torch import nn
3
- from typing import List, Union, Tuple, Dict, Any
4
- from pathlib import Path
5
- import json
6
-
7
- from ._logger import get_logger
8
- from ._path_manager import make_fullpath
9
- from ._script_info import _script_info
10
- from ._keys import PytorchModelArchitectureKeys
11
- from ._schema import FeatureSchema
12
-
13
-
14
- _LOGGER = get_logger("DragonModel")
15
-
16
-
17
- __all__ = [
18
- "DragonMLP",
19
- "DragonAttentionMLP",
20
- "DragonMultiHeadAttentionNet",
21
- "DragonTabularTransformer"
22
- ]
23
-
24
-
25
- class _ArchitectureHandlerMixin:
26
- """
27
- A mixin class to provide save and load functionality for model architectures.
28
- """
29
- def save(self: nn.Module, directory: Union[str, Path], verbose: bool = True): # type: ignore
30
- """Saves the model's architecture to a JSON file."""
31
- if not hasattr(self, 'get_architecture_config'):
32
- _LOGGER.error(f"Model '{self.__class__.__name__}' must have a 'get_architecture_config()' method to use this functionality.")
33
- raise AttributeError()
34
-
35
- path_dir = make_fullpath(directory, make=True, enforce="directory")
36
-
37
- json_filename = PytorchModelArchitectureKeys.SAVENAME + ".json"
38
-
39
- full_path = path_dir / json_filename
40
-
41
- config = {
42
- PytorchModelArchitectureKeys.MODEL: self.__class__.__name__,
43
- PytorchModelArchitectureKeys.CONFIG: self.get_architecture_config() # type: ignore
44
- }
45
-
46
- with open(full_path, 'w') as f:
47
- json.dump(config, f, indent=4)
48
-
49
- if verbose:
50
- _LOGGER.info(f"Architecture for '{self.__class__.__name__}' saved as '{full_path.name}'")
51
-
52
- @classmethod
53
- def load(cls: type, file_or_dir: Union[str, Path], verbose: bool = True) -> nn.Module:
54
- """Loads a model architecture from a JSON file. If a directory is provided, the function will attempt to load a JSON file inside."""
55
- user_path = make_fullpath(file_or_dir)
56
-
57
- if user_path.is_dir():
58
- json_filename = PytorchModelArchitectureKeys.SAVENAME + ".json"
59
- target_path = make_fullpath(user_path / json_filename, enforce="file")
60
- elif user_path.is_file():
61
- target_path = user_path
62
- else:
63
- _LOGGER.error(f"Invalid path: '{file_or_dir}'")
64
- raise IOError()
65
-
66
- with open(target_path, 'r') as f:
67
- saved_data = json.load(f)
68
-
69
- saved_class_name = saved_data[PytorchModelArchitectureKeys.MODEL]
70
- config = saved_data[PytorchModelArchitectureKeys.CONFIG]
71
-
72
- if saved_class_name != cls.__name__:
73
- _LOGGER.error(f"Model class mismatch. File specifies '{saved_class_name}', but '{cls.__name__}' was expected.")
74
- raise ValueError()
75
-
76
- model = cls(**config)
77
- if verbose:
78
- _LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
79
- return model
80
-
81
-
82
- class _BaseMLP(nn.Module, _ArchitectureHandlerMixin):
83
- """
84
- A base class for Multilayer Perceptrons.
85
-
86
- Handles validation, configuration, and the creation of the core MLP layers,
87
- allowing subclasses to define their own pre-processing and forward pass.
88
- """
89
- def __init__(self,
90
- in_features: int,
91
- out_targets: int,
92
- hidden_layers: List[int],
93
- drop_out: float) -> None:
94
- super().__init__()
95
-
96
- # --- Validation ---
97
- if not isinstance(in_features, int) or in_features < 1:
98
- _LOGGER.error("'in_features' must be a positive integer.")
99
- raise ValueError()
100
- if not isinstance(out_targets, int) or out_targets < 1:
101
- _LOGGER.error("'out_targets' must be a positive integer.")
102
- raise ValueError()
103
- if not isinstance(hidden_layers, list) or not all(isinstance(n, int) for n in hidden_layers):
104
- _LOGGER.error("'hidden_layers' must be a list of integers.")
105
- raise TypeError()
106
- if not (0.0 <= drop_out < 1.0):
107
- _LOGGER.error("'drop_out' must be a float between 0.0 and 1.0.")
108
- raise ValueError()
109
-
110
- # --- Save configuration ---
111
- self.in_features = in_features
112
- self.out_targets = out_targets
113
- self.hidden_layers = hidden_layers
114
- self.drop_out = drop_out
115
-
116
- # --- Build the core MLP network ---
117
- mlp_layers = []
118
- current_features = in_features
119
- for neurons in hidden_layers:
120
- mlp_layers.extend([
121
- nn.Linear(current_features, neurons),
122
- nn.BatchNorm1d(neurons),
123
- nn.ReLU(),
124
- nn.Dropout(p=drop_out)
125
- ])
126
- current_features = neurons
127
-
128
- self.mlp = nn.Sequential(*mlp_layers)
129
- # Set a customizable Prediction Head for flexibility, specially in transfer learning and fine-tuning
130
- self.output_layer = nn.Linear(current_features, out_targets)
131
-
132
- def get_architecture_config(self) -> Dict[str, Any]:
133
- """Returns the base configuration of the model."""
134
- return {
135
- 'in_features': self.in_features,
136
- 'out_targets': self.out_targets,
137
- 'hidden_layers': self.hidden_layers,
138
- 'drop_out': self.drop_out
139
- }
140
-
141
- def _repr_helper(self, name: str, mlp_layers: list[str]):
142
- last_layer = self.output_layer
143
- if isinstance(last_layer, nn.Linear):
144
- mlp_layers.append(str(last_layer.out_features))
145
- else:
146
- mlp_layers.append("Custom Prediction Head")
147
-
148
- # Creates a string like: 10 -> 40 -> 80 -> 40 -> 2
149
- arch_str = ' -> '.join(mlp_layers)
150
-
151
- return f"{name}(arch: {arch_str})"
152
-
153
-
154
- class _BaseAttention(_BaseMLP):
155
- """
156
- Abstract base class for MLP models that incorporate an attention mechanism
157
- before the main MLP layers.
158
- """
159
- def __init__(self, *args, **kwargs):
160
- super().__init__(*args, **kwargs)
161
- # By default, models inheriting this do not have the flag.
162
- self.attention = None
163
- self.has_interpretable_attention = False
164
-
165
- def forward(self, x: torch.Tensor) -> torch.Tensor:
166
- """Defines the standard forward pass."""
167
- logits, _attention_weights = self.forward_attention(x)
168
- return logits
169
-
170
- def forward_attention(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
171
- """Returns logits and attention weights."""
172
- # This logic is now shared and defined in one place
173
- x, attention_weights = self.attention(x) # type: ignore
174
- x = self.mlp(x)
175
- logits = self.output_layer(x)
176
- return logits, attention_weights
177
-
178
-
179
- class DragonMLP(_BaseMLP):
180
- """
181
- Creates a versatile Multilayer Perceptron (MLP) for regression or classification tasks.
182
- """
183
- def __init__(self, in_features: int, out_targets: int,
184
- hidden_layers: List[int] = [256, 128], drop_out: float = 0.2) -> None:
185
- """
186
- Args:
187
- in_features (int): The number of input features (e.g., columns in your data).
188
- out_targets (int): The number of output targets. For regression, this is
189
- typically 1. For classification, it's the number of classes.
190
- hidden_layers (list[int]): A list where each integer represents the
191
- number of neurons in a hidden layer.
192
- drop_out (float): The dropout probability for neurons in each hidden
193
- layer. Must be between 0.0 and 1.0.
194
-
195
- ### Rules of thumb:
196
- - Choose a number of hidden neurons between the size of the input layer and the size of the output layer.
197
- - The number of hidden neurons should be 2/3 the size of the input layer, plus the size of the output layer.
198
- - The number of hidden neurons should be less than twice the size of the input layer.
199
- """
200
- super().__init__(in_features, out_targets, hidden_layers, drop_out)
201
-
202
- def forward(self, x: torch.Tensor) -> torch.Tensor:
203
- """Defines the forward pass of the model."""
204
- x = self.mlp(x)
205
- logits = self.output_layer(x)
206
- return logits
207
-
208
- def __repr__(self) -> str:
209
- """Returns the developer-friendly string representation of the model."""
210
- # Extracts the number of neurons from each nn.Linear layer
211
- layer_sizes = [str(layer.in_features) for layer in self.mlp if isinstance(layer, nn.Linear)]
212
-
213
- return self._repr_helper(name="DragonMLP", mlp_layers=layer_sizes)
214
-
215
-
216
- class DragonAttentionMLP(_BaseAttention):
217
- """
218
- A Multilayer Perceptron (MLP) that incorporates an Attention layer to dynamically weigh input features.
219
-
220
- In inference mode use `forward_attention()` to get a tuple with `(output, attention_weights)`
221
- """
222
- def __init__(self, in_features: int, out_targets: int,
223
- hidden_layers: List[int] = [256, 128], drop_out: float = 0.2) -> None:
224
- """
225
- Args:
226
- in_features (int): The number of input features (e.g., columns in your data).
227
- out_targets (int): The number of output targets. For regression, this is
228
- typically 1. For classification, it's the number of classes.
229
- hidden_layers (list[int]): A list where each integer represents the
230
- number of neurons in a hidden layer.
231
- drop_out (float): The dropout probability for neurons in each hidden
232
- layer. Must be between 0.0 and 1.0.
233
- """
234
- super().__init__(in_features, out_targets, hidden_layers, drop_out)
235
- # Attention
236
- self.attention = _AttentionLayer(in_features)
237
- self.has_interpretable_attention = True
238
-
239
- def __repr__(self) -> str:
240
- """Returns the developer-friendly string representation of the model."""
241
- # Start with the input features and the attention marker
242
- arch = [str(self.in_features), "[Attention]"]
243
-
244
- # Find all other linear layers in the MLP
245
- for layer in self.mlp[1:]:
246
- if isinstance(layer, nn.Linear):
247
- arch.append(str(layer.in_features))
248
-
249
- return self._repr_helper(name="DragonAttentionMLP", mlp_layers=arch)
250
-
251
-
252
- class DragonMultiHeadAttentionNet(_BaseAttention):
253
- """
254
- An MLP that incorporates a standard `nn.MultiheadAttention` layer to process
255
- the input features.
256
-
257
- In inference mode use `forward_attention()` to get a tuple with `(output, attention_weights)`.
258
- """
259
- def __init__(self, in_features: int, out_targets: int,
260
- hidden_layers: List[int] = [256, 128], drop_out: float = 0.2,
261
- num_heads: int = 4, attention_dropout: float = 0.1) -> None:
262
- """
263
- Args:
264
- in_features (int): The number of input features.
265
- out_targets (int): The number of output targets.
266
- hidden_layers (list[int]): A list of neuron counts for each hidden layer.
267
- drop_out (float): The dropout probability for the MLP layers.
268
- num_heads (int): The number of attention heads.
269
- attention_dropout (float): Dropout probability in the attention layer.
270
- """
271
- super().__init__(in_features, out_targets, hidden_layers, drop_out)
272
- self.num_heads = num_heads
273
- self.attention_dropout = attention_dropout
274
-
275
- self.attention = _MultiHeadAttentionLayer(
276
- num_features=in_features,
277
- num_heads=num_heads,
278
- dropout=attention_dropout
279
- )
280
-
281
- def get_architecture_config(self) -> Dict[str, Any]:
282
- """Returns the full configuration of the model."""
283
- config = super().get_architecture_config()
284
- config['num_heads'] = self.num_heads
285
- config['attention_dropout'] = self.attention_dropout
286
- return config
287
-
288
- def __repr__(self) -> str:
289
- """Returns the developer-friendly string representation of the model."""
290
- mlp_part = " -> ".join(
291
- [str(self.in_features)] +
292
- [str(h) for h in self.hidden_layers] +
293
- [str(self.out_targets)]
294
- )
295
- arch_str = f"{self.in_features} -> [MultiHead(h={self.num_heads})] -> {mlp_part}"
296
-
297
- return f"DragonMultiHeadAttentionNet(arch: {arch_str})"
298
-
299
-
300
- class DragonTabularTransformer(nn.Module, _ArchitectureHandlerMixin):
301
- """
302
- A Transformer-based model for tabular data tasks.
303
-
304
- This model uses a Feature Tokenizer to convert all input features into a
305
- sequence of embeddings, prepends a [CLS] token, and processes the
306
- sequence with a standard Transformer Encoder.
307
- """
308
- def __init__(self, *,
309
- schema: FeatureSchema,
310
- out_targets: int,
311
- embedding_dim: int = 256,
312
- num_heads: int = 8,
313
- num_layers: int = 6,
314
- dropout: float = 0.2):
315
- """
316
- Args:
317
- schema (FeatureSchema):
318
- The definitive schema object created by `data_exploration.finalize_feature_schema()`.
319
- out_targets (int):
320
- Number of output targets (1 for regression).
321
- embedding_dim (int):
322
- The dimension for all feature embeddings. Must be divisible by num_heads. Common values: (64, 128, 192, 256, etc.)
323
- num_heads (int):
324
- The number of heads in the multi-head attention mechanism. Common values: (4, 8, 16)
325
- num_layers (int):
326
- The number of sub-encoder-layers in the transformer encoder. Common values: (4, 8, 12)
327
- dropout (float):
328
- The dropout value.
329
-
330
- ## Note:
331
-
332
- **Embedding Dimension:** "Width" of the model. It's the N-dimension vector that will be used to represent each one of the features.
333
- - Each continuous feature gets its own learnable N-dimension vector.
334
- - Each categorical feature gets an embedding table that maps every category (e.g., "color=red", "color=blue") to a unique N-dimension vector.
335
-
336
- **Attention Heads:** Controls the "Multi-Head Attention" mechanism. Instead of looking at all the feature interactions at once, the model splits its attention into N parallel heads.
337
- - Embedding Dimensions get divided by the number of Attention Heads, resulting in the dimensions assigned per head.
338
-
339
- **Number of Layers:** "Depth" of the model. Number of identical `TransformerEncoderLayer` blocks that are stacked on top of each other.
340
- - Layer 1: The attention heads find simple, direct interactions between the features.
341
- - Layer 2: Takes the output of Layer 1 and finds interactions between those interactions and so on.
342
- - Trade-off: More layers are more powerful but are slower to train and more prone to overfitting. If the training loss goes down but the validation loss goes up, you might have too many layers (or need more dropout).
343
-
344
- """
345
- super().__init__()
346
-
347
- # --- Get info from schema ---
348
- in_features = len(schema.feature_names)
349
- categorical_index_map = schema.categorical_index_map
350
-
351
- # --- Validation ---
352
- if categorical_index_map and (max(categorical_index_map.keys()) >= in_features):
353
- _LOGGER.error(f"A categorical index ({max(categorical_index_map.keys())}) is out of bounds for the provided input features ({in_features}).")
354
- raise ValueError()
355
-
356
- # --- Save configuration ---
357
- self.schema = schema # <-- Save the whole schema
358
- self.out_targets = out_targets
359
- self.embedding_dim = embedding_dim
360
- self.num_heads = num_heads
361
- self.num_layers = num_layers
362
- self.dropout = dropout
363
-
364
- # --- 1. Feature Tokenizer (now takes the schema) ---
365
- self.tokenizer = _FeatureTokenizer(
366
- schema=schema,
367
- embedding_dim=embedding_dim
368
- )
369
-
370
- # --- 2. CLS Token ---
371
- self.cls_token = nn.Parameter(torch.randn(1, 1, embedding_dim))
372
-
373
- # --- 3. Transformer Encoder ---
374
- encoder_layer = nn.TransformerEncoderLayer(
375
- d_model=embedding_dim,
376
- nhead=num_heads,
377
- dropout=dropout,
378
- batch_first=True # Crucial for (batch, seq, feature) input
379
- )
380
- self.transformer_encoder = nn.TransformerEncoder(
381
- encoder_layer=encoder_layer,
382
- num_layers=num_layers
383
- )
384
-
385
- # --- 4. Prediction Head ---
386
- self.output_layer = nn.Linear(embedding_dim, out_targets)
387
-
388
- def forward(self, x: torch.Tensor) -> torch.Tensor:
389
- """Defines the forward pass of the model."""
390
- # Get the batch size for later use
391
- batch_size = x.shape[0]
392
-
393
- # 1. Get feature tokens from the tokenizer
394
- # -> tokens shape: (batch_size, num_features, embedding_dim)
395
- tokens = self.tokenizer(x)
396
-
397
- # 2. Prepend the [CLS] token to the sequence
398
- # -> cls_tokens shape: (batch_size, 1, embedding_dim)
399
- cls_tokens = self.cls_token.expand(batch_size, -1, -1)
400
- # -> full_sequence shape: (batch_size, num_features + 1, embedding_dim)
401
- full_sequence = torch.cat([cls_tokens, tokens], dim=1)
402
-
403
- # 3. Pass the full sequence through the Transformer Encoder
404
- # -> transformer_out shape: (batch_size, num_features + 1, embedding_dim)
405
- transformer_out = self.transformer_encoder(full_sequence)
406
-
407
- # 4. Isolate the output of the [CLS] token (it's the first one)
408
- # -> cls_output shape: (batch_size, embedding_dim)
409
- cls_output = transformer_out[:, 0]
410
-
411
- # 5. Pass the [CLS] token's output through the prediction head
412
- # -> logits shape: (batch_size, out_targets)
413
- logits = self.output_layer(cls_output)
414
-
415
- return logits
416
-
417
- def get_architecture_config(self) -> Dict[str, Any]:
418
- """Returns the full configuration of the model."""
419
- # Deconstruct schema into a JSON-friendly dict
420
- # Tuples are saved as lists
421
- schema_dict = {
422
- 'feature_names': self.schema.feature_names,
423
- 'continuous_feature_names': self.schema.continuous_feature_names,
424
- 'categorical_feature_names': self.schema.categorical_feature_names,
425
- 'categorical_index_map': self.schema.categorical_index_map,
426
- 'categorical_mappings': self.schema.categorical_mappings
427
- }
428
-
429
- return {
430
- 'schema_dict': schema_dict,
431
- 'out_targets': self.out_targets,
432
- 'embedding_dim': self.embedding_dim,
433
- 'num_heads': self.num_heads,
434
- 'num_layers': self.num_layers,
435
- 'dropout': self.dropout
436
- }
437
-
438
- @classmethod
439
- def load(cls: type, file_or_dir: Union[str, Path], verbose: bool = True) -> nn.Module:
440
- """Loads a model architecture from a JSON file."""
441
- user_path = make_fullpath(file_or_dir)
442
-
443
- if user_path.is_dir():
444
- json_filename = PytorchModelArchitectureKeys.SAVENAME + ".json"
445
- target_path = make_fullpath(user_path / json_filename, enforce="file")
446
- elif user_path.is_file():
447
- target_path = user_path
448
- else:
449
- _LOGGER.error(f"Invalid path: '{file_or_dir}'")
450
- raise IOError()
451
-
452
- with open(target_path, 'r') as f:
453
- saved_data = json.load(f)
454
-
455
- saved_class_name = saved_data[PytorchModelArchitectureKeys.MODEL]
456
- config = saved_data[PytorchModelArchitectureKeys.CONFIG]
457
-
458
- if saved_class_name != cls.__name__:
459
- _LOGGER.error(f"Model class mismatch. File specifies '{saved_class_name}', but '{cls.__name__}' was expected.")
460
- raise ValueError()
461
-
462
- # --- RECONSTRUCTION LOGIC ---
463
- if 'schema_dict' not in config:
464
- _LOGGER.error("Invalid architecture file: missing 'schema_dict'. This file may be from an older version.")
465
- raise ValueError("Missing 'schema_dict' in config.")
466
-
467
- schema_data = config.pop('schema_dict')
468
-
469
- # Re-hydrate the categorical_index_map
470
- # JSON saves all dict keys as strings, so we must convert them back to int.
471
- raw_index_map = schema_data['categorical_index_map']
472
- if raw_index_map is not None:
473
- rehydrated_index_map = {int(k): v for k, v in raw_index_map.items()}
474
- else:
475
- rehydrated_index_map = None
476
-
477
- # Re-hydrate the FeatureSchema object
478
- # JSON deserializes tuples as lists, so we must convert them back.
479
- schema = FeatureSchema(
480
- feature_names=tuple(schema_data['feature_names']),
481
- continuous_feature_names=tuple(schema_data['continuous_feature_names']),
482
- categorical_feature_names=tuple(schema_data['categorical_feature_names']),
483
- categorical_index_map=rehydrated_index_map,
484
- categorical_mappings=schema_data['categorical_mappings']
485
- )
486
-
487
- config['schema'] = schema
488
- # --- End Reconstruction ---
489
-
490
- model = cls(**config)
491
- if verbose:
492
- _LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
493
- return model
494
-
495
- def __repr__(self) -> str:
496
- """Returns the developer-friendly string representation of the model."""
497
- # Build the architecture string part-by-part
498
- parts = [
499
- f"Tokenizer(features={len(self.schema.feature_names)}, dim={self.embedding_dim})",
500
- "[CLS]",
501
- f"TransformerEncoder(layers={self.num_layers}, heads={self.num_heads})",
502
- f"PredictionHead(outputs={self.out_targets})"
503
- ]
504
-
505
- arch_str = " -> ".join(parts)
506
-
507
- return f"DragonTabularTransformer(arch: {arch_str})"
508
-
509
-
510
- class _FeatureTokenizer(nn.Module):
511
- """
512
- Transforms raw numerical and categorical features from any column order
513
- into a sequence of embeddings.
514
- """
515
- def __init__(self,
516
- schema: FeatureSchema,
517
- embedding_dim: int):
518
- """
519
- Args:
520
- schema (FeatureSchema):
521
- The definitive schema object from data_exploration.
522
- embedding_dim (int):
523
- The dimension for all feature embeddings.
524
- """
525
- super().__init__()
526
-
527
- # --- Get info from schema ---
528
- categorical_map = schema.categorical_index_map
529
-
530
- if categorical_map:
531
- # Unpack the dictionary into separate lists
532
- self.categorical_indices = list(categorical_map.keys())
533
- cardinalities = list(categorical_map.values())
534
- else:
535
- self.categorical_indices = []
536
- cardinalities = []
537
-
538
- # Derive numerical indices by finding what's not categorical
539
- all_indices = set(range(len(schema.feature_names)))
540
- categorical_indices_set = set(self.categorical_indices)
541
- self.numerical_indices = sorted(list(all_indices - categorical_indices_set))
542
-
543
- self.embedding_dim = embedding_dim
544
-
545
- # A learnable embedding for each numerical feature
546
- self.numerical_embeddings = nn.Parameter(torch.randn(len(self.numerical_indices), embedding_dim))
547
-
548
- # A standard embedding layer for each categorical feature
549
- self.categorical_embeddings = nn.ModuleList(
550
- [nn.Embedding(num_embeddings=c, embedding_dim=embedding_dim) for c in cardinalities]
551
- )
552
-
553
- def forward(self, x: torch.Tensor) -> torch.Tensor:
554
- """
555
- Processes features from a single input tensor and concatenates them
556
- into a sequence of tokens.
557
- """
558
- # Select the correct columns for each type using the stored indices
559
- x_numerical = x[:, self.numerical_indices].float()
560
- x_categorical = x[:, self.categorical_indices].long()
561
-
562
- # Process numerical features
563
- numerical_tokens = x_numerical.unsqueeze(-1) * self.numerical_embeddings
564
-
565
- # Process categorical features
566
- categorical_tokens = []
567
- for i, embed_layer in enumerate(self.categorical_embeddings):
568
- # x_categorical[:, i] selects the i-th categorical column
569
- # (e.g., all values for the 'color' feature)
570
- token = embed_layer(x_categorical[:, i]).unsqueeze(1)
571
- categorical_tokens.append(token)
572
-
573
- # Concatenate all tokens into a single sequence
574
- if not self.categorical_indices:
575
- all_tokens = numerical_tokens
576
- elif not self.numerical_indices:
577
- all_tokens = torch.cat(categorical_tokens, dim=1)
578
- else:
579
- all_categorical_tokens = torch.cat(categorical_tokens, dim=1)
580
- all_tokens = torch.cat([numerical_tokens, all_categorical_tokens], dim=1)
581
-
582
- return all_tokens
583
-
584
-
585
- class _AttentionLayer(nn.Module):
586
- """
587
- Calculates attention weights and applies them to the input features, incorporating a residual connection for improved stability and performance.
588
-
589
- Returns both the final output and the weights for interpretability.
590
- """
591
- def __init__(self, num_features: int):
592
- super().__init__()
593
- # The hidden layer size is a hyperparameter
594
- hidden_size = max(16, num_features // 4)
595
-
596
- # Learn to produce attention scores
597
- self.attention_net = nn.Sequential(
598
- nn.Linear(num_features, hidden_size),
599
- nn.Tanh(),
600
- nn.Linear(hidden_size, num_features) # Output one score per feature
601
- )
602
- self.softmax = nn.Softmax(dim=1)
603
-
604
- def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
605
- # x shape: (batch_size, num_features)
606
-
607
- # Get one raw "importance" score per feature
608
- attention_scores = self.attention_net(x)
609
-
610
- # Apply the softmax module to get weights that sum to 1
611
- attention_weights = self.softmax(attention_scores)
612
-
613
- # Weighted features (attention mechanism's output)
614
- weighted_features = x * attention_weights
615
-
616
- # Residual connection
617
- residual_connection = x + weighted_features
618
-
619
- return residual_connection, attention_weights
620
-
621
-
622
- class _MultiHeadAttentionLayer(nn.Module):
623
- """
624
- A wrapper for the standard `torch.nn.MultiheadAttention` layer.
625
-
626
- This layer treats the entire input feature vector as a single item in a
627
- sequence and applies self-attention to it. It is followed by a residual
628
- connection and layer normalization, which is a standard block in
629
- Transformer-style models.
630
- """
631
- def __init__(self, num_features: int, num_heads: int, dropout: float):
632
- super().__init__()
633
- self.attention = nn.MultiheadAttention(
634
- embed_dim=num_features,
635
- num_heads=num_heads,
636
- dropout=dropout,
637
- batch_first=True # Crucial for (batch, seq, feature) input
638
- )
639
- self.layer_norm = nn.LayerNorm(num_features)
640
-
641
- def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
642
- # x shape: (batch_size, num_features)
643
-
644
- # nn.MultiheadAttention expects a sequence dimension.
645
- # We add a sequence dimension of length 1.
646
- # x_reshaped shape: (batch_size, 1, num_features)
647
- x_reshaped = x.unsqueeze(1)
648
-
649
- # Apply self-attention. query, key, and value are all the same.
650
- # attn_output shape: (batch_size, 1, num_features)
651
- # attn_weights shape: (batch_size, 1, 1)
652
- attn_output, attn_weights = self.attention(
653
- query=x_reshaped,
654
- key=x_reshaped,
655
- value=x_reshaped,
656
- need_weights=True,
657
- average_attn_weights=True # Average weights across heads
658
- )
659
-
660
- # Add residual connection and apply layer normalization (Post-LN)
661
- out = self.layer_norm(x + attn_output.squeeze(1))
662
-
663
- # Squeeze weights for a consistent output shape
664
- return out, attn_weights.squeeze()
665
-
666
-
667
- def info():
668
- _script_info(__all__)