dragon-ml-toolbox 20.2.0__py3-none-any.whl → 20.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-20.2.0.dist-info → dragon_ml_toolbox-20.3.0.dist-info}/METADATA +1 -1
- dragon_ml_toolbox-20.3.0.dist-info/RECORD +143 -0
- ml_tools/ETL_cleaning/__init__.py +5 -1
- ml_tools/ETL_cleaning/_basic_clean.py +1 -1
- ml_tools/ETL_engineering/__init__.py +5 -1
- ml_tools/GUI_tools/__init__.py +5 -1
- ml_tools/IO_tools/_IO_loggers.py +12 -4
- ml_tools/IO_tools/__init__.py +5 -1
- ml_tools/MICE/__init__.py +8 -2
- ml_tools/MICE/_dragon_mice.py +1 -1
- ml_tools/ML_callbacks/__init__.py +5 -1
- ml_tools/ML_chain/__init__.py +5 -1
- ml_tools/ML_configuration/__init__.py +7 -1
- ml_tools/ML_configuration/_training.py +65 -1
- ml_tools/ML_datasetmaster/__init__.py +5 -1
- ml_tools/ML_datasetmaster/_base_datasetmaster.py +31 -20
- ml_tools/ML_datasetmaster/_datasetmaster.py +26 -9
- ml_tools/ML_datasetmaster/_sequence_datasetmaster.py +38 -23
- ml_tools/ML_evaluation/__init__.py +5 -1
- ml_tools/ML_evaluation_captum/__init__.py +5 -1
- ml_tools/ML_finalize_handler/__init__.py +5 -1
- ml_tools/ML_inference/__init__.py +5 -1
- ml_tools/ML_inference_sequence/__init__.py +5 -1
- ml_tools/ML_inference_vision/__init__.py +5 -1
- ml_tools/ML_models/__init__.py +21 -6
- ml_tools/ML_models/_dragon_autoint.py +302 -0
- ml_tools/ML_models/_dragon_gate.py +358 -0
- ml_tools/ML_models/_dragon_node.py +268 -0
- ml_tools/ML_models/_dragon_tabnet.py +255 -0
- ml_tools/ML_models_sequence/__init__.py +5 -1
- ml_tools/ML_models_vision/__init__.py +5 -1
- ml_tools/ML_optimization/__init__.py +11 -3
- ml_tools/ML_optimization/_multi_dragon.py +2 -2
- ml_tools/ML_optimization/_single_dragon.py +47 -67
- ml_tools/ML_optimization/_single_manual.py +1 -1
- ml_tools/ML_scaler/_ML_scaler.py +12 -7
- ml_tools/ML_scaler/__init__.py +5 -1
- ml_tools/ML_trainer/__init__.py +5 -1
- ml_tools/ML_trainer/_base_trainer.py +136 -13
- ml_tools/ML_trainer/_dragon_detection_trainer.py +31 -91
- ml_tools/ML_trainer/_dragon_sequence_trainer.py +24 -74
- ml_tools/ML_trainer/_dragon_trainer.py +24 -85
- ml_tools/ML_utilities/__init__.py +5 -1
- ml_tools/ML_utilities/_inspection.py +44 -30
- ml_tools/ML_vision_transformers/__init__.py +8 -2
- ml_tools/PSO_optimization/__init__.py +5 -1
- ml_tools/SQL/__init__.py +8 -2
- ml_tools/VIF/__init__.py +5 -1
- ml_tools/data_exploration/__init__.py +4 -1
- ml_tools/data_exploration/_cleaning.py +4 -2
- ml_tools/ensemble_evaluation/__init__.py +5 -1
- ml_tools/ensemble_inference/__init__.py +5 -1
- ml_tools/ensemble_learning/__init__.py +5 -1
- ml_tools/excel_handler/__init__.py +5 -1
- ml_tools/keys/__init__.py +5 -1
- ml_tools/math_utilities/__init__.py +5 -1
- ml_tools/optimization_tools/__init__.py +5 -1
- ml_tools/path_manager/__init__.py +8 -2
- ml_tools/plot_fonts/__init__.py +8 -2
- ml_tools/schema/__init__.py +8 -2
- ml_tools/schema/_feature_schema.py +3 -3
- ml_tools/serde/__init__.py +5 -1
- ml_tools/utilities/__init__.py +5 -1
- ml_tools/utilities/_utility_save_load.py +38 -20
- dragon_ml_toolbox-20.2.0.dist-info/RECORD +0 -179
- ml_tools/ETL_cleaning/_imprimir.py +0 -13
- ml_tools/ETL_engineering/_imprimir.py +0 -24
- ml_tools/GUI_tools/_imprimir.py +0 -12
- ml_tools/IO_tools/_imprimir.py +0 -14
- ml_tools/MICE/_imprimir.py +0 -11
- ml_tools/ML_callbacks/_imprimir.py +0 -12
- ml_tools/ML_chain/_imprimir.py +0 -12
- ml_tools/ML_configuration/_imprimir.py +0 -47
- ml_tools/ML_datasetmaster/_imprimir.py +0 -15
- ml_tools/ML_evaluation/_imprimir.py +0 -25
- ml_tools/ML_evaluation_captum/_imprimir.py +0 -10
- ml_tools/ML_finalize_handler/_imprimir.py +0 -8
- ml_tools/ML_inference/_imprimir.py +0 -11
- ml_tools/ML_inference_sequence/_imprimir.py +0 -8
- ml_tools/ML_inference_vision/_imprimir.py +0 -8
- ml_tools/ML_models/_advanced_models.py +0 -1086
- ml_tools/ML_models/_imprimir.py +0 -18
- ml_tools/ML_models_sequence/_imprimir.py +0 -8
- ml_tools/ML_models_vision/_imprimir.py +0 -16
- ml_tools/ML_optimization/_imprimir.py +0 -13
- ml_tools/ML_scaler/_imprimir.py +0 -8
- ml_tools/ML_trainer/_imprimir.py +0 -10
- ml_tools/ML_utilities/_imprimir.py +0 -16
- ml_tools/ML_vision_transformers/_imprimir.py +0 -14
- ml_tools/PSO_optimization/_imprimir.py +0 -10
- ml_tools/SQL/_imprimir.py +0 -8
- ml_tools/VIF/_imprimir.py +0 -10
- ml_tools/data_exploration/_imprimir.py +0 -32
- ml_tools/ensemble_evaluation/_imprimir.py +0 -14
- ml_tools/ensemble_inference/_imprimir.py +0 -9
- ml_tools/ensemble_learning/_imprimir.py +0 -10
- ml_tools/excel_handler/_imprimir.py +0 -13
- ml_tools/keys/_imprimir.py +0 -11
- ml_tools/math_utilities/_imprimir.py +0 -11
- ml_tools/optimization_tools/_imprimir.py +0 -13
- ml_tools/path_manager/_imprimir.py +0 -15
- ml_tools/plot_fonts/_imprimir.py +0 -8
- ml_tools/schema/_imprimir.py +0 -10
- ml_tools/serde/_imprimir.py +0 -10
- ml_tools/utilities/_imprimir.py +0 -18
- {dragon_ml_toolbox-20.2.0.dist-info → dragon_ml_toolbox-20.3.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-20.2.0.dist-info → dragon_ml_toolbox-20.3.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-20.2.0.dist-info → dragon_ml_toolbox-20.3.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-20.2.0.dist-info → dragon_ml_toolbox-20.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
import torch.nn.functional as F
|
|
4
|
+
from typing import Any, Optional, Literal
|
|
5
|
+
|
|
6
|
+
from ..schema import FeatureSchema
|
|
7
|
+
from .._core import get_logger
|
|
8
|
+
from ..keys._keys import SchemaKeys
|
|
9
|
+
|
|
10
|
+
from ._base_save_load import _ArchitectureBuilder
|
|
11
|
+
from ._models_advanced_helpers import (
|
|
12
|
+
Embedding1dLayer,
|
|
13
|
+
entmax15,
|
|
14
|
+
entmoid15,
|
|
15
|
+
sparsemax,
|
|
16
|
+
sparsemoid,
|
|
17
|
+
DenseODSTBlock,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_LOGGER = get_logger("DragonNodeModel")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"DragonNodeModel",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
# SOURCE CODE: Adapted and modified from:
|
|
29
|
+
# https://github.com/manujosephv/pytorch_tabular/blob/main/LICENSE
|
|
30
|
+
# https://github.com/Qwicen/node/blob/master/LICENSE.md
|
|
31
|
+
# https://github.com/jrzaurin/pytorch-widedeep?tab=readme-ov-file#license
|
|
32
|
+
# https://github.com/rixwew/pytorch-fm/blob/master/LICENSE
|
|
33
|
+
# https://arxiv.org/abs/1705.08741v2
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DragonNodeModel(_ArchitectureBuilder):
|
|
37
|
+
"""
|
|
38
|
+
Native implementation of Neural Oblivious Decision Ensembles (NODE).
|
|
39
|
+
|
|
40
|
+
The 'Dense' architecture concatenates the outputs of previous layers to the
|
|
41
|
+
features of subsequent layers, allowing for deep feature interaction learning.
|
|
42
|
+
"""
|
|
43
|
+
ACTIVATION_MAP = {
|
|
44
|
+
"entmax": entmax15,
|
|
45
|
+
"sparsemax": sparsemax,
|
|
46
|
+
"softmax": F.softmax,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
BINARY_ACTIVATION_MAP = {
|
|
50
|
+
"entmoid": entmoid15,
|
|
51
|
+
"sparsemoid": sparsemoid,
|
|
52
|
+
"sigmoid": torch.sigmoid,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
def __init__(self, *,
|
|
56
|
+
schema: FeatureSchema,
|
|
57
|
+
out_targets: int,
|
|
58
|
+
embedding_dim: int = 24,
|
|
59
|
+
num_trees: int = 1024,
|
|
60
|
+
num_layers: int = 2,
|
|
61
|
+
tree_depth: int = 6,
|
|
62
|
+
additional_tree_output_dim: int = 3,
|
|
63
|
+
max_features: Optional[int] = None,
|
|
64
|
+
input_dropout: float = 0.0,
|
|
65
|
+
embedding_dropout: float = 0.0,
|
|
66
|
+
choice_function: Literal['entmax', 'sparsemax', 'softmax'] = 'entmax',
|
|
67
|
+
bin_function: Literal['entmoid', 'sparsemoid', 'sigmoid'] = 'entmoid',
|
|
68
|
+
batch_norm_continuous: bool = False):
|
|
69
|
+
"""
|
|
70
|
+
Args:
|
|
71
|
+
schema (FeatureSchema):
|
|
72
|
+
Schema object containing feature names and types.
|
|
73
|
+
out_targets (int):
|
|
74
|
+
Number of output targets.
|
|
75
|
+
embedding_dim (int, optional):
|
|
76
|
+
Embedding dimension for categorical features.
|
|
77
|
+
Suggested: 16 to 64.
|
|
78
|
+
num_trees (int, optional):
|
|
79
|
+
Number of Oblivious Decision Trees per layer. NODE relies on a large number
|
|
80
|
+
of trees (wider layers) compared to standard forests.
|
|
81
|
+
Suggested: 512 to 2048.
|
|
82
|
+
num_layers (int, optional):
|
|
83
|
+
Number of DenseODST layers. Since layers are densely connected, deeper
|
|
84
|
+
networks increase memory usage significantly.
|
|
85
|
+
Suggested: 2 to 5.
|
|
86
|
+
tree_depth (int, optional):
|
|
87
|
+
Depth of the oblivious trees. Oblivious trees are symmetric, so
|
|
88
|
+
parameters scale with 2^depth.
|
|
89
|
+
Suggested: 4 to 8.
|
|
90
|
+
additional_tree_output_dim (int, optional):
|
|
91
|
+
Extra output channels per tree. These are used for internal representation
|
|
92
|
+
in deeper layers but discarded for the final prediction.
|
|
93
|
+
Suggested: 1 to 5.
|
|
94
|
+
max_features (int, optional):
|
|
95
|
+
Max features to keep in the dense connection to prevent explosion in
|
|
96
|
+
feature dimension for deeper layers. If None, keeps all.
|
|
97
|
+
input_dropout (float, optional):
|
|
98
|
+
Dropout applied to the input of the Dense Block.
|
|
99
|
+
Suggested: 0.0 to 0.2.
|
|
100
|
+
embedding_dropout (float, optional):
|
|
101
|
+
Dropout applied specifically to embeddings.
|
|
102
|
+
Suggested: 0.0 to 0.2.
|
|
103
|
+
choice_function (str, optional):
|
|
104
|
+
Activation for feature selection. 'entmax' allows sparse feature selection.
|
|
105
|
+
Options: 'entmax', 'sparsemax', 'softmax'.
|
|
106
|
+
bin_function (str, optional):
|
|
107
|
+
Activation for the soft binning steps.
|
|
108
|
+
Options: 'entmoid', 'sparsemoid', 'sigmoid'.
|
|
109
|
+
batch_norm_continuous (bool, optional):
|
|
110
|
+
If True, applies Batch Normalization to continuous features.
|
|
111
|
+
"""
|
|
112
|
+
super().__init__()
|
|
113
|
+
self.schema = schema
|
|
114
|
+
self.out_targets = out_targets
|
|
115
|
+
|
|
116
|
+
# -- Configuration for saving --
|
|
117
|
+
self.model_hparams = {
|
|
118
|
+
'embedding_dim': embedding_dim,
|
|
119
|
+
'num_trees': num_trees,
|
|
120
|
+
'num_layers': num_layers,
|
|
121
|
+
'tree_depth': tree_depth,
|
|
122
|
+
'additional_tree_output_dim': additional_tree_output_dim,
|
|
123
|
+
'max_features': max_features,
|
|
124
|
+
'input_dropout': input_dropout,
|
|
125
|
+
'embedding_dropout': embedding_dropout,
|
|
126
|
+
'choice_function': choice_function,
|
|
127
|
+
'bin_function': bin_function,
|
|
128
|
+
'batch_norm_continuous': batch_norm_continuous
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# -- 1. Setup Embeddings --
|
|
132
|
+
self.categorical_indices = []
|
|
133
|
+
self.cardinalities = []
|
|
134
|
+
if schema.categorical_index_map:
|
|
135
|
+
self.categorical_indices = list(schema.categorical_index_map.keys())
|
|
136
|
+
self.cardinalities = list(schema.categorical_index_map.values())
|
|
137
|
+
|
|
138
|
+
all_indices = set(range(len(schema.feature_names)))
|
|
139
|
+
self.numerical_indices = sorted(list(all_indices - set(self.categorical_indices)))
|
|
140
|
+
|
|
141
|
+
embedding_dims = [(c, embedding_dim) for c in self.cardinalities]
|
|
142
|
+
n_continuous = len(self.numerical_indices)
|
|
143
|
+
|
|
144
|
+
self.embedding_layer = Embedding1dLayer(
|
|
145
|
+
continuous_dim=n_continuous,
|
|
146
|
+
categorical_embedding_dims=embedding_dims,
|
|
147
|
+
embedding_dropout=embedding_dropout,
|
|
148
|
+
batch_norm_continuous_input=batch_norm_continuous
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
total_embedded_dim = n_continuous + sum([d for _, d in embedding_dims])
|
|
152
|
+
|
|
153
|
+
# -- 2. Backbone (Dense ODST) --
|
|
154
|
+
# The tree output dim includes the target dim + auxiliary dims for deep learning
|
|
155
|
+
self.tree_dim = out_targets + additional_tree_output_dim
|
|
156
|
+
|
|
157
|
+
self.backbone = DenseODSTBlock(
|
|
158
|
+
input_dim=total_embedded_dim,
|
|
159
|
+
num_trees=num_trees,
|
|
160
|
+
num_layers=num_layers,
|
|
161
|
+
tree_output_dim=self.tree_dim,
|
|
162
|
+
max_features=max_features,
|
|
163
|
+
input_dropout=input_dropout,
|
|
164
|
+
flatten_output=False, # We want (Batch, Num_Layers * Num_Trees, Tree_Dim)
|
|
165
|
+
depth=tree_depth,
|
|
166
|
+
# Activations
|
|
167
|
+
choice_function=self.ACTIVATION_MAP[choice_function],
|
|
168
|
+
bin_function=self.BINARY_ACTIVATION_MAP[bin_function],
|
|
169
|
+
# Init strategies (defaults)
|
|
170
|
+
initialize_response_=nn.init.normal_,
|
|
171
|
+
initialize_selection_logits_=nn.init.uniform_,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Note: NODE has a fixed Head (averaging) which is defined in forward()
|
|
175
|
+
|
|
176
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
177
|
+
# Split inputs
|
|
178
|
+
x_cont = x[:, self.numerical_indices].float()
|
|
179
|
+
x_cat = x[:, self.categorical_indices].long()
|
|
180
|
+
|
|
181
|
+
# 1. Embeddings
|
|
182
|
+
x = self.embedding_layer(x_cont, x_cat)
|
|
183
|
+
|
|
184
|
+
# 2. Backbone
|
|
185
|
+
# Output shape: (Batch, Total_Trees, Tree_Dim)
|
|
186
|
+
x = self.backbone(x)
|
|
187
|
+
|
|
188
|
+
# 3. Head (Averaging)
|
|
189
|
+
# We take the first 'out_targets' channels and average them across all trees
|
|
190
|
+
# subset: x[..., :out_targets]
|
|
191
|
+
# mean: .mean(dim=-2) -> average over Total_Trees dimension
|
|
192
|
+
return x[..., :self.out_targets].mean(dim=-2)
|
|
193
|
+
|
|
194
|
+
def data_aware_initialization(self, train_dataset, num_samples: int = 2000, verbose: int = 3):
|
|
195
|
+
"""
|
|
196
|
+
Performs data-aware initialization for the ODST trees using a dataset.
|
|
197
|
+
Crucial for NODE convergence.
|
|
198
|
+
"""
|
|
199
|
+
# 1. Prepare Data
|
|
200
|
+
if verbose >= 2:
|
|
201
|
+
_LOGGER.info(f"Performing NODE data-aware initialization on up to {num_samples} samples...")
|
|
202
|
+
device = next(self.parameters()).device
|
|
203
|
+
|
|
204
|
+
# 2. Extract Features
|
|
205
|
+
# Fast path: If the dataset exposes the full feature tensor (like _PytorchDataset)
|
|
206
|
+
if hasattr(train_dataset, "features") and isinstance(train_dataset.features, torch.Tensor):
|
|
207
|
+
# Slice directly
|
|
208
|
+
limit = min(len(train_dataset.features), num_samples)
|
|
209
|
+
x_input = train_dataset.features[:limit]
|
|
210
|
+
else:
|
|
211
|
+
# Slow path: Iterate and stack (Generic Dataset)
|
|
212
|
+
indices = range(min(len(train_dataset), num_samples))
|
|
213
|
+
x_accum = []
|
|
214
|
+
for i in indices:
|
|
215
|
+
# Expecting (features, targets) tuple from standard datasets
|
|
216
|
+
sample = train_dataset[i]
|
|
217
|
+
if isinstance(sample, (tuple, list)):
|
|
218
|
+
x_accum.append(sample[0])
|
|
219
|
+
elif isinstance(sample, dict) and 'features' in sample:
|
|
220
|
+
x_accum.append(sample['features'])
|
|
221
|
+
elif isinstance(sample, dict) and 'x' in sample:
|
|
222
|
+
x_accum.append(sample['x'])
|
|
223
|
+
else:
|
|
224
|
+
# Fallback: assume the sample itself is the feature
|
|
225
|
+
x_accum.append(sample)
|
|
226
|
+
|
|
227
|
+
if not x_accum:
|
|
228
|
+
if verbose >= 1:
|
|
229
|
+
_LOGGER.warning("Dataset empty or format unrecognized. Skipping NODE initialization.")
|
|
230
|
+
return
|
|
231
|
+
|
|
232
|
+
x_input = torch.stack(x_accum)
|
|
233
|
+
|
|
234
|
+
x_input = x_input.to(device).float()
|
|
235
|
+
|
|
236
|
+
# 3. Process features (Split -> Embed)
|
|
237
|
+
x_cont = x_input[:, self.numerical_indices].float()
|
|
238
|
+
x_cat = x_input[:, self.categorical_indices].long()
|
|
239
|
+
|
|
240
|
+
with torch.no_grad():
|
|
241
|
+
x_embedded = self.embedding_layer(x_cont, x_cat)
|
|
242
|
+
|
|
243
|
+
# 4. Initialize Backbone
|
|
244
|
+
if hasattr(self.backbone, 'initialize'):
|
|
245
|
+
self.backbone.initialize(x_embedded)
|
|
246
|
+
if verbose >= 2:
|
|
247
|
+
_LOGGER.info("NODE Initialization Complete. Ready to train.")
|
|
248
|
+
else:
|
|
249
|
+
if verbose >= 1:
|
|
250
|
+
_LOGGER.warning("NODE Backbone does not have an 'initialize' method. Skipping.")
|
|
251
|
+
|
|
252
|
+
def get_architecture_config(self) -> dict[str, Any]:
|
|
253
|
+
"""Returns the full configuration of the model."""
|
|
254
|
+
schema_dict = {
|
|
255
|
+
'feature_names': self.schema.feature_names,
|
|
256
|
+
'continuous_feature_names': self.schema.continuous_feature_names,
|
|
257
|
+
'categorical_feature_names': self.schema.categorical_feature_names,
|
|
258
|
+
'categorical_index_map': self.schema.categorical_index_map,
|
|
259
|
+
'categorical_mappings': self.schema.categorical_mappings
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
config = {
|
|
263
|
+
SchemaKeys.SCHEMA_DICT: schema_dict,
|
|
264
|
+
'out_targets': self.out_targets,
|
|
265
|
+
**self.model_hparams
|
|
266
|
+
}
|
|
267
|
+
return config
|
|
268
|
+
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
from ..schema import FeatureSchema
|
|
6
|
+
from .._core import get_logger
|
|
7
|
+
from ..keys._keys import SchemaKeys
|
|
8
|
+
|
|
9
|
+
from ._base_save_load import _ArchitectureBuilder
|
|
10
|
+
from ._models_advanced_helpers import (
|
|
11
|
+
FeatTransformer,
|
|
12
|
+
AttentiveTransformer,
|
|
13
|
+
initialize_non_glu,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
_LOGGER = get_logger("DragonTabNet")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"DragonTabNet"
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
# SOURCE CODE: Adapted and modified from:
|
|
25
|
+
# https://github.com/manujosephv/pytorch_tabular/blob/main/LICENSE
|
|
26
|
+
# https://github.com/Qwicen/node/blob/master/LICENSE.md
|
|
27
|
+
# https://github.com/jrzaurin/pytorch-widedeep?tab=readme-ov-file#license
|
|
28
|
+
# https://github.com/rixwew/pytorch-fm/blob/master/LICENSE
|
|
29
|
+
# https://arxiv.org/abs/1705.08741v2
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DragonTabNet(_ArchitectureBuilder):
|
|
33
|
+
"""
|
|
34
|
+
Native implementation of TabNet (Attentive Interpretable Tabular Learning).
|
|
35
|
+
|
|
36
|
+
Includes the Initial Splitter, Ghost Batch Norm, and GLU scaling.
|
|
37
|
+
"""
|
|
38
|
+
def __init__(self, *,
|
|
39
|
+
schema: FeatureSchema,
|
|
40
|
+
out_targets: int,
|
|
41
|
+
n_d: int = 8,
|
|
42
|
+
n_a: int = 8,
|
|
43
|
+
n_steps: int = 3,
|
|
44
|
+
gamma: float = 1.3,
|
|
45
|
+
n_independent: int = 2,
|
|
46
|
+
n_shared: int = 2,
|
|
47
|
+
virtual_batch_size: int = 128,
|
|
48
|
+
momentum: float = 0.02,
|
|
49
|
+
mask_type: Literal['sparsemax', 'entmax', 'softmax'] = 'sparsemax',
|
|
50
|
+
batch_norm_continuous: bool = False):
|
|
51
|
+
"""
|
|
52
|
+
Args:
|
|
53
|
+
schema (FeatureSchema):
|
|
54
|
+
Schema object containing feature names and types.
|
|
55
|
+
out_targets (int):
|
|
56
|
+
Number of output targets.
|
|
57
|
+
n_d (int, optional):
|
|
58
|
+
Dimension of the prediction layer (decision step).
|
|
59
|
+
Suggested: 8 to 64.
|
|
60
|
+
n_a (int, optional):
|
|
61
|
+
Dimension of the attention layer (masking step).
|
|
62
|
+
Suggested: 8 to 64.
|
|
63
|
+
n_steps (int, optional):
|
|
64
|
+
Number of sequential attention steps (architecture depth).
|
|
65
|
+
Suggested: 3 to 10.
|
|
66
|
+
gamma (float, optional):
|
|
67
|
+
Relaxation parameter for sparsity in the mask.
|
|
68
|
+
Suggested: 1.0 to 2.0.
|
|
69
|
+
n_independent (int, optional):
|
|
70
|
+
Number of independent Gated Linear Unit (GLU) layers in each block.
|
|
71
|
+
Suggested: 1 to 5.
|
|
72
|
+
n_shared (int, optional):
|
|
73
|
+
Number of shared GLU layers across all blocks.
|
|
74
|
+
Suggested: 1 to 5.
|
|
75
|
+
virtual_batch_size (int, optional):
|
|
76
|
+
Batch size for Ghost Batch Normalization.
|
|
77
|
+
Suggested: 128 to 1024.
|
|
78
|
+
momentum (float, optional):
|
|
79
|
+
Momentum for Batch Normalization.
|
|
80
|
+
Suggested: 0.01 to 0.4.
|
|
81
|
+
mask_type (str, optional):
|
|
82
|
+
Masking function to use. 'sparsemax' enforces sparsity.
|
|
83
|
+
Options: 'sparsemax', 'entmax', 'softmax'.
|
|
84
|
+
batch_norm_continuous (bool, optional):
|
|
85
|
+
If True, applies Batch Normalization to continuous features before processing.
|
|
86
|
+
"""
|
|
87
|
+
super().__init__()
|
|
88
|
+
self.schema = schema
|
|
89
|
+
self.out_targets = out_targets
|
|
90
|
+
|
|
91
|
+
# Save config
|
|
92
|
+
self.model_hparams = {
|
|
93
|
+
'n_d': n_d,
|
|
94
|
+
'n_a': n_a,
|
|
95
|
+
'n_steps': n_steps,
|
|
96
|
+
'gamma': gamma,
|
|
97
|
+
'n_independent': n_independent,
|
|
98
|
+
'n_shared': n_shared,
|
|
99
|
+
'virtual_batch_size': virtual_batch_size,
|
|
100
|
+
'momentum': momentum,
|
|
101
|
+
'mask_type': mask_type,
|
|
102
|
+
'batch_norm_continuous': batch_norm_continuous
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
# -- 1. Setup Input Features --
|
|
106
|
+
self.categorical_indices = []
|
|
107
|
+
self.cardinalities = []
|
|
108
|
+
if schema.categorical_index_map:
|
|
109
|
+
self.categorical_indices = list(schema.categorical_index_map.keys())
|
|
110
|
+
self.cardinalities = list(schema.categorical_index_map.values())
|
|
111
|
+
|
|
112
|
+
all_indices = set(range(len(schema.feature_names)))
|
|
113
|
+
self.numerical_indices = sorted(list(all_indices - set(self.categorical_indices)))
|
|
114
|
+
|
|
115
|
+
# Standard TabNet Embeddings:
|
|
116
|
+
# We use a simple embedding for each categorical feature and concat with continuous.
|
|
117
|
+
self.cat_embeddings = nn.ModuleList([
|
|
118
|
+
nn.Embedding(card, 1) for card in self.cardinalities
|
|
119
|
+
])
|
|
120
|
+
|
|
121
|
+
self.n_continuous = len(self.numerical_indices)
|
|
122
|
+
self.input_dim = self.n_continuous + len(self.cardinalities)
|
|
123
|
+
|
|
124
|
+
# -- 2. TabNet Backbone Components --
|
|
125
|
+
self.n_d = n_d
|
|
126
|
+
self.n_a = n_a
|
|
127
|
+
self.n_steps = n_steps
|
|
128
|
+
self.gamma = gamma
|
|
129
|
+
self.epsilon = 1e-15
|
|
130
|
+
|
|
131
|
+
# Initial BN
|
|
132
|
+
self.initial_bn = nn.BatchNorm1d(self.input_dim, momentum=0.01)
|
|
133
|
+
|
|
134
|
+
# Shared GLU Layers
|
|
135
|
+
if n_shared > 0:
|
|
136
|
+
self.shared_feat_transform = nn.ModuleList()
|
|
137
|
+
for i in range(n_shared):
|
|
138
|
+
if i == 0:
|
|
139
|
+
self.shared_feat_transform.append(
|
|
140
|
+
nn.Linear(self.input_dim, 2 * (n_d + n_a), bias=False)
|
|
141
|
+
)
|
|
142
|
+
else:
|
|
143
|
+
self.shared_feat_transform.append(
|
|
144
|
+
nn.Linear(n_d + n_a, 2 * (n_d + n_a), bias=False)
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
self.shared_feat_transform = None
|
|
148
|
+
|
|
149
|
+
# Initial Splitter
|
|
150
|
+
# This processes the input BEFORE the first step to generate the initial attention vector 'a'
|
|
151
|
+
self.initial_splitter = FeatTransformer(
|
|
152
|
+
self.input_dim,
|
|
153
|
+
n_d + n_a,
|
|
154
|
+
self.shared_feat_transform,
|
|
155
|
+
n_glu_independent=n_independent,
|
|
156
|
+
virtual_batch_size=virtual_batch_size,
|
|
157
|
+
momentum=momentum,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Steps
|
|
161
|
+
self.feat_transformers = nn.ModuleList()
|
|
162
|
+
self.att_transformers = nn.ModuleList()
|
|
163
|
+
|
|
164
|
+
for step in range(n_steps):
|
|
165
|
+
transformer = FeatTransformer(
|
|
166
|
+
self.input_dim,
|
|
167
|
+
n_d + n_a,
|
|
168
|
+
self.shared_feat_transform,
|
|
169
|
+
n_glu_independent=n_independent,
|
|
170
|
+
virtual_batch_size=virtual_batch_size,
|
|
171
|
+
momentum=momentum,
|
|
172
|
+
)
|
|
173
|
+
attention = AttentiveTransformer(
|
|
174
|
+
n_a,
|
|
175
|
+
self.input_dim, # We assume group_dim = input_dim (no grouping)
|
|
176
|
+
virtual_batch_size=virtual_batch_size,
|
|
177
|
+
momentum=momentum,
|
|
178
|
+
mask_type=mask_type,
|
|
179
|
+
)
|
|
180
|
+
self.feat_transformers.append(transformer)
|
|
181
|
+
self.att_transformers.append(attention)
|
|
182
|
+
|
|
183
|
+
# -- 3. Final Mapping Head --
|
|
184
|
+
self.final_mapping = nn.Linear(n_d, out_targets, bias=False)
|
|
185
|
+
initialize_non_glu(self.final_mapping, n_d, out_targets)
|
|
186
|
+
|
|
187
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
188
|
+
# -- Preprocessing --
|
|
189
|
+
x_cont = x[:, self.numerical_indices].float()
|
|
190
|
+
x_cat = x[:, self.categorical_indices].long()
|
|
191
|
+
|
|
192
|
+
cat_list = []
|
|
193
|
+
for i, embed in enumerate(self.cat_embeddings):
|
|
194
|
+
cat_list.append(embed(x_cat[:, i])) # (B, 1)
|
|
195
|
+
|
|
196
|
+
if cat_list:
|
|
197
|
+
x_in = torch.cat([x_cont, *cat_list], dim=1)
|
|
198
|
+
else:
|
|
199
|
+
x_in = x_cont
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# -- TabNet Encoder Pass --
|
|
203
|
+
x_bn = self.initial_bn(x_in)
|
|
204
|
+
# Initial Split
|
|
205
|
+
# The splitter produces [d, a]. We only need 'a' to start the loop.
|
|
206
|
+
att = self.initial_splitter(x_bn)[:, self.n_d :]
|
|
207
|
+
priors = torch.ones(x_bn.shape, device=x.device)
|
|
208
|
+
out_accumulated = 0
|
|
209
|
+
self.regularization_loss = 0
|
|
210
|
+
|
|
211
|
+
for step in range(self.n_steps):
|
|
212
|
+
# 1. Attention
|
|
213
|
+
mask = self.att_transformers[step](priors, att)
|
|
214
|
+
# 2. Accumulate sparsity loss matching original implementation
|
|
215
|
+
loss = torch.sum(torch.mul(mask, torch.log(mask + self.epsilon)), dim=1)
|
|
216
|
+
self.regularization_loss += torch.mean(loss)
|
|
217
|
+
# 3. Update Prior
|
|
218
|
+
priors = torch.mul(self.gamma - mask, priors)
|
|
219
|
+
# 4. Masking
|
|
220
|
+
masked_x = torch.mul(mask, x_bn)
|
|
221
|
+
# 5. Feature Transformer
|
|
222
|
+
out = self.feat_transformers[step](masked_x)
|
|
223
|
+
# 6. Split Output
|
|
224
|
+
d = nn.ReLU()(out[:, :self.n_d])
|
|
225
|
+
att = out[:, self.n_d:]
|
|
226
|
+
# 7. Accumulate Decision
|
|
227
|
+
out_accumulated = out_accumulated + d
|
|
228
|
+
|
|
229
|
+
self.regularization_loss /= self.n_steps
|
|
230
|
+
return self.final_mapping(out_accumulated)
|
|
231
|
+
|
|
232
|
+
def data_aware_initialization(self, train_dataset, num_samples: int = 2000, verbose: int = 3):
|
|
233
|
+
"""
|
|
234
|
+
TabNet does not require data-aware initialization. Method Implemented for compatibility.
|
|
235
|
+
"""
|
|
236
|
+
if verbose >= 2:
|
|
237
|
+
_LOGGER.info("TabNet does not require data-aware initialization. Skipping.")
|
|
238
|
+
|
|
239
|
+
def get_architecture_config(self) -> dict[str, Any]:
|
|
240
|
+
"""Returns the full configuration of the model."""
|
|
241
|
+
schema_dict = {
|
|
242
|
+
'feature_names': self.schema.feature_names,
|
|
243
|
+
'continuous_feature_names': self.schema.continuous_feature_names,
|
|
244
|
+
'categorical_feature_names': self.schema.categorical_feature_names,
|
|
245
|
+
'categorical_index_map': self.schema.categorical_index_map,
|
|
246
|
+
'categorical_mappings': self.schema.categorical_mappings
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
config = {
|
|
250
|
+
SchemaKeys.SCHEMA_DICT: schema_dict,
|
|
251
|
+
'out_targets': self.out_targets,
|
|
252
|
+
**self.model_hparams
|
|
253
|
+
}
|
|
254
|
+
return config
|
|
255
|
+
|
|
@@ -13,7 +13,7 @@ from ._object_detection import (
|
|
|
13
13
|
DragonFastRCNN,
|
|
14
14
|
)
|
|
15
15
|
|
|
16
|
-
from
|
|
16
|
+
from .._core import _imprimir_disponibles
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
__all__ = [
|
|
@@ -27,3 +27,7 @@ __all__ = [
|
|
|
27
27
|
# Object Detection
|
|
28
28
|
"DragonFastRCNN",
|
|
29
29
|
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def info():
|
|
33
|
+
_imprimir_disponibles(__all__)
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
from ._multi_dragon import
|
|
1
|
+
from ._multi_dragon import (
|
|
2
|
+
DragonParetoOptimizer
|
|
3
|
+
)
|
|
2
4
|
|
|
3
|
-
from ._single_dragon import
|
|
5
|
+
from ._single_dragon import (
|
|
6
|
+
DragonOptimizer
|
|
7
|
+
)
|
|
4
8
|
|
|
5
9
|
from ._single_manual import (
|
|
6
10
|
FitnessEvaluator,
|
|
@@ -8,7 +12,7 @@ from ._single_manual import (
|
|
|
8
12
|
run_optimization,
|
|
9
13
|
)
|
|
10
14
|
|
|
11
|
-
from
|
|
15
|
+
from .._core import _imprimir_disponibles
|
|
12
16
|
|
|
13
17
|
|
|
14
18
|
__all__ = [
|
|
@@ -19,3 +23,7 @@ __all__ = [
|
|
|
19
23
|
"create_pytorch_problem",
|
|
20
24
|
"run_optimization",
|
|
21
25
|
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def info():
|
|
29
|
+
_imprimir_disponibles(__all__)
|
|
@@ -378,8 +378,8 @@ class DragonParetoOptimizer:
|
|
|
378
378
|
sanitized_filename = sanitize_filename(filename)
|
|
379
379
|
csv_filename = sanitized_filename if sanitized_filename.lower().endswith(".csv") else f"{sanitized_filename}.csv"
|
|
380
380
|
|
|
381
|
-
save_dataframe_filename(df=df_to_save, save_dir=save_path, filename=csv_filename)
|
|
382
|
-
_LOGGER.info(f"💾 Pareto solutions saved to CSV: '{save_path.name}/{csv_filename}'")
|
|
381
|
+
save_dataframe_filename(df=df_to_save, save_dir=save_path, filename=csv_filename, verbose=1)
|
|
382
|
+
_LOGGER.info(f"💾 Pareto solutions saved to CSV: '{save_path.name}/{csv_filename}'. Shape: {df_to_save.shape}")
|
|
383
383
|
|
|
384
384
|
# Save optimization bounds as JSON for reference (debug mode)
|
|
385
385
|
if self._debug:
|