dragon-ml-toolbox 19.13.0__py3-none-any.whl → 20.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
- dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
- ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
- ml_tools/ETL_cleaning/_basic_clean.py +351 -0
- ml_tools/ETL_cleaning/_clean_tools.py +128 -0
- ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
- ml_tools/ETL_cleaning/_imprimir.py +13 -0
- ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
- ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
- ml_tools/ETL_engineering/_imprimir.py +24 -0
- ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
- ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
- ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
- ml_tools/GUI_tools/_imprimir.py +12 -0
- ml_tools/IO_tools/_IO_loggers.py +235 -0
- ml_tools/IO_tools/_IO_save_load.py +151 -0
- ml_tools/IO_tools/_IO_utils.py +140 -0
- ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
- ml_tools/IO_tools/_imprimir.py +14 -0
- ml_tools/MICE/_MICE_imputation.py +132 -0
- ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
- ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
- ml_tools/MICE/_imprimir.py +11 -0
- ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
- ml_tools/ML_callbacks/_base.py +101 -0
- ml_tools/ML_callbacks/_checkpoint.py +232 -0
- ml_tools/ML_callbacks/_early_stop.py +208 -0
- ml_tools/ML_callbacks/_imprimir.py +12 -0
- ml_tools/ML_callbacks/_scheduler.py +197 -0
- ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
- ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
- ml_tools/ML_chain/_dragon_chain.py +140 -0
- ml_tools/ML_chain/_imprimir.py +11 -0
- ml_tools/ML_configuration/__init__.py +90 -0
- ml_tools/ML_configuration/_base_model_config.py +69 -0
- ml_tools/ML_configuration/_finalize.py +366 -0
- ml_tools/ML_configuration/_imprimir.py +47 -0
- ml_tools/ML_configuration/_metrics.py +593 -0
- ml_tools/ML_configuration/_models.py +206 -0
- ml_tools/ML_configuration/_training.py +124 -0
- ml_tools/ML_datasetmaster/__init__.py +28 -0
- ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
- ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
- ml_tools/ML_datasetmaster/_imprimir.py +15 -0
- ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
- ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
- ml_tools/ML_evaluation/__init__.py +53 -0
- ml_tools/ML_evaluation/_classification.py +629 -0
- ml_tools/ML_evaluation/_feature_importance.py +409 -0
- ml_tools/ML_evaluation/_imprimir.py +25 -0
- ml_tools/ML_evaluation/_loss.py +92 -0
- ml_tools/ML_evaluation/_regression.py +273 -0
- ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
- ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
- ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
- ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
- ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
- ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
- ml_tools/ML_finalize_handler/__init__.py +10 -0
- ml_tools/ML_finalize_handler/_imprimir.py +8 -0
- ml_tools/ML_inference/__init__.py +22 -0
- ml_tools/ML_inference/_base_inference.py +166 -0
- ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
- ml_tools/ML_inference/_dragon_inference.py +332 -0
- ml_tools/ML_inference/_imprimir.py +11 -0
- ml_tools/ML_inference/_multi_inference.py +180 -0
- ml_tools/ML_inference_sequence/__init__.py +10 -0
- ml_tools/ML_inference_sequence/_imprimir.py +8 -0
- ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
- ml_tools/ML_inference_vision/__init__.py +10 -0
- ml_tools/ML_inference_vision/_imprimir.py +8 -0
- ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
- ml_tools/ML_models/__init__.py +32 -0
- ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
- ml_tools/ML_models/_base_mlp_attention.py +198 -0
- ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
- ml_tools/ML_models/_dragon_tabular.py +248 -0
- ml_tools/ML_models/_imprimir.py +18 -0
- ml_tools/ML_models/_mlp_attention.py +134 -0
- ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
- ml_tools/ML_models_sequence/__init__.py +10 -0
- ml_tools/ML_models_sequence/_imprimir.py +8 -0
- ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
- ml_tools/ML_models_vision/__init__.py +29 -0
- ml_tools/ML_models_vision/_base_wrapper.py +254 -0
- ml_tools/ML_models_vision/_image_classification.py +182 -0
- ml_tools/ML_models_vision/_image_segmentation.py +108 -0
- ml_tools/ML_models_vision/_imprimir.py +16 -0
- ml_tools/ML_models_vision/_object_detection.py +135 -0
- ml_tools/ML_optimization/__init__.py +21 -0
- ml_tools/ML_optimization/_imprimir.py +13 -0
- ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
- ml_tools/ML_optimization/_single_dragon.py +203 -0
- ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
- ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
- ml_tools/ML_scaler/__init__.py +10 -0
- ml_tools/ML_scaler/_imprimir.py +8 -0
- ml_tools/ML_trainer/__init__.py +20 -0
- ml_tools/ML_trainer/_base_trainer.py +297 -0
- ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
- ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
- ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
- ml_tools/ML_trainer/_imprimir.py +10 -0
- ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
- ml_tools/ML_utilities/_artifact_finder.py +382 -0
- ml_tools/ML_utilities/_imprimir.py +16 -0
- ml_tools/ML_utilities/_inspection.py +325 -0
- ml_tools/ML_utilities/_train_tools.py +205 -0
- ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
- ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
- ml_tools/ML_vision_transformers/_imprimir.py +14 -0
- ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
- ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
- ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
- ml_tools/PSO_optimization/_imprimir.py +10 -0
- ml_tools/SQL/__init__.py +7 -0
- ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
- ml_tools/SQL/_imprimir.py +8 -0
- ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
- ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
- ml_tools/VIF/_imprimir.py +10 -0
- ml_tools/_core/__init__.py +7 -1
- ml_tools/_core/_logger.py +8 -18
- ml_tools/_core/_schema_load_ops.py +43 -0
- ml_tools/_core/_script_info.py +2 -2
- ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
- ml_tools/data_exploration/_analysis.py +214 -0
- ml_tools/data_exploration/_cleaning.py +566 -0
- ml_tools/data_exploration/_features.py +583 -0
- ml_tools/data_exploration/_imprimir.py +32 -0
- ml_tools/data_exploration/_plotting.py +487 -0
- ml_tools/data_exploration/_schema_ops.py +176 -0
- ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
- ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
- ml_tools/ensemble_evaluation/_imprimir.py +14 -0
- ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
- ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
- ml_tools/ensemble_inference/_imprimir.py +9 -0
- ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
- ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
- ml_tools/ensemble_learning/_imprimir.py +10 -0
- ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
- ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
- ml_tools/excel_handler/_imprimir.py +13 -0
- ml_tools/{keys.py → keys/__init__.py} +4 -1
- ml_tools/keys/_imprimir.py +11 -0
- ml_tools/{_core → keys}/_keys.py +2 -0
- ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
- ml_tools/math_utilities/_imprimir.py +11 -0
- ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
- ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
- ml_tools/optimization_tools/_imprimir.py +13 -0
- ml_tools/optimization_tools/_optimization_bounds.py +236 -0
- ml_tools/optimization_tools/_optimization_plots.py +218 -0
- ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
- ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
- ml_tools/path_manager/_imprimir.py +15 -0
- ml_tools/path_manager/_path_tools.py +346 -0
- ml_tools/plot_fonts/__init__.py +8 -0
- ml_tools/plot_fonts/_imprimir.py +8 -0
- ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
- ml_tools/schema/__init__.py +15 -0
- ml_tools/schema/_feature_schema.py +223 -0
- ml_tools/schema/_gui_schema.py +191 -0
- ml_tools/schema/_imprimir.py +10 -0
- ml_tools/{serde.py → serde/__init__.py} +4 -2
- ml_tools/serde/_imprimir.py +10 -0
- ml_tools/{_core → serde}/_serde.py +3 -8
- ml_tools/{utilities.py → utilities/__init__.py} +11 -6
- ml_tools/utilities/_imprimir.py +18 -0
- ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
- ml_tools/utilities/_utility_tools.py +192 -0
- dragon_ml_toolbox-19.13.0.dist-info/RECORD +0 -111
- ml_tools/ML_chaining_inference.py +0 -8
- ml_tools/ML_configuration.py +0 -86
- ml_tools/ML_configuration_pytab.py +0 -14
- ml_tools/ML_datasetmaster.py +0 -10
- ml_tools/ML_evaluation.py +0 -16
- ml_tools/ML_evaluation_multi.py +0 -12
- ml_tools/ML_finalize_handler.py +0 -8
- ml_tools/ML_inference.py +0 -12
- ml_tools/ML_models.py +0 -14
- ml_tools/ML_models_advanced.py +0 -14
- ml_tools/ML_models_pytab.py +0 -14
- ml_tools/ML_optimization.py +0 -14
- ml_tools/ML_optimization_pareto.py +0 -8
- ml_tools/ML_scaler.py +0 -8
- ml_tools/ML_sequence_datasetmaster.py +0 -8
- ml_tools/ML_sequence_evaluation.py +0 -10
- ml_tools/ML_sequence_inference.py +0 -8
- ml_tools/ML_sequence_models.py +0 -8
- ml_tools/ML_trainer.py +0 -12
- ml_tools/ML_vision_datasetmaster.py +0 -12
- ml_tools/ML_vision_evaluation.py +0 -10
- ml_tools/ML_vision_inference.py +0 -8
- ml_tools/ML_vision_models.py +0 -18
- ml_tools/SQL.py +0 -8
- ml_tools/_core/_ETL_cleaning.py +0 -694
- ml_tools/_core/_IO_tools.py +0 -498
- ml_tools/_core/_ML_callbacks.py +0 -702
- ml_tools/_core/_ML_configuration.py +0 -1332
- ml_tools/_core/_ML_configuration_pytab.py +0 -102
- ml_tools/_core/_ML_evaluation.py +0 -867
- ml_tools/_core/_ML_evaluation_multi.py +0 -544
- ml_tools/_core/_ML_inference.py +0 -646
- ml_tools/_core/_ML_models.py +0 -668
- ml_tools/_core/_ML_models_pytab.py +0 -693
- ml_tools/_core/_ML_trainer.py +0 -2323
- ml_tools/_core/_ML_utilities.py +0 -886
- ml_tools/_core/_ML_vision_models.py +0 -644
- ml_tools/_core/_data_exploration.py +0 -1901
- ml_tools/_core/_optimization_tools.py +0 -493
- ml_tools/_core/_schema.py +0 -359
- ml_tools/plot_fonts.py +0 -8
- ml_tools/schema.py +0 -12
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
ml_tools/_core/_ML_models.py
DELETED
|
@@ -1,668 +0,0 @@
|
|
|
1
|
-
import torch
|
|
2
|
-
from torch import nn
|
|
3
|
-
from typing import List, Union, Tuple, Dict, Any
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
import json
|
|
6
|
-
|
|
7
|
-
from ._logger import get_logger
|
|
8
|
-
from ._path_manager import make_fullpath
|
|
9
|
-
from ._script_info import _script_info
|
|
10
|
-
from ._keys import PytorchModelArchitectureKeys
|
|
11
|
-
from ._schema import FeatureSchema
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
_LOGGER = get_logger("DragonModel")
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
__all__ = [
|
|
18
|
-
"DragonMLP",
|
|
19
|
-
"DragonAttentionMLP",
|
|
20
|
-
"DragonMultiHeadAttentionNet",
|
|
21
|
-
"DragonTabularTransformer"
|
|
22
|
-
]
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class _ArchitectureHandlerMixin:
|
|
26
|
-
"""
|
|
27
|
-
A mixin class to provide save and load functionality for model architectures.
|
|
28
|
-
"""
|
|
29
|
-
def save(self: nn.Module, directory: Union[str, Path], verbose: bool = True): # type: ignore
|
|
30
|
-
"""Saves the model's architecture to a JSON file."""
|
|
31
|
-
if not hasattr(self, 'get_architecture_config'):
|
|
32
|
-
_LOGGER.error(f"Model '{self.__class__.__name__}' must have a 'get_architecture_config()' method to use this functionality.")
|
|
33
|
-
raise AttributeError()
|
|
34
|
-
|
|
35
|
-
path_dir = make_fullpath(directory, make=True, enforce="directory")
|
|
36
|
-
|
|
37
|
-
json_filename = PytorchModelArchitectureKeys.SAVENAME + ".json"
|
|
38
|
-
|
|
39
|
-
full_path = path_dir / json_filename
|
|
40
|
-
|
|
41
|
-
config = {
|
|
42
|
-
PytorchModelArchitectureKeys.MODEL: self.__class__.__name__,
|
|
43
|
-
PytorchModelArchitectureKeys.CONFIG: self.get_architecture_config() # type: ignore
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
with open(full_path, 'w') as f:
|
|
47
|
-
json.dump(config, f, indent=4)
|
|
48
|
-
|
|
49
|
-
if verbose:
|
|
50
|
-
_LOGGER.info(f"Architecture for '{self.__class__.__name__}' saved as '{full_path.name}'")
|
|
51
|
-
|
|
52
|
-
@classmethod
|
|
53
|
-
def load(cls: type, file_or_dir: Union[str, Path], verbose: bool = True) -> nn.Module:
|
|
54
|
-
"""Loads a model architecture from a JSON file. If a directory is provided, the function will attempt to load a JSON file inside."""
|
|
55
|
-
user_path = make_fullpath(file_or_dir)
|
|
56
|
-
|
|
57
|
-
if user_path.is_dir():
|
|
58
|
-
json_filename = PytorchModelArchitectureKeys.SAVENAME + ".json"
|
|
59
|
-
target_path = make_fullpath(user_path / json_filename, enforce="file")
|
|
60
|
-
elif user_path.is_file():
|
|
61
|
-
target_path = user_path
|
|
62
|
-
else:
|
|
63
|
-
_LOGGER.error(f"Invalid path: '{file_or_dir}'")
|
|
64
|
-
raise IOError()
|
|
65
|
-
|
|
66
|
-
with open(target_path, 'r') as f:
|
|
67
|
-
saved_data = json.load(f)
|
|
68
|
-
|
|
69
|
-
saved_class_name = saved_data[PytorchModelArchitectureKeys.MODEL]
|
|
70
|
-
config = saved_data[PytorchModelArchitectureKeys.CONFIG]
|
|
71
|
-
|
|
72
|
-
if saved_class_name != cls.__name__:
|
|
73
|
-
_LOGGER.error(f"Model class mismatch. File specifies '{saved_class_name}', but '{cls.__name__}' was expected.")
|
|
74
|
-
raise ValueError()
|
|
75
|
-
|
|
76
|
-
model = cls(**config)
|
|
77
|
-
if verbose:
|
|
78
|
-
_LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
|
|
79
|
-
return model
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
class _BaseMLP(nn.Module, _ArchitectureHandlerMixin):
|
|
83
|
-
"""
|
|
84
|
-
A base class for Multilayer Perceptrons.
|
|
85
|
-
|
|
86
|
-
Handles validation, configuration, and the creation of the core MLP layers,
|
|
87
|
-
allowing subclasses to define their own pre-processing and forward pass.
|
|
88
|
-
"""
|
|
89
|
-
def __init__(self,
|
|
90
|
-
in_features: int,
|
|
91
|
-
out_targets: int,
|
|
92
|
-
hidden_layers: List[int],
|
|
93
|
-
drop_out: float) -> None:
|
|
94
|
-
super().__init__()
|
|
95
|
-
|
|
96
|
-
# --- Validation ---
|
|
97
|
-
if not isinstance(in_features, int) or in_features < 1:
|
|
98
|
-
_LOGGER.error("'in_features' must be a positive integer.")
|
|
99
|
-
raise ValueError()
|
|
100
|
-
if not isinstance(out_targets, int) or out_targets < 1:
|
|
101
|
-
_LOGGER.error("'out_targets' must be a positive integer.")
|
|
102
|
-
raise ValueError()
|
|
103
|
-
if not isinstance(hidden_layers, list) or not all(isinstance(n, int) for n in hidden_layers):
|
|
104
|
-
_LOGGER.error("'hidden_layers' must be a list of integers.")
|
|
105
|
-
raise TypeError()
|
|
106
|
-
if not (0.0 <= drop_out < 1.0):
|
|
107
|
-
_LOGGER.error("'drop_out' must be a float between 0.0 and 1.0.")
|
|
108
|
-
raise ValueError()
|
|
109
|
-
|
|
110
|
-
# --- Save configuration ---
|
|
111
|
-
self.in_features = in_features
|
|
112
|
-
self.out_targets = out_targets
|
|
113
|
-
self.hidden_layers = hidden_layers
|
|
114
|
-
self.drop_out = drop_out
|
|
115
|
-
|
|
116
|
-
# --- Build the core MLP network ---
|
|
117
|
-
mlp_layers = []
|
|
118
|
-
current_features = in_features
|
|
119
|
-
for neurons in hidden_layers:
|
|
120
|
-
mlp_layers.extend([
|
|
121
|
-
nn.Linear(current_features, neurons),
|
|
122
|
-
nn.BatchNorm1d(neurons),
|
|
123
|
-
nn.ReLU(),
|
|
124
|
-
nn.Dropout(p=drop_out)
|
|
125
|
-
])
|
|
126
|
-
current_features = neurons
|
|
127
|
-
|
|
128
|
-
self.mlp = nn.Sequential(*mlp_layers)
|
|
129
|
-
# Set a customizable Prediction Head for flexibility, specially in transfer learning and fine-tuning
|
|
130
|
-
self.output_layer = nn.Linear(current_features, out_targets)
|
|
131
|
-
|
|
132
|
-
def get_architecture_config(self) -> Dict[str, Any]:
|
|
133
|
-
"""Returns the base configuration of the model."""
|
|
134
|
-
return {
|
|
135
|
-
'in_features': self.in_features,
|
|
136
|
-
'out_targets': self.out_targets,
|
|
137
|
-
'hidden_layers': self.hidden_layers,
|
|
138
|
-
'drop_out': self.drop_out
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
def _repr_helper(self, name: str, mlp_layers: list[str]):
|
|
142
|
-
last_layer = self.output_layer
|
|
143
|
-
if isinstance(last_layer, nn.Linear):
|
|
144
|
-
mlp_layers.append(str(last_layer.out_features))
|
|
145
|
-
else:
|
|
146
|
-
mlp_layers.append("Custom Prediction Head")
|
|
147
|
-
|
|
148
|
-
# Creates a string like: 10 -> 40 -> 80 -> 40 -> 2
|
|
149
|
-
arch_str = ' -> '.join(mlp_layers)
|
|
150
|
-
|
|
151
|
-
return f"{name}(arch: {arch_str})"
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
class _BaseAttention(_BaseMLP):
|
|
155
|
-
"""
|
|
156
|
-
Abstract base class for MLP models that incorporate an attention mechanism
|
|
157
|
-
before the main MLP layers.
|
|
158
|
-
"""
|
|
159
|
-
def __init__(self, *args, **kwargs):
|
|
160
|
-
super().__init__(*args, **kwargs)
|
|
161
|
-
# By default, models inheriting this do not have the flag.
|
|
162
|
-
self.attention = None
|
|
163
|
-
self.has_interpretable_attention = False
|
|
164
|
-
|
|
165
|
-
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
166
|
-
"""Defines the standard forward pass."""
|
|
167
|
-
logits, _attention_weights = self.forward_attention(x)
|
|
168
|
-
return logits
|
|
169
|
-
|
|
170
|
-
def forward_attention(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
171
|
-
"""Returns logits and attention weights."""
|
|
172
|
-
# This logic is now shared and defined in one place
|
|
173
|
-
x, attention_weights = self.attention(x) # type: ignore
|
|
174
|
-
x = self.mlp(x)
|
|
175
|
-
logits = self.output_layer(x)
|
|
176
|
-
return logits, attention_weights
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
class DragonMLP(_BaseMLP):
|
|
180
|
-
"""
|
|
181
|
-
Creates a versatile Multilayer Perceptron (MLP) for regression or classification tasks.
|
|
182
|
-
"""
|
|
183
|
-
def __init__(self, in_features: int, out_targets: int,
|
|
184
|
-
hidden_layers: List[int] = [256, 128], drop_out: float = 0.2) -> None:
|
|
185
|
-
"""
|
|
186
|
-
Args:
|
|
187
|
-
in_features (int): The number of input features (e.g., columns in your data).
|
|
188
|
-
out_targets (int): The number of output targets. For regression, this is
|
|
189
|
-
typically 1. For classification, it's the number of classes.
|
|
190
|
-
hidden_layers (list[int]): A list where each integer represents the
|
|
191
|
-
number of neurons in a hidden layer.
|
|
192
|
-
drop_out (float): The dropout probability for neurons in each hidden
|
|
193
|
-
layer. Must be between 0.0 and 1.0.
|
|
194
|
-
|
|
195
|
-
### Rules of thumb:
|
|
196
|
-
- Choose a number of hidden neurons between the size of the input layer and the size of the output layer.
|
|
197
|
-
- The number of hidden neurons should be 2/3 the size of the input layer, plus the size of the output layer.
|
|
198
|
-
- The number of hidden neurons should be less than twice the size of the input layer.
|
|
199
|
-
"""
|
|
200
|
-
super().__init__(in_features, out_targets, hidden_layers, drop_out)
|
|
201
|
-
|
|
202
|
-
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
203
|
-
"""Defines the forward pass of the model."""
|
|
204
|
-
x = self.mlp(x)
|
|
205
|
-
logits = self.output_layer(x)
|
|
206
|
-
return logits
|
|
207
|
-
|
|
208
|
-
def __repr__(self) -> str:
|
|
209
|
-
"""Returns the developer-friendly string representation of the model."""
|
|
210
|
-
# Extracts the number of neurons from each nn.Linear layer
|
|
211
|
-
layer_sizes = [str(layer.in_features) for layer in self.mlp if isinstance(layer, nn.Linear)]
|
|
212
|
-
|
|
213
|
-
return self._repr_helper(name="DragonMLP", mlp_layers=layer_sizes)
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
class DragonAttentionMLP(_BaseAttention):
|
|
217
|
-
"""
|
|
218
|
-
A Multilayer Perceptron (MLP) that incorporates an Attention layer to dynamically weigh input features.
|
|
219
|
-
|
|
220
|
-
In inference mode use `forward_attention()` to get a tuple with `(output, attention_weights)`
|
|
221
|
-
"""
|
|
222
|
-
def __init__(self, in_features: int, out_targets: int,
|
|
223
|
-
hidden_layers: List[int] = [256, 128], drop_out: float = 0.2) -> None:
|
|
224
|
-
"""
|
|
225
|
-
Args:
|
|
226
|
-
in_features (int): The number of input features (e.g., columns in your data).
|
|
227
|
-
out_targets (int): The number of output targets. For regression, this is
|
|
228
|
-
typically 1. For classification, it's the number of classes.
|
|
229
|
-
hidden_layers (list[int]): A list where each integer represents the
|
|
230
|
-
number of neurons in a hidden layer.
|
|
231
|
-
drop_out (float): The dropout probability for neurons in each hidden
|
|
232
|
-
layer. Must be between 0.0 and 1.0.
|
|
233
|
-
"""
|
|
234
|
-
super().__init__(in_features, out_targets, hidden_layers, drop_out)
|
|
235
|
-
# Attention
|
|
236
|
-
self.attention = _AttentionLayer(in_features)
|
|
237
|
-
self.has_interpretable_attention = True
|
|
238
|
-
|
|
239
|
-
def __repr__(self) -> str:
|
|
240
|
-
"""Returns the developer-friendly string representation of the model."""
|
|
241
|
-
# Start with the input features and the attention marker
|
|
242
|
-
arch = [str(self.in_features), "[Attention]"]
|
|
243
|
-
|
|
244
|
-
# Find all other linear layers in the MLP
|
|
245
|
-
for layer in self.mlp[1:]:
|
|
246
|
-
if isinstance(layer, nn.Linear):
|
|
247
|
-
arch.append(str(layer.in_features))
|
|
248
|
-
|
|
249
|
-
return self._repr_helper(name="DragonAttentionMLP", mlp_layers=arch)
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
class DragonMultiHeadAttentionNet(_BaseAttention):
|
|
253
|
-
"""
|
|
254
|
-
An MLP that incorporates a standard `nn.MultiheadAttention` layer to process
|
|
255
|
-
the input features.
|
|
256
|
-
|
|
257
|
-
In inference mode use `forward_attention()` to get a tuple with `(output, attention_weights)`.
|
|
258
|
-
"""
|
|
259
|
-
def __init__(self, in_features: int, out_targets: int,
|
|
260
|
-
hidden_layers: List[int] = [256, 128], drop_out: float = 0.2,
|
|
261
|
-
num_heads: int = 4, attention_dropout: float = 0.1) -> None:
|
|
262
|
-
"""
|
|
263
|
-
Args:
|
|
264
|
-
in_features (int): The number of input features.
|
|
265
|
-
out_targets (int): The number of output targets.
|
|
266
|
-
hidden_layers (list[int]): A list of neuron counts for each hidden layer.
|
|
267
|
-
drop_out (float): The dropout probability for the MLP layers.
|
|
268
|
-
num_heads (int): The number of attention heads.
|
|
269
|
-
attention_dropout (float): Dropout probability in the attention layer.
|
|
270
|
-
"""
|
|
271
|
-
super().__init__(in_features, out_targets, hidden_layers, drop_out)
|
|
272
|
-
self.num_heads = num_heads
|
|
273
|
-
self.attention_dropout = attention_dropout
|
|
274
|
-
|
|
275
|
-
self.attention = _MultiHeadAttentionLayer(
|
|
276
|
-
num_features=in_features,
|
|
277
|
-
num_heads=num_heads,
|
|
278
|
-
dropout=attention_dropout
|
|
279
|
-
)
|
|
280
|
-
|
|
281
|
-
def get_architecture_config(self) -> Dict[str, Any]:
|
|
282
|
-
"""Returns the full configuration of the model."""
|
|
283
|
-
config = super().get_architecture_config()
|
|
284
|
-
config['num_heads'] = self.num_heads
|
|
285
|
-
config['attention_dropout'] = self.attention_dropout
|
|
286
|
-
return config
|
|
287
|
-
|
|
288
|
-
def __repr__(self) -> str:
|
|
289
|
-
"""Returns the developer-friendly string representation of the model."""
|
|
290
|
-
mlp_part = " -> ".join(
|
|
291
|
-
[str(self.in_features)] +
|
|
292
|
-
[str(h) for h in self.hidden_layers] +
|
|
293
|
-
[str(self.out_targets)]
|
|
294
|
-
)
|
|
295
|
-
arch_str = f"{self.in_features} -> [MultiHead(h={self.num_heads})] -> {mlp_part}"
|
|
296
|
-
|
|
297
|
-
return f"DragonMultiHeadAttentionNet(arch: {arch_str})"
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
class DragonTabularTransformer(nn.Module, _ArchitectureHandlerMixin):
|
|
301
|
-
"""
|
|
302
|
-
A Transformer-based model for tabular data tasks.
|
|
303
|
-
|
|
304
|
-
This model uses a Feature Tokenizer to convert all input features into a
|
|
305
|
-
sequence of embeddings, prepends a [CLS] token, and processes the
|
|
306
|
-
sequence with a standard Transformer Encoder.
|
|
307
|
-
"""
|
|
308
|
-
def __init__(self, *,
|
|
309
|
-
schema: FeatureSchema,
|
|
310
|
-
out_targets: int,
|
|
311
|
-
embedding_dim: int = 256,
|
|
312
|
-
num_heads: int = 8,
|
|
313
|
-
num_layers: int = 6,
|
|
314
|
-
dropout: float = 0.2):
|
|
315
|
-
"""
|
|
316
|
-
Args:
|
|
317
|
-
schema (FeatureSchema):
|
|
318
|
-
The definitive schema object created by `data_exploration.finalize_feature_schema()`.
|
|
319
|
-
out_targets (int):
|
|
320
|
-
Number of output targets (1 for regression).
|
|
321
|
-
embedding_dim (int):
|
|
322
|
-
The dimension for all feature embeddings. Must be divisible by num_heads. Common values: (64, 128, 192, 256, etc.)
|
|
323
|
-
num_heads (int):
|
|
324
|
-
The number of heads in the multi-head attention mechanism. Common values: (4, 8, 16)
|
|
325
|
-
num_layers (int):
|
|
326
|
-
The number of sub-encoder-layers in the transformer encoder. Common values: (4, 8, 12)
|
|
327
|
-
dropout (float):
|
|
328
|
-
The dropout value.
|
|
329
|
-
|
|
330
|
-
## Note:
|
|
331
|
-
|
|
332
|
-
**Embedding Dimension:** "Width" of the model. It's the N-dimension vector that will be used to represent each one of the features.
|
|
333
|
-
- Each continuous feature gets its own learnable N-dimension vector.
|
|
334
|
-
- Each categorical feature gets an embedding table that maps every category (e.g., "color=red", "color=blue") to a unique N-dimension vector.
|
|
335
|
-
|
|
336
|
-
**Attention Heads:** Controls the "Multi-Head Attention" mechanism. Instead of looking at all the feature interactions at once, the model splits its attention into N parallel heads.
|
|
337
|
-
- Embedding Dimensions get divided by the number of Attention Heads, resulting in the dimensions assigned per head.
|
|
338
|
-
|
|
339
|
-
**Number of Layers:** "Depth" of the model. Number of identical `TransformerEncoderLayer` blocks that are stacked on top of each other.
|
|
340
|
-
- Layer 1: The attention heads find simple, direct interactions between the features.
|
|
341
|
-
- Layer 2: Takes the output of Layer 1 and finds interactions between those interactions and so on.
|
|
342
|
-
- Trade-off: More layers are more powerful but are slower to train and more prone to overfitting. If the training loss goes down but the validation loss goes up, you might have too many layers (or need more dropout).
|
|
343
|
-
|
|
344
|
-
"""
|
|
345
|
-
super().__init__()
|
|
346
|
-
|
|
347
|
-
# --- Get info from schema ---
|
|
348
|
-
in_features = len(schema.feature_names)
|
|
349
|
-
categorical_index_map = schema.categorical_index_map
|
|
350
|
-
|
|
351
|
-
# --- Validation ---
|
|
352
|
-
if categorical_index_map and (max(categorical_index_map.keys()) >= in_features):
|
|
353
|
-
_LOGGER.error(f"A categorical index ({max(categorical_index_map.keys())}) is out of bounds for the provided input features ({in_features}).")
|
|
354
|
-
raise ValueError()
|
|
355
|
-
|
|
356
|
-
# --- Save configuration ---
|
|
357
|
-
self.schema = schema # <-- Save the whole schema
|
|
358
|
-
self.out_targets = out_targets
|
|
359
|
-
self.embedding_dim = embedding_dim
|
|
360
|
-
self.num_heads = num_heads
|
|
361
|
-
self.num_layers = num_layers
|
|
362
|
-
self.dropout = dropout
|
|
363
|
-
|
|
364
|
-
# --- 1. Feature Tokenizer (now takes the schema) ---
|
|
365
|
-
self.tokenizer = _FeatureTokenizer(
|
|
366
|
-
schema=schema,
|
|
367
|
-
embedding_dim=embedding_dim
|
|
368
|
-
)
|
|
369
|
-
|
|
370
|
-
# --- 2. CLS Token ---
|
|
371
|
-
self.cls_token = nn.Parameter(torch.randn(1, 1, embedding_dim))
|
|
372
|
-
|
|
373
|
-
# --- 3. Transformer Encoder ---
|
|
374
|
-
encoder_layer = nn.TransformerEncoderLayer(
|
|
375
|
-
d_model=embedding_dim,
|
|
376
|
-
nhead=num_heads,
|
|
377
|
-
dropout=dropout,
|
|
378
|
-
batch_first=True # Crucial for (batch, seq, feature) input
|
|
379
|
-
)
|
|
380
|
-
self.transformer_encoder = nn.TransformerEncoder(
|
|
381
|
-
encoder_layer=encoder_layer,
|
|
382
|
-
num_layers=num_layers
|
|
383
|
-
)
|
|
384
|
-
|
|
385
|
-
# --- 4. Prediction Head ---
|
|
386
|
-
self.output_layer = nn.Linear(embedding_dim, out_targets)
|
|
387
|
-
|
|
388
|
-
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
389
|
-
"""Defines the forward pass of the model."""
|
|
390
|
-
# Get the batch size for later use
|
|
391
|
-
batch_size = x.shape[0]
|
|
392
|
-
|
|
393
|
-
# 1. Get feature tokens from the tokenizer
|
|
394
|
-
# -> tokens shape: (batch_size, num_features, embedding_dim)
|
|
395
|
-
tokens = self.tokenizer(x)
|
|
396
|
-
|
|
397
|
-
# 2. Prepend the [CLS] token to the sequence
|
|
398
|
-
# -> cls_tokens shape: (batch_size, 1, embedding_dim)
|
|
399
|
-
cls_tokens = self.cls_token.expand(batch_size, -1, -1)
|
|
400
|
-
# -> full_sequence shape: (batch_size, num_features + 1, embedding_dim)
|
|
401
|
-
full_sequence = torch.cat([cls_tokens, tokens], dim=1)
|
|
402
|
-
|
|
403
|
-
# 3. Pass the full sequence through the Transformer Encoder
|
|
404
|
-
# -> transformer_out shape: (batch_size, num_features + 1, embedding_dim)
|
|
405
|
-
transformer_out = self.transformer_encoder(full_sequence)
|
|
406
|
-
|
|
407
|
-
# 4. Isolate the output of the [CLS] token (it's the first one)
|
|
408
|
-
# -> cls_output shape: (batch_size, embedding_dim)
|
|
409
|
-
cls_output = transformer_out[:, 0]
|
|
410
|
-
|
|
411
|
-
# 5. Pass the [CLS] token's output through the prediction head
|
|
412
|
-
# -> logits shape: (batch_size, out_targets)
|
|
413
|
-
logits = self.output_layer(cls_output)
|
|
414
|
-
|
|
415
|
-
return logits
|
|
416
|
-
|
|
417
|
-
def get_architecture_config(self) -> Dict[str, Any]:
|
|
418
|
-
"""Returns the full configuration of the model."""
|
|
419
|
-
# Deconstruct schema into a JSON-friendly dict
|
|
420
|
-
# Tuples are saved as lists
|
|
421
|
-
schema_dict = {
|
|
422
|
-
'feature_names': self.schema.feature_names,
|
|
423
|
-
'continuous_feature_names': self.schema.continuous_feature_names,
|
|
424
|
-
'categorical_feature_names': self.schema.categorical_feature_names,
|
|
425
|
-
'categorical_index_map': self.schema.categorical_index_map,
|
|
426
|
-
'categorical_mappings': self.schema.categorical_mappings
|
|
427
|
-
}
|
|
428
|
-
|
|
429
|
-
return {
|
|
430
|
-
'schema_dict': schema_dict,
|
|
431
|
-
'out_targets': self.out_targets,
|
|
432
|
-
'embedding_dim': self.embedding_dim,
|
|
433
|
-
'num_heads': self.num_heads,
|
|
434
|
-
'num_layers': self.num_layers,
|
|
435
|
-
'dropout': self.dropout
|
|
436
|
-
}
|
|
437
|
-
|
|
438
|
-
@classmethod
|
|
439
|
-
def load(cls: type, file_or_dir: Union[str, Path], verbose: bool = True) -> nn.Module:
|
|
440
|
-
"""Loads a model architecture from a JSON file."""
|
|
441
|
-
user_path = make_fullpath(file_or_dir)
|
|
442
|
-
|
|
443
|
-
if user_path.is_dir():
|
|
444
|
-
json_filename = PytorchModelArchitectureKeys.SAVENAME + ".json"
|
|
445
|
-
target_path = make_fullpath(user_path / json_filename, enforce="file")
|
|
446
|
-
elif user_path.is_file():
|
|
447
|
-
target_path = user_path
|
|
448
|
-
else:
|
|
449
|
-
_LOGGER.error(f"Invalid path: '{file_or_dir}'")
|
|
450
|
-
raise IOError()
|
|
451
|
-
|
|
452
|
-
with open(target_path, 'r') as f:
|
|
453
|
-
saved_data = json.load(f)
|
|
454
|
-
|
|
455
|
-
saved_class_name = saved_data[PytorchModelArchitectureKeys.MODEL]
|
|
456
|
-
config = saved_data[PytorchModelArchitectureKeys.CONFIG]
|
|
457
|
-
|
|
458
|
-
if saved_class_name != cls.__name__:
|
|
459
|
-
_LOGGER.error(f"Model class mismatch. File specifies '{saved_class_name}', but '{cls.__name__}' was expected.")
|
|
460
|
-
raise ValueError()
|
|
461
|
-
|
|
462
|
-
# --- RECONSTRUCTION LOGIC ---
|
|
463
|
-
if 'schema_dict' not in config:
|
|
464
|
-
_LOGGER.error("Invalid architecture file: missing 'schema_dict'. This file may be from an older version.")
|
|
465
|
-
raise ValueError("Missing 'schema_dict' in config.")
|
|
466
|
-
|
|
467
|
-
schema_data = config.pop('schema_dict')
|
|
468
|
-
|
|
469
|
-
# Re-hydrate the categorical_index_map
|
|
470
|
-
# JSON saves all dict keys as strings, so we must convert them back to int.
|
|
471
|
-
raw_index_map = schema_data['categorical_index_map']
|
|
472
|
-
if raw_index_map is not None:
|
|
473
|
-
rehydrated_index_map = {int(k): v for k, v in raw_index_map.items()}
|
|
474
|
-
else:
|
|
475
|
-
rehydrated_index_map = None
|
|
476
|
-
|
|
477
|
-
# Re-hydrate the FeatureSchema object
|
|
478
|
-
# JSON deserializes tuples as lists, so we must convert them back.
|
|
479
|
-
schema = FeatureSchema(
|
|
480
|
-
feature_names=tuple(schema_data['feature_names']),
|
|
481
|
-
continuous_feature_names=tuple(schema_data['continuous_feature_names']),
|
|
482
|
-
categorical_feature_names=tuple(schema_data['categorical_feature_names']),
|
|
483
|
-
categorical_index_map=rehydrated_index_map,
|
|
484
|
-
categorical_mappings=schema_data['categorical_mappings']
|
|
485
|
-
)
|
|
486
|
-
|
|
487
|
-
config['schema'] = schema
|
|
488
|
-
# --- End Reconstruction ---
|
|
489
|
-
|
|
490
|
-
model = cls(**config)
|
|
491
|
-
if verbose:
|
|
492
|
-
_LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
|
|
493
|
-
return model
|
|
494
|
-
|
|
495
|
-
def __repr__(self) -> str:
|
|
496
|
-
"""Returns the developer-friendly string representation of the model."""
|
|
497
|
-
# Build the architecture string part-by-part
|
|
498
|
-
parts = [
|
|
499
|
-
f"Tokenizer(features={len(self.schema.feature_names)}, dim={self.embedding_dim})",
|
|
500
|
-
"[CLS]",
|
|
501
|
-
f"TransformerEncoder(layers={self.num_layers}, heads={self.num_heads})",
|
|
502
|
-
f"PredictionHead(outputs={self.out_targets})"
|
|
503
|
-
]
|
|
504
|
-
|
|
505
|
-
arch_str = " -> ".join(parts)
|
|
506
|
-
|
|
507
|
-
return f"DragonTabularTransformer(arch: {arch_str})"
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
class _FeatureTokenizer(nn.Module):
|
|
511
|
-
"""
|
|
512
|
-
Transforms raw numerical and categorical features from any column order
|
|
513
|
-
into a sequence of embeddings.
|
|
514
|
-
"""
|
|
515
|
-
def __init__(self,
|
|
516
|
-
schema: FeatureSchema,
|
|
517
|
-
embedding_dim: int):
|
|
518
|
-
"""
|
|
519
|
-
Args:
|
|
520
|
-
schema (FeatureSchema):
|
|
521
|
-
The definitive schema object from data_exploration.
|
|
522
|
-
embedding_dim (int):
|
|
523
|
-
The dimension for all feature embeddings.
|
|
524
|
-
"""
|
|
525
|
-
super().__init__()
|
|
526
|
-
|
|
527
|
-
# --- Get info from schema ---
|
|
528
|
-
categorical_map = schema.categorical_index_map
|
|
529
|
-
|
|
530
|
-
if categorical_map:
|
|
531
|
-
# Unpack the dictionary into separate lists
|
|
532
|
-
self.categorical_indices = list(categorical_map.keys())
|
|
533
|
-
cardinalities = list(categorical_map.values())
|
|
534
|
-
else:
|
|
535
|
-
self.categorical_indices = []
|
|
536
|
-
cardinalities = []
|
|
537
|
-
|
|
538
|
-
# Derive numerical indices by finding what's not categorical
|
|
539
|
-
all_indices = set(range(len(schema.feature_names)))
|
|
540
|
-
categorical_indices_set = set(self.categorical_indices)
|
|
541
|
-
self.numerical_indices = sorted(list(all_indices - categorical_indices_set))
|
|
542
|
-
|
|
543
|
-
self.embedding_dim = embedding_dim
|
|
544
|
-
|
|
545
|
-
# A learnable embedding for each numerical feature
|
|
546
|
-
self.numerical_embeddings = nn.Parameter(torch.randn(len(self.numerical_indices), embedding_dim))
|
|
547
|
-
|
|
548
|
-
# A standard embedding layer for each categorical feature
|
|
549
|
-
self.categorical_embeddings = nn.ModuleList(
|
|
550
|
-
[nn.Embedding(num_embeddings=c, embedding_dim=embedding_dim) for c in cardinalities]
|
|
551
|
-
)
|
|
552
|
-
|
|
553
|
-
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
554
|
-
"""
|
|
555
|
-
Processes features from a single input tensor and concatenates them
|
|
556
|
-
into a sequence of tokens.
|
|
557
|
-
"""
|
|
558
|
-
# Select the correct columns for each type using the stored indices
|
|
559
|
-
x_numerical = x[:, self.numerical_indices].float()
|
|
560
|
-
x_categorical = x[:, self.categorical_indices].long()
|
|
561
|
-
|
|
562
|
-
# Process numerical features
|
|
563
|
-
numerical_tokens = x_numerical.unsqueeze(-1) * self.numerical_embeddings
|
|
564
|
-
|
|
565
|
-
# Process categorical features
|
|
566
|
-
categorical_tokens = []
|
|
567
|
-
for i, embed_layer in enumerate(self.categorical_embeddings):
|
|
568
|
-
# x_categorical[:, i] selects the i-th categorical column
|
|
569
|
-
# (e.g., all values for the 'color' feature)
|
|
570
|
-
token = embed_layer(x_categorical[:, i]).unsqueeze(1)
|
|
571
|
-
categorical_tokens.append(token)
|
|
572
|
-
|
|
573
|
-
# Concatenate all tokens into a single sequence
|
|
574
|
-
if not self.categorical_indices:
|
|
575
|
-
all_tokens = numerical_tokens
|
|
576
|
-
elif not self.numerical_indices:
|
|
577
|
-
all_tokens = torch.cat(categorical_tokens, dim=1)
|
|
578
|
-
else:
|
|
579
|
-
all_categorical_tokens = torch.cat(categorical_tokens, dim=1)
|
|
580
|
-
all_tokens = torch.cat([numerical_tokens, all_categorical_tokens], dim=1)
|
|
581
|
-
|
|
582
|
-
return all_tokens
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
class _AttentionLayer(nn.Module):
|
|
586
|
-
"""
|
|
587
|
-
Calculates attention weights and applies them to the input features, incorporating a residual connection for improved stability and performance.
|
|
588
|
-
|
|
589
|
-
Returns both the final output and the weights for interpretability.
|
|
590
|
-
"""
|
|
591
|
-
def __init__(self, num_features: int):
|
|
592
|
-
super().__init__()
|
|
593
|
-
# The hidden layer size is a hyperparameter
|
|
594
|
-
hidden_size = max(16, num_features // 4)
|
|
595
|
-
|
|
596
|
-
# Learn to produce attention scores
|
|
597
|
-
self.attention_net = nn.Sequential(
|
|
598
|
-
nn.Linear(num_features, hidden_size),
|
|
599
|
-
nn.Tanh(),
|
|
600
|
-
nn.Linear(hidden_size, num_features) # Output one score per feature
|
|
601
|
-
)
|
|
602
|
-
self.softmax = nn.Softmax(dim=1)
|
|
603
|
-
|
|
604
|
-
def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
|
|
605
|
-
# x shape: (batch_size, num_features)
|
|
606
|
-
|
|
607
|
-
# Get one raw "importance" score per feature
|
|
608
|
-
attention_scores = self.attention_net(x)
|
|
609
|
-
|
|
610
|
-
# Apply the softmax module to get weights that sum to 1
|
|
611
|
-
attention_weights = self.softmax(attention_scores)
|
|
612
|
-
|
|
613
|
-
# Weighted features (attention mechanism's output)
|
|
614
|
-
weighted_features = x * attention_weights
|
|
615
|
-
|
|
616
|
-
# Residual connection
|
|
617
|
-
residual_connection = x + weighted_features
|
|
618
|
-
|
|
619
|
-
return residual_connection, attention_weights
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
class _MultiHeadAttentionLayer(nn.Module):
|
|
623
|
-
"""
|
|
624
|
-
A wrapper for the standard `torch.nn.MultiheadAttention` layer.
|
|
625
|
-
|
|
626
|
-
This layer treats the entire input feature vector as a single item in a
|
|
627
|
-
sequence and applies self-attention to it. It is followed by a residual
|
|
628
|
-
connection and layer normalization, which is a standard block in
|
|
629
|
-
Transformer-style models.
|
|
630
|
-
"""
|
|
631
|
-
def __init__(self, num_features: int, num_heads: int, dropout: float):
|
|
632
|
-
super().__init__()
|
|
633
|
-
self.attention = nn.MultiheadAttention(
|
|
634
|
-
embed_dim=num_features,
|
|
635
|
-
num_heads=num_heads,
|
|
636
|
-
dropout=dropout,
|
|
637
|
-
batch_first=True # Crucial for (batch, seq, feature) input
|
|
638
|
-
)
|
|
639
|
-
self.layer_norm = nn.LayerNorm(num_features)
|
|
640
|
-
|
|
641
|
-
def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
|
|
642
|
-
# x shape: (batch_size, num_features)
|
|
643
|
-
|
|
644
|
-
# nn.MultiheadAttention expects a sequence dimension.
|
|
645
|
-
# We add a sequence dimension of length 1.
|
|
646
|
-
# x_reshaped shape: (batch_size, 1, num_features)
|
|
647
|
-
x_reshaped = x.unsqueeze(1)
|
|
648
|
-
|
|
649
|
-
# Apply self-attention. query, key, and value are all the same.
|
|
650
|
-
# attn_output shape: (batch_size, 1, num_features)
|
|
651
|
-
# attn_weights shape: (batch_size, 1, 1)
|
|
652
|
-
attn_output, attn_weights = self.attention(
|
|
653
|
-
query=x_reshaped,
|
|
654
|
-
key=x_reshaped,
|
|
655
|
-
value=x_reshaped,
|
|
656
|
-
need_weights=True,
|
|
657
|
-
average_attn_weights=True # Average weights across heads
|
|
658
|
-
)
|
|
659
|
-
|
|
660
|
-
# Add residual connection and apply layer normalization (Post-LN)
|
|
661
|
-
out = self.layer_norm(x + attn_output.squeeze(1))
|
|
662
|
-
|
|
663
|
-
# Squeeze weights for a consistent output shape
|
|
664
|
-
return out, attn_weights.squeeze()
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
def info():
|
|
668
|
-
_script_info(__all__)
|