dragon-ml-toolbox 19.14.0__py3-none-any.whl → 20.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
- dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
- ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
- ml_tools/ETL_cleaning/_basic_clean.py +351 -0
- ml_tools/ETL_cleaning/_clean_tools.py +128 -0
- ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
- ml_tools/ETL_cleaning/_imprimir.py +13 -0
- ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
- ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
- ml_tools/ETL_engineering/_imprimir.py +24 -0
- ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
- ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
- ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
- ml_tools/GUI_tools/_imprimir.py +12 -0
- ml_tools/IO_tools/_IO_loggers.py +235 -0
- ml_tools/IO_tools/_IO_save_load.py +151 -0
- ml_tools/IO_tools/_IO_utils.py +140 -0
- ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
- ml_tools/IO_tools/_imprimir.py +14 -0
- ml_tools/MICE/_MICE_imputation.py +132 -0
- ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
- ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
- ml_tools/MICE/_imprimir.py +11 -0
- ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
- ml_tools/ML_callbacks/_base.py +101 -0
- ml_tools/ML_callbacks/_checkpoint.py +232 -0
- ml_tools/ML_callbacks/_early_stop.py +208 -0
- ml_tools/ML_callbacks/_imprimir.py +12 -0
- ml_tools/ML_callbacks/_scheduler.py +197 -0
- ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
- ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
- ml_tools/ML_chain/_dragon_chain.py +140 -0
- ml_tools/ML_chain/_imprimir.py +11 -0
- ml_tools/ML_configuration/__init__.py +90 -0
- ml_tools/ML_configuration/_base_model_config.py +69 -0
- ml_tools/ML_configuration/_finalize.py +366 -0
- ml_tools/ML_configuration/_imprimir.py +47 -0
- ml_tools/ML_configuration/_metrics.py +593 -0
- ml_tools/ML_configuration/_models.py +206 -0
- ml_tools/ML_configuration/_training.py +124 -0
- ml_tools/ML_datasetmaster/__init__.py +28 -0
- ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
- ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
- ml_tools/ML_datasetmaster/_imprimir.py +15 -0
- ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
- ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
- ml_tools/ML_evaluation/__init__.py +53 -0
- ml_tools/ML_evaluation/_classification.py +629 -0
- ml_tools/ML_evaluation/_feature_importance.py +409 -0
- ml_tools/ML_evaluation/_imprimir.py +25 -0
- ml_tools/ML_evaluation/_loss.py +92 -0
- ml_tools/ML_evaluation/_regression.py +273 -0
- ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
- ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
- ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
- ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
- ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
- ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
- ml_tools/ML_finalize_handler/__init__.py +10 -0
- ml_tools/ML_finalize_handler/_imprimir.py +8 -0
- ml_tools/ML_inference/__init__.py +22 -0
- ml_tools/ML_inference/_base_inference.py +166 -0
- ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
- ml_tools/ML_inference/_dragon_inference.py +332 -0
- ml_tools/ML_inference/_imprimir.py +11 -0
- ml_tools/ML_inference/_multi_inference.py +180 -0
- ml_tools/ML_inference_sequence/__init__.py +10 -0
- ml_tools/ML_inference_sequence/_imprimir.py +8 -0
- ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
- ml_tools/ML_inference_vision/__init__.py +10 -0
- ml_tools/ML_inference_vision/_imprimir.py +8 -0
- ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
- ml_tools/ML_models/__init__.py +32 -0
- ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
- ml_tools/ML_models/_base_mlp_attention.py +198 -0
- ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
- ml_tools/ML_models/_dragon_tabular.py +248 -0
- ml_tools/ML_models/_imprimir.py +18 -0
- ml_tools/ML_models/_mlp_attention.py +134 -0
- ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
- ml_tools/ML_models_sequence/__init__.py +10 -0
- ml_tools/ML_models_sequence/_imprimir.py +8 -0
- ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
- ml_tools/ML_models_vision/__init__.py +29 -0
- ml_tools/ML_models_vision/_base_wrapper.py +254 -0
- ml_tools/ML_models_vision/_image_classification.py +182 -0
- ml_tools/ML_models_vision/_image_segmentation.py +108 -0
- ml_tools/ML_models_vision/_imprimir.py +16 -0
- ml_tools/ML_models_vision/_object_detection.py +135 -0
- ml_tools/ML_optimization/__init__.py +21 -0
- ml_tools/ML_optimization/_imprimir.py +13 -0
- ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
- ml_tools/ML_optimization/_single_dragon.py +203 -0
- ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
- ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
- ml_tools/ML_scaler/__init__.py +10 -0
- ml_tools/ML_scaler/_imprimir.py +8 -0
- ml_tools/ML_trainer/__init__.py +20 -0
- ml_tools/ML_trainer/_base_trainer.py +297 -0
- ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
- ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
- ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
- ml_tools/ML_trainer/_imprimir.py +10 -0
- ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
- ml_tools/ML_utilities/_artifact_finder.py +382 -0
- ml_tools/ML_utilities/_imprimir.py +16 -0
- ml_tools/ML_utilities/_inspection.py +325 -0
- ml_tools/ML_utilities/_train_tools.py +205 -0
- ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
- ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
- ml_tools/ML_vision_transformers/_imprimir.py +14 -0
- ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
- ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
- ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
- ml_tools/PSO_optimization/_imprimir.py +10 -0
- ml_tools/SQL/__init__.py +7 -0
- ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
- ml_tools/SQL/_imprimir.py +8 -0
- ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
- ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
- ml_tools/VIF/_imprimir.py +10 -0
- ml_tools/_core/__init__.py +7 -1
- ml_tools/_core/_logger.py +8 -18
- ml_tools/_core/_schema_load_ops.py +43 -0
- ml_tools/_core/_script_info.py +2 -2
- ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
- ml_tools/data_exploration/_analysis.py +214 -0
- ml_tools/data_exploration/_cleaning.py +566 -0
- ml_tools/data_exploration/_features.py +583 -0
- ml_tools/data_exploration/_imprimir.py +32 -0
- ml_tools/data_exploration/_plotting.py +487 -0
- ml_tools/data_exploration/_schema_ops.py +176 -0
- ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
- ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
- ml_tools/ensemble_evaluation/_imprimir.py +14 -0
- ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
- ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
- ml_tools/ensemble_inference/_imprimir.py +9 -0
- ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
- ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
- ml_tools/ensemble_learning/_imprimir.py +10 -0
- ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
- ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
- ml_tools/excel_handler/_imprimir.py +13 -0
- ml_tools/{keys.py → keys/__init__.py} +4 -1
- ml_tools/keys/_imprimir.py +11 -0
- ml_tools/{_core → keys}/_keys.py +2 -0
- ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
- ml_tools/math_utilities/_imprimir.py +11 -0
- ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
- ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
- ml_tools/optimization_tools/_imprimir.py +13 -0
- ml_tools/optimization_tools/_optimization_bounds.py +236 -0
- ml_tools/optimization_tools/_optimization_plots.py +218 -0
- ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
- ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
- ml_tools/path_manager/_imprimir.py +15 -0
- ml_tools/path_manager/_path_tools.py +346 -0
- ml_tools/plot_fonts/__init__.py +8 -0
- ml_tools/plot_fonts/_imprimir.py +8 -0
- ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
- ml_tools/schema/__init__.py +15 -0
- ml_tools/schema/_feature_schema.py +223 -0
- ml_tools/schema/_gui_schema.py +191 -0
- ml_tools/schema/_imprimir.py +10 -0
- ml_tools/{serde.py → serde/__init__.py} +4 -2
- ml_tools/serde/_imprimir.py +10 -0
- ml_tools/{_core → serde}/_serde.py +3 -8
- ml_tools/{utilities.py → utilities/__init__.py} +11 -6
- ml_tools/utilities/_imprimir.py +18 -0
- ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
- ml_tools/utilities/_utility_tools.py +192 -0
- dragon_ml_toolbox-19.14.0.dist-info/RECORD +0 -111
- ml_tools/ML_chaining_inference.py +0 -8
- ml_tools/ML_configuration.py +0 -86
- ml_tools/ML_configuration_pytab.py +0 -14
- ml_tools/ML_datasetmaster.py +0 -10
- ml_tools/ML_evaluation.py +0 -16
- ml_tools/ML_evaluation_multi.py +0 -12
- ml_tools/ML_finalize_handler.py +0 -8
- ml_tools/ML_inference.py +0 -12
- ml_tools/ML_models.py +0 -14
- ml_tools/ML_models_advanced.py +0 -14
- ml_tools/ML_models_pytab.py +0 -14
- ml_tools/ML_optimization.py +0 -14
- ml_tools/ML_optimization_pareto.py +0 -8
- ml_tools/ML_scaler.py +0 -8
- ml_tools/ML_sequence_datasetmaster.py +0 -8
- ml_tools/ML_sequence_evaluation.py +0 -10
- ml_tools/ML_sequence_inference.py +0 -8
- ml_tools/ML_sequence_models.py +0 -8
- ml_tools/ML_trainer.py +0 -12
- ml_tools/ML_vision_datasetmaster.py +0 -12
- ml_tools/ML_vision_evaluation.py +0 -10
- ml_tools/ML_vision_inference.py +0 -8
- ml_tools/ML_vision_models.py +0 -18
- ml_tools/SQL.py +0 -8
- ml_tools/_core/_ETL_cleaning.py +0 -694
- ml_tools/_core/_IO_tools.py +0 -498
- ml_tools/_core/_ML_callbacks.py +0 -702
- ml_tools/_core/_ML_configuration.py +0 -1332
- ml_tools/_core/_ML_configuration_pytab.py +0 -102
- ml_tools/_core/_ML_evaluation.py +0 -867
- ml_tools/_core/_ML_evaluation_multi.py +0 -544
- ml_tools/_core/_ML_inference.py +0 -646
- ml_tools/_core/_ML_models.py +0 -668
- ml_tools/_core/_ML_models_pytab.py +0 -693
- ml_tools/_core/_ML_trainer.py +0 -2323
- ml_tools/_core/_ML_utilities.py +0 -886
- ml_tools/_core/_ML_vision_models.py +0 -644
- ml_tools/_core/_data_exploration.py +0 -1909
- ml_tools/_core/_optimization_tools.py +0 -493
- ml_tools/_core/_schema.py +0 -359
- ml_tools/plot_fonts.py +0 -8
- ml_tools/schema.py +0 -12
- {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
from typing import Optional, Literal
|
|
2
|
+
|
|
3
|
+
from ..schema import FeatureSchema
|
|
4
|
+
|
|
5
|
+
from ._base_model_config import _BaseModelParams
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
# --- Model Parameter Configs ---
|
|
10
|
+
"DragonMLPParams",
|
|
11
|
+
"DragonAttentionMLPParams",
|
|
12
|
+
"DragonMultiHeadAttentionNetParams",
|
|
13
|
+
"DragonTabularTransformerParams",
|
|
14
|
+
"DragonGateParams",
|
|
15
|
+
"DragonNodeParams",
|
|
16
|
+
"DragonTabNetParams",
|
|
17
|
+
"DragonAutoIntParams",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ----------------------------
|
|
22
|
+
# Model Parameters Configurations
|
|
23
|
+
# ----------------------------
|
|
24
|
+
|
|
25
|
+
# --- Standard Models ---
|
|
26
|
+
|
|
27
|
+
class DragonMLPParams(_BaseModelParams):
|
|
28
|
+
def __init__(self,
|
|
29
|
+
in_features: int,
|
|
30
|
+
out_targets: int,
|
|
31
|
+
hidden_layers: list[int],
|
|
32
|
+
drop_out: float = 0.2) -> None:
|
|
33
|
+
self.in_features = in_features
|
|
34
|
+
self.out_targets = out_targets
|
|
35
|
+
self.hidden_layers = hidden_layers
|
|
36
|
+
self.drop_out = drop_out
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DragonAttentionMLPParams(_BaseModelParams):
|
|
40
|
+
def __init__(self,
|
|
41
|
+
in_features: int,
|
|
42
|
+
out_targets: int,
|
|
43
|
+
hidden_layers: list[int],
|
|
44
|
+
drop_out: float = 0.2) -> None:
|
|
45
|
+
self.in_features = in_features
|
|
46
|
+
self.out_targets = out_targets
|
|
47
|
+
self.hidden_layers = hidden_layers
|
|
48
|
+
self.drop_out = drop_out
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class DragonMultiHeadAttentionNetParams(_BaseModelParams):
|
|
52
|
+
def __init__(self,
|
|
53
|
+
in_features: int,
|
|
54
|
+
out_targets: int,
|
|
55
|
+
hidden_layers: list[int],
|
|
56
|
+
drop_out: float = 0.2,
|
|
57
|
+
num_heads: int = 4,
|
|
58
|
+
attention_dropout: float = 0.1) -> None:
|
|
59
|
+
self.in_features = in_features
|
|
60
|
+
self.out_targets = out_targets
|
|
61
|
+
self.hidden_layers = hidden_layers
|
|
62
|
+
self.drop_out = drop_out
|
|
63
|
+
self.num_heads = num_heads
|
|
64
|
+
self.attention_dropout = attention_dropout
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class DragonTabularTransformerParams(_BaseModelParams):
|
|
68
|
+
def __init__(self, *,
|
|
69
|
+
schema: FeatureSchema,
|
|
70
|
+
out_targets: int,
|
|
71
|
+
embedding_dim: int = 256,
|
|
72
|
+
num_heads: int = 8,
|
|
73
|
+
num_layers: int = 6,
|
|
74
|
+
dropout: float = 0.2) -> None:
|
|
75
|
+
self.schema = schema
|
|
76
|
+
self.out_targets = out_targets
|
|
77
|
+
self.embedding_dim = embedding_dim
|
|
78
|
+
self.num_heads = num_heads
|
|
79
|
+
self.num_layers = num_layers
|
|
80
|
+
self.dropout = dropout
|
|
81
|
+
|
|
82
|
+
# --- Advanced Models ---
|
|
83
|
+
|
|
84
|
+
class DragonGateParams(_BaseModelParams):
|
|
85
|
+
def __init__(self, *,
|
|
86
|
+
schema: FeatureSchema,
|
|
87
|
+
out_targets: int,
|
|
88
|
+
embedding_dim: int = 16,
|
|
89
|
+
gflu_stages: int = 6,
|
|
90
|
+
gflu_dropout: float = 0.1,
|
|
91
|
+
num_trees: int = 20,
|
|
92
|
+
tree_depth: int = 4,
|
|
93
|
+
tree_dropout: float = 0.1,
|
|
94
|
+
chain_trees: bool = False,
|
|
95
|
+
tree_wise_attention: bool = True,
|
|
96
|
+
tree_wise_attention_dropout: float = 0.1,
|
|
97
|
+
binning_activation: Literal['entmoid', 'sparsemoid', 'sigmoid'] = "entmoid",
|
|
98
|
+
feature_mask_function: Literal['entmax', 'sparsemax', 'softmax', 't-softmax'] = "entmax",
|
|
99
|
+
share_head_weights: bool = True,
|
|
100
|
+
batch_norm_continuous: bool = True) -> None:
|
|
101
|
+
self.schema = schema
|
|
102
|
+
self.out_targets = out_targets
|
|
103
|
+
self.embedding_dim = embedding_dim
|
|
104
|
+
self.gflu_stages = gflu_stages
|
|
105
|
+
self.gflu_dropout = gflu_dropout
|
|
106
|
+
self.num_trees = num_trees
|
|
107
|
+
self.tree_depth = tree_depth
|
|
108
|
+
self.tree_dropout = tree_dropout
|
|
109
|
+
self.chain_trees = chain_trees
|
|
110
|
+
self.tree_wise_attention = tree_wise_attention
|
|
111
|
+
self.tree_wise_attention_dropout = tree_wise_attention_dropout
|
|
112
|
+
self.binning_activation = binning_activation
|
|
113
|
+
self.feature_mask_function = feature_mask_function
|
|
114
|
+
self.share_head_weights = share_head_weights
|
|
115
|
+
self.batch_norm_continuous = batch_norm_continuous
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class DragonNodeParams(_BaseModelParams):
|
|
119
|
+
def __init__(self, *,
|
|
120
|
+
schema: FeatureSchema,
|
|
121
|
+
out_targets: int,
|
|
122
|
+
embedding_dim: int = 24,
|
|
123
|
+
num_trees: int = 1024,
|
|
124
|
+
num_layers: int = 2,
|
|
125
|
+
tree_depth: int = 6,
|
|
126
|
+
additional_tree_output_dim: int = 3,
|
|
127
|
+
max_features: Optional[int] = None,
|
|
128
|
+
input_dropout: float = 0.0,
|
|
129
|
+
embedding_dropout: float = 0.0,
|
|
130
|
+
choice_function: Literal['entmax', 'sparsemax', 'softmax'] = 'entmax',
|
|
131
|
+
bin_function: Literal['entmoid', 'sparsemoid', 'sigmoid'] = 'entmoid',
|
|
132
|
+
batch_norm_continuous: bool = False) -> None:
|
|
133
|
+
self.schema = schema
|
|
134
|
+
self.out_targets = out_targets
|
|
135
|
+
self.embedding_dim = embedding_dim
|
|
136
|
+
self.num_trees = num_trees
|
|
137
|
+
self.num_layers = num_layers
|
|
138
|
+
self.tree_depth = tree_depth
|
|
139
|
+
self.additional_tree_output_dim = additional_tree_output_dim
|
|
140
|
+
self.max_features = max_features
|
|
141
|
+
self.input_dropout = input_dropout
|
|
142
|
+
self.embedding_dropout = embedding_dropout
|
|
143
|
+
self.choice_function = choice_function
|
|
144
|
+
self.bin_function = bin_function
|
|
145
|
+
self.batch_norm_continuous = batch_norm_continuous
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class DragonAutoIntParams(_BaseModelParams):
|
|
149
|
+
def __init__(self, *,
|
|
150
|
+
schema: FeatureSchema,
|
|
151
|
+
out_targets: int,
|
|
152
|
+
embedding_dim: int = 32,
|
|
153
|
+
attn_embed_dim: int = 32,
|
|
154
|
+
num_heads: int = 2,
|
|
155
|
+
num_attn_blocks: int = 3,
|
|
156
|
+
attn_dropout: float = 0.1,
|
|
157
|
+
has_residuals: bool = True,
|
|
158
|
+
attention_pooling: bool = True,
|
|
159
|
+
deep_layers: bool = True,
|
|
160
|
+
layers: str = "128-64-32",
|
|
161
|
+
activation: str = "ReLU",
|
|
162
|
+
embedding_dropout: float = 0.0,
|
|
163
|
+
batch_norm_continuous: bool = False) -> None:
|
|
164
|
+
self.schema = schema
|
|
165
|
+
self.out_targets = out_targets
|
|
166
|
+
self.embedding_dim = embedding_dim
|
|
167
|
+
self.attn_embed_dim = attn_embed_dim
|
|
168
|
+
self.num_heads = num_heads
|
|
169
|
+
self.num_attn_blocks = num_attn_blocks
|
|
170
|
+
self.attn_dropout = attn_dropout
|
|
171
|
+
self.has_residuals = has_residuals
|
|
172
|
+
self.attention_pooling = attention_pooling
|
|
173
|
+
self.deep_layers = deep_layers
|
|
174
|
+
self.layers = layers
|
|
175
|
+
self.activation = activation
|
|
176
|
+
self.embedding_dropout = embedding_dropout
|
|
177
|
+
self.batch_norm_continuous = batch_norm_continuous
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class DragonTabNetParams(_BaseModelParams):
|
|
181
|
+
def __init__(self, *,
|
|
182
|
+
schema: FeatureSchema,
|
|
183
|
+
out_targets: int,
|
|
184
|
+
n_d: int = 8,
|
|
185
|
+
n_a: int = 8,
|
|
186
|
+
n_steps: int = 3,
|
|
187
|
+
gamma: float = 1.3,
|
|
188
|
+
n_independent: int = 2,
|
|
189
|
+
n_shared: int = 2,
|
|
190
|
+
virtual_batch_size: int = 128,
|
|
191
|
+
momentum: float = 0.02,
|
|
192
|
+
mask_type: Literal['sparsemax', 'entmax', 'softmax'] = 'sparsemax',
|
|
193
|
+
batch_norm_continuous: bool = False) -> None:
|
|
194
|
+
self.schema = schema
|
|
195
|
+
self.out_targets = out_targets
|
|
196
|
+
self.n_d = n_d
|
|
197
|
+
self.n_a = n_a
|
|
198
|
+
self.n_steps = n_steps
|
|
199
|
+
self.gamma = gamma
|
|
200
|
+
self.n_independent = n_independent
|
|
201
|
+
self.n_shared = n_shared
|
|
202
|
+
self.virtual_batch_size = virtual_batch_size
|
|
203
|
+
self.momentum = momentum
|
|
204
|
+
self.mask_type = mask_type
|
|
205
|
+
self.batch_norm_continuous = batch_norm_continuous
|
|
206
|
+
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
from typing import Union, Optional, Any, Literal
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from .._core import get_logger
|
|
5
|
+
from ..path_manager import make_fullpath
|
|
6
|
+
|
|
7
|
+
from ._base_model_config import _BaseModelParams
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
_LOGGER = get_logger("ML Configuration")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
# --- Training Config ---
|
|
15
|
+
"DragonTrainingConfig",
|
|
16
|
+
"DragonParetoConfig"
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DragonTrainingConfig(_BaseModelParams):
|
|
21
|
+
"""
|
|
22
|
+
Configuration object for the training process.
|
|
23
|
+
|
|
24
|
+
Can be unpacked as a dictionary for logging or accessed as an object.
|
|
25
|
+
|
|
26
|
+
Accepts arbitrary keyword arguments which are set as instance attributes.
|
|
27
|
+
"""
|
|
28
|
+
def __init__(self,
|
|
29
|
+
validation_size: float,
|
|
30
|
+
test_size: float,
|
|
31
|
+
initial_learning_rate: float,
|
|
32
|
+
batch_size: int,
|
|
33
|
+
random_state: int = 101,
|
|
34
|
+
**kwargs: Any) -> None:
|
|
35
|
+
"""
|
|
36
|
+
Args:
|
|
37
|
+
validation_size (float): Proportion of data for validation set.
|
|
38
|
+
test_size (float): Proportion of data for test set.
|
|
39
|
+
initial_learning_rate (float): Starting learning rate.
|
|
40
|
+
batch_size (int): Number of samples per training batch.
|
|
41
|
+
random_state (int): Seed for reproducibility.
|
|
42
|
+
**kwargs: Additional training parameters as key-value pairs.
|
|
43
|
+
"""
|
|
44
|
+
self.validation_size = validation_size
|
|
45
|
+
self.test_size = test_size
|
|
46
|
+
self.initial_learning_rate = initial_learning_rate
|
|
47
|
+
self.batch_size = batch_size
|
|
48
|
+
self.random_state = random_state
|
|
49
|
+
|
|
50
|
+
# Process kwargs with validation
|
|
51
|
+
for key, value in kwargs.items():
|
|
52
|
+
# Python guarantees 'key' is a string for **kwargs
|
|
53
|
+
|
|
54
|
+
# Allow None in value
|
|
55
|
+
if value is None:
|
|
56
|
+
setattr(self, key, value)
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
if isinstance(value, dict):
|
|
60
|
+
_LOGGER.error("Nested dictionaries are not supported, unpack them first.")
|
|
61
|
+
raise TypeError()
|
|
62
|
+
|
|
63
|
+
# Check if value is a number or a string or a JSON supported type, except dict
|
|
64
|
+
if not isinstance(value, (str, int, float, bool, list, tuple)):
|
|
65
|
+
_LOGGER.error(f"Invalid type for configuration '{key}': {type(value).__name__}")
|
|
66
|
+
raise TypeError()
|
|
67
|
+
|
|
68
|
+
setattr(self, key, value)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class DragonParetoConfig(_BaseModelParams):
|
|
72
|
+
"""
|
|
73
|
+
Configuration object for the Pareto Optimization process.
|
|
74
|
+
"""
|
|
75
|
+
def __init__(self,
|
|
76
|
+
save_directory: Union[str, Path],
|
|
77
|
+
target_objectives: dict[str, Literal["min", "max"]],
|
|
78
|
+
continuous_bounds_map: Union[dict[str, tuple[float, float]], dict[str, list[float]], str, Path],
|
|
79
|
+
columns_to_round: Optional[list[str]] = None,
|
|
80
|
+
population_size: int = 500,
|
|
81
|
+
generations: int = 1000,
|
|
82
|
+
solutions_filename: str = "NonDominatedSolutions",
|
|
83
|
+
float_precision: int = 4,
|
|
84
|
+
log_interval: int = 10,
|
|
85
|
+
plot_size: tuple[int, int] = (10, 7),
|
|
86
|
+
plot_font_size: int = 16,
|
|
87
|
+
discretize_start_at_zero: bool = True):
|
|
88
|
+
"""
|
|
89
|
+
Configure the Pareto Optimizer.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
save_directory (str | Path): Directory to save artifacts.
|
|
93
|
+
target_objectives (Dict[str, "min"|"max"]): Dictionary mapping target names to optimization direction.
|
|
94
|
+
Example: {"price": "max", "error": "min"}
|
|
95
|
+
continuous_bounds_map (Dict): Bounds for continuous features {name: (min, max)}. Or a path/str to a directory containing the "optimization_bounds.json" file.
|
|
96
|
+
columns_to_round (List[str] | None): List of continuous column names that should be rounded to the nearest integer.
|
|
97
|
+
population_size (int): Size of the genetic population.
|
|
98
|
+
generations (int): Number of generations to run.
|
|
99
|
+
solutions_filename (str): Filename for saving Pareto solutions.
|
|
100
|
+
float_precision (int): Number of decimal places to round standard float columns.
|
|
101
|
+
log_interval (int): Interval for logging progress.
|
|
102
|
+
plot_size (Tuple[int, int]): Size of the 2D plots.
|
|
103
|
+
plot_font_size (int): Font size for plot text.
|
|
104
|
+
discretize_start_at_zero (bool): Categorical encoding start index. True=0, False=1.
|
|
105
|
+
"""
|
|
106
|
+
# Validate string or Path
|
|
107
|
+
valid_save_dir = make_fullpath(save_directory, make=True, enforce="directory")
|
|
108
|
+
|
|
109
|
+
if isinstance(continuous_bounds_map, (str, Path)):
|
|
110
|
+
continuous_bounds_map = make_fullpath(continuous_bounds_map, make=False, enforce="directory")
|
|
111
|
+
|
|
112
|
+
self.save_directory = valid_save_dir
|
|
113
|
+
self.target_objectives = target_objectives
|
|
114
|
+
self.continuous_bounds_map = continuous_bounds_map
|
|
115
|
+
self.columns_to_round = columns_to_round
|
|
116
|
+
self.population_size = population_size
|
|
117
|
+
self.generations = generations
|
|
118
|
+
self.solutions_filename = solutions_filename
|
|
119
|
+
self.float_precision = float_precision
|
|
120
|
+
self.log_interval = log_interval
|
|
121
|
+
self.plot_size = plot_size
|
|
122
|
+
self.plot_font_size = plot_font_size
|
|
123
|
+
self.discretize_start_at_zero = discretize_start_at_zero
|
|
124
|
+
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from ._datasetmaster import (
|
|
2
|
+
DragonDataset,
|
|
3
|
+
DragonDatasetMulti,
|
|
4
|
+
)
|
|
5
|
+
|
|
6
|
+
from ._sequence_datasetmaster import (
|
|
7
|
+
DragonDatasetSequence
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from ._vision_datasetmaster import (
|
|
11
|
+
DragonDatasetVision,
|
|
12
|
+
DragonDatasetSegmentation,
|
|
13
|
+
DragonDatasetObjectDetection
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
from ._imprimir import info
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"DragonDataset",
|
|
21
|
+
"DragonDatasetMulti",
|
|
22
|
+
# sequence
|
|
23
|
+
"DragonDatasetSequence",
|
|
24
|
+
# vision
|
|
25
|
+
"DragonDatasetVision",
|
|
26
|
+
"DragonDatasetSegmentation",
|
|
27
|
+
"DragonDatasetObjectDetection",
|
|
28
|
+
]
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
from torch.utils.data import Dataset
|
|
3
|
+
import pandas
|
|
4
|
+
import numpy
|
|
5
|
+
from typing import Union, Optional
|
|
6
|
+
from abc import ABC
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from ..IO_tools import save_list_strings, custom_logger
|
|
10
|
+
from ..ML_scaler import DragonScaler
|
|
11
|
+
from ..schema import FeatureSchema
|
|
12
|
+
|
|
13
|
+
from ..path_manager import make_fullpath, sanitize_filename
|
|
14
|
+
from .._core import get_logger
|
|
15
|
+
from ..keys._keys import DatasetKeys, ScalerKeys
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
_LOGGER = get_logger("DragonDataset")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"_BaseDatasetMaker",
|
|
23
|
+
"_PytorchDataset",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# --- Internal Helper Class ---
|
|
28
|
+
class _PytorchDataset(Dataset):
|
|
29
|
+
"""
|
|
30
|
+
Internal helper class to create a PyTorch Dataset.
|
|
31
|
+
Converts numpy/pandas data into tensors for model consumption.
|
|
32
|
+
"""
|
|
33
|
+
def __init__(self, features: Union[numpy.ndarray, pandas.DataFrame],
|
|
34
|
+
labels: Union[numpy.ndarray, pandas.Series, pandas.DataFrame],
|
|
35
|
+
labels_dtype: torch.dtype,
|
|
36
|
+
features_dtype: torch.dtype = torch.float32,
|
|
37
|
+
feature_names: Optional[list[str]] = None,
|
|
38
|
+
target_names: Optional[list[str]] = None):
|
|
39
|
+
|
|
40
|
+
if isinstance(features, numpy.ndarray):
|
|
41
|
+
self.features = torch.tensor(features, dtype=features_dtype)
|
|
42
|
+
else: # It's a pandas.DataFrame
|
|
43
|
+
self.features = torch.tensor(features.to_numpy(), dtype=features_dtype)
|
|
44
|
+
|
|
45
|
+
if isinstance(labels, numpy.ndarray):
|
|
46
|
+
self.labels = torch.tensor(labels, dtype=labels_dtype)
|
|
47
|
+
elif isinstance(labels, (pandas.Series, pandas.DataFrame)):
|
|
48
|
+
self.labels = torch.tensor(labels.to_numpy(), dtype=labels_dtype)
|
|
49
|
+
else:
|
|
50
|
+
self.labels = torch.tensor(labels, dtype=labels_dtype)
|
|
51
|
+
|
|
52
|
+
self._feature_names = feature_names
|
|
53
|
+
self._target_names = target_names
|
|
54
|
+
self._classes: list[str] = []
|
|
55
|
+
self._class_map: dict[str,int] = dict()
|
|
56
|
+
self._feature_scaler: Optional[DragonScaler] = None
|
|
57
|
+
self._target_scaler: Optional[DragonScaler] = None
|
|
58
|
+
|
|
59
|
+
def __len__(self):
|
|
60
|
+
return len(self.features)
|
|
61
|
+
|
|
62
|
+
def __getitem__(self, index):
|
|
63
|
+
return self.features[index], self.labels[index]
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def feature_names(self):
|
|
67
|
+
if self._feature_names is not None:
|
|
68
|
+
return self._feature_names
|
|
69
|
+
else:
|
|
70
|
+
_LOGGER.error(f"Dataset {self.__class__} has not been initialized with any feature names.")
|
|
71
|
+
raise ValueError()
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def target_names(self):
|
|
75
|
+
if self._target_names is not None:
|
|
76
|
+
return self._target_names
|
|
77
|
+
else:
|
|
78
|
+
_LOGGER.error(f"Dataset {self.__class__} has not been initialized with any target names.")
|
|
79
|
+
raise ValueError()
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def classes(self):
|
|
83
|
+
return self._classes
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def class_map(self):
|
|
87
|
+
return self._class_map
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def feature_scaler(self):
|
|
91
|
+
return self._feature_scaler
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def target_scaler(self):
|
|
95
|
+
return self._target_scaler
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# --- Abstract Base Class ---
|
|
99
|
+
class _BaseDatasetMaker(ABC):
|
|
100
|
+
"""
|
|
101
|
+
Abstract base class for dataset makers. Contains shared logic.
|
|
102
|
+
"""
|
|
103
|
+
def __init__(self):
|
|
104
|
+
self._train_ds: Optional[Dataset] = None
|
|
105
|
+
self._val_ds: Optional[Dataset] = None
|
|
106
|
+
self._test_ds: Optional[Dataset] = None
|
|
107
|
+
|
|
108
|
+
self.feature_scaler: Optional[DragonScaler] = None
|
|
109
|
+
self.target_scaler: Optional[DragonScaler] = None
|
|
110
|
+
|
|
111
|
+
self._id: Optional[str] = None
|
|
112
|
+
self._feature_names: list[str] = []
|
|
113
|
+
self._target_names: list[str] = []
|
|
114
|
+
self._X_train_shape = (0,0)
|
|
115
|
+
self._X_val_shape = (0,0)
|
|
116
|
+
self._X_test_shape = (0,0)
|
|
117
|
+
self._y_train_shape = (0,)
|
|
118
|
+
self._y_val_shape = (0,)
|
|
119
|
+
self._y_test_shape = (0,)
|
|
120
|
+
self.class_map: dict[str, int] = dict()
|
|
121
|
+
self.classes: list[str] = list()
|
|
122
|
+
|
|
123
|
+
def _prepare_feature_scaler(self,
|
|
124
|
+
X_train: pandas.DataFrame,
|
|
125
|
+
y_train: Union[pandas.Series, pandas.DataFrame],
|
|
126
|
+
X_val: pandas.DataFrame,
|
|
127
|
+
X_test: pandas.DataFrame,
|
|
128
|
+
label_dtype: torch.dtype,
|
|
129
|
+
schema: FeatureSchema) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]:
|
|
130
|
+
"""Internal helper to fit and apply a DragonScaler for FEATURES using a FeatureSchema."""
|
|
131
|
+
continuous_feature_indices: Optional[list[int]] = None
|
|
132
|
+
|
|
133
|
+
# Get continuous feature indices *from the schema*
|
|
134
|
+
if schema.continuous_feature_names:
|
|
135
|
+
_LOGGER.info("Getting continuous feature indices from schema.")
|
|
136
|
+
try:
|
|
137
|
+
# Convert columns to a standard list for .index()
|
|
138
|
+
train_cols_list = X_train.columns.to_list()
|
|
139
|
+
# Map names from schema to column indices in the training DataFrame
|
|
140
|
+
continuous_feature_indices = [train_cols_list.index(name) for name in schema.continuous_feature_names]
|
|
141
|
+
except ValueError as e:
|
|
142
|
+
_LOGGER.error(f"Feature name from schema not found in training data columns:\n{e}")
|
|
143
|
+
raise ValueError()
|
|
144
|
+
else:
|
|
145
|
+
_LOGGER.info("No continuous features listed in schema. Feature scaler will not be fitted.")
|
|
146
|
+
|
|
147
|
+
X_train_values = X_train.to_numpy()
|
|
148
|
+
X_val_values = X_val.to_numpy()
|
|
149
|
+
X_test_values = X_test.to_numpy()
|
|
150
|
+
|
|
151
|
+
# continuous_feature_indices is derived
|
|
152
|
+
if self.feature_scaler is None and continuous_feature_indices:
|
|
153
|
+
_LOGGER.info("Fitting a new DragonScaler on training features.")
|
|
154
|
+
temp_train_ds = _PytorchDataset(X_train_values, y_train, label_dtype)
|
|
155
|
+
self.feature_scaler = DragonScaler.fit(temp_train_ds, continuous_feature_indices)
|
|
156
|
+
|
|
157
|
+
if self.feature_scaler and self.feature_scaler.mean_ is not None:
|
|
158
|
+
_LOGGER.info("Applying scaler transformation to train, validation, and test feature sets.")
|
|
159
|
+
X_train_tensor = self.feature_scaler.transform(torch.tensor(X_train_values, dtype=torch.float32))
|
|
160
|
+
X_val_tensor = self.feature_scaler.transform(torch.tensor(X_val_values, dtype=torch.float32))
|
|
161
|
+
X_test_tensor = self.feature_scaler.transform(torch.tensor(X_test_values, dtype=torch.float32))
|
|
162
|
+
return X_train_tensor.numpy(), X_val_tensor.numpy(), X_test_tensor.numpy()
|
|
163
|
+
|
|
164
|
+
return X_train_values, X_val_values, X_test_values
|
|
165
|
+
|
|
166
|
+
def _prepare_target_scaler(self,
|
|
167
|
+
y_train: Union[pandas.Series, pandas.DataFrame],
|
|
168
|
+
y_val: Union[pandas.Series, pandas.DataFrame],
|
|
169
|
+
y_test: Union[pandas.Series, pandas.DataFrame]) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]:
|
|
170
|
+
"""Internal helper to fit and apply a DragonScaler for TARGETS."""
|
|
171
|
+
|
|
172
|
+
y_train_arr = y_train.to_numpy() if isinstance(y_train, (pandas.Series, pandas.DataFrame)) else y_train
|
|
173
|
+
y_val_arr = y_val.to_numpy() if isinstance(y_val, (pandas.Series, pandas.DataFrame)) else y_val
|
|
174
|
+
y_test_arr = y_test.to_numpy() if isinstance(y_test, (pandas.Series, pandas.DataFrame)) else y_test
|
|
175
|
+
|
|
176
|
+
if self.target_scaler is None:
|
|
177
|
+
_LOGGER.info("Fitting a new DragonScaler on training targets.")
|
|
178
|
+
# Convert to float tensor for calculation
|
|
179
|
+
y_train_tensor = torch.tensor(y_train_arr, dtype=torch.float32)
|
|
180
|
+
self.target_scaler = DragonScaler.fit_tensor(y_train_tensor)
|
|
181
|
+
|
|
182
|
+
if self.target_scaler and self.target_scaler.mean_ is not None:
|
|
183
|
+
_LOGGER.info("Applying scaler transformation to train, validation, and test targets.")
|
|
184
|
+
y_train_tensor = self.target_scaler.transform(torch.tensor(y_train_arr, dtype=torch.float32))
|
|
185
|
+
y_val_tensor = self.target_scaler.transform(torch.tensor(y_val_arr, dtype=torch.float32))
|
|
186
|
+
y_test_tensor = self.target_scaler.transform(torch.tensor(y_test_arr, dtype=torch.float32))
|
|
187
|
+
return y_train_tensor.numpy(), y_val_tensor.numpy(), y_test_tensor.numpy()
|
|
188
|
+
|
|
189
|
+
return y_train_arr, y_val_arr, y_test_arr
|
|
190
|
+
|
|
191
|
+
def _attach_scalers_to_datasets(self):
|
|
192
|
+
"""Helper to attach the master scalers to the child datasets."""
|
|
193
|
+
for ds in [self._train_ds, self._val_ds, self._test_ds]:
|
|
194
|
+
if ds is not None:
|
|
195
|
+
ds._feature_scaler = self.feature_scaler # type: ignore
|
|
196
|
+
ds._target_scaler = self.target_scaler # type: ignore
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def train_dataset(self) -> Dataset:
|
|
200
|
+
if self._train_ds is None:
|
|
201
|
+
_LOGGER.error("Train Dataset not yet created.")
|
|
202
|
+
raise RuntimeError()
|
|
203
|
+
return self._train_ds
|
|
204
|
+
|
|
205
|
+
@property
|
|
206
|
+
def validation_dataset(self) -> Dataset:
|
|
207
|
+
if self._val_ds is None:
|
|
208
|
+
_LOGGER.error("Validation Dataset not yet created.")
|
|
209
|
+
raise RuntimeError()
|
|
210
|
+
return self._val_ds
|
|
211
|
+
|
|
212
|
+
@property
|
|
213
|
+
def test_dataset(self) -> Dataset:
|
|
214
|
+
if self._test_ds is None:
|
|
215
|
+
_LOGGER.error("Test Dataset not yet created.")
|
|
216
|
+
raise RuntimeError()
|
|
217
|
+
return self._test_ds
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def feature_names(self) -> list[str]:
|
|
221
|
+
return self._feature_names
|
|
222
|
+
|
|
223
|
+
@property
|
|
224
|
+
def target_names(self) -> list[str]:
|
|
225
|
+
return self._target_names
|
|
226
|
+
|
|
227
|
+
@property
|
|
228
|
+
def number_of_features(self) -> int:
|
|
229
|
+
return len(self._feature_names)
|
|
230
|
+
|
|
231
|
+
@property
|
|
232
|
+
def number_of_targets(self) -> int:
|
|
233
|
+
return len(self._target_names)
|
|
234
|
+
|
|
235
|
+
@property
|
|
236
|
+
def id(self) -> Optional[str]:
|
|
237
|
+
return self._id
|
|
238
|
+
|
|
239
|
+
@id.setter
|
|
240
|
+
def id(self, dataset_id: str):
|
|
241
|
+
if not isinstance(dataset_id, str): raise ValueError("ID must be a string.")
|
|
242
|
+
self._id = dataset_id
|
|
243
|
+
|
|
244
|
+
def dataframes_info(self) -> None:
|
|
245
|
+
print("--- DataFrame Shapes After Split ---")
|
|
246
|
+
print(f" X_train shape: {self._X_train_shape}, y_train shape: {self._y_train_shape}")
|
|
247
|
+
print(f" X_val shape: {self._X_val_shape}, y_val shape: {self._y_val_shape}")
|
|
248
|
+
print(f" X_test shape: {self._X_test_shape}, y_test shape: {self._y_test_shape}")
|
|
249
|
+
print("------------------------------------")
|
|
250
|
+
|
|
251
|
+
def save_feature_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
|
|
252
|
+
save_list_strings(list_strings=self._feature_names,
|
|
253
|
+
directory=directory,
|
|
254
|
+
filename=DatasetKeys.FEATURE_NAMES,
|
|
255
|
+
verbose=verbose)
|
|
256
|
+
|
|
257
|
+
def save_target_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
|
|
258
|
+
save_list_strings(list_strings=self._target_names,
|
|
259
|
+
directory=directory,
|
|
260
|
+
filename=DatasetKeys.TARGET_NAMES,
|
|
261
|
+
verbose=verbose)
|
|
262
|
+
|
|
263
|
+
def save_scaler(self, directory: Union[str, Path], verbose: bool=True) -> None:
|
|
264
|
+
"""
|
|
265
|
+
Saves both feature and target scalers (if they exist) to a single .pth file
|
|
266
|
+
using a dictionary structure.
|
|
267
|
+
"""
|
|
268
|
+
if self.feature_scaler is None and self.target_scaler is None:
|
|
269
|
+
_LOGGER.warning("No scalers (feature or target) were fitted. Nothing to save.")
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
if not self.id:
|
|
273
|
+
_LOGGER.error("Must set the dataset `id` before saving scaler.")
|
|
274
|
+
raise ValueError()
|
|
275
|
+
|
|
276
|
+
save_path = make_fullpath(directory, make=True, enforce="directory")
|
|
277
|
+
sanitized_id = sanitize_filename(self.id)
|
|
278
|
+
filename = f"{DatasetKeys.SCALER_PREFIX}{sanitized_id}.pth"
|
|
279
|
+
filepath = save_path / filename
|
|
280
|
+
|
|
281
|
+
# Construct the consolidated dictionary
|
|
282
|
+
combined_state = {}
|
|
283
|
+
|
|
284
|
+
print_message = "Saved "
|
|
285
|
+
|
|
286
|
+
if self.feature_scaler:
|
|
287
|
+
combined_state[ScalerKeys.FEATURE_SCALER] = self.feature_scaler._get_state()
|
|
288
|
+
print_message += "feature scaler "
|
|
289
|
+
|
|
290
|
+
if self.target_scaler:
|
|
291
|
+
if self.feature_scaler:
|
|
292
|
+
print_message += "and "
|
|
293
|
+
combined_state[ScalerKeys.TARGET_SCALER] = self.target_scaler._get_state()
|
|
294
|
+
print_message += "target scaler "
|
|
295
|
+
|
|
296
|
+
torch.save(combined_state, filepath)
|
|
297
|
+
|
|
298
|
+
if verbose:
|
|
299
|
+
_LOGGER.info(f"{print_message}to '{filepath.name}'.")
|
|
300
|
+
|
|
301
|
+
def save_class_map(self, directory: Union[str,Path], verbose: bool=True) -> None:
|
|
302
|
+
"""
|
|
303
|
+
Saves the class map dictionary to a JSON file.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
directory (str | Path): Directory to save the class map.
|
|
307
|
+
verbose (bool): Whether to print log messages.
|
|
308
|
+
"""
|
|
309
|
+
if not self.class_map:
|
|
310
|
+
_LOGGER.warning(f"No class_map defined. Skipping.")
|
|
311
|
+
return
|
|
312
|
+
|
|
313
|
+
log_name = f"Class_to_Index_{self.id}" if self.id else "Class_to_Index"
|
|
314
|
+
|
|
315
|
+
custom_logger(data=self.class_map,
|
|
316
|
+
save_directory=directory,
|
|
317
|
+
log_name=log_name,
|
|
318
|
+
add_timestamp=False,
|
|
319
|
+
dict_as="json")
|
|
320
|
+
if verbose:
|
|
321
|
+
_LOGGER.info(f"Class map for '{self.id}' saved as '{log_name}.json'.")
|
|
322
|
+
|
|
323
|
+
def save_artifacts(self, directory: Union[str, Path], verbose: bool=True) -> None:
|
|
324
|
+
"""
|
|
325
|
+
Saves all dataset artifacts: feature names, target names, scalers, and class map (if applicable).
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
directory (str | Path): Directory to save artifacts.
|
|
329
|
+
verbose (bool): Whether to print log messages.
|
|
330
|
+
"""
|
|
331
|
+
self.save_feature_names(directory=directory, verbose=verbose)
|
|
332
|
+
self.save_target_names(directory=directory, verbose=verbose)
|
|
333
|
+
if self.feature_scaler is not None or self.target_scaler is not None:
|
|
334
|
+
self.save_scaler(directory=directory, verbose=verbose)
|
|
335
|
+
if self.class_map:
|
|
336
|
+
self.save_class_map(directory=directory, verbose=verbose)
|
|
337
|
+
|