autogluon.tabular 1.3.2b20250712__py3-none-any.whl → 1.3.2b20250714__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/tabular/models/__init__.py +1 -0
- autogluon/tabular/models/mitra/__init__.py +0 -0
- autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +190 -0
- autogluon/tabular/models/mitra/_internal/config/config_run.py +32 -0
- autogluon/tabular/models/mitra/_internal/config/enums.py +145 -0
- autogluon/tabular/models/mitra/_internal/core/callbacks.py +94 -0
- autogluon/tabular/models/mitra/_internal/core/get_loss.py +55 -0
- autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +108 -0
- autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +67 -0
- autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +134 -0
- autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +367 -0
- autogluon/tabular/models/mitra/_internal/data/collator.py +46 -0
- autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +132 -0
- autogluon/tabular/models/mitra/_internal/data/dataset_split.py +53 -0
- autogluon/tabular/models/mitra/_internal/data/preprocessor.py +420 -0
- autogluon/tabular/models/mitra/_internal/models/base.py +21 -0
- autogluon/tabular/models/mitra/_internal/models/embedding.py +182 -0
- autogluon/tabular/models/mitra/_internal/models/tab2d.py +667 -0
- autogluon/tabular/models/mitra/_internal/utils/set_seed.py +15 -0
- autogluon/tabular/models/mitra/mitra_model.py +214 -0
- autogluon/tabular/models/mitra/sklearn_interface.py +462 -0
- autogluon/tabular/registry/_ag_model_registry.py +2 -0
- autogluon/tabular/version.py +1 -1
- {autogluon.tabular-1.3.2b20250712.dist-info → autogluon.tabular-1.3.2b20250714.dist-info}/METADATA +19 -10
- {autogluon.tabular-1.3.2b20250712.dist-info → autogluon.tabular-1.3.2b20250714.dist-info}/RECORD +32 -12
- /autogluon.tabular-1.3.2b20250712-py3.9-nspkg.pth → /autogluon.tabular-1.3.2b20250714-py3.9-nspkg.pth +0 -0
- {autogluon.tabular-1.3.2b20250712.dist-info → autogluon.tabular-1.3.2b20250714.dist-info}/LICENSE +0 -0
- {autogluon.tabular-1.3.2b20250712.dist-info → autogluon.tabular-1.3.2b20250714.dist-info}/NOTICE +0 -0
- {autogluon.tabular-1.3.2b20250712.dist-info → autogluon.tabular-1.3.2b20250714.dist-info}/WHEEL +0 -0
- {autogluon.tabular-1.3.2b20250712.dist-info → autogluon.tabular-1.3.2b20250714.dist-info}/namespace_packages.txt +0 -0
- {autogluon.tabular-1.3.2b20250712.dist-info → autogluon.tabular-1.3.2b20250714.dist-info}/top_level.txt +0 -0
- {autogluon.tabular-1.3.2b20250712.dist-info → autogluon.tabular-1.3.2b20250714.dist-info}/zip-safe +0 -0
@@ -0,0 +1,214 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
from typing import Optional, List
|
3
|
+
from autogluon.common.utils.resource_utils import ResourceManager
|
4
|
+
from autogluon.core.models import AbstractModel
|
5
|
+
import os
|
6
|
+
|
7
|
+
# TODO: Needs memory usage estimate method
|
8
|
+
class MitraModel(AbstractModel):
|
9
|
+
ag_key = "MITRA"
|
10
|
+
ag_name = "Mitra"
|
11
|
+
weights_file_name = "model.pt"
|
12
|
+
ag_priority = 55
|
13
|
+
|
14
|
+
def __init__(self, problem_type=None, **kwargs):
|
15
|
+
super().__init__(**kwargs)
|
16
|
+
self.problem_type = problem_type
|
17
|
+
self._weights_saved = False
|
18
|
+
|
19
|
+
def get_model_cls(self):
|
20
|
+
from .sklearn_interface import MitraClassifier
|
21
|
+
if self.problem_type in ['binary', 'multiclass']:
|
22
|
+
model_cls = MitraClassifier
|
23
|
+
elif self.problem_type == 'regression':
|
24
|
+
from .sklearn_interface import MitraRegressor
|
25
|
+
model_cls = MitraRegressor
|
26
|
+
else:
|
27
|
+
raise AssertionError(f"Unsupported problem_type: {self.problem_type}")
|
28
|
+
return model_cls
|
29
|
+
|
30
|
+
def _fit(
|
31
|
+
self,
|
32
|
+
X: pd.DataFrame,
|
33
|
+
y: pd.Series,
|
34
|
+
X_val: pd.DataFrame = None,
|
35
|
+
y_val: pd.Series = None,
|
36
|
+
time_limit: float = None,
|
37
|
+
num_cpus: int = 1,
|
38
|
+
**kwargs,
|
39
|
+
):
|
40
|
+
model_cls = self.get_model_cls()
|
41
|
+
|
42
|
+
hyp = self._get_model_params()
|
43
|
+
if "state_dict_classification" in hyp:
|
44
|
+
state_dict_classification = hyp.pop("state_dict_classification")
|
45
|
+
if self.problem_type in ["binary", "multiclass"]:
|
46
|
+
hyp["state_dict"] = state_dict_classification
|
47
|
+
if "state_dict_regression" in hyp:
|
48
|
+
state_dict_regression = hyp.pop("state_dict_regression")
|
49
|
+
if self.problem_type in ["regression"]:
|
50
|
+
hyp["state_dict"] = state_dict_regression
|
51
|
+
|
52
|
+
self.model = model_cls(
|
53
|
+
**hyp,
|
54
|
+
)
|
55
|
+
|
56
|
+
X = self.preprocess(X)
|
57
|
+
if X_val is not None:
|
58
|
+
X_val = self.preprocess(X_val)
|
59
|
+
|
60
|
+
self.model = self.model.fit(
|
61
|
+
X=X,
|
62
|
+
y=y,
|
63
|
+
X_val=X_val,
|
64
|
+
y_val=y_val,
|
65
|
+
time_limit=time_limit,
|
66
|
+
)
|
67
|
+
|
68
|
+
def _set_default_params(self):
|
69
|
+
default_params = {
|
70
|
+
"device": "cuda", # "cpu"
|
71
|
+
"n_estimators": 1,
|
72
|
+
}
|
73
|
+
for param, val in default_params.items():
|
74
|
+
self._set_default_param_value(param, val)
|
75
|
+
|
76
|
+
def _get_default_auxiliary_params(self) -> dict:
|
77
|
+
default_auxiliary_params = super()._get_default_auxiliary_params()
|
78
|
+
default_auxiliary_params.update(
|
79
|
+
{
|
80
|
+
"max_rows": 10000,
|
81
|
+
"max_features": 500,
|
82
|
+
"max_classes": 10,
|
83
|
+
}
|
84
|
+
)
|
85
|
+
return default_auxiliary_params
|
86
|
+
|
87
|
+
@property
|
88
|
+
def weights_path(self) -> str:
|
89
|
+
return os.path.join(self.path, self.weights_file_name)
|
90
|
+
|
91
|
+
def save(self, path: str = None, verbose=True) -> str:
|
92
|
+
_model_weights_list = None
|
93
|
+
if self.model is not None:
|
94
|
+
_model_weights_list = []
|
95
|
+
for i in range(len(self.model.trainers)):
|
96
|
+
_model_weights_list.append(self.model.trainers[i].model)
|
97
|
+
self.model.trainers[i].checkpoint = None
|
98
|
+
self.model.trainers[i].model = None
|
99
|
+
self.model.trainers[i].optimizer = None
|
100
|
+
self.model.trainers[i].scheduler_warmup = None
|
101
|
+
self.model.trainers[i].scheduler_reduce_on_plateau = None
|
102
|
+
self._weights_saved = True
|
103
|
+
path = super().save(path=path, verbose=verbose)
|
104
|
+
if _model_weights_list is not None:
|
105
|
+
import torch
|
106
|
+
os.makedirs(self.path, exist_ok=True)
|
107
|
+
torch.save(_model_weights_list, self.weights_path)
|
108
|
+
for i in range(len(self.model.trainers)):
|
109
|
+
self.model.trainers[i].model = _model_weights_list[i]
|
110
|
+
return path
|
111
|
+
|
112
|
+
@classmethod
|
113
|
+
def load(cls, path: str, reset_paths=False, verbose=True):
|
114
|
+
model: MitraModel = super().load(path=path, reset_paths=reset_paths, verbose=verbose)
|
115
|
+
|
116
|
+
if model._weights_saved:
|
117
|
+
import torch
|
118
|
+
model_weights_list = torch.load(model.weights_path, weights_only=False) # nosec B614
|
119
|
+
for i in range(len(model.model.trainers)):
|
120
|
+
model.model.trainers[i].model = model_weights_list[i]
|
121
|
+
model._weights_saved = False
|
122
|
+
return model
|
123
|
+
|
124
|
+
@classmethod
|
125
|
+
def supported_problem_types(cls) -> Optional[List[str]]:
|
126
|
+
return ["binary", "multiclass", "regression"]
|
127
|
+
|
128
|
+
@classmethod
|
129
|
+
def _get_default_ag_args_ensemble(cls, **kwargs) -> dict:
|
130
|
+
default_ag_args_ensemble = super()._get_default_ag_args_ensemble(**kwargs)
|
131
|
+
# FIXME: Test if it works with parallel, need to enable n_cpus support
|
132
|
+
extra_ag_args_ensemble = {
|
133
|
+
"fold_fitting_strategy": "sequential_local", # FIXME: Comment out after debugging for large speedup
|
134
|
+
}
|
135
|
+
default_ag_args_ensemble.update(extra_ag_args_ensemble)
|
136
|
+
return default_ag_args_ensemble
|
137
|
+
|
138
|
+
def _get_default_resources(self) -> tuple[int, int]:
|
139
|
+
# logical=False is faster in training
|
140
|
+
num_cpus = ResourceManager.get_cpu_count_psutil(logical=False)
|
141
|
+
num_gpus = 1
|
142
|
+
return num_cpus, num_gpus
|
143
|
+
|
144
|
+
def _estimate_memory_usage(self, X: pd.DataFrame, **kwargs) -> int:
|
145
|
+
return self.estimate_memory_usage_static(X=X, problem_type=self.problem_type, num_classes=self.num_classes, **kwargs)
|
146
|
+
|
147
|
+
@classmethod
|
148
|
+
def _estimate_memory_usage_static(
|
149
|
+
cls,
|
150
|
+
*,
|
151
|
+
X: pd.DataFrame,
|
152
|
+
**kwargs,
|
153
|
+
) -> int:
|
154
|
+
return max(
|
155
|
+
cls._estimate_memory_usage_static_cpu_icl(X=X, **kwargs),
|
156
|
+
cls._estimate_memory_usage_static_cpu_ft_icl(X=X, **kwargs),
|
157
|
+
cls._estimate_memory_usage_static_gpu_cpu(X=X, **kwargs),
|
158
|
+
cls._estimate_memory_usage_static_gpu_gpu(X=X, **kwargs),
|
159
|
+
)
|
160
|
+
|
161
|
+
@classmethod
|
162
|
+
def _estimate_memory_usage_static_cpu_icl(
|
163
|
+
cls,
|
164
|
+
*,
|
165
|
+
X: pd.DataFrame,
|
166
|
+
**kwargs,
|
167
|
+
) -> int:
|
168
|
+
cpu_memory_kb = 1.3 * (0.001748 * (X.shape[0]**2) * X.shape[1] + \
|
169
|
+
0.001206 * X.shape[0] * (X.shape[1]**2) + \
|
170
|
+
10.3482 * X.shape[0] * X.shape[1] + \
|
171
|
+
6409698)
|
172
|
+
return int(cpu_memory_kb * 1e3)
|
173
|
+
|
174
|
+
@classmethod
|
175
|
+
def _estimate_memory_usage_static_cpu_ft_icl(
|
176
|
+
cls,
|
177
|
+
*,
|
178
|
+
X: pd.DataFrame,
|
179
|
+
**kwargs,
|
180
|
+
) -> int:
|
181
|
+
cpu_memory_kb = 1.3 * (0.001 * (X.shape[0]**2) * X.shape[1] + \
|
182
|
+
0.004541 * X.shape[0] * (X.shape[1]**2) + \
|
183
|
+
46.2974 * X.shape[0] * X.shape[1] + \
|
184
|
+
5605681)
|
185
|
+
return int(cpu_memory_kb * 1e3)
|
186
|
+
|
187
|
+
@classmethod
|
188
|
+
def _estimate_memory_usage_static_gpu_cpu(
|
189
|
+
cls,
|
190
|
+
*,
|
191
|
+
X: pd.DataFrame,
|
192
|
+
**kwargs,
|
193
|
+
) -> int:
|
194
|
+
return int(5 * 1e9)
|
195
|
+
|
196
|
+
@classmethod
|
197
|
+
def _estimate_memory_usage_static_gpu_gpu(
|
198
|
+
cls,
|
199
|
+
*,
|
200
|
+
X: pd.DataFrame,
|
201
|
+
**kwargs,
|
202
|
+
) -> int:
|
203
|
+
gpu_memory_mb = 1.3 * (0.05676 * X.shape[0] * X.shape[1] + 3901)
|
204
|
+
return int(gpu_memory_mb * 1e6)
|
205
|
+
|
206
|
+
@classmethod
|
207
|
+
def _class_tags(cls) -> dict:
|
208
|
+
return {
|
209
|
+
"can_estimate_memory_usage_static": True,
|
210
|
+
}
|
211
|
+
|
212
|
+
def _more_tags(self) -> dict:
|
213
|
+
tags = {"can_refit_full": True}
|
214
|
+
return tags
|
@@ -0,0 +1,462 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import time
|
3
|
+
import torch
|
4
|
+
import pandas as pd
|
5
|
+
|
6
|
+
from pathlib import Path
|
7
|
+
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
|
8
|
+
|
9
|
+
from ._internal.data.dataset_split import make_stratified_dataset_split
|
10
|
+
from ._internal.config.config_run import ConfigRun
|
11
|
+
from ._internal.core.trainer_finetune import TrainerFinetune
|
12
|
+
from ._internal.models.tab2d import Tab2D
|
13
|
+
from ._internal.config.enums import ModelName
|
14
|
+
|
15
|
+
# Hyperparameter search space
|
16
|
+
DEFAULT_FINE_TUNE = True # [True, False]
|
17
|
+
DEFAULT_FINE_TUNE_STEPS = 50 # [50, 60, 70, 80, 90, 100]
|
18
|
+
DEFAULT_CLS_METRIC = 'log_loss' # ['log_loss', 'accuracy', 'auc']
|
19
|
+
DEFAULT_REG_METRIC = 'mse' # ['mse', 'mae', 'rmse', 'r2']
|
20
|
+
SHUFFLE_CLASSES = False # [True, False]
|
21
|
+
SHUFFLE_FEATURES = False # [True, False]
|
22
|
+
USE_RANDOM_TRANSFORMS = False # [True, False]
|
23
|
+
RANDOM_MIRROR_REGRESSION = True # [True, False]
|
24
|
+
RANDOM_MIRROR_X = True # [True, False]
|
25
|
+
LR = 0.0001 # [0.00001, 0.000025, 0.00005, 0.000075, 0.0001, 0.00025, 0.0005, 0.00075, 0.001]
|
26
|
+
PATIENCE = 40 # [30, 35, 40, 45, 50]
|
27
|
+
WARMUP_STEPS = 1000 # [500, 750, 1000, 1250, 1500]
|
28
|
+
DEFAULT_GENERAL_MODEL = 'autogluon/mitra-classifier'
|
29
|
+
DEFAULT_CLS_MODEL = 'autogluon/mitra-classifier'
|
30
|
+
DEFAULT_REG_MODEL = 'autogluon/mitra-regressor'
|
31
|
+
|
32
|
+
# Constants
|
33
|
+
SEED = 0
|
34
|
+
DEFAULT_MODEL_TYPE = "Tab2D"
|
35
|
+
DEFAULT_DEVICE = "cuda"
|
36
|
+
DEFAULT_ENSEMBLE = 1
|
37
|
+
DEFAULT_DIM = 512
|
38
|
+
DEFAULT_LAYERS = 12
|
39
|
+
DEFAULT_HEADS = 4
|
40
|
+
DEFAULT_CLASSES = 10
|
41
|
+
DEFAULT_VALIDATION_SPLIT = 0.2
|
42
|
+
USE_HF = True # Use Hugging Face pretrained models if available
|
43
|
+
|
44
|
+
class MitraBase(BaseEstimator):
|
45
|
+
"""Base class for Mitra models with common functionality."""
|
46
|
+
|
47
|
+
def __init__(self,
|
48
|
+
model_type=DEFAULT_MODEL_TYPE,
|
49
|
+
n_estimators=DEFAULT_ENSEMBLE,
|
50
|
+
device=DEFAULT_DEVICE,
|
51
|
+
fine_tune=DEFAULT_FINE_TUNE,
|
52
|
+
fine_tune_steps=DEFAULT_FINE_TUNE_STEPS,
|
53
|
+
metric=DEFAULT_CLS_METRIC,
|
54
|
+
state_dict=None,
|
55
|
+
hf_general_model=DEFAULT_GENERAL_MODEL,
|
56
|
+
hf_cls_model=DEFAULT_CLS_MODEL,
|
57
|
+
hf_reg_model=DEFAULT_REG_MODEL,
|
58
|
+
patience=PATIENCE,
|
59
|
+
lr=LR,
|
60
|
+
warmup_steps=WARMUP_STEPS,
|
61
|
+
shuffle_classes=SHUFFLE_CLASSES,
|
62
|
+
shuffle_features=SHUFFLE_FEATURES,
|
63
|
+
use_random_transforms=USE_RANDOM_TRANSFORMS,
|
64
|
+
random_mirror_regression=RANDOM_MIRROR_REGRESSION,
|
65
|
+
random_mirror_x=RANDOM_MIRROR_X,
|
66
|
+
seed=SEED,
|
67
|
+
):
|
68
|
+
"""
|
69
|
+
Initialize the base Mitra model.
|
70
|
+
|
71
|
+
Parameters
|
72
|
+
----------
|
73
|
+
model_type : str, default="Tab2D"
|
74
|
+
The type of model to use. Options: "Tab2D", "Tab2D_COL_ROW"
|
75
|
+
n_estimators : int, default=1
|
76
|
+
Number of models in the ensemble
|
77
|
+
device : str, default="cuda"
|
78
|
+
Device to run the model on
|
79
|
+
fine_tune_steps: int, default=0
|
80
|
+
Number of epochs to train for
|
81
|
+
state_dict : str, optional
|
82
|
+
Path to the pretrained weights
|
83
|
+
"""
|
84
|
+
self.model_type = model_type
|
85
|
+
self.n_estimators = n_estimators
|
86
|
+
self.device = device
|
87
|
+
self.fine_tune = fine_tune
|
88
|
+
self.fine_tune_steps = fine_tune_steps
|
89
|
+
self.metric = metric
|
90
|
+
self.state_dict = state_dict
|
91
|
+
self.hf_general_model = hf_general_model
|
92
|
+
self.hf_cls_model = hf_cls_model
|
93
|
+
self.hf_reg_model = hf_reg_model
|
94
|
+
self.patience = patience
|
95
|
+
self.lr = lr
|
96
|
+
self.warmup_steps = warmup_steps
|
97
|
+
self.shuffle_classes = shuffle_classes
|
98
|
+
self.shuffle_features = shuffle_features
|
99
|
+
self.use_random_transforms = use_random_transforms
|
100
|
+
self.random_mirror_regression = random_mirror_regression
|
101
|
+
self.random_mirror_x = random_mirror_x
|
102
|
+
self.trainers = []
|
103
|
+
self.train_time = 0
|
104
|
+
self.seed = seed
|
105
|
+
|
106
|
+
|
107
|
+
def _create_config(self, task, dim_output, time_limit=None):
|
108
|
+
cfg = ConfigRun(
|
109
|
+
device=self.device,
|
110
|
+
model_name=ModelName.TAB2D,
|
111
|
+
seed=self.seed,
|
112
|
+
hyperparams={
|
113
|
+
'dim_embedding': None,
|
114
|
+
'early_stopping_data_split': 'VALID',
|
115
|
+
'early_stopping_max_samples': 2048,
|
116
|
+
'early_stopping_patience': self.patience,
|
117
|
+
'grad_scaler_enabled': False,
|
118
|
+
'grad_scaler_growth_interval': 1000,
|
119
|
+
'grad_scaler_scale_init': 65536.0,
|
120
|
+
'grad_scaler_scale_min': 65536.0,
|
121
|
+
'label_smoothing': 0.0,
|
122
|
+
'lr_scheduler': False,
|
123
|
+
'lr_scheduler_patience': 25,
|
124
|
+
'max_epochs': self.fine_tune_steps if self.fine_tune else 0,
|
125
|
+
'max_samples_query': 1024,
|
126
|
+
'max_samples_support': 8192,
|
127
|
+
'optimizer': 'adamw',
|
128
|
+
'lr': self.lr,
|
129
|
+
'weight_decay': 0.1,
|
130
|
+
'warmup_steps': self.warmup_steps,
|
131
|
+
'path_to_weights': self.state_dict,
|
132
|
+
'precision': 'bfloat16',
|
133
|
+
'random_mirror_regression': self.random_mirror_regression,
|
134
|
+
'random_mirror_x': self.random_mirror_x,
|
135
|
+
'shuffle_classes': self.shuffle_classes,
|
136
|
+
'shuffle_features': self.shuffle_features,
|
137
|
+
'use_random_transforms': self.use_random_transforms,
|
138
|
+
'use_feature_count_scaling': False,
|
139
|
+
'use_pretrained_weights': False,
|
140
|
+
'use_quantile_transformer': False,
|
141
|
+
'budget': time_limit,
|
142
|
+
'metric': self.metric,
|
143
|
+
},
|
144
|
+
)
|
145
|
+
|
146
|
+
cfg.task = task
|
147
|
+
cfg.hyperparams.update({
|
148
|
+
'n_ensembles': self.n_estimators,
|
149
|
+
'dim': DEFAULT_DIM,
|
150
|
+
'dim_output': dim_output,
|
151
|
+
'n_layers': DEFAULT_LAYERS,
|
152
|
+
'n_heads': DEFAULT_HEADS,
|
153
|
+
'regression_loss': 'mse',
|
154
|
+
})
|
155
|
+
|
156
|
+
return cfg, Tab2D
|
157
|
+
|
158
|
+
|
159
|
+
def _split_data(self, X, y):
|
160
|
+
"""Split data into training and validation sets."""
|
161
|
+
if hasattr(self, 'task') and self.task == 'classification':
|
162
|
+
return make_stratified_dataset_split(X, y, seed=self.seed)
|
163
|
+
else:
|
164
|
+
# For regression, use random split
|
165
|
+
val_indices = np.random.choice(range(len(X)), int(DEFAULT_VALIDATION_SPLIT * len(X)), replace=False).tolist()
|
166
|
+
train_indices = [i for i in range(len(X)) if i not in val_indices]
|
167
|
+
return X[train_indices], X[val_indices], y[train_indices], y[val_indices]
|
168
|
+
|
169
|
+
def _train_ensemble(self, X_train, y_train, X_valid, y_valid, task, dim_output, n_classes=0, time_limit=None):
|
170
|
+
"""Train the ensemble of models."""
|
171
|
+
|
172
|
+
cfg, Tab2D = self._create_config(task, dim_output, time_limit)
|
173
|
+
|
174
|
+
success = False
|
175
|
+
while not (success and cfg.hyperparams["max_samples_support"] > 0 and cfg.hyperparams["max_samples_query"] > 0):
|
176
|
+
try:
|
177
|
+
self.trainers.clear()
|
178
|
+
|
179
|
+
self.train_time = 0
|
180
|
+
for _ in range(self.n_estimators):
|
181
|
+
if USE_HF:
|
182
|
+
if task == 'classification':
|
183
|
+
if self.hf_cls_model is not None:
|
184
|
+
model = Tab2D.from_pretrained(self.hf_cls_model, device=self.device)
|
185
|
+
elif self.hf_general_model is not None:
|
186
|
+
model = Tab2D.from_pretrained(self.hf_general_model, device=self.device)
|
187
|
+
else:
|
188
|
+
model = Tab2D.from_pretrained("autogluon/mitra-classifier", device=self.device)
|
189
|
+
elif task == 'regression':
|
190
|
+
if self.hf_reg_model is not None:
|
191
|
+
model = Tab2D.from_pretrained(self.hf_reg_model, device=self.device)
|
192
|
+
elif self.hf_general_model is not None:
|
193
|
+
model = Tab2D.from_pretrained(self.hf_general_model, device=self.device)
|
194
|
+
else:
|
195
|
+
model = Tab2D.from_pretrained("autogluon/mitra-regressor", device=self.device)
|
196
|
+
else:
|
197
|
+
model = Tab2D(
|
198
|
+
dim=cfg.hyperparams['dim'],
|
199
|
+
dim_output=dim_output,
|
200
|
+
n_layers=cfg.hyperparams['n_layers'],
|
201
|
+
n_heads=cfg.hyperparams['n_heads'],
|
202
|
+
task=task.upper(),
|
203
|
+
use_pretrained_weights=True,
|
204
|
+
path_to_weights=Path(self.state_dict),
|
205
|
+
device=self.device,
|
206
|
+
)
|
207
|
+
trainer = TrainerFinetune(cfg, model, n_classes=n_classes, device=self.device)
|
208
|
+
|
209
|
+
start_time = time.time()
|
210
|
+
trainer.train(X_train, y_train, X_valid, y_valid)
|
211
|
+
end_time = time.time()
|
212
|
+
|
213
|
+
self.trainers.append(trainer)
|
214
|
+
self.train_time += end_time - start_time
|
215
|
+
|
216
|
+
success = True
|
217
|
+
|
218
|
+
except torch.cuda.OutOfMemoryError:
|
219
|
+
if cfg.hyperparams["max_samples_support"] >= 2048:
|
220
|
+
cfg.hyperparams["max_samples_support"] = int(
|
221
|
+
cfg.hyperparams["max_samples_support"] // 2
|
222
|
+
)
|
223
|
+
print(f"Reducing max_samples_support from {cfg.hyperparams['max_samples_support'] * 2}"
|
224
|
+
f"to {cfg.hyperparams['max_samples_support']} due to OOM error.")
|
225
|
+
else:
|
226
|
+
cfg.hyperparams["max_samples_support"] = int(
|
227
|
+
cfg.hyperparams["max_samples_support"] // 2
|
228
|
+
)
|
229
|
+
print(f"Reducing max_samples_support from {cfg.hyperparams['max_samples_support'] * 2}"
|
230
|
+
f"to {cfg.hyperparams['max_samples_support']} due to OOM error.")
|
231
|
+
cfg.hyperparams["max_samples_query"] = int(
|
232
|
+
cfg.hyperparams["max_samples_query"] // 2
|
233
|
+
)
|
234
|
+
print(f"Reducing max_samples_query from {cfg.hyperparams['max_samples_query'] * 2}"
|
235
|
+
f"to {cfg.hyperparams['max_samples_query']} due to OOM error.")
|
236
|
+
|
237
|
+
if not success:
|
238
|
+
raise RuntimeError(
|
239
|
+
f"Failed to train Mitra model after multiple attempts due to out of memory error."
|
240
|
+
)
|
241
|
+
|
242
|
+
return self
|
243
|
+
|
244
|
+
|
245
|
+
class MitraClassifier(MitraBase, ClassifierMixin):
|
246
|
+
"""Classifier implementation of Mitra model."""
|
247
|
+
|
248
|
+
def __init__(self,
|
249
|
+
model_type=DEFAULT_MODEL_TYPE,
|
250
|
+
n_estimators=DEFAULT_ENSEMBLE,
|
251
|
+
device=DEFAULT_DEVICE,
|
252
|
+
fine_tune=DEFAULT_FINE_TUNE,
|
253
|
+
fine_tune_steps=DEFAULT_FINE_TUNE_STEPS,
|
254
|
+
metric=DEFAULT_CLS_METRIC,
|
255
|
+
state_dict=None,
|
256
|
+
patience=PATIENCE,
|
257
|
+
lr=LR,
|
258
|
+
warmup_steps=WARMUP_STEPS,
|
259
|
+
shuffle_classes=SHUFFLE_CLASSES,
|
260
|
+
shuffle_features=SHUFFLE_FEATURES,
|
261
|
+
use_random_transforms=USE_RANDOM_TRANSFORMS,
|
262
|
+
random_mirror_regression=RANDOM_MIRROR_REGRESSION,
|
263
|
+
random_mirror_x=RANDOM_MIRROR_X,
|
264
|
+
seed=SEED,
|
265
|
+
):
|
266
|
+
"""Initialize the classifier."""
|
267
|
+
super().__init__(
|
268
|
+
model_type,
|
269
|
+
n_estimators,
|
270
|
+
device,
|
271
|
+
fine_tune,
|
272
|
+
fine_tune_steps,
|
273
|
+
metric,
|
274
|
+
state_dict,
|
275
|
+
patience=patience,
|
276
|
+
lr=lr,
|
277
|
+
warmup_steps=warmup_steps,
|
278
|
+
shuffle_classes=shuffle_classes,
|
279
|
+
shuffle_features=shuffle_features,
|
280
|
+
use_random_transforms=use_random_transforms,
|
281
|
+
random_mirror_regression=random_mirror_regression,
|
282
|
+
random_mirror_x=random_mirror_x,
|
283
|
+
seed=seed,
|
284
|
+
)
|
285
|
+
self.task = 'classification'
|
286
|
+
|
287
|
+
def fit(self, X, y, X_val = None, y_val = None, time_limit = None):
|
288
|
+
"""
|
289
|
+
Fit the ensemble of models.
|
290
|
+
|
291
|
+
Parameters
|
292
|
+
----------
|
293
|
+
X : array-like of shape (n_samples, n_features)
|
294
|
+
Training data
|
295
|
+
y : array-like of shape (n_samples,)
|
296
|
+
Target values
|
297
|
+
|
298
|
+
Returns
|
299
|
+
-------
|
300
|
+
self : object
|
301
|
+
Returns self
|
302
|
+
"""
|
303
|
+
|
304
|
+
if isinstance(X, pd.DataFrame):
|
305
|
+
X = X.values
|
306
|
+
if isinstance(y, pd.Series):
|
307
|
+
y = y.values
|
308
|
+
|
309
|
+
self.X, self.y = X, y
|
310
|
+
|
311
|
+
if X_val is not None and y_val is not None:
|
312
|
+
if isinstance(X_val, pd.DataFrame):
|
313
|
+
X_val = X_val.values
|
314
|
+
if isinstance(y_val, pd.Series):
|
315
|
+
y_val = y_val.values
|
316
|
+
X_train, X_valid, y_train, y_valid = X, X_val, y, y_val
|
317
|
+
else:
|
318
|
+
X_train, X_valid, y_train, y_valid = self._split_data(X, y)
|
319
|
+
|
320
|
+
return self._train_ensemble(X_train, y_train, X_valid, y_valid, self.task, DEFAULT_CLASSES, n_classes=DEFAULT_CLASSES, time_limit=time_limit)
|
321
|
+
|
322
|
+
def predict(self, X):
|
323
|
+
"""
|
324
|
+
Predict class labels for samples in X.
|
325
|
+
|
326
|
+
Parameters
|
327
|
+
----------
|
328
|
+
X : array-like of shape (n_samples, n_features)
|
329
|
+
The input samples
|
330
|
+
|
331
|
+
Returns
|
332
|
+
-------
|
333
|
+
y : ndarray of shape (n_samples,)
|
334
|
+
The predicted classes
|
335
|
+
"""
|
336
|
+
|
337
|
+
if isinstance(X, pd.DataFrame):
|
338
|
+
X = X.values
|
339
|
+
|
340
|
+
return self.predict_proba(X).argmax(axis=1)
|
341
|
+
|
342
|
+
def predict_proba(self, X):
|
343
|
+
"""
|
344
|
+
Predict class probabilities for samples in X.
|
345
|
+
|
346
|
+
Parameters
|
347
|
+
----------
|
348
|
+
X : array-like of shape (n_samples, n_features)
|
349
|
+
The input samples
|
350
|
+
|
351
|
+
Returns
|
352
|
+
-------
|
353
|
+
p : ndarray of shape (n_samples, n_classes)
|
354
|
+
The class probabilities of the input samples
|
355
|
+
"""
|
356
|
+
if isinstance(X, pd.DataFrame):
|
357
|
+
X = X.values
|
358
|
+
|
359
|
+
preds = []
|
360
|
+
for trainer in self.trainers:
|
361
|
+
logits = trainer.predict(self.X, self.y, X)[...,:len(np.unique(self.y))] # Remove extra classes
|
362
|
+
preds.append(np.exp(logits) / np.exp(logits).sum(axis=1, keepdims=True)) # Softmax
|
363
|
+
preds = sum(preds) / len(preds) # Averaging ensemble predictions
|
364
|
+
return preds
|
365
|
+
|
366
|
+
|
367
|
+
class MitraRegressor(MitraBase, RegressorMixin):
|
368
|
+
"""Regressor implementation of Mitra model."""
|
369
|
+
|
370
|
+
def __init__(self,
|
371
|
+
model_type=DEFAULT_MODEL_TYPE,
|
372
|
+
n_estimators=DEFAULT_ENSEMBLE,
|
373
|
+
device=DEFAULT_DEVICE,
|
374
|
+
fine_tune=DEFAULT_FINE_TUNE,
|
375
|
+
fine_tune_steps=DEFAULT_FINE_TUNE_STEPS,
|
376
|
+
metric=DEFAULT_REG_METRIC,
|
377
|
+
state_dict=None,
|
378
|
+
patience=PATIENCE,
|
379
|
+
lr=LR,
|
380
|
+
warmup_steps=WARMUP_STEPS,
|
381
|
+
shuffle_classes=SHUFFLE_CLASSES,
|
382
|
+
shuffle_features=SHUFFLE_FEATURES,
|
383
|
+
use_random_transforms=USE_RANDOM_TRANSFORMS,
|
384
|
+
random_mirror_regression=RANDOM_MIRROR_REGRESSION,
|
385
|
+
random_mirror_x=RANDOM_MIRROR_X,
|
386
|
+
seed=SEED,
|
387
|
+
):
|
388
|
+
"""Initialize the regressor."""
|
389
|
+
super().__init__(
|
390
|
+
model_type,
|
391
|
+
n_estimators,
|
392
|
+
device,
|
393
|
+
fine_tune,
|
394
|
+
fine_tune_steps,
|
395
|
+
metric,
|
396
|
+
state_dict,
|
397
|
+
patience=patience,
|
398
|
+
lr=lr,
|
399
|
+
warmup_steps=warmup_steps,
|
400
|
+
shuffle_classes=shuffle_classes,
|
401
|
+
shuffle_features=shuffle_features,
|
402
|
+
use_random_transforms=use_random_transforms,
|
403
|
+
random_mirror_regression=random_mirror_regression,
|
404
|
+
random_mirror_x=random_mirror_x,
|
405
|
+
seed=seed,
|
406
|
+
)
|
407
|
+
self.task = 'regression'
|
408
|
+
|
409
|
+
def fit(self, X, y, X_val = None, y_val = None, time_limit = None):
|
410
|
+
"""
|
411
|
+
Fit the ensemble of models.
|
412
|
+
|
413
|
+
Parameters
|
414
|
+
----------
|
415
|
+
X : array-like of shape (n_samples, n_features)
|
416
|
+
Training data
|
417
|
+
y : array-like of shape (n_samples,)
|
418
|
+
Target values
|
419
|
+
|
420
|
+
Returns
|
421
|
+
-------
|
422
|
+
self : object
|
423
|
+
Returns self
|
424
|
+
"""
|
425
|
+
|
426
|
+
if isinstance(X, pd.DataFrame):
|
427
|
+
X = X.values
|
428
|
+
if isinstance(y, pd.Series):
|
429
|
+
y = y.values
|
430
|
+
|
431
|
+
self.X, self.y = X, y
|
432
|
+
|
433
|
+
if X_val is not None and y_val is not None:
|
434
|
+
if isinstance(X_val, pd.DataFrame):
|
435
|
+
X_val = X_val.values
|
436
|
+
if isinstance(y_val, pd.Series):
|
437
|
+
y_val = y_val.values
|
438
|
+
X_train, X_valid, y_train, y_valid = X, X_val, y, y_val
|
439
|
+
else:
|
440
|
+
X_train, X_valid, y_train, y_valid = self._split_data(X, y)
|
441
|
+
|
442
|
+
return self._train_ensemble(X_train, y_train, X_valid, y_valid, self.task, 1, time_limit=time_limit)
|
443
|
+
|
444
|
+
def predict(self, X):
|
445
|
+
"""
|
446
|
+
Predict regression target for samples in X.
|
447
|
+
|
448
|
+
Parameters
|
449
|
+
----------
|
450
|
+
X : array-like of shape (n_samples, n_features)
|
451
|
+
The input samples
|
452
|
+
|
453
|
+
Returns
|
454
|
+
-------
|
455
|
+
y : ndarray of shape (n_samples,)
|
456
|
+
The predicted values
|
457
|
+
"""
|
458
|
+
if isinstance(X, pd.DataFrame):
|
459
|
+
X = X.values
|
460
|
+
|
461
|
+
preds = [trainer.predict(self.X, self.y, X) for trainer in self.trainers]
|
462
|
+
return sum(preds) / len(preds) # Averaging ensemble predictions
|
@@ -25,6 +25,7 @@ from ..models import (
|
|
25
25
|
TabICLModel,
|
26
26
|
TabMModel,
|
27
27
|
TabPFNMixModel,
|
28
|
+
MitraModel,
|
28
29
|
TabPFNV2Model,
|
29
30
|
TabularNeuralNetTorchModel,
|
30
31
|
TextPredictorModel,
|
@@ -53,6 +54,7 @@ REGISTERED_MODEL_CLS_LST = [
|
|
53
54
|
TabMModel,
|
54
55
|
TabPFNMixModel,
|
55
56
|
TabPFNV2Model,
|
57
|
+
MitraModel,
|
56
58
|
FastTextModel,
|
57
59
|
GreedyWeightedEnsembleModel,
|
58
60
|
SimpleWeightedEnsembleModel,
|
autogluon/tabular/version.py
CHANGED