autogluon.tabular 1.4.0__py3-none-any.whl → 1.4.1b20251128__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.tabular might be problematic. Click here for more details.
- autogluon/tabular/configs/pipeline_presets.py +130 -0
- autogluon/tabular/configs/presets_configs.py +0 -3
- autogluon/tabular/models/__init__.py +1 -0
- autogluon/tabular/models/catboost/catboost_model.py +4 -1
- autogluon/tabular/models/ebm/__init__.py +0 -0
- autogluon/tabular/models/ebm/ebm_model.py +259 -0
- autogluon/tabular/models/ebm/hyperparameters/__init__.py +0 -0
- autogluon/tabular/models/ebm/hyperparameters/parameters.py +39 -0
- autogluon/tabular/models/ebm/hyperparameters/searchspaces.py +72 -0
- autogluon/tabular/models/fastainn/tabular_nn_fastai.py +4 -2
- autogluon/tabular/models/knn/knn_model.py +7 -3
- autogluon/tabular/models/lgb/lgb_model.py +56 -18
- autogluon/tabular/models/lr/lr_model.py +6 -1
- autogluon/tabular/models/lr/lr_preprocessing_utils.py +6 -7
- autogluon/tabular/models/mitra/_internal/models/tab2d.py +10 -10
- autogluon/tabular/models/mitra/mitra_model.py +43 -3
- autogluon/tabular/models/mitra/sklearn_interface.py +8 -21
- autogluon/tabular/models/realmlp/realmlp_model.py +1 -3
- autogluon/tabular/models/rf/rf_model.py +5 -1
- autogluon/tabular/models/tabicl/tabicl_model.py +1 -7
- autogluon/tabular/models/tabm/tabm_model.py +76 -6
- autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +6 -4
- autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +1 -7
- autogluon/tabular/models/tabular_nn/hyperparameters/parameters.py +1 -3
- autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +2 -1
- autogluon/tabular/models/xgboost/xgboost_model.py +8 -1
- autogluon/tabular/predictor/predictor.py +63 -55
- autogluon/tabular/registry/_ag_model_registry.py +2 -0
- autogluon/tabular/testing/fit_helper.py +28 -0
- autogluon/tabular/version.py +1 -1
- autogluon.tabular-1.4.1b20251128-py3.11-nspkg.pth +1 -0
- {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info}/METADATA +87 -71
- {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info}/RECORD +39 -33
- {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info}/WHEEL +1 -1
- autogluon.tabular-1.4.0-py3.9-nspkg.pth +0 -1
- {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info/licenses}/LICENSE +0 -0
- {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info/licenses}/NOTICE +0 -0
- {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info}/namespace_packages.txt +0 -0
- {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info}/top_level.txt +0 -0
- {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info}/zip-safe +0 -0
|
@@ -46,6 +46,8 @@ class LGBModel(AbstractModel):
|
|
|
46
46
|
ag_priority_by_problem_type = MappingProxyType({
|
|
47
47
|
SOFTCLASS: 100
|
|
48
48
|
})
|
|
49
|
+
seed_name = "seed"
|
|
50
|
+
seed_name_alt = ["seed_value", "random_seed", "random_state"]
|
|
49
51
|
|
|
50
52
|
def __init__(self, **kwargs):
|
|
51
53
|
super().__init__(**kwargs)
|
|
@@ -161,7 +163,7 @@ class LGBModel(AbstractModel):
|
|
|
161
163
|
# Before enabling GPU, we should add code to detect that GPU-enabled version is installed and that a valid GPU exists.
|
|
162
164
|
# GPU training heavily alters accuracy, often in a negative manner. We will have to be careful about when to use GPU.
|
|
163
165
|
params["device"] = "gpu"
|
|
164
|
-
logger.log(20, f"\
|
|
166
|
+
logger.log(20, f"\tWarning: Training LightGBM with GPU. This may negatively impact model quality compared to CPU training.")
|
|
165
167
|
logger.log(15, f"\tFitting {num_boost_round} rounds... Hyperparameters: {params}")
|
|
166
168
|
|
|
167
169
|
if "num_threads" not in params:
|
|
@@ -225,7 +227,6 @@ class LGBModel(AbstractModel):
|
|
|
225
227
|
if log_period is not None:
|
|
226
228
|
callbacks.append(log_evaluation(period=log_period))
|
|
227
229
|
|
|
228
|
-
seed_val = params.pop("seed_value", 0)
|
|
229
230
|
train_params = {
|
|
230
231
|
"params": params,
|
|
231
232
|
"train_set": dataset_train,
|
|
@@ -285,8 +286,6 @@ class LGBModel(AbstractModel):
|
|
|
285
286
|
train_params["params"]["num_classes"] = self.num_classes
|
|
286
287
|
elif self.problem_type == QUANTILE:
|
|
287
288
|
train_params["params"]["quantile_levels"] = self.quantile_levels
|
|
288
|
-
if seed_val is not None:
|
|
289
|
-
train_params["params"]["seed"] = seed_val
|
|
290
289
|
|
|
291
290
|
# Train LightGBM model:
|
|
292
291
|
# Note that self.model contains a <class 'lightgbm.basic.Booster'> not a LightBGMClassifier or LightGBMRegressor object
|
|
@@ -299,16 +298,28 @@ class LGBModel(AbstractModel):
|
|
|
299
298
|
try:
|
|
300
299
|
self.model = train_lgb_model(early_stopping_callback_kwargs=early_stopping_callback_kwargs, **train_params)
|
|
301
300
|
except LightGBMError:
|
|
302
|
-
if train_params["params"].get("device", "cpu")
|
|
301
|
+
if train_params["params"].get("device", "cpu") not in ["gpu", "cuda"]:
|
|
303
302
|
raise
|
|
304
303
|
else:
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
304
|
+
if train_params["params"]["device"] == "gpu":
|
|
305
|
+
logger.warning(
|
|
306
|
+
"Warning: GPU mode might not be installed for LightGBM, "
|
|
307
|
+
"GPU training raised an exception. Falling back to CPU training..."
|
|
308
|
+
"Refer to LightGBM GPU documentation: "
|
|
309
|
+
"https://github.com/Microsoft/LightGBM/tree/master/python-package#build-gpu-version"
|
|
310
|
+
"One possible method is:"
|
|
311
|
+
"\tpip uninstall lightgbm -y"
|
|
312
|
+
"\tpip install lightgbm --install-option=--gpu"
|
|
313
|
+
)
|
|
314
|
+
elif train_params["params"]["device"] == "cuda":
|
|
315
|
+
# Current blocker for using CUDA over GPU: https://github.com/microsoft/LightGBM/issues/6828
|
|
316
|
+
# Note that device="cuda" works if AutoGluon (and therefore LightGBM) is installed via conda.
|
|
317
|
+
logger.warning(
|
|
318
|
+
"Warning: CUDA mode might not be installed for LightGBM, "
|
|
319
|
+
"CUDA training raised an exception. Falling back to CPU training..."
|
|
320
|
+
"Refer to LightGBM CUDA documentation: "
|
|
321
|
+
"https://github.com/Microsoft/LightGBM/tree/master/python-package#build-cuda-version"
|
|
322
|
+
)
|
|
312
323
|
train_params["params"]["device"] = "cpu"
|
|
313
324
|
self.model = train_lgb_model(early_stopping_callback_kwargs=early_stopping_callback_kwargs, **train_params)
|
|
314
325
|
retrain = False
|
|
@@ -509,17 +520,44 @@ class LGBModel(AbstractModel):
|
|
|
509
520
|
default_auxiliary_params.update(extra_auxiliary_params)
|
|
510
521
|
return default_auxiliary_params
|
|
511
522
|
|
|
512
|
-
|
|
523
|
+
@staticmethod
|
|
524
|
+
def _is_gpu_lgbm_installed():
|
|
513
525
|
# Taken from https://github.com/microsoft/LightGBM/issues/3939
|
|
514
526
|
try_import_lightgbm()
|
|
515
527
|
import lightgbm
|
|
516
528
|
|
|
529
|
+
rng = np.random.RandomState(42)
|
|
530
|
+
data = rng.rand(25, 2)
|
|
531
|
+
label = rng.randint(2, size=25)
|
|
532
|
+
|
|
533
|
+
try:
|
|
534
|
+
train_data = lightgbm.Dataset(data, label=label)
|
|
535
|
+
params = {
|
|
536
|
+
"device": "gpu",
|
|
537
|
+
"verbose": -1,
|
|
538
|
+
}
|
|
539
|
+
gbm = lightgbm.train(params, num_boost_round=10, train_set=train_data)
|
|
540
|
+
return True
|
|
541
|
+
except Exception as e:
|
|
542
|
+
return False
|
|
543
|
+
|
|
544
|
+
@staticmethod
|
|
545
|
+
def _is_cuda_lgbm_installed():
|
|
546
|
+
# Taken from https://github.com/microsoft/LightGBM/issues/3939
|
|
547
|
+
try_import_lightgbm()
|
|
548
|
+
import lightgbm
|
|
549
|
+
|
|
550
|
+
rng = np.random.RandomState(42)
|
|
551
|
+
data = rng.rand(25, 2)
|
|
552
|
+
label = rng.randint(2, size=25)
|
|
553
|
+
|
|
517
554
|
try:
|
|
518
|
-
data = np.random.rand(50, 2)
|
|
519
|
-
label = np.random.randint(2, size=50)
|
|
520
555
|
train_data = lightgbm.Dataset(data, label=label)
|
|
521
|
-
params = {
|
|
522
|
-
|
|
556
|
+
params = {
|
|
557
|
+
"device": "cuda",
|
|
558
|
+
"verbose": -1,
|
|
559
|
+
}
|
|
560
|
+
gbm = lightgbm.train(params, num_boost_round=10, train_set=train_data)
|
|
523
561
|
return True
|
|
524
562
|
except Exception as e:
|
|
525
563
|
return False
|
|
@@ -528,7 +566,7 @@ class LGBModel(AbstractModel):
|
|
|
528
566
|
minimum_resources = {
|
|
529
567
|
"num_cpus": 1,
|
|
530
568
|
}
|
|
531
|
-
if is_gpu_available
|
|
569
|
+
if is_gpu_available:
|
|
532
570
|
minimum_resources["num_gpus"] = 0.5
|
|
533
571
|
return minimum_resources
|
|
534
572
|
|
|
@@ -43,6 +43,7 @@ class LinearModel(AbstractModel):
|
|
|
43
43
|
ag_key = "LR"
|
|
44
44
|
ag_name = "LinearModel"
|
|
45
45
|
ag_priority = 30
|
|
46
|
+
seed_name = "random_state"
|
|
46
47
|
|
|
47
48
|
def __init__(self, **kwargs):
|
|
48
49
|
super().__init__(**kwargs)
|
|
@@ -155,7 +156,7 @@ class LinearModel(AbstractModel):
|
|
|
155
156
|
return self._pipeline.fit_transform(X)
|
|
156
157
|
|
|
157
158
|
def _set_default_params(self):
|
|
158
|
-
default_params = {"
|
|
159
|
+
default_params = {"fit_intercept": True}
|
|
159
160
|
if self.problem_type != REGRESSION:
|
|
160
161
|
default_params.update({"solver": _get_solver(self.problem_type)})
|
|
161
162
|
default_params.update(get_param_baseline())
|
|
@@ -319,6 +320,10 @@ class LinearModel(AbstractModel):
|
|
|
319
320
|
) -> int:
|
|
320
321
|
return 4 * get_approximate_df_mem_usage(X).sum()
|
|
321
322
|
|
|
323
|
+
def _get_maximum_resources(self) -> dict[str, int | float]:
|
|
324
|
+
# no GPU support
|
|
325
|
+
return {"num_gpus": 0}
|
|
326
|
+
|
|
322
327
|
@classmethod
|
|
323
328
|
def supported_problem_types(cls) -> list[str] | None:
|
|
324
329
|
return ["binary", "multiclass", "regression"]
|
|
@@ -5,20 +5,19 @@ from autogluon.features.generators import OneHotEncoderFeatureGenerator
|
|
|
5
5
|
|
|
6
6
|
class OheFeaturesGenerator(BaseEstimator, TransformerMixin):
|
|
7
7
|
def __init__(self):
|
|
8
|
-
|
|
9
|
-
self._encoder = None
|
|
8
|
+
pass
|
|
10
9
|
|
|
11
10
|
def fit(self, X, y=None):
|
|
12
|
-
self.
|
|
13
|
-
self.
|
|
14
|
-
self.
|
|
11
|
+
self.encoder_ = OneHotEncoderFeatureGenerator(max_levels=10000, verbosity=0)
|
|
12
|
+
self.encoder_.fit(X)
|
|
13
|
+
self.feature_names_ = self.encoder_.features_out
|
|
15
14
|
return self
|
|
16
15
|
|
|
17
16
|
def transform(self, X, y=None):
|
|
18
|
-
return self.
|
|
17
|
+
return self.encoder_.transform_ohe(X)
|
|
19
18
|
|
|
20
19
|
def get_feature_names(self):
|
|
21
|
-
return self.
|
|
20
|
+
return self.feature_names_
|
|
22
21
|
|
|
23
22
|
|
|
24
23
|
class NlpDataPreprocessor(BaseEstimator, TransformerMixin):
|
|
@@ -278,18 +278,18 @@ class Padder(torch.nn.Module):
|
|
|
278
278
|
self.cpu_mode = False
|
|
279
279
|
|
|
280
280
|
# Original flash attention initialization logic
|
|
281
|
-
x_o, self.indices_o, self.cu_seqlens_o, self.max_seqlen_in_batch_o = unpad_input(x, ~self.padding_mask)
|
|
281
|
+
x_o, self.indices_o, self.cu_seqlens_o, self.max_seqlen_in_batch_o, *_ = unpad_input(x, ~self.padding_mask)
|
|
282
282
|
|
|
283
283
|
self.feature_mask_big = einops.repeat(self.feature_mask, 'b f -> b s f', s=n_obs)
|
|
284
|
-
self.feature_mask_big, _, _, _ = unpad_input(self.feature_mask_big, ~self.padding_mask)
|
|
285
|
-
x_of, self.indices_of, self.cu_seqlens_of, self.max_seqlen_in_batch_of = unpad_input(x_o, ~self.feature_mask_big)
|
|
284
|
+
self.feature_mask_big, _, _, _, *_ = unpad_input(self.feature_mask_big, ~self.padding_mask)
|
|
285
|
+
x_of, self.indices_of, self.cu_seqlens_of, self.max_seqlen_in_batch_of, *_ = unpad_input(x_o, ~self.feature_mask_big)
|
|
286
286
|
|
|
287
287
|
x_rearranged = einx.rearrange('b s f d -> b f s d', x)
|
|
288
|
-
x_f, self.indices_f, self.cu_seqlens_f, self.max_seqlen_in_batch_f = unpad_input(x_rearranged, ~self.feature_mask)
|
|
288
|
+
x_f, self.indices_f, self.cu_seqlens_f, self.max_seqlen_in_batch_f, *_ = unpad_input(x_rearranged, ~self.feature_mask)
|
|
289
289
|
|
|
290
290
|
self.padding_mask_big = einops.repeat(self.padding_mask, 'b s -> b f s', f=n_feat)
|
|
291
|
-
self.padding_mask_big, _, _, _ = unpad_input(self.padding_mask_big, ~self.feature_mask)
|
|
292
|
-
x_fo, self.indices_fo, self.cu_seqlens_fo, self.max_seqlen_in_batch_fo = unpad_input(x_f, ~self.padding_mask_big)
|
|
291
|
+
self.padding_mask_big, _, _, _, *_ = unpad_input(self.padding_mask_big, ~self.feature_mask)
|
|
292
|
+
x_fo, self.indices_fo, self.cu_seqlens_fo, self.max_seqlen_in_batch_fo, *_ = unpad_input(x_f, ~self.padding_mask_big)
|
|
293
293
|
|
|
294
294
|
self.batch_size_f = x_f.shape[0]
|
|
295
295
|
self.batch_size_o = x_o.shape[0]
|
|
@@ -307,8 +307,8 @@ class Padder(torch.nn.Module):
|
|
|
307
307
|
|
|
308
308
|
# GPU path with flash attention
|
|
309
309
|
x = einx.rearrange('b s f d -> b f s d', x)
|
|
310
|
-
x, _, _, _ = unpad_input(x, ~self.feature_mask)
|
|
311
|
-
x, _, _, _ = unpad_input(x, ~self.padding_mask_big)
|
|
310
|
+
x, _, _, _, *_ = unpad_input(x, ~self.feature_mask)
|
|
311
|
+
x, _, _, _, *_ = unpad_input(x, ~self.padding_mask_big)
|
|
312
312
|
return x
|
|
313
313
|
|
|
314
314
|
def base_to_feat(self, x: torch.Tensor) -> torch.Tensor:
|
|
@@ -319,8 +319,8 @@ class Padder(torch.nn.Module):
|
|
|
319
319
|
return x.view(b * f, s * d)
|
|
320
320
|
|
|
321
321
|
# GPU path with flash attention
|
|
322
|
-
x, _, _, _ = unpad_input(x, ~self.padding_mask)
|
|
323
|
-
x, _, _, _ = unpad_input(x, ~self.feature_mask_big)
|
|
322
|
+
x, _, _, _, *_ = unpad_input(x, ~self.padding_mask)
|
|
323
|
+
x, _, _, _, *_ = unpad_input(x, ~self.feature_mask_big)
|
|
324
324
|
return x
|
|
325
325
|
|
|
326
326
|
def obs_to_base(self, x: torch.Tensor) -> torch.Tensor:
|
|
@@ -32,6 +32,7 @@ class MitraModel(AbstractModel):
|
|
|
32
32
|
ag_name = "Mitra"
|
|
33
33
|
weights_file_name = "model.pt"
|
|
34
34
|
ag_priority = 55
|
|
35
|
+
seed_name = "seed"
|
|
35
36
|
|
|
36
37
|
def __init__(self, **kwargs):
|
|
37
38
|
super().__init__(**kwargs)
|
|
@@ -113,6 +114,22 @@ class MitraModel(AbstractModel):
|
|
|
113
114
|
|
|
114
115
|
hyp = self._get_model_params()
|
|
115
116
|
|
|
117
|
+
hf_cls_model = hyp.pop("hf_cls_model", None)
|
|
118
|
+
hf_reg_model = hyp.pop("hf_reg_model", None)
|
|
119
|
+
if self.problem_type in ["binary", "multiclass"]:
|
|
120
|
+
hf_model = hf_cls_model
|
|
121
|
+
elif self.problem_type == "regression":
|
|
122
|
+
hf_model = hf_reg_model
|
|
123
|
+
else:
|
|
124
|
+
raise AssertionError(f"Unsupported problem_type: {self.problem_type}")
|
|
125
|
+
if hf_model is None:
|
|
126
|
+
hf_model = hyp.pop("hf_general_model", None)
|
|
127
|
+
if hf_model is None:
|
|
128
|
+
hf_model = hyp.pop("hf_model", None)
|
|
129
|
+
if hf_model is not None:
|
|
130
|
+
logger.log(30, f"\tCustom hf_model specified: {hf_model}")
|
|
131
|
+
hyp["hf_model"] = hf_model
|
|
132
|
+
|
|
116
133
|
if hyp.get("device", None) is None:
|
|
117
134
|
if num_gpus == 0:
|
|
118
135
|
hyp["device"] = "cpu"
|
|
@@ -138,9 +155,7 @@ class MitraModel(AbstractModel):
|
|
|
138
155
|
if "verbose" not in hyp:
|
|
139
156
|
hyp["verbose"] = verbosity >= 3
|
|
140
157
|
|
|
141
|
-
self.model = model_cls(
|
|
142
|
-
**hyp,
|
|
143
|
-
)
|
|
158
|
+
self.model = model_cls(**hyp)
|
|
144
159
|
|
|
145
160
|
X = self.preprocess(X, is_train=True)
|
|
146
161
|
if X_val is not None:
|
|
@@ -214,6 +229,31 @@ class MitraModel(AbstractModel):
|
|
|
214
229
|
model._weights_saved = False
|
|
215
230
|
return model
|
|
216
231
|
|
|
232
|
+
@classmethod
|
|
233
|
+
def download_weights(cls, repo_id: str):
|
|
234
|
+
"""
|
|
235
|
+
Download weights for Mitra from HuggingFace from `repo_id`.
|
|
236
|
+
Requires an internet connection.
|
|
237
|
+
"""
|
|
238
|
+
from huggingface_hub import hf_hub_download
|
|
239
|
+
hf_hub_download(repo_id=repo_id, filename="config.json")
|
|
240
|
+
hf_hub_download(repo_id=repo_id, filename="model.safetensors")
|
|
241
|
+
|
|
242
|
+
@classmethod
|
|
243
|
+
def download_default_weights(cls):
|
|
244
|
+
"""
|
|
245
|
+
Download default weights for Mitra from HuggingFace.
|
|
246
|
+
Includes both classifier and regressor weights.
|
|
247
|
+
|
|
248
|
+
This is useful to call when building a docker image to avoid having to download Mitra weights for each instance.
|
|
249
|
+
This is also useful for benchmarking as a first sanity check
|
|
250
|
+
to avoid HuggingFace potentially blocking the download.
|
|
251
|
+
|
|
252
|
+
Requires an internet connection.
|
|
253
|
+
"""
|
|
254
|
+
cls.download_weights(repo_id="autogluon/mitra-classifier")
|
|
255
|
+
cls.download_weights(repo_id="autogluon/mitra-regressor")
|
|
256
|
+
|
|
217
257
|
@classmethod
|
|
218
258
|
def supported_problem_types(cls) -> Optional[List[str]]:
|
|
219
259
|
return ["binary", "multiclass", "regression"]
|
|
@@ -30,7 +30,6 @@ RANDOM_MIRROR_X = True # [True, False]
|
|
|
30
30
|
LR = 0.0001 # [0.00001, 0.000025, 0.00005, 0.000075, 0.0001, 0.00025, 0.0005, 0.00075, 0.001]
|
|
31
31
|
PATIENCE = 40 # [30, 35, 40, 45, 50]
|
|
32
32
|
WARMUP_STEPS = 1000 # [500, 750, 1000, 1250, 1500]
|
|
33
|
-
DEFAULT_GENERAL_MODEL = 'autogluon/mitra-classifier'
|
|
34
33
|
DEFAULT_CLS_MODEL = 'autogluon/mitra-classifier'
|
|
35
34
|
DEFAULT_REG_MODEL = 'autogluon/mitra-regressor'
|
|
36
35
|
|
|
@@ -67,9 +66,7 @@ class MitraBase(BaseEstimator):
|
|
|
67
66
|
fine_tune_steps=DEFAULT_FINE_TUNE_STEPS,
|
|
68
67
|
metric=DEFAULT_CLS_METRIC,
|
|
69
68
|
state_dict=None,
|
|
70
|
-
|
|
71
|
-
hf_cls_model=DEFAULT_CLS_MODEL,
|
|
72
|
-
hf_reg_model=DEFAULT_REG_MODEL,
|
|
69
|
+
hf_model=None,
|
|
73
70
|
patience=PATIENCE,
|
|
74
71
|
lr=LR,
|
|
75
72
|
warmup_steps=WARMUP_STEPS,
|
|
@@ -104,9 +101,7 @@ class MitraBase(BaseEstimator):
|
|
|
104
101
|
self.fine_tune_steps = fine_tune_steps
|
|
105
102
|
self.metric = metric
|
|
106
103
|
self.state_dict = state_dict
|
|
107
|
-
self.
|
|
108
|
-
self.hf_cls_model = hf_cls_model
|
|
109
|
-
self.hf_reg_model = hf_reg_model
|
|
104
|
+
self.hf_model = hf_model
|
|
110
105
|
self.patience = patience
|
|
111
106
|
self.lr = lr
|
|
112
107
|
self.warmup_steps = warmup_steps
|
|
@@ -200,20 +195,8 @@ class MitraBase(BaseEstimator):
|
|
|
200
195
|
self.train_time = 0
|
|
201
196
|
for _ in range(self.n_estimators):
|
|
202
197
|
if USE_HF:
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
model = Tab2D.from_pretrained(self.hf_cls_model, device=self.device)
|
|
206
|
-
elif self.hf_general_model is not None:
|
|
207
|
-
model = Tab2D.from_pretrained(self.hf_general_model, device=self.device)
|
|
208
|
-
else:
|
|
209
|
-
model = Tab2D.from_pretrained("autogluon/mitra-classifier", device=self.device)
|
|
210
|
-
elif task == 'regression':
|
|
211
|
-
if self.hf_reg_model is not None:
|
|
212
|
-
model = Tab2D.from_pretrained(self.hf_reg_model, device=self.device)
|
|
213
|
-
elif self.hf_general_model is not None:
|
|
214
|
-
model = Tab2D.from_pretrained(self.hf_general_model, device=self.device)
|
|
215
|
-
else:
|
|
216
|
-
model = Tab2D.from_pretrained("autogluon/mitra-regressor", device=self.device)
|
|
198
|
+
assert self.hf_model is not None, f"hf_model must not be None."
|
|
199
|
+
model = Tab2D.from_pretrained(self.hf_model, device=self.device)
|
|
217
200
|
else:
|
|
218
201
|
model = Tab2D(
|
|
219
202
|
dim=cfg.hyperparams['dim'],
|
|
@@ -274,6 +257,7 @@ class MitraClassifier(MitraBase, ClassifierMixin):
|
|
|
274
257
|
fine_tune_steps=DEFAULT_FINE_TUNE_STEPS,
|
|
275
258
|
metric=DEFAULT_CLS_METRIC,
|
|
276
259
|
state_dict=None,
|
|
260
|
+
hf_model=DEFAULT_CLS_MODEL,
|
|
277
261
|
patience=PATIENCE,
|
|
278
262
|
lr=LR,
|
|
279
263
|
warmup_steps=WARMUP_STEPS,
|
|
@@ -294,6 +278,7 @@ class MitraClassifier(MitraBase, ClassifierMixin):
|
|
|
294
278
|
fine_tune_steps,
|
|
295
279
|
metric,
|
|
296
280
|
state_dict,
|
|
281
|
+
hf_model=hf_model,
|
|
297
282
|
patience=patience,
|
|
298
283
|
lr=lr,
|
|
299
284
|
warmup_steps=warmup_steps,
|
|
@@ -404,6 +389,7 @@ class MitraRegressor(MitraBase, RegressorMixin):
|
|
|
404
389
|
fine_tune_steps=DEFAULT_FINE_TUNE_STEPS,
|
|
405
390
|
metric=DEFAULT_REG_METRIC,
|
|
406
391
|
state_dict=None,
|
|
392
|
+
hf_model=DEFAULT_REG_MODEL,
|
|
407
393
|
patience=PATIENCE,
|
|
408
394
|
lr=LR,
|
|
409
395
|
warmup_steps=WARMUP_STEPS,
|
|
@@ -424,6 +410,7 @@ class MitraRegressor(MitraBase, RegressorMixin):
|
|
|
424
410
|
fine_tune_steps,
|
|
425
411
|
metric,
|
|
426
412
|
state_dict,
|
|
413
|
+
hf_model=hf_model,
|
|
427
414
|
patience=patience,
|
|
428
415
|
lr=lr,
|
|
429
416
|
warmup_steps=warmup_steps,
|
|
@@ -51,6 +51,7 @@ class RealMLPModel(AbstractModel):
|
|
|
51
51
|
ag_key = "REALMLP"
|
|
52
52
|
ag_name = "RealMLP"
|
|
53
53
|
ag_priority = 75
|
|
54
|
+
seed_name = "random_state"
|
|
54
55
|
|
|
55
56
|
def __init__(self, **kwargs):
|
|
56
57
|
super().__init__(**kwargs)
|
|
@@ -243,8 +244,6 @@ class RealMLPModel(AbstractModel):
|
|
|
243
244
|
|
|
244
245
|
def _set_default_params(self):
|
|
245
246
|
default_params = dict(
|
|
246
|
-
random_state=0,
|
|
247
|
-
|
|
248
247
|
# Don't use early stopping by default, seems to work well without
|
|
249
248
|
use_early_stopping=False,
|
|
250
249
|
early_stopping_additive_patience=40,
|
|
@@ -359,4 +358,3 @@ class RealMLPModel(AbstractModel):
|
|
|
359
358
|
# How to mirror RealMLP learning rate scheduler while forcing stopping at a specific epoch?
|
|
360
359
|
tags = {"can_refit_full": False}
|
|
361
360
|
return tags
|
|
362
|
-
return tags
|
|
@@ -30,6 +30,7 @@ class RFModel(AbstractModel):
|
|
|
30
30
|
ag_key = "RF"
|
|
31
31
|
ag_name = "RandomForest"
|
|
32
32
|
ag_priority = 80
|
|
33
|
+
seed_name = "random_state"
|
|
33
34
|
|
|
34
35
|
def __init__(self, **kwargs):
|
|
35
36
|
super().__init__(**kwargs)
|
|
@@ -97,7 +98,6 @@ class RFModel(AbstractModel):
|
|
|
97
98
|
# This size scales linearly with number of rows.
|
|
98
99
|
"max_leaf_nodes": 15000,
|
|
99
100
|
"n_jobs": -1,
|
|
100
|
-
"random_state": 0,
|
|
101
101
|
"bootstrap": True, # Required for OOB estimates, setting to False will raise exception if bagging.
|
|
102
102
|
# TODO: min_samples_leaf=5 is too large on most problems, however on some datasets it helps a lot (airlines likes >40 min_samples_leaf, adult likes 2 much better than 1)
|
|
103
103
|
# This value would need to be tuned per dataset, likely very worthwhile.
|
|
@@ -368,6 +368,10 @@ class RFModel(AbstractModel):
|
|
|
368
368
|
|
|
369
369
|
return self._convert_proba_to_unified_form(y_oof_pred_proba)
|
|
370
370
|
|
|
371
|
+
def _get_maximum_resources(self) -> dict[str, int | float]:
|
|
372
|
+
# no GPU support
|
|
373
|
+
return {"num_gpus": 0}
|
|
374
|
+
|
|
371
375
|
def _get_default_auxiliary_params(self) -> dict:
|
|
372
376
|
default_auxiliary_params = super()._get_default_auxiliary_params()
|
|
373
377
|
extra_auxiliary_params = dict(
|
|
@@ -35,6 +35,7 @@ class TabICLModel(AbstractModel):
|
|
|
35
35
|
ag_key = "TABICL"
|
|
36
36
|
ag_name = "TabICL"
|
|
37
37
|
ag_priority = 65
|
|
38
|
+
seed_name = "random_state"
|
|
38
39
|
|
|
39
40
|
def get_model_cls(self):
|
|
40
41
|
from tabicl import TabICLClassifier
|
|
@@ -96,13 +97,6 @@ class TabICLModel(AbstractModel):
|
|
|
96
97
|
y=y,
|
|
97
98
|
)
|
|
98
99
|
|
|
99
|
-
def _set_default_params(self):
|
|
100
|
-
default_params = {
|
|
101
|
-
"random_state": 42,
|
|
102
|
-
}
|
|
103
|
-
for param, val in default_params.items():
|
|
104
|
-
self._set_default_param_value(param, val)
|
|
105
|
-
|
|
106
100
|
def _get_default_auxiliary_params(self) -> dict:
|
|
107
101
|
default_auxiliary_params = super()._get_default_auxiliary_params()
|
|
108
102
|
default_auxiliary_params.update(
|
|
@@ -39,6 +39,7 @@ class TabMModel(AbstractModel):
|
|
|
39
39
|
ag_key = "TABM"
|
|
40
40
|
ag_name = "TabM"
|
|
41
41
|
ag_priority = 85
|
|
42
|
+
seed_name = "random_state"
|
|
42
43
|
|
|
43
44
|
def __init__(self, **kwargs):
|
|
44
45
|
super().__init__(**kwargs)
|
|
@@ -48,6 +49,7 @@ class TabMModel(AbstractModel):
|
|
|
48
49
|
self._indicator_columns = None
|
|
49
50
|
self._features_bool = None
|
|
50
51
|
self._bool_to_cat = None
|
|
52
|
+
self.device = None
|
|
51
53
|
|
|
52
54
|
def _fit(
|
|
53
55
|
self,
|
|
@@ -141,12 +143,80 @@ class TabMModel(AbstractModel):
|
|
|
141
143
|
|
|
142
144
|
return X
|
|
143
145
|
|
|
144
|
-
def
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
146
|
+
def save(self, path: str = None, verbose=True) -> str:
|
|
147
|
+
"""
|
|
148
|
+
Need to set device to CPU to be able to load on a non-GPU environment
|
|
149
|
+
"""
|
|
150
|
+
import torch
|
|
151
|
+
|
|
152
|
+
# Save on CPU to ensure the model can be loaded without GPU
|
|
153
|
+
if self.model is not None:
|
|
154
|
+
self.device = self.model.device_
|
|
155
|
+
device_cpu = torch.device("cpu")
|
|
156
|
+
self.model.model_ = self.model.model_.to(device_cpu)
|
|
157
|
+
self.model.device_ = device_cpu
|
|
158
|
+
path = super().save(path=path, verbose=verbose)
|
|
159
|
+
# Put the model back to the device after the save
|
|
160
|
+
if self.model is not None:
|
|
161
|
+
self.model.model_.to(self.device)
|
|
162
|
+
self.model.device_ = self.device
|
|
163
|
+
|
|
164
|
+
return path
|
|
165
|
+
|
|
166
|
+
@classmethod
|
|
167
|
+
def load(cls, path: str, reset_paths=True, verbose=True):
|
|
168
|
+
"""
|
|
169
|
+
Loads the model from disk to memory.
|
|
170
|
+
The loaded model will be on the same device it was trained on (cuda/mps);
|
|
171
|
+
if the device is not available (trained on GPU, deployed on CPU), then `cpu` will be used.
|
|
172
|
+
|
|
173
|
+
Parameters
|
|
174
|
+
----------
|
|
175
|
+
path : str
|
|
176
|
+
Path to the saved model, minus the file name.
|
|
177
|
+
This should generally be a directory path ending with a '/' character (or appropriate path separator value depending on OS).
|
|
178
|
+
The model file is typically located in os.path.join(path, cls.model_file_name).
|
|
179
|
+
reset_paths : bool, default True
|
|
180
|
+
Whether to reset the self.path value of the loaded model to be equal to path.
|
|
181
|
+
It is highly recommended to keep this value as True unless accessing the original self.path value is important.
|
|
182
|
+
If False, the actual valid path and self.path may differ, leading to strange behaviour and potential exceptions if the model needs to load any other files at a later time.
|
|
183
|
+
verbose : bool, default True
|
|
184
|
+
Whether to log the location of the loaded file.
|
|
185
|
+
|
|
186
|
+
Returns
|
|
187
|
+
-------
|
|
188
|
+
model : cls
|
|
189
|
+
Loaded model object.
|
|
190
|
+
"""
|
|
191
|
+
import torch
|
|
192
|
+
|
|
193
|
+
model: TabMModel = super().load(path=path, reset_paths=reset_paths, verbose=verbose)
|
|
194
|
+
|
|
195
|
+
# Put the model on the same device it was trained on (GPU/MPS) if it is available; otherwise use CPU
|
|
196
|
+
if model.model is not None:
|
|
197
|
+
original_device_type = model.device.type
|
|
198
|
+
if "cuda" in original_device_type:
|
|
199
|
+
# cuda: nvidia GPU
|
|
200
|
+
device = torch.device(original_device_type if torch.cuda.is_available() else "cpu")
|
|
201
|
+
elif "mps" in original_device_type:
|
|
202
|
+
# mps: Apple Silicon
|
|
203
|
+
device = torch.device(original_device_type if torch.backends.mps.is_available() else "cpu")
|
|
204
|
+
else:
|
|
205
|
+
device = torch.device(original_device_type)
|
|
206
|
+
|
|
207
|
+
if verbose and (original_device_type != device.type):
|
|
208
|
+
logger.log(15, f"Model is trained on {original_device_type}, but the device is not available - loading on {device.type}")
|
|
209
|
+
|
|
210
|
+
model.set_device(device=device)
|
|
211
|
+
|
|
212
|
+
return model
|
|
213
|
+
|
|
214
|
+
def set_device(self, device):
|
|
215
|
+
self.device = device
|
|
216
|
+
if self.model is not None:
|
|
217
|
+
self.model.device_ = device
|
|
218
|
+
if self.model.model_ is not None:
|
|
219
|
+
self.model.model_ = self.model.model_.to(device)
|
|
150
220
|
|
|
151
221
|
@classmethod
|
|
152
222
|
def supported_problem_types(cls) -> list[str] | None:
|
|
@@ -42,6 +42,7 @@ class TabPFNMixModel(AbstractModel):
|
|
|
42
42
|
ag_key = "TABPFNMIX"
|
|
43
43
|
ag_name = "TabPFNMix"
|
|
44
44
|
ag_priority = 45
|
|
45
|
+
seed_name = "random_state"
|
|
45
46
|
|
|
46
47
|
weights_file_name = "model.pt"
|
|
47
48
|
|
|
@@ -123,6 +124,7 @@ class TabPFNMixModel(AbstractModel):
|
|
|
123
124
|
raise AssertionError(f"Max allowed classes for the model is {max_classes}, " f"but found {self.num_classes} classes.")
|
|
124
125
|
|
|
125
126
|
params = self._get_model_params()
|
|
127
|
+
random_state = params.pop(self.seed_name, self.default_random_seed)
|
|
126
128
|
sample_rows = ag_params.get("sample_rows", None)
|
|
127
129
|
sample_rows_val = ag_params.get("sample_rows_val", None)
|
|
128
130
|
max_rows = ag_params.get("max_rows", None)
|
|
@@ -133,11 +135,11 @@ class TabPFNMixModel(AbstractModel):
|
|
|
133
135
|
|
|
134
136
|
# TODO: Make sample_rows generic
|
|
135
137
|
if sample_rows is not None and isinstance(sample_rows, int) and len(X) > sample_rows:
|
|
136
|
-
X, y = self._subsample_data(X=X, y=y, num_rows=sample_rows)
|
|
138
|
+
X, y = self._subsample_data(X=X, y=y, num_rows=sample_rows, random_state=random_state)
|
|
137
139
|
|
|
138
140
|
# TODO: Make sample_rows generic
|
|
139
141
|
if X_val is not None and y_val is not None and sample_rows_val is not None and isinstance(sample_rows_val, int) and len(X_val) > sample_rows_val:
|
|
140
|
-
X_val, y_val = self._subsample_data(X=X_val, y=y_val, num_rows=sample_rows_val)
|
|
142
|
+
X_val, y_val = self._subsample_data(X=X_val, y=y_val, num_rows=sample_rows_val, random_state=random_state)
|
|
141
143
|
|
|
142
144
|
from ._internal.core.enums import Task
|
|
143
145
|
if self.problem_type in [REGRESSION, QUANTILE]:
|
|
@@ -178,7 +180,7 @@ class TabPFNMixModel(AbstractModel):
|
|
|
178
180
|
elif weights_path is not None:
|
|
179
181
|
logger.log(15, f'\tLoading pre-trained weights from file... (weights_path="{weights_path}")')
|
|
180
182
|
|
|
181
|
-
cfg = ConfigRun(hyperparams=params, task=task, device=device)
|
|
183
|
+
cfg = ConfigRun(hyperparams=params, task=task, device=device, seed=random_state)
|
|
182
184
|
|
|
183
185
|
if cfg.hyperparams["max_epochs"] == 0 and cfg.hyperparams["n_ensembles"] != 1:
|
|
184
186
|
logger.log(
|
|
@@ -242,7 +244,7 @@ class TabPFNMixModel(AbstractModel):
|
|
|
242
244
|
return self
|
|
243
245
|
|
|
244
246
|
# TODO: Make this generic by creating a generic `preprocess_train` and putting this logic prior to `_preprocess`.
|
|
245
|
-
def _subsample_data(self, X: pd.DataFrame, y: pd.Series, num_rows: int, random_state=0) -> (pd.DataFrame, pd.Series):
|
|
247
|
+
def _subsample_data(self, X: pd.DataFrame, y: pd.Series, num_rows: int, random_state: int | None = 0) -> (pd.DataFrame, pd.Series):
|
|
246
248
|
num_rows_to_drop = len(X) - num_rows
|
|
247
249
|
X, _, y, _ = generate_train_test_split(
|
|
248
250
|
X=X,
|
|
@@ -122,6 +122,7 @@ class TabPFNV2Model(AbstractModel):
|
|
|
122
122
|
ag_key = "TABPFNV2"
|
|
123
123
|
ag_name = "TabPFNv2"
|
|
124
124
|
ag_priority = 105
|
|
125
|
+
seed_name = "random_state"
|
|
125
126
|
|
|
126
127
|
def __init__(self, **kwargs):
|
|
127
128
|
super().__init__(**kwargs)
|
|
@@ -198,12 +199,6 @@ class TabPFNV2Model(AbstractModel):
|
|
|
198
199
|
# logs "Built with PriorLabs-TabPFN"
|
|
199
200
|
self._log_license(device=device)
|
|
200
201
|
|
|
201
|
-
if num_gpus == 0:
|
|
202
|
-
logger.log(
|
|
203
|
-
30,
|
|
204
|
-
f"\tWARNING: Running TabPFNv2 on CPU. This can be very slow. We recommend using a GPU instead."
|
|
205
|
-
)
|
|
206
|
-
|
|
207
202
|
X = self.preprocess(X, is_train=True)
|
|
208
203
|
|
|
209
204
|
hps = self._get_model_params()
|
|
@@ -306,7 +301,6 @@ class TabPFNV2Model(AbstractModel):
|
|
|
306
301
|
|
|
307
302
|
def _set_default_params(self):
|
|
308
303
|
default_params = {
|
|
309
|
-
"random_state": 42,
|
|
310
304
|
"ignore_pretraining_limits": True, # to ignore warnings and size limits
|
|
311
305
|
}
|
|
312
306
|
for param, val in default_params.items():
|
|
@@ -7,9 +7,7 @@ from autogluon.core.constants import BINARY, MULTICLASS, QUANTILE, REGRESSION
|
|
|
7
7
|
|
|
8
8
|
def get_fixed_params(framework):
|
|
9
9
|
"""Parameters that currently cannot be searched during HPO"""
|
|
10
|
-
fixed_params = {
|
|
11
|
-
# 'seed_value': 0, # random seed for reproducibility (set = None to ignore)
|
|
12
|
-
}
|
|
10
|
+
fixed_params = {}
|
|
13
11
|
# TODO: v1.2 Change default epochs_wo_improve to "auto", so that None can mean no early stopping.
|
|
14
12
|
pytorch_fixed_params = {
|
|
15
13
|
"num_epochs": 1000, # maximum number of epochs (passes over full dataset) for training NN
|