autogluon.tabular 1.3.2b20250723__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/tabular/configs/config_helper.py +1 -1
- autogluon/tabular/configs/hyperparameter_configs.py +2 -265
- autogluon/tabular/configs/presets_configs.py +51 -23
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py +0 -1
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +310 -0
- autogluon/tabular/models/automm/automm_model.py +2 -0
- autogluon/tabular/models/automm/ft_transformer.py +4 -1
- autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +18 -6
- autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +8 -4
- autogluon/tabular/models/mitra/_internal/data/dataset_split.py +5 -1
- autogluon/tabular/models/mitra/_internal/models/tab2d.py +3 -0
- autogluon/tabular/models/mitra/mitra_model.py +74 -21
- autogluon/tabular/models/mitra/sklearn_interface.py +15 -13
- autogluon/tabular/models/realmlp/realmlp_model.py +13 -6
- autogluon/tabular/models/tabicl/tabicl_model.py +17 -8
- autogluon/tabular/models/tabm/rtdl_num_embeddings.py +3 -0
- autogluon/tabular/models/tabm/tabm_model.py +14 -6
- autogluon/tabular/models/tabm/tabm_reference.py +2 -0
- autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +4 -0
- autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +29 -12
- autogluon/tabular/predictor/predictor.py +79 -26
- autogluon/tabular/trainer/abstract_trainer.py +2 -0
- autogluon/tabular/version.py +1 -1
- {autogluon.tabular-1.3.2b20250723.dist-info → autogluon.tabular-1.4.0.dist-info}/METADATA +40 -18
- {autogluon.tabular-1.3.2b20250723.dist-info → autogluon.tabular-1.4.0.dist-info}/RECORD +32 -31
- /autogluon.tabular-1.3.2b20250723-py3.9-nspkg.pth → /autogluon.tabular-1.4.0-py3.9-nspkg.pth +0 -0
- {autogluon.tabular-1.3.2b20250723.dist-info → autogluon.tabular-1.4.0.dist-info}/LICENSE +0 -0
- {autogluon.tabular-1.3.2b20250723.dist-info → autogluon.tabular-1.4.0.dist-info}/NOTICE +0 -0
- {autogluon.tabular-1.3.2b20250723.dist-info → autogluon.tabular-1.4.0.dist-info}/WHEEL +0 -0
- {autogluon.tabular-1.3.2b20250723.dist-info → autogluon.tabular-1.4.0.dist-info}/namespace_packages.txt +0 -0
- {autogluon.tabular-1.3.2b20250723.dist-info → autogluon.tabular-1.4.0.dist-info}/top_level.txt +0 -0
- {autogluon.tabular-1.3.2b20250723.dist-info → autogluon.tabular-1.4.0.dist-info}/zip-safe +0 -0
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
1
4
|
import time
|
|
2
5
|
from pathlib import Path
|
|
3
6
|
import contextlib
|
|
@@ -76,6 +79,7 @@ class MitraBase(BaseEstimator):
|
|
|
76
79
|
random_mirror_regression=RANDOM_MIRROR_REGRESSION,
|
|
77
80
|
random_mirror_x=RANDOM_MIRROR_X,
|
|
78
81
|
seed=SEED,
|
|
82
|
+
verbose=True,
|
|
79
83
|
):
|
|
80
84
|
"""
|
|
81
85
|
Initialize the base Mitra model.
|
|
@@ -114,8 +118,11 @@ class MitraBase(BaseEstimator):
|
|
|
114
118
|
self.trainers = []
|
|
115
119
|
self.train_time = 0
|
|
116
120
|
self.seed = seed
|
|
121
|
+
self.verbose = verbose
|
|
117
122
|
|
|
118
|
-
set_seed
|
|
123
|
+
# FIXME: set_seed was removed in v1.4 as quality and speed reduction was observed when setting seed.
|
|
124
|
+
# This should be investigated and fixed for v1.5
|
|
125
|
+
# set_seed(self.seed)
|
|
119
126
|
|
|
120
127
|
def _create_config(self, task, dim_output, time_limit=None):
|
|
121
128
|
cfg = ConfigRun(
|
|
@@ -183,6 +190,7 @@ class MitraBase(BaseEstimator):
|
|
|
183
190
|
"""Train the ensemble of models."""
|
|
184
191
|
|
|
185
192
|
cfg, Tab2D = self._create_config(task, dim_output, time_limit)
|
|
193
|
+
rng = np.random.RandomState(cfg.seed)
|
|
186
194
|
|
|
187
195
|
success = False
|
|
188
196
|
while not (success and cfg.hyperparams["max_samples_support"] > 0 and cfg.hyperparams["max_samples_query"] > 0):
|
|
@@ -217,7 +225,7 @@ class MitraBase(BaseEstimator):
|
|
|
217
225
|
path_to_weights=Path(self.state_dict),
|
|
218
226
|
device=self.device,
|
|
219
227
|
)
|
|
220
|
-
trainer = TrainerFinetune(cfg, model, n_classes=n_classes, device=self.device)
|
|
228
|
+
trainer = TrainerFinetune(cfg, model, n_classes=n_classes, device=self.device, rng=rng, verbose=self.verbose)
|
|
221
229
|
|
|
222
230
|
start_time = time.time()
|
|
223
231
|
trainer.train(X_train, y_train, X_valid, y_valid)
|
|
@@ -275,6 +283,7 @@ class MitraClassifier(MitraBase, ClassifierMixin):
|
|
|
275
283
|
random_mirror_regression=RANDOM_MIRROR_REGRESSION,
|
|
276
284
|
random_mirror_x=RANDOM_MIRROR_X,
|
|
277
285
|
seed=SEED,
|
|
286
|
+
verbose=True,
|
|
278
287
|
):
|
|
279
288
|
"""Initialize the classifier."""
|
|
280
289
|
super().__init__(
|
|
@@ -294,6 +303,7 @@ class MitraClassifier(MitraBase, ClassifierMixin):
|
|
|
294
303
|
random_mirror_regression=random_mirror_regression,
|
|
295
304
|
random_mirror_x=random_mirror_x,
|
|
296
305
|
seed=seed,
|
|
306
|
+
verbose=verbose,
|
|
297
307
|
)
|
|
298
308
|
self.task = 'classification'
|
|
299
309
|
|
|
@@ -403,6 +413,7 @@ class MitraRegressor(MitraBase, RegressorMixin):
|
|
|
403
413
|
random_mirror_regression=RANDOM_MIRROR_REGRESSION,
|
|
404
414
|
random_mirror_x=RANDOM_MIRROR_X,
|
|
405
415
|
seed=SEED,
|
|
416
|
+
verbose=True,
|
|
406
417
|
):
|
|
407
418
|
"""Initialize the regressor."""
|
|
408
419
|
super().__init__(
|
|
@@ -422,6 +433,7 @@ class MitraRegressor(MitraBase, RegressorMixin):
|
|
|
422
433
|
random_mirror_regression=random_mirror_regression,
|
|
423
434
|
random_mirror_x=random_mirror_x,
|
|
424
435
|
seed=seed,
|
|
436
|
+
verbose=verbose,
|
|
425
437
|
)
|
|
426
438
|
self.task = 'regression'
|
|
427
439
|
|
|
@@ -492,14 +504,4 @@ class MitraRegressor(MitraBase, RegressorMixin):
|
|
|
492
504
|
@contextlib.contextmanager
|
|
493
505
|
def mitra_deterministic_context():
|
|
494
506
|
"""Context manager to set deterministic settings only for Mitra operations."""
|
|
495
|
-
|
|
496
|
-
original_deterministic_algorithms_set = False
|
|
497
|
-
|
|
498
|
-
try:
|
|
499
|
-
torch.use_deterministic_algorithms(True)
|
|
500
|
-
original_deterministic_algorithms_set = True
|
|
501
|
-
yield
|
|
502
|
-
|
|
503
|
-
finally:
|
|
504
|
-
if original_deterministic_algorithms_set:
|
|
505
|
-
torch.use_deterministic_algorithms(False)
|
|
507
|
+
yield
|
|
@@ -1,11 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Code Adapted from TabArena: https://github.com/autogluon/tabrepo/blob/main/tabrepo/benchmark/models/ag/realmlp/realmlp_model.py
|
|
3
|
-
|
|
4
|
-
Model: RealMLP
|
|
5
|
-
Paper: Better by Default: Strong Pre-Tuned MLPs and Boosted Trees on Tabular Data
|
|
6
|
-
Authors: David Holzmüller, Léo Grinsztajn, Ingo Steinwart
|
|
7
|
-
Codebase: https://github.com/dholzmueller/pytabkit
|
|
8
|
-
License: Apache-2.0
|
|
9
3
|
"""
|
|
10
4
|
|
|
11
5
|
from __future__ import annotations
|
|
@@ -41,6 +35,19 @@ def set_logger_level(logger_name: str, level: int):
|
|
|
41
35
|
|
|
42
36
|
# pip install pytabkit
|
|
43
37
|
class RealMLPModel(AbstractModel):
|
|
38
|
+
"""
|
|
39
|
+
RealMLP is an improved multilayer perception (MLP) model
|
|
40
|
+
through a bag of tricks and better default hyperparameters.
|
|
41
|
+
|
|
42
|
+
RealMLP is the top performing method overall on TabArena-v0.1: https://tabarena.ai
|
|
43
|
+
|
|
44
|
+
Paper: Better by Default: Strong Pre-Tuned MLPs and Boosted Trees on Tabular Data
|
|
45
|
+
Authors: David Holzmüller, Léo Grinsztajn, Ingo Steinwart
|
|
46
|
+
Codebase: https://github.com/dholzmueller/pytabkit
|
|
47
|
+
License: Apache-2.0
|
|
48
|
+
|
|
49
|
+
.. versionadded:: 1.4.0
|
|
50
|
+
"""
|
|
44
51
|
ag_key = "REALMLP"
|
|
45
52
|
ag_name = "RealMLP"
|
|
46
53
|
ag_priority = 75
|
|
@@ -1,10 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Code Adapted from TabArena: https://github.com/autogluon/tabrepo/blob/main/tabrepo/benchmark/models/ag/tabicl/tabicl_model.py
|
|
3
|
-
Model: TabICL
|
|
4
|
-
Paper: TabICL: A Tabular Foundation Model for In-Context Learning on Large Data
|
|
5
|
-
Authors: Jingang Qu, David Holzmüller, Gaël Varoquaux, Marine Le Morvan
|
|
6
|
-
Codebase: https://github.com/soda-inria/tabicl
|
|
7
|
-
License: BSD-3-Clause
|
|
8
3
|
"""
|
|
9
4
|
|
|
10
5
|
from __future__ import annotations
|
|
@@ -23,6 +18,20 @@ logger = logging.getLogger(__name__)
|
|
|
23
18
|
|
|
24
19
|
# TODO: Verify if crashes when weights are not yet downloaded and fit in parallel
|
|
25
20
|
class TabICLModel(AbstractModel):
|
|
21
|
+
"""
|
|
22
|
+
TabICL is a foundation model for tabular data using in-context learning
|
|
23
|
+
that is scalable to larger datasets than TabPFNv2. It is pretrained purely on synthetic data.
|
|
24
|
+
TabICL currently only supports classification tasks.
|
|
25
|
+
|
|
26
|
+
TabICL is one of the top performing methods overall on TabArena-v0.1: https://tabarena.ai
|
|
27
|
+
|
|
28
|
+
Paper: TabICL: A Tabular Foundation Model for In-Context Learning on Large Data
|
|
29
|
+
Authors: Jingang Qu, David Holzmüller, Gaël Varoquaux, Marine Le Morvan
|
|
30
|
+
Codebase: https://github.com/soda-inria/tabicl
|
|
31
|
+
License: BSD-3-Clause
|
|
32
|
+
|
|
33
|
+
.. versionadded:: 1.4.0
|
|
34
|
+
"""
|
|
26
35
|
ag_key = "TABICL"
|
|
27
36
|
ag_name = "TabICL"
|
|
28
37
|
ag_priority = 65
|
|
@@ -98,8 +107,8 @@ class TabICLModel(AbstractModel):
|
|
|
98
107
|
default_auxiliary_params = super()._get_default_auxiliary_params()
|
|
99
108
|
default_auxiliary_params.update(
|
|
100
109
|
{
|
|
101
|
-
"max_rows":
|
|
102
|
-
"max_features":
|
|
110
|
+
"max_rows": 30000,
|
|
111
|
+
"max_features": 2000,
|
|
103
112
|
}
|
|
104
113
|
)
|
|
105
114
|
return default_auxiliary_params
|
|
@@ -147,7 +156,7 @@ class TabICLModel(AbstractModel):
|
|
|
147
156
|
model_mem_estimate *= 1.3 # add 30% buffer
|
|
148
157
|
|
|
149
158
|
# TODO: Observed memory spikes above expected values on large datasets, increasing mem estimate to compensate
|
|
150
|
-
model_mem_estimate *= 1.5
|
|
159
|
+
model_mem_estimate *= 2.0 # Note: 1.5 is not large enough, still gets OOM
|
|
151
160
|
|
|
152
161
|
mem_estimate = model_mem_estimate + dataset_size_mem_est + baseline_overhead_mem_est
|
|
153
162
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# taken from https://github.com/yandex-research/rtdl-num-embeddings/blob/main/package/rtdl_num_embeddings.py
|
|
2
2
|
"""On Embeddings for Numerical Features in Tabular Deep Learning."""
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
__version__ = '0.0.12'
|
|
5
7
|
|
|
6
8
|
__all__ = [
|
|
@@ -12,6 +14,7 @@ __all__ = [
|
|
|
12
14
|
'compute_bins',
|
|
13
15
|
]
|
|
14
16
|
|
|
17
|
+
|
|
15
18
|
import math
|
|
16
19
|
import warnings
|
|
17
20
|
from typing import Any, Literal, Optional, Union
|
|
@@ -4,12 +4,6 @@ Note: This is a custom implementation of TabM based on TabArena. Because the Aut
|
|
|
4
4
|
the same time as TabM became available on PyPi, we chose to use TabArena's implementation
|
|
5
5
|
for the AutoGluon 1.4 release as it has already been benchmarked.
|
|
6
6
|
|
|
7
|
-
Model: TabM
|
|
8
|
-
Paper: TabM: Advancing Tabular Deep Learning with Parameter-Efficient Ensembling
|
|
9
|
-
Authors: Yury Gorishniy, Akim Kotelnikov, Artem Babenko
|
|
10
|
-
Codebase: https://github.com/yandex-research/tabm
|
|
11
|
-
License: Apache-2.0
|
|
12
|
-
|
|
13
7
|
Partially adapted from pytabkit's TabM implementation.
|
|
14
8
|
"""
|
|
15
9
|
|
|
@@ -28,6 +22,20 @@ logger = logging.getLogger(__name__)
|
|
|
28
22
|
|
|
29
23
|
|
|
30
24
|
class TabMModel(AbstractModel):
|
|
25
|
+
"""
|
|
26
|
+
TabM is an efficient ensemble of MLPs that is trained simultaneously with mostly shared parameters.
|
|
27
|
+
|
|
28
|
+
TabM is one of the top performing methods overall on TabArena-v0.1: https://tabarena.ai
|
|
29
|
+
|
|
30
|
+
Paper: TabM: Advancing Tabular Deep Learning with Parameter-Efficient Ensembling
|
|
31
|
+
Authors: Yury Gorishniy, Akim Kotelnikov, Artem Babenko
|
|
32
|
+
Codebase: https://github.com/yandex-research/tabm
|
|
33
|
+
License: Apache-2.0
|
|
34
|
+
|
|
35
|
+
Partially adapted from pytabkit's TabM implementation.
|
|
36
|
+
|
|
37
|
+
.. versionadded:: 1.4.0
|
|
38
|
+
"""
|
|
31
39
|
ag_key = "TABM"
|
|
32
40
|
ag_name = "TabM"
|
|
33
41
|
ag_priority = 85
|
|
@@ -26,6 +26,8 @@ class TabPFNMixModel(AbstractModel):
|
|
|
26
26
|
|
|
27
27
|
TabPFNMix is based off of the TabPFN and TabForestPFN models.
|
|
28
28
|
|
|
29
|
+
We recommend using Mitra instead, as it is an improved version of TabPFNMix.
|
|
30
|
+
|
|
29
31
|
It is a tabular transformer model pre-trained on purely synthetic data.
|
|
30
32
|
|
|
31
33
|
It currently has several limitations:
|
|
@@ -34,6 +36,8 @@ class TabPFNMixModel(AbstractModel):
|
|
|
34
36
|
3. Does not support GPU
|
|
35
37
|
|
|
36
38
|
For more information, refer to the `./_internals/README.md` file.
|
|
39
|
+
|
|
40
|
+
.. versionadded:: 1.2.0
|
|
37
41
|
"""
|
|
38
42
|
ag_key = "TABPFNMIX"
|
|
39
43
|
ag_name = "TabPFNMix"
|
|
@@ -1,11 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Code Adapted from TabArena: https://github.com/autogluon/tabrepo/blob/main/tabrepo/benchmark/models/ag/tabpfnv2/tabpfnv2_model.py
|
|
3
|
-
|
|
4
|
-
Model: TabPFNv2
|
|
5
|
-
Paper: Accurate predictions on small data with a tabular foundation model
|
|
6
|
-
Authors: Noah Hollmann, Samuel Müller, Lennart Purucker, Arjun Krishnakumar, Max Körfer, Shi Bin Hoo, Robin Tibor Schirrmeister & Frank Hutter
|
|
7
|
-
Codebase: https://github.com/PriorLabs/TabPFN
|
|
8
|
-
License: https://github.com/PriorLabs/TabPFN/blob/main/LICENSE
|
|
9
3
|
"""
|
|
10
4
|
|
|
11
5
|
from __future__ import annotations
|
|
@@ -111,6 +105,20 @@ class FixedSafePowerTransformer(PowerTransformer):
|
|
|
111
105
|
|
|
112
106
|
|
|
113
107
|
class TabPFNV2Model(AbstractModel):
|
|
108
|
+
"""
|
|
109
|
+
TabPFNv2 is a tabular foundation model pre-trained purely on synthetic data that achieves
|
|
110
|
+
state-of-the-art results with in-context learning on small datasets with <=10000 samples and <=500 features.
|
|
111
|
+
TabPFNv2 is developed and maintained by PriorLabs: https://priorlabs.ai/
|
|
112
|
+
|
|
113
|
+
TabPFNv2 is the top performing method for small datasets on TabArena-v0.1: https://tabarena.ai
|
|
114
|
+
|
|
115
|
+
Paper: Accurate predictions on small data with a tabular foundation model
|
|
116
|
+
Authors: Noah Hollmann, Samuel Müller, Lennart Purucker, Arjun Krishnakumar, Max Körfer, Shi Bin Hoo, Robin Tibor Schirrmeister & Frank Hutter
|
|
117
|
+
Codebase: https://github.com/PriorLabs/TabPFN
|
|
118
|
+
License: https://github.com/PriorLabs/TabPFN/blob/main/LICENSE
|
|
119
|
+
|
|
120
|
+
.. versionadded:: 1.4.0
|
|
121
|
+
"""
|
|
114
122
|
ag_key = "TABPFNV2"
|
|
115
123
|
ag_name = "TabPFNv2"
|
|
116
124
|
ag_priority = 105
|
|
@@ -119,12 +127,14 @@ class TabPFNV2Model(AbstractModel):
|
|
|
119
127
|
super().__init__(**kwargs)
|
|
120
128
|
self._feature_generator = None
|
|
121
129
|
self._cat_features = None
|
|
130
|
+
self._cat_indices = None
|
|
122
131
|
|
|
123
132
|
def _preprocess(self, X: pd.DataFrame, is_train=False, **kwargs) -> pd.DataFrame:
|
|
124
133
|
X = super()._preprocess(X, **kwargs)
|
|
125
|
-
self._cat_indices = []
|
|
126
134
|
|
|
127
135
|
if is_train:
|
|
136
|
+
self._cat_indices = []
|
|
137
|
+
|
|
128
138
|
# X will be the training data.
|
|
129
139
|
self._feature_generator = LabelEncoderFeatureGenerator(verbosity=0)
|
|
130
140
|
self._feature_generator.fit(X=X)
|
|
@@ -136,10 +146,11 @@ class TabPFNV2Model(AbstractModel):
|
|
|
136
146
|
X=X
|
|
137
147
|
)
|
|
138
148
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
self._cat_features
|
|
142
|
-
|
|
149
|
+
if is_train:
|
|
150
|
+
# Detect/set cat features and indices
|
|
151
|
+
if self._cat_features is None:
|
|
152
|
+
self._cat_features = self._feature_generator.features_in[:]
|
|
153
|
+
self._cat_indices = [X.columns.get_loc(col) for col in self._cat_features]
|
|
143
154
|
|
|
144
155
|
return X
|
|
145
156
|
|
|
@@ -187,6 +198,12 @@ class TabPFNV2Model(AbstractModel):
|
|
|
187
198
|
# logs "Built with PriorLabs-TabPFN"
|
|
188
199
|
self._log_license(device=device)
|
|
189
200
|
|
|
201
|
+
if num_gpus == 0:
|
|
202
|
+
logger.log(
|
|
203
|
+
30,
|
|
204
|
+
f"\tWARNING: Running TabPFNv2 on CPU. This can be very slow. We recommend using a GPU instead."
|
|
205
|
+
)
|
|
206
|
+
|
|
190
207
|
X = self.preprocess(X, is_train=True)
|
|
191
208
|
|
|
192
209
|
hps = self._get_model_params()
|
|
@@ -366,7 +383,7 @@ class TabPFNV2Model(AbstractModel):
|
|
|
366
383
|
|
|
367
384
|
# Add some buffer to each term + 1 GB overhead to be safe
|
|
368
385
|
return int(
|
|
369
|
-
model_mem + 4 * X_mem +
|
|
386
|
+
model_mem + 4 * X_mem + 2 * activation_mem + baseline_overhead_mem_est
|
|
370
387
|
)
|
|
371
388
|
|
|
372
389
|
@classmethod
|
|
@@ -437,18 +437,24 @@ class TabularPredictor:
|
|
|
437
437
|
presets : list or str or dict, default = ['medium_quality']
|
|
438
438
|
List of preset configurations for various arguments in `fit()`. Can significantly impact predictive accuracy, memory-footprint, and inference latency of trained models, and various other properties of the returned `predictor`.
|
|
439
439
|
It is recommended to specify presets and avoid specifying most other `fit()` arguments or model hyperparameters prior to becoming familiar with AutoGluon.
|
|
440
|
-
As an example, to get the most accurate overall predictor (regardless of its efficiency), set `presets='best_quality'
|
|
440
|
+
As an example, to get the most accurate overall predictor (regardless of its efficiency), set `presets='best_quality'` (or `extreme_quality` if a GPU is available).
|
|
441
441
|
To get good quality with minimal disk usage, set `presets=['good_quality', 'optimize_for_deployment']`
|
|
442
442
|
Any user-specified arguments in `fit()` will override the values used by presets.
|
|
443
443
|
If specifying a list of presets, later presets will override earlier presets if they alter the same argument.
|
|
444
444
|
For precise definitions of the provided presets, see file: `autogluon/tabular/configs/presets_configs.py`.
|
|
445
445
|
Users can specify custom presets by passing in a dictionary of argument values as an element to the list.
|
|
446
446
|
|
|
447
|
-
Available Presets: ['best_quality', 'high_quality', 'good_quality', 'medium_quality', 'experimental_quality', 'optimize_for_deployment', 'interpretable', 'ignore_text']
|
|
447
|
+
Available Presets: ['extreme_quality', 'best_quality', 'high_quality', 'good_quality', 'medium_quality', 'experimental_quality', 'optimize_for_deployment', 'interpretable', 'ignore_text']
|
|
448
448
|
|
|
449
449
|
It is recommended to only use one `quality` based preset in a given call to `fit()` as they alter many of the same arguments and are not compatible with each-other.
|
|
450
450
|
|
|
451
451
|
In-depth Preset Info:
|
|
452
|
+
extreme_quality={"auto_stack": True, "dynamic_stacking": "auto", "_experimental_dynamic_hyperparameters": True, "hyperparameters": None}
|
|
453
|
+
Significantly more accurate than `best_quality` on datasets <= 30000 samples. Requires a GPU for best results.
|
|
454
|
+
For datasets <= 30000 samples, will use recent tabular foundation models TabPFNv2, TabICL, and Mitra to maximize performance.
|
|
455
|
+
For datasets > 30000 samples, will behave identically to `best_quality`.
|
|
456
|
+
Recommended for applications that benefit from the best possible model accuracy.
|
|
457
|
+
|
|
452
458
|
best_quality={'auto_stack': True, 'dynamic_stacking': 'auto', 'hyperparameters': 'zeroshot'}
|
|
453
459
|
Best predictive accuracy with little consideration to inference time or disk usage. Achieve even better results by specifying a large time_limit value.
|
|
454
460
|
Recommended for applications that benefit from the best possible model accuracy.
|
|
@@ -477,7 +483,7 @@ class TabularPredictor:
|
|
|
477
483
|
Because unused models will be deleted under this preset, methods like `predictor.leaderboard()` and `predictor.fit_summary()` will no longer show the full set of models that were trained during `fit()`.
|
|
478
484
|
Recommended for applications where the inner details of AutoGluon's training is not important and there is no intention of manually choosing between the final models.
|
|
479
485
|
This preset pairs well with the other presets such as `good_quality` to make a very compact final model.
|
|
480
|
-
Identical to calling `predictor.delete_models(models_to_keep='best'
|
|
486
|
+
Identical to calling `predictor.delete_models(models_to_keep='best')` and `predictor.save_space()` directly after `fit()`.
|
|
481
487
|
|
|
482
488
|
interpretable={'auto_stack': False, 'hyperparameters': 'interpretable'}
|
|
483
489
|
Fits only interpretable rule-based models from the imodels package.
|
|
@@ -491,9 +497,10 @@ class TabularPredictor:
|
|
|
491
497
|
hyperparameters : str or dict, default = 'default'
|
|
492
498
|
Determines the hyperparameters used by the models.
|
|
493
499
|
If `str` is passed, will use a preset hyperparameter configuration.
|
|
494
|
-
Valid `str` options: ['default', 'zeroshot', 'light', 'very_light', 'toy', 'multimodal']
|
|
500
|
+
Valid `str` options: ['default', 'zeroshot', 'zeroshot_2025_tabfm', 'light', 'very_light', 'toy', 'multimodal']
|
|
495
501
|
'default': Default AutoGluon hyperparameters intended to get strong accuracy with reasonable disk usage and inference time. Used in the 'medium_quality' preset.
|
|
496
502
|
'zeroshot': A powerful model portfolio learned from TabRepo's ensemble simulation on 200 datasets. Contains ~100 models and is used in 'best_quality' and 'high_quality' presets.
|
|
503
|
+
'zeroshot_2025_tabfm': Absolute cutting edge portfolio learned from TabArena's ensemble simulation that leverages tabular foundation models. Contains 22 models and is used in the `extreme_quality` preset.
|
|
497
504
|
'light': Results in smaller models. Generally will make inference speed much faster and disk usage much lower, but with worse accuracy. Used in the 'good_quality' preset.
|
|
498
505
|
'very_light': Results in much smaller models. Behaves similarly to 'light', but in many cases with over 10x less disk usage and a further reduction in accuracy.
|
|
499
506
|
'toy': Results in extremely small models. Only use this when prototyping, as the model quality will be severely reduced.
|
|
@@ -505,6 +512,11 @@ class TabularPredictor:
|
|
|
505
512
|
'GBM' (LightGBM)
|
|
506
513
|
'CAT' (CatBoost)
|
|
507
514
|
'XGB' (XGBoost)
|
|
515
|
+
'REALMLP' (RealMLP)
|
|
516
|
+
'TABM' (TabM)
|
|
517
|
+
'MITRA' (Mitra)
|
|
518
|
+
'TABICL' (TabICL)
|
|
519
|
+
'TABPFNV2' (TabPFNv2)
|
|
508
520
|
'RF' (random forest)
|
|
509
521
|
'XT' (extremely randomized trees)
|
|
510
522
|
'KNN' (k-nearest neighbors)
|
|
@@ -513,9 +525,8 @@ class TabularPredictor:
|
|
|
513
525
|
'FASTAI' (neural network with FastAI backend)
|
|
514
526
|
'AG_AUTOMM' (`MultimodalPredictor` from `autogluon.multimodal`. Supports Tabular, Text, and Image modalities. GPU is required.)
|
|
515
527
|
Experimental model options include:
|
|
516
|
-
'FT_TRANSFORMER' (Tabular Transformer, GPU is recommended. Does not scale well to >100 features.)
|
|
528
|
+
'FT_TRANSFORMER' (Tabular Transformer, GPU is recommended. Does not scale well to >100 features. Recommended to use TabM instead.)
|
|
517
529
|
'FASTTEXT' (FastText. Note: Has not been tested for a long time.)
|
|
518
|
-
'TABPFN' (TabPFN. Does not scale well to >100 features or >1000 rows, and does not support regression. Extremely slow inference speed.)
|
|
519
530
|
'AG_TEXT_NN' (Multimodal Text+Tabular model, GPU is required. Recommended to instead use its successor, 'AG_AUTOMM'.)
|
|
520
531
|
'AG_IMAGE_NN' (Image model, GPU is required. Recommended to instead use its successor, 'AG_AUTOMM'.)
|
|
521
532
|
If a certain key is missing from hyperparameters, then `fit()` will not train any models of that type. Omitting a model key from hyperparameters is equivalent to including this model key in `excluded_model_types`.
|
|
@@ -623,6 +634,16 @@ class TabularPredictor:
|
|
|
623
634
|
How many GPUs to use during model fit.
|
|
624
635
|
If 'auto', model will decide. Some models can use GPUs but don't by default due to differences in model quality.
|
|
625
636
|
Set to 0 to disable usage of GPUs.
|
|
637
|
+
max_rows : (int, default=None)
|
|
638
|
+
If train_data has more rows than `max_rows`, the model will raise an AssertionError at the start of fit.
|
|
639
|
+
max_features : (int, default=None)
|
|
640
|
+
If train_data has more features than `max_features`, the model will raise an AssertionError at the start of fit.
|
|
641
|
+
max_classes : (int, default==None)
|
|
642
|
+
If train_data has more classes than `max_classes`, the model will raise an AssertionError at the start of fit.
|
|
643
|
+
problem_types : (list[str], default=None)
|
|
644
|
+
If the task is not a problem_type in `problem_types`, the model will raise an AssertionError at the start of fit.
|
|
645
|
+
ignore_constraints : (bool, default=False)
|
|
646
|
+
If True, will ignore the values of `max_rows`, `max_features`, `max_classes`, and `problem_type`, treating them as None.
|
|
626
647
|
ag_args_ensemble: Dictionary of hyperparameters shared by all models that control how they are ensembled, if bag mode is enabled.
|
|
627
648
|
Valid keys:
|
|
628
649
|
use_orig_features: [True, False, "never"], default True
|
|
@@ -723,6 +744,7 @@ class TabularPredictor:
|
|
|
723
744
|
If "sequential", models will be fit sequentially. This is the most stable option with the most readable logging.
|
|
724
745
|
If "parallel", models will be fit in parallel with ray, splitting available compute between them.
|
|
725
746
|
Note: "parallel" is experimental and may run into issues. It was first added in version 1.2.0.
|
|
747
|
+
Note: "parallel" does not yet support running with GPUs.
|
|
726
748
|
For machines with 16 or more CPU cores, it is likely that "parallel" will be faster than "sequential".
|
|
727
749
|
|
|
728
750
|
.. versionadded:: 1.2.0
|
|
@@ -933,14 +955,14 @@ class TabularPredictor:
|
|
|
933
955
|
This is because by default, refit_full will fall back to cloning the first fold of the bagged model in case it lacks memory to refit.
|
|
934
956
|
However, if `save_bag_folds=False`, this fallback isn't possible, as there is not fold model to clone because it wasn't saved.
|
|
935
957
|
In this scenario, refit will raise an exception for `save_bag_folds=False`, but will succeed if `save_bag_folds=True`.
|
|
936
|
-
Final disk usage of predictor will be identical regardless of the setting after `predictor.delete_models(models_to_keep="best"
|
|
958
|
+
Final disk usage of predictor will be identical regardless of the setting after `predictor.delete_models(models_to_keep="best")` is called post-fit.
|
|
937
959
|
set_best_to_refit_full : bool, default = False
|
|
938
960
|
If True, will change the default model that Predictor uses for prediction when model is not specified to the refit_full version of the model that exhibited the highest validation score.
|
|
939
961
|
Only valid if `refit_full` is set.
|
|
940
962
|
keep_only_best : bool, default = False
|
|
941
963
|
If True, only the best model and its ancestor models are saved in the outputted `predictor`. All other models are deleted.
|
|
942
964
|
If you only care about deploying the most accurate predictor with the smallest file-size and no longer need any of the other trained models or functionality beyond prediction on new data, then set: `keep_only_best=True`, `save_space=True`.
|
|
943
|
-
This is equivalent to calling `predictor.delete_models(models_to_keep='best'
|
|
965
|
+
This is equivalent to calling `predictor.delete_models(models_to_keep='best')` directly after `fit()`.
|
|
944
966
|
If used with `refit_full` and `set_best_to_refit_full`, the best model will be the refit_full model, and the original bagged best model will be deleted.
|
|
945
967
|
`refit_full` will be automatically set to 'best' in this case to avoid training models which will be later deleted.
|
|
946
968
|
save_space : bool, default = False
|
|
@@ -1068,11 +1090,11 @@ class TabularPredictor:
|
|
|
1068
1090
|
20,
|
|
1069
1091
|
"No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...\n"
|
|
1070
1092
|
"\tRecommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):\n"
|
|
1071
|
-
"\tpresets='
|
|
1072
|
-
"\tpresets='best'
|
|
1073
|
-
"\tpresets='high'
|
|
1074
|
-
"\tpresets='good'
|
|
1075
|
-
"\tpresets='medium'
|
|
1093
|
+
"\tpresets='extreme' : New in v1.4: Massively better than 'best' on datasets <30000 samples by using new models meta-learned on https://tabarena.ai: TabPFNv2, TabICL, Mitra, and TabM. Absolute best accuracy. Requires a GPU. Recommended 64 GB CPU memory and 32+ GB GPU memory.\n"
|
|
1094
|
+
"\tpresets='best' : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.\n"
|
|
1095
|
+
"\tpresets='high' : Strong accuracy with fast inference speed.\n"
|
|
1096
|
+
"\tpresets='good' : Good accuracy with very fast inference speed.\n"
|
|
1097
|
+
"\tpresets='medium' : Fast training time, ideal for initial prototyping.",
|
|
1076
1098
|
)
|
|
1077
1099
|
|
|
1078
1100
|
kwargs_orig = kwargs.copy()
|
|
@@ -1127,10 +1149,48 @@ class TabularPredictor:
|
|
|
1127
1149
|
)
|
|
1128
1150
|
infer_limit, infer_limit_batch_size = self._validate_infer_limit(infer_limit=infer_limit, infer_limit_batch_size=infer_limit_batch_size)
|
|
1129
1151
|
|
|
1152
|
+
# TODO: Temporary for v1.4. Make this more extensible for v1.5 by letting users make their own dynamic hyperparameters.
|
|
1153
|
+
dynamic_hyperparameters = kwargs["_experimental_dynamic_hyperparameters"]
|
|
1154
|
+
if dynamic_hyperparameters:
|
|
1155
|
+
logger.log(20, f"`extreme` preset uses a dynamic portfolio based on dataset size...")
|
|
1156
|
+
assert hyperparameters is None, f"hyperparameters must be unspecified when `_experimental_dynamic_hyperparameters=True`."
|
|
1157
|
+
n_samples = len(train_data)
|
|
1158
|
+
if n_samples > 30000:
|
|
1159
|
+
data_size = "large"
|
|
1160
|
+
else:
|
|
1161
|
+
data_size = "small"
|
|
1162
|
+
assert data_size in ["large", "small"]
|
|
1163
|
+
if data_size == "large":
|
|
1164
|
+
logger.log(20, f"\tDetected data size: large (>30000 samples), using `zeroshot` portfolio (identical to 'best_quality' preset).")
|
|
1165
|
+
hyperparameters = "zeroshot"
|
|
1166
|
+
else:
|
|
1167
|
+
if "num_stack_levels" not in kwargs_orig:
|
|
1168
|
+
# disable stacking for tabfm portfolio
|
|
1169
|
+
num_stack_levels = 0
|
|
1170
|
+
kwargs["num_stack_levels"] = 0
|
|
1171
|
+
logger.log(
|
|
1172
|
+
20,
|
|
1173
|
+
f"\tDetected data size: small (<=30000 samples), using `zeroshot_2025_tabfm` portfolio."
|
|
1174
|
+
f"\n\t\tNote: `zeroshot_2025_tabfm` portfolio requires a CUDA compatible GPU for best performance."
|
|
1175
|
+
f"\n\t\tMake sure you have all the relevant dependencies installed: "
|
|
1176
|
+
f"`pip install autogluon.tabular[tabarena]`."
|
|
1177
|
+
f"\n\t\tIt is strongly recommended to use a machine with 64+ GB memory "
|
|
1178
|
+
f"and a CUDA compatible GPU with 32+ GB vRAM when using this preset. "
|
|
1179
|
+
f"\n\t\tThis portfolio will download foundation model weights from HuggingFace during training. "
|
|
1180
|
+
f"Ensure you have an internet connection or have pre-downloaded the weights to use these models."
|
|
1181
|
+
f"\n\t\tThis portfolio was meta-learned with TabArena: https://tabarena.ai"
|
|
1182
|
+
)
|
|
1183
|
+
hyperparameters = "zeroshot_2025_tabfm"
|
|
1184
|
+
|
|
1130
1185
|
if hyperparameters is None:
|
|
1131
1186
|
hyperparameters = "default"
|
|
1132
1187
|
if isinstance(hyperparameters, str):
|
|
1188
|
+
hyperparameters_str = hyperparameters
|
|
1133
1189
|
hyperparameters = get_hyperparameter_config(hyperparameters)
|
|
1190
|
+
logger.log(
|
|
1191
|
+
20,
|
|
1192
|
+
f"Using hyperparameters preset: hyperparameters='{hyperparameters_str}'",
|
|
1193
|
+
)
|
|
1134
1194
|
self._validate_hyperparameters(hyperparameters=hyperparameters)
|
|
1135
1195
|
self.fit_hyperparameters_ = hyperparameters
|
|
1136
1196
|
|
|
@@ -4341,7 +4401,7 @@ class TabularPredictor:
|
|
|
4341
4401
|
models_to_delete: str | list[str] | None = None,
|
|
4342
4402
|
allow_delete_cascade: bool = False,
|
|
4343
4403
|
delete_from_disk: bool = True,
|
|
4344
|
-
dry_run: bool
|
|
4404
|
+
dry_run: bool = False,
|
|
4345
4405
|
):
|
|
4346
4406
|
"""
|
|
4347
4407
|
Deletes models from `predictor`.
|
|
@@ -4372,20 +4432,11 @@ class TabularPredictor:
|
|
|
4372
4432
|
If `True`, deletes the models from disk if they were persisted.
|
|
4373
4433
|
WARNING: This deletes the entire directory for the deleted models, and ALL FILES located there.
|
|
4374
4434
|
It is highly recommended to first run with `dry_run=True` to understand which directories will be deleted.
|
|
4375
|
-
dry_run : bool, default =
|
|
4376
|
-
WARNING: Starting in v1.4.0 dry_run will default to False.
|
|
4435
|
+
dry_run : bool, default = False
|
|
4377
4436
|
If `True`, then deletions don't occur, and logging statements are printed describing what would have occurred.
|
|
4378
4437
|
Set `dry_run=False` to perform the deletions.
|
|
4379
4438
|
|
|
4380
4439
|
"""
|
|
4381
|
-
if dry_run is None:
|
|
4382
|
-
warnings.warn(
|
|
4383
|
-
f"dry_run was not specified for `TabularPredictor.delete_models`. dry_run prior to version 1.4.0 defaults to True. "
|
|
4384
|
-
f"Starting in version 1.4, AutoGluon will default dry_run to False. "
|
|
4385
|
-
f"If you want to maintain the current logic in future versions, explicitly specify `dry_run=True`.",
|
|
4386
|
-
category=FutureWarning,
|
|
4387
|
-
)
|
|
4388
|
-
dry_run = True
|
|
4389
4440
|
self._assert_is_fit("delete_models")
|
|
4390
4441
|
if models_to_keep == "best":
|
|
4391
4442
|
models_to_keep = self.model_best
|
|
@@ -5042,6 +5093,8 @@ class TabularPredictor:
|
|
|
5042
5093
|
learning_curves=False,
|
|
5043
5094
|
test_data=None,
|
|
5044
5095
|
raise_on_model_failure=False,
|
|
5096
|
+
# experimental
|
|
5097
|
+
_experimental_dynamic_hyperparameters=False,
|
|
5045
5098
|
)
|
|
5046
5099
|
kwargs, ds_valid_keys = self._sanitize_dynamic_stacking_kwargs(kwargs)
|
|
5047
5100
|
kwargs = self._validate_fit_extra_kwargs(kwargs, extra_valid_keys=list(fit_kwargs_default.keys()) + ds_valid_keys)
|
|
@@ -5552,7 +5605,7 @@ class TabularPredictor:
|
|
|
5552
5605
|
Identical to performing the following operations in order:
|
|
5553
5606
|
|
|
5554
5607
|
predictor_clone = predictor.clone(path=path, return_clone=True, dirs_exist_ok=dirs_exist_ok)
|
|
5555
|
-
predictor_clone.delete_models(models_to_keep=model
|
|
5608
|
+
predictor_clone.delete_models(models_to_keep=model)
|
|
5556
5609
|
predictor_clone.set_model_best(model=model, save_trainer=True)
|
|
5557
5610
|
predictor_clone.save_space()
|
|
5558
5611
|
|
|
@@ -5564,7 +5617,7 @@ class TabularPredictor:
|
|
|
5564
5617
|
The model to use in the optimized predictor clone.
|
|
5565
5618
|
All other unrelated models will be deleted to save disk space.
|
|
5566
5619
|
Refer to the `models_to_keep` argument of `predictor.delete_models` for available options.
|
|
5567
|
-
Internally calls `predictor_clone.delete_models(models_to_keep=model
|
|
5620
|
+
Internally calls `predictor_clone.delete_models(models_to_keep=model)`
|
|
5568
5621
|
return_clone : bool, default = False
|
|
5569
5622
|
If True, returns the loaded cloned TabularPredictor object.
|
|
5570
5623
|
If False, returns the local path to the cloned TabularPredictor object.
|
|
@@ -2131,6 +2131,8 @@ class AbstractTabularTrainer(AbstractTrainer[AbstractModel]):
|
|
|
2131
2131
|
if isinstance(model, BaggedEnsembleModel) and not compute_score:
|
|
2132
2132
|
# Do not perform OOF predictions when we don't compute a score.
|
|
2133
2133
|
model_fit_kwargs["_skip_oof"] = True
|
|
2134
|
+
if not isinstance(model, BaggedEnsembleModel):
|
|
2135
|
+
model_fit_kwargs.setdefault("log_resources", True)
|
|
2134
2136
|
|
|
2135
2137
|
model_fit_kwargs = dict(
|
|
2136
2138
|
model=model,
|
autogluon/tabular/version.py
CHANGED