autogluon.tabular 1.5.1b20260105__py3-none-any.whl → 1.5.1b20260117__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.tabular might be problematic. Click here for more details.
- autogluon/tabular/__init__.py +1 -0
- autogluon/tabular/configs/config_helper.py +18 -6
- autogluon/tabular/configs/feature_generator_presets.py +3 -1
- autogluon/tabular/configs/hyperparameter_configs.py +42 -9
- autogluon/tabular/configs/presets_configs.py +38 -14
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py +84 -14
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +48 -48
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_cpu_2025_12_18.py +774 -1
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_gpu_2025_12_18.py +421 -1
- autogluon/tabular/experimental/_scikit_mixin.py +6 -2
- autogluon/tabular/experimental/_tabular_classifier.py +3 -1
- autogluon/tabular/experimental/_tabular_regressor.py +3 -1
- autogluon/tabular/experimental/plot_leaderboard.py +73 -19
- autogluon/tabular/learner/abstract_learner.py +160 -42
- autogluon/tabular/learner/default_learner.py +78 -22
- autogluon/tabular/models/__init__.py +2 -2
- autogluon/tabular/models/_utils/rapids_utils.py +3 -1
- autogluon/tabular/models/abstract/abstract_torch_model.py +2 -0
- autogluon/tabular/models/automm/automm_model.py +12 -3
- autogluon/tabular/models/automm/ft_transformer.py +5 -1
- autogluon/tabular/models/catboost/callbacks.py +2 -2
- autogluon/tabular/models/catboost/catboost_model.py +93 -29
- autogluon/tabular/models/catboost/catboost_softclass_utils.py +4 -1
- autogluon/tabular/models/catboost/catboost_utils.py +3 -1
- autogluon/tabular/models/ebm/ebm_model.py +8 -13
- autogluon/tabular/models/ebm/hyperparameters/parameters.py +1 -0
- autogluon/tabular/models/ebm/hyperparameters/searchspaces.py +1 -0
- autogluon/tabular/models/fastainn/callbacks.py +20 -3
- autogluon/tabular/models/fastainn/hyperparameters/searchspaces.py +11 -1
- autogluon/tabular/models/fastainn/quantile_helpers.py +10 -2
- autogluon/tabular/models/fastainn/tabular_nn_fastai.py +65 -18
- autogluon/tabular/models/fasttext/fasttext_model.py +3 -1
- autogluon/tabular/models/image_prediction/image_predictor.py +7 -2
- autogluon/tabular/models/knn/knn_model.py +41 -8
- autogluon/tabular/models/lgb/callbacks.py +32 -9
- autogluon/tabular/models/lgb/hyperparameters/searchspaces.py +3 -1
- autogluon/tabular/models/lgb/lgb_model.py +150 -34
- autogluon/tabular/models/lgb/lgb_utils.py +12 -4
- autogluon/tabular/models/lr/hyperparameters/searchspaces.py +5 -1
- autogluon/tabular/models/lr/lr_model.py +40 -10
- autogluon/tabular/models/lr/lr_rapids_model.py +22 -13
- autogluon/tabular/models/mitra/_internal/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/config/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +36 -40
- autogluon/tabular/models/mitra/_internal/config/config_run.py +2 -14
- autogluon/tabular/models/mitra/_internal/config/enums.py +27 -26
- autogluon/tabular/models/mitra/_internal/core/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/core/callbacks.py +14 -21
- autogluon/tabular/models/mitra/_internal/core/get_loss.py +10 -12
- autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +17 -32
- autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +12 -27
- autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +16 -21
- autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +130 -111
- autogluon/tabular/models/mitra/_internal/data/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/data/collator.py +30 -26
- autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +18 -26
- autogluon/tabular/models/mitra/_internal/data/dataset_split.py +10 -7
- autogluon/tabular/models/mitra/_internal/data/preprocessor.py +70 -100
- autogluon/tabular/models/mitra/_internal/models/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/models/base.py +7 -10
- autogluon/tabular/models/mitra/_internal/models/embedding.py +46 -56
- autogluon/tabular/models/mitra/_internal/models/tab2d.py +140 -120
- autogluon/tabular/models/mitra/_internal/utils/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/utils/set_seed.py +3 -1
- autogluon/tabular/models/mitra/mitra_model.py +16 -11
- autogluon/tabular/models/mitra/sklearn_interface.py +178 -162
- autogluon/tabular/models/realmlp/realmlp_model.py +28 -15
- autogluon/tabular/models/rf/compilers/onnx.py +1 -1
- autogluon/tabular/models/rf/rf_model.py +45 -12
- autogluon/tabular/models/rf/rf_quantile.py +4 -2
- autogluon/tabular/models/tabdpt/tabdpt_model.py +8 -17
- autogluon/tabular/models/tabicl/tabicl_model.py +8 -1
- autogluon/tabular/models/tabm/_tabm_internal.py +6 -4
- autogluon/tabular/models/tabm/rtdl_num_embeddings.py +80 -127
- autogluon/tabular/models/tabm/tabm_model.py +8 -4
- autogluon/tabular/models/tabm/tabm_reference.py +53 -85
- autogluon/tabular/models/tabpfnmix/_internal/core/callbacks.py +7 -16
- autogluon/tabular/models/tabpfnmix/_internal/core/collator.py +16 -24
- autogluon/tabular/models/tabpfnmix/_internal/core/dataset_split.py +5 -7
- autogluon/tabular/models/tabpfnmix/_internal/core/enums.py +0 -2
- autogluon/tabular/models/tabpfnmix/_internal/core/get_loss.py +0 -1
- autogluon/tabular/models/tabpfnmix/_internal/core/get_optimizer.py +7 -18
- autogluon/tabular/models/tabpfnmix/_internal/core/get_scheduler.py +3 -14
- autogluon/tabular/models/tabpfnmix/_internal/core/trainer_finetune.py +79 -64
- autogluon/tabular/models/tabpfnmix/_internal/core/y_transformer.py +3 -5
- autogluon/tabular/models/tabpfnmix/_internal/data/dataset_finetune.py +17 -30
- autogluon/tabular/models/tabpfnmix/_internal/data/preprocessor.py +15 -35
- autogluon/tabular/models/tabpfnmix/_internal/models/foundation/embedding.py +21 -38
- autogluon/tabular/models/tabpfnmix/_internal/models/foundation/foundation_transformer.py +33 -51
- autogluon/tabular/models/tabpfnmix/_internal/results/prediction_metrics.py +4 -4
- autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_classifier.py +32 -12
- autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_regressor.py +32 -13
- autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +55 -19
- autogluon/tabular/models/tabpfnv2/tabpfnv2_5_model.py +21 -48
- autogluon/tabular/models/tabprep/prep_mixin.py +34 -26
- autogluon/tabular/models/tabular_nn/compilers/onnx.py +36 -8
- autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +130 -36
- autogluon/tabular/models/tabular_nn/torch/tabular_torch_dataset.py +8 -4
- autogluon/tabular/models/tabular_nn/torch/torch_network_modules.py +26 -5
- autogluon/tabular/models/tabular_nn/utils/categorical_encoders.py +41 -24
- autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py +33 -8
- autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py +21 -6
- autogluon/tabular/models/xgboost/callbacks.py +9 -3
- autogluon/tabular/models/xgboost/xgboost_model.py +59 -11
- autogluon/tabular/models/xt/xt_model.py +1 -0
- autogluon/tabular/predictor/interpretable_predictor.py +3 -1
- autogluon/tabular/predictor/predictor.py +409 -128
- autogluon/tabular/registry/__init__.py +1 -1
- autogluon/tabular/registry/_ag_model_registry.py +4 -5
- autogluon/tabular/registry/_model_registry.py +1 -0
- autogluon/tabular/testing/fit_helper.py +55 -15
- autogluon/tabular/testing/generate_datasets.py +1 -1
- autogluon/tabular/testing/model_fit_helper.py +10 -4
- autogluon/tabular/trainer/abstract_trainer.py +644 -230
- autogluon/tabular/trainer/auto_trainer.py +19 -8
- autogluon/tabular/trainer/model_presets/presets.py +33 -9
- autogluon/tabular/trainer/model_presets/presets_distill.py +16 -2
- autogluon/tabular/version.py +1 -1
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/METADATA +27 -27
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/RECORD +127 -135
- autogluon/tabular/models/tabpfnv2/rfpfn/__init__.py +0 -20
- autogluon/tabular/models/tabpfnv2/rfpfn/configs.py +0 -40
- autogluon/tabular/models/tabpfnv2/rfpfn/scoring_utils.py +0 -201
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_decision_tree_tabpfn.py +0 -1464
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_random_forest_tabpfn.py +0 -747
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_compat.py +0 -863
- autogluon/tabular/models/tabpfnv2/rfpfn/utils.py +0 -106
- autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +0 -466
- /autogluon.tabular-1.5.1b20260105-py3.11-nspkg.pth → /autogluon.tabular-1.5.1b20260117-py3.11-nspkg.pth +0 -0
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/WHEEL +0 -0
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/licenses/LICENSE +0 -0
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/licenses/NOTICE +0 -0
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/namespace_packages.txt +0 -0
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/top_level.txt +0 -0
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/zip-safe +0 -0
|
@@ -2,8 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
-
from pathlib import Path
|
|
6
5
|
import time
|
|
6
|
+
from pathlib import Path
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
@@ -11,7 +11,7 @@ import pandas as pd
|
|
|
11
11
|
from autogluon.common.utils.pandas_utils import get_approximate_df_mem_usage
|
|
12
12
|
from autogluon.common.utils.resource_utils import ResourceManager
|
|
13
13
|
from autogluon.common.utils.try_import import try_import_torch
|
|
14
|
-
from autogluon.core.constants import BINARY, MULTICLASS,
|
|
14
|
+
from autogluon.core.constants import BINARY, MULTICLASS, QUANTILE, REGRESSION
|
|
15
15
|
from autogluon.core.models import AbstractModel
|
|
16
16
|
from autogluon.core.utils import generate_train_test_split
|
|
17
17
|
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
|
@@ -39,6 +39,7 @@ class TabPFNMixModel(AbstractModel):
|
|
|
39
39
|
|
|
40
40
|
.. versionadded:: 1.2.0
|
|
41
41
|
"""
|
|
42
|
+
|
|
42
43
|
ag_key = "TABPFNMIX"
|
|
43
44
|
ag_name = "TabPFNMix"
|
|
44
45
|
ag_priority = 45
|
|
@@ -54,9 +55,10 @@ class TabPFNMixModel(AbstractModel):
|
|
|
54
55
|
def _get_model_type(self):
|
|
55
56
|
from ._internal.tabpfnmix_classifier import TabPFNMixClassifier
|
|
56
57
|
from ._internal.tabpfnmix_regressor import TabPFNMixRegressor
|
|
57
|
-
|
|
58
|
+
|
|
59
|
+
if self.problem_type in ["binary", "multiclass"]:
|
|
58
60
|
model_cls = TabPFNMixClassifier
|
|
59
|
-
elif self.problem_type in [
|
|
61
|
+
elif self.problem_type in ["regression"]:
|
|
60
62
|
model_cls = TabPFNMixRegressor
|
|
61
63
|
else:
|
|
62
64
|
raise AssertionError(f"TabPFN does not support problem_type='{self.problem_type}'")
|
|
@@ -74,12 +76,10 @@ class TabPFNMixModel(AbstractModel):
|
|
|
74
76
|
# weights_path_regressor, # if specified, overrides weights_path for regression problems
|
|
75
77
|
"n_ensembles": 1, # FIXME: RENAME: n_estimators
|
|
76
78
|
"max_epochs": 0, # fine-tuning epochs. Will do pure in-context learning if 0.
|
|
77
|
-
|
|
78
79
|
# next most important hyperparameters
|
|
79
80
|
"lr": 1.0e-05,
|
|
80
81
|
"max_samples_query": 1024, # larger = slower but better quality on datasets with at least this many validation samples
|
|
81
82
|
"max_samples_support": 8196, # larger = slower but better quality on datasets with at least this many training samples
|
|
82
|
-
|
|
83
83
|
# other hyperparameters
|
|
84
84
|
"early_stopping_patience": 40, # TODO: Figure out optimal value
|
|
85
85
|
"linear_attention": True,
|
|
@@ -89,7 +89,6 @@ class TabPFNMixModel(AbstractModel):
|
|
|
89
89
|
"use_feature_count_scaling": True,
|
|
90
90
|
"use_quantile_transformer": True,
|
|
91
91
|
"weight_decay": 0,
|
|
92
|
-
|
|
93
92
|
# architecture hyperparameters, recommended to keep as default unless using a custom pre-trained backbone
|
|
94
93
|
"n_classes": 10,
|
|
95
94
|
"n_features": 100,
|
|
@@ -98,7 +97,6 @@ class TabPFNMixModel(AbstractModel):
|
|
|
98
97
|
"attn_dropout": 0.0,
|
|
99
98
|
"dim": 512,
|
|
100
99
|
"y_as_float_embedding": True,
|
|
101
|
-
|
|
102
100
|
# utility parameters, recommended to keep as default
|
|
103
101
|
"split_val": False,
|
|
104
102
|
"use_best_epoch": True,
|
|
@@ -111,17 +109,30 @@ class TabPFNMixModel(AbstractModel):
|
|
|
111
109
|
# FIXME: Handle model weights download
|
|
112
110
|
# FIXME: GPU support?
|
|
113
111
|
# FIXME: Save model weights to file instead of pickling?
|
|
114
|
-
def _fit(
|
|
112
|
+
def _fit(
|
|
113
|
+
self,
|
|
114
|
+
X: pd.DataFrame,
|
|
115
|
+
y: pd.Series,
|
|
116
|
+
X_val: pd.DataFrame = None,
|
|
117
|
+
y_val: pd.Series = None,
|
|
118
|
+
time_limit: float = None,
|
|
119
|
+
num_cpus: int = 1,
|
|
120
|
+
num_gpus: float = 0,
|
|
121
|
+
**kwargs,
|
|
122
|
+
):
|
|
115
123
|
time_start = time.time()
|
|
116
124
|
try_import_torch()
|
|
117
125
|
import torch
|
|
126
|
+
|
|
118
127
|
from ._internal.config.config_run import ConfigRun
|
|
119
128
|
|
|
120
129
|
ag_params = self._get_ag_params()
|
|
121
130
|
max_classes = ag_params.get("max_classes")
|
|
122
131
|
if max_classes is not None and self.num_classes is not None and self.num_classes > max_classes:
|
|
123
132
|
# TODO: Move to earlier stage when problem_type is checked
|
|
124
|
-
raise AssertionError(
|
|
133
|
+
raise AssertionError(
|
|
134
|
+
f"Max allowed classes for the model is {max_classes}, but found {self.num_classes} classes."
|
|
135
|
+
)
|
|
125
136
|
|
|
126
137
|
params = self._get_model_params()
|
|
127
138
|
random_state = params.pop(self.seed_name, self.default_random_seed)
|
|
@@ -131,17 +142,26 @@ class TabPFNMixModel(AbstractModel):
|
|
|
131
142
|
|
|
132
143
|
# TODO: Make max_rows generic
|
|
133
144
|
if max_rows is not None and isinstance(max_rows, (int, float)) and len(X) > max_rows:
|
|
134
|
-
raise AssertionError(
|
|
145
|
+
raise AssertionError(
|
|
146
|
+
f"Skipping model due to X having more rows than `ag.max_rows={max_rows}` (len(X)={len(X)})"
|
|
147
|
+
)
|
|
135
148
|
|
|
136
149
|
# TODO: Make sample_rows generic
|
|
137
150
|
if sample_rows is not None and isinstance(sample_rows, int) and len(X) > sample_rows:
|
|
138
151
|
X, y = self._subsample_data(X=X, y=y, num_rows=sample_rows, random_state=random_state)
|
|
139
152
|
|
|
140
153
|
# TODO: Make sample_rows generic
|
|
141
|
-
if
|
|
154
|
+
if (
|
|
155
|
+
X_val is not None
|
|
156
|
+
and y_val is not None
|
|
157
|
+
and sample_rows_val is not None
|
|
158
|
+
and isinstance(sample_rows_val, int)
|
|
159
|
+
and len(X_val) > sample_rows_val
|
|
160
|
+
):
|
|
142
161
|
X_val, y_val = self._subsample_data(X=X_val, y=y_val, num_rows=sample_rows_val, random_state=random_state)
|
|
143
162
|
|
|
144
163
|
from ._internal.core.enums import Task
|
|
164
|
+
|
|
145
165
|
if self.problem_type in [REGRESSION, QUANTILE]:
|
|
146
166
|
task = Task.REGRESSION
|
|
147
167
|
n_classes = 0
|
|
@@ -186,7 +206,7 @@ class TabPFNMixModel(AbstractModel):
|
|
|
186
206
|
logger.log(
|
|
187
207
|
30,
|
|
188
208
|
f"WARNING: max_epochs should be > 0 if n_ensembles > 1, otherwise there will be zero quality benefit with slower inference. "
|
|
189
|
-
f"(max_epochs={cfg.hyperparams['max_epochs']}, n_ensembles={cfg.hyperparams['n_ensembles']})"
|
|
209
|
+
f"(max_epochs={cfg.hyperparams['max_epochs']}, n_ensembles={cfg.hyperparams['n_ensembles']})",
|
|
190
210
|
)
|
|
191
211
|
|
|
192
212
|
X = self.preprocess(X)
|
|
@@ -211,7 +231,9 @@ class TabPFNMixModel(AbstractModel):
|
|
|
211
231
|
time_cur = time.time()
|
|
212
232
|
time_left = time_limit - (time_cur - time_start)
|
|
213
233
|
if time_left <= 0:
|
|
214
|
-
raise TimeLimitExceeded(
|
|
234
|
+
raise TimeLimitExceeded(
|
|
235
|
+
f"No time remaining to fit model (time_limit={time_limit:.2f}s, time_left={time_left:.2f}s)"
|
|
236
|
+
)
|
|
215
237
|
time_limit = time_left
|
|
216
238
|
|
|
217
239
|
self.model = model_cls(
|
|
@@ -244,7 +266,9 @@ class TabPFNMixModel(AbstractModel):
|
|
|
244
266
|
return self
|
|
245
267
|
|
|
246
268
|
# TODO: Make this generic by creating a generic `preprocess_train` and putting this logic prior to `_preprocess`.
|
|
247
|
-
def _subsample_data(
|
|
269
|
+
def _subsample_data(
|
|
270
|
+
self, X: pd.DataFrame, y: pd.Series, num_rows: int, random_state: int | None = 0
|
|
271
|
+
) -> (pd.DataFrame, pd.Series):
|
|
248
272
|
num_rows_to_drop = len(X) - num_rows
|
|
249
273
|
X, _, y, _ = generate_train_test_split(
|
|
250
274
|
X=X,
|
|
@@ -282,6 +306,7 @@ class TabPFNMixModel(AbstractModel):
|
|
|
282
306
|
path = super().save(path=path, verbose=verbose)
|
|
283
307
|
if _model_weights is not None:
|
|
284
308
|
import torch
|
|
309
|
+
|
|
285
310
|
os.makedirs(self.path, exist_ok=True)
|
|
286
311
|
torch.save(_model_weights, self.weights_path)
|
|
287
312
|
self.model.trainer.model = _model_weights
|
|
@@ -294,6 +319,7 @@ class TabPFNMixModel(AbstractModel):
|
|
|
294
319
|
|
|
295
320
|
if model._weights_saved:
|
|
296
321
|
import torch
|
|
322
|
+
|
|
297
323
|
model.model.trainer.model = torch.load(model.weights_path, weights_only=False) # nosec B614
|
|
298
324
|
model._weights_saved = False
|
|
299
325
|
return model
|
|
@@ -327,7 +353,13 @@ class TabPFNMixModel(AbstractModel):
|
|
|
327
353
|
|
|
328
354
|
def _estimate_memory_usage(self, X: pd.DataFrame, **kwargs) -> int:
|
|
329
355
|
hyperparameters = self._get_model_params()
|
|
330
|
-
return self.estimate_memory_usage_static(
|
|
356
|
+
return self.estimate_memory_usage_static(
|
|
357
|
+
X=X,
|
|
358
|
+
problem_type=self.problem_type,
|
|
359
|
+
num_classes=self.num_classes,
|
|
360
|
+
hyperparameters=hyperparameters,
|
|
361
|
+
**kwargs,
|
|
362
|
+
)
|
|
331
363
|
|
|
332
364
|
def get_minimum_ideal_resources(self) -> dict[str, int | float]:
|
|
333
365
|
return {"num_cpus": 4}
|
|
@@ -344,8 +376,12 @@ class TabPFNMixModel(AbstractModel):
|
|
|
344
376
|
# TODO: Fitting 4 in parallel still causes many OOM errors with 32 GB of memory on relatively small datasets, so each model is using over 8 GB of memory
|
|
345
377
|
# The below logic returns a minimum of 8.8 GB, to avoid OOM errors
|
|
346
378
|
data_mem_usage = 5 * get_approximate_df_mem_usage(X).sum() # rough estimate
|
|
347
|
-
model_size =
|
|
348
|
-
|
|
379
|
+
model_size = (
|
|
380
|
+
160 * 1e6
|
|
381
|
+
) # model weights are ~160 MB # TODO: Avoid hardcoding, we can derive from the model itself?
|
|
382
|
+
model_mem_usage = (
|
|
383
|
+
model_size * 5
|
|
384
|
+
) # Account for 1x copy being fit, 1x copy checkpointed, 2x for optimizer, and 1x for overhead
|
|
349
385
|
model_fit_usage = model_size * 50 # TODO: This is a placeholder large value to try to avoid OOM errors
|
|
350
386
|
mem_usage_estimate = data_mem_usage + model_mem_usage + model_fit_usage
|
|
351
387
|
return mem_usage_estimate
|
|
@@ -361,4 +397,4 @@ class TabPFNMixModel(AbstractModel):
|
|
|
361
397
|
|
|
362
398
|
def _more_tags(self) -> dict:
|
|
363
399
|
tags = {"can_refit_full": True}
|
|
364
|
-
return tags
|
|
400
|
+
return tags
|
|
@@ -61,17 +61,13 @@ class TabPFNModel(AbstractTorchModel):
|
|
|
61
61
|
# This converts categorical features to numeric via stateful label encoding.
|
|
62
62
|
if self._feature_generator.features_in:
|
|
63
63
|
X = X.copy()
|
|
64
|
-
X[self._feature_generator.features_in] = self._feature_generator.transform(
|
|
65
|
-
X=X
|
|
66
|
-
)
|
|
64
|
+
X[self._feature_generator.features_in] = self._feature_generator.transform(X=X)
|
|
67
65
|
|
|
68
66
|
if is_train:
|
|
69
67
|
# Detect/set cat features and indices
|
|
70
68
|
if self._cat_features is None:
|
|
71
69
|
self._cat_features = self._feature_generator.features_in[:]
|
|
72
|
-
self._cat_indices = [
|
|
73
|
-
X.columns.get_loc(col) for col in self._cat_features
|
|
74
|
-
]
|
|
70
|
+
self._cat_indices = [X.columns.get_loc(col) for col in self._cat_features]
|
|
75
71
|
|
|
76
72
|
return X
|
|
77
73
|
|
|
@@ -121,9 +117,7 @@ class TabPFNModel(AbstractTorchModel):
|
|
|
121
117
|
{
|
|
122
118
|
"name": scaler,
|
|
123
119
|
"global_transformer_name": hps.pop("preprocessing/global", None),
|
|
124
|
-
"categorical_name": hps.pop(
|
|
125
|
-
"preprocessing/categoricals", "numeric"
|
|
126
|
-
),
|
|
120
|
+
"categorical_name": hps.pop("preprocessing/categoricals", "numeric"),
|
|
127
121
|
"append_original": hps.pop("preprocessing/append_original", True),
|
|
128
122
|
}
|
|
129
123
|
for scaler in hps["preprocessing/scaling"]
|
|
@@ -161,9 +155,7 @@ class TabPFNModel(AbstractTorchModel):
|
|
|
161
155
|
|
|
162
156
|
# Resolve inference_config
|
|
163
157
|
inference_config = {
|
|
164
|
-
_k: v
|
|
165
|
-
for k, v in hps.items()
|
|
166
|
-
if k.startswith("inference_config/") and (_k := k.split("/")[-1])
|
|
158
|
+
_k: v for k, v in hps.items() if k.startswith("inference_config/") and (_k := k.split("/")[-1])
|
|
167
159
|
}
|
|
168
160
|
if inference_config:
|
|
169
161
|
hps["inference_config"] = inference_config
|
|
@@ -191,9 +183,7 @@ class TabPFNModel(AbstractTorchModel):
|
|
|
191
183
|
|
|
192
184
|
return num_cpus, num_gpus
|
|
193
185
|
|
|
194
|
-
def get_minimum_resources(
|
|
195
|
-
self, is_gpu_available: bool = False
|
|
196
|
-
) -> dict[str, int | float]:
|
|
186
|
+
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
|
|
197
187
|
return {
|
|
198
188
|
"num_cpus": 1,
|
|
199
189
|
"num_gpus": 1 if is_gpu_available else 0,
|
|
@@ -278,21 +268,15 @@ class TabPFNModel(AbstractTorchModel):
|
|
|
278
268
|
model_mem = 14489108 # Based on TabPFNv2 default
|
|
279
269
|
|
|
280
270
|
n_samples, n_features = X.shape[0], min(X.shape[1], 2000)
|
|
281
|
-
n_feature_groups = (
|
|
282
|
-
n_features
|
|
283
|
-
) / features_per_group + 1 # TODO: Unsure how to calculate this
|
|
271
|
+
n_feature_groups = (n_features) / features_per_group + 1 # TODO: Unsure how to calculate this
|
|
284
272
|
|
|
285
273
|
X_mem = n_samples * n_feature_groups * dtype_byte_size
|
|
286
|
-
activation_mem =
|
|
287
|
-
n_samples * n_feature_groups * embedding_size * n_layers * dtype_byte_size
|
|
288
|
-
)
|
|
274
|
+
activation_mem = n_samples * n_feature_groups * embedding_size * n_layers * dtype_byte_size
|
|
289
275
|
|
|
290
276
|
baseline_overhead_mem_est = 1e9 # 1 GB generic overhead
|
|
291
277
|
|
|
292
278
|
# Add some buffer to each term + 1 GB overhead to be safe
|
|
293
|
-
return int(
|
|
294
|
-
model_mem + 4 * X_mem + 2 * activation_mem + baseline_overhead_mem_est
|
|
295
|
-
)
|
|
279
|
+
return int(model_mem + 4 * X_mem + 2 * activation_mem + baseline_overhead_mem_est)
|
|
296
280
|
|
|
297
281
|
@classmethod
|
|
298
282
|
def _class_tags(cls):
|
|
@@ -313,12 +297,11 @@ class TabPFNModel(AbstractTorchModel):
|
|
|
313
297
|
if not _HAS_LOGGED_TABPFN_CPU_WARNING:
|
|
314
298
|
if device == "cpu":
|
|
315
299
|
logger.log(
|
|
316
|
-
20,
|
|
317
|
-
"\tRunning TabPFN on CPU. This can be very slow. "
|
|
318
|
-
"It is recommended to run TabPFN on a GPU."
|
|
300
|
+
20, "\tRunning TabPFN on CPU. This can be very slow. It is recommended to run TabPFN on a GPU."
|
|
319
301
|
)
|
|
320
302
|
_HAS_LOGGED_TABPFN_CPU_WARNING = True
|
|
321
303
|
|
|
304
|
+
|
|
322
305
|
class RealTabPFNv25Model(TabPFNModel):
|
|
323
306
|
"""RealTabPFN-v2.5 version: https://priorlabs.ai/technical-reports/tabpfn-2-5-model-report.
|
|
324
307
|
|
|
@@ -332,9 +315,7 @@ class RealTabPFNv25Model(TabPFNModel):
|
|
|
332
315
|
ag_key = "REALTABPFN-V2.5"
|
|
333
316
|
ag_name = "RealTabPFN-v2.5"
|
|
334
317
|
|
|
335
|
-
default_classification_model: str | None =
|
|
336
|
-
"tabpfn-v2.5-classifier-v2.5_default.ckpt"
|
|
337
|
-
)
|
|
318
|
+
default_classification_model: str | None = "tabpfn-v2.5-classifier-v2.5_default.ckpt"
|
|
338
319
|
default_regression_model: str | None = "tabpfn-v2.5-regressor-v2.5_default.ckpt"
|
|
339
320
|
|
|
340
321
|
@staticmethod
|
|
@@ -369,7 +350,7 @@ class RealTabPFNv25Model(TabPFNModel):
|
|
|
369
350
|
"\tWarning: TabPFN-2.5 is a NONCOMMERCIAL model. "
|
|
370
351
|
"Usage of this artifact (including through AutoGluon) is not permitted "
|
|
371
352
|
"for commercial tasks unless granted explicit permission "
|
|
372
|
-
"by the model authors (PriorLabs)."
|
|
353
|
+
"by the model authors (PriorLabs).",
|
|
373
354
|
) # Aligning with TabPFNv25 license
|
|
374
355
|
_HAS_LOGGED_TABPFN_NONCOMMERICAL = True # Avoid repeated logging
|
|
375
356
|
|
|
@@ -388,9 +369,7 @@ class RealTabPFNv2Model(TabPFNModel):
|
|
|
388
369
|
ag_name = "RealTabPFN-v2"
|
|
389
370
|
|
|
390
371
|
# TODO: Verify if this is the same as the "default" ckpt
|
|
391
|
-
default_classification_model: str | None =
|
|
392
|
-
"tabpfn-v2-classifier-finetuned-zk73skhh.ckpt"
|
|
393
|
-
)
|
|
372
|
+
default_classification_model: str | None = "tabpfn-v2-classifier-finetuned-zk73skhh.ckpt"
|
|
394
373
|
default_regression_model: str | None = "tabpfn-v2-regressor-v2_default.ckpt"
|
|
395
374
|
|
|
396
375
|
def _get_default_auxiliary_params(self) -> dict:
|
|
@@ -414,11 +393,11 @@ class RealTabPFNv2Model(TabPFNModel):
|
|
|
414
393
|
# FIXME: Avoid code dupe. This one has 500 features max, 2.5 has 2000.
|
|
415
394
|
@classmethod
|
|
416
395
|
def _estimate_memory_usage_static(
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
396
|
+
cls,
|
|
397
|
+
*,
|
|
398
|
+
X: pd.DataFrame,
|
|
399
|
+
hyperparameters: dict | None = None,
|
|
400
|
+
**kwargs,
|
|
422
401
|
) -> int:
|
|
423
402
|
"""Heuristic memory estimate based on TabPFN's memory estimate logic in:
|
|
424
403
|
https://github.com/PriorLabs/TabPFN/blob/57a2efd3ebdb3886245e4d097cefa73a5261a969/src/tabpfn/model/memory.py#L147.
|
|
@@ -434,18 +413,12 @@ class RealTabPFNv2Model(TabPFNModel):
|
|
|
434
413
|
model_mem = 14489108 # Based on TabPFNv2 default
|
|
435
414
|
|
|
436
415
|
n_samples, n_features = X.shape[0], min(X.shape[1], 500)
|
|
437
|
-
n_feature_groups = (
|
|
438
|
-
n_features
|
|
439
|
-
) / features_per_group + 1 # TODO: Unsure how to calculate this
|
|
416
|
+
n_feature_groups = (n_features) / features_per_group + 1 # TODO: Unsure how to calculate this
|
|
440
417
|
|
|
441
418
|
X_mem = n_samples * n_feature_groups * dtype_byte_size
|
|
442
|
-
activation_mem =
|
|
443
|
-
n_samples * n_feature_groups * embedding_size * n_layers * dtype_byte_size
|
|
444
|
-
)
|
|
419
|
+
activation_mem = n_samples * n_feature_groups * embedding_size * n_layers * dtype_byte_size
|
|
445
420
|
|
|
446
421
|
baseline_overhead_mem_est = 1e9 # 1 GB generic overhead
|
|
447
422
|
|
|
448
423
|
# Add some buffer to each term + 1 GB overhead to be safe
|
|
449
|
-
return int(
|
|
450
|
-
model_mem + 4 * X_mem + 2 * activation_mem + baseline_overhead_mem_est
|
|
451
|
-
)
|
|
424
|
+
return int(model_mem + 4 * X_mem + 2 * activation_mem + baseline_overhead_mem_est)
|
|
@@ -6,10 +6,12 @@ from typing import Type
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
8
8
|
|
|
9
|
-
from autogluon.features import
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
from autogluon.features import (
|
|
10
|
+
ArithmeticFeatureGenerator,
|
|
11
|
+
BulkFeatureGenerator,
|
|
12
|
+
CategoricalInteractionFeatureGenerator,
|
|
13
|
+
OOFTargetEncodingFeatureGenerator,
|
|
14
|
+
)
|
|
13
15
|
from autogluon.features.generators.abstract import AbstractFeatureGenerator
|
|
14
16
|
|
|
15
17
|
logger = logging.getLogger(__name__)
|
|
@@ -66,21 +68,23 @@ class ModelAgnosticPrepMixin:
|
|
|
66
68
|
X_nunique = X.nunique().values
|
|
67
69
|
n_categorical = X.select_dtypes(exclude=[np.number]).shape[1]
|
|
68
70
|
n_numeric = X.loc[:, X_nunique > 2].select_dtypes(include=[np.number]).shape[1]
|
|
69
|
-
n_binary =
|
|
70
|
-
|
|
71
|
+
n_binary = (
|
|
72
|
+
X.loc[:, X_nunique <= 2].select_dtypes(include=[np.number]).shape[1]
|
|
73
|
+
) # NOTE: It can happen that features have less than two unique values if cleaning is applied before the bagging, i.e. Bioresponse
|
|
71
74
|
|
|
72
75
|
assert n_numeric + n_categorical + n_binary == X.shape[1] # NOTE: FOr debugging, to be removed later
|
|
73
76
|
for preprocessor_cls_name, init_params in prep_params:
|
|
74
|
-
if preprocessor_cls_name ==
|
|
77
|
+
if preprocessor_cls_name == "ArithmeticFeatureGenerator":
|
|
75
78
|
prep_cls = ArithmeticFeatureGenerator(target_type=self.problem_type, **init_params)
|
|
76
|
-
elif preprocessor_cls_name ==
|
|
79
|
+
elif preprocessor_cls_name == "CategoricalInteractionFeatureGenerator":
|
|
77
80
|
prep_cls = CategoricalInteractionFeatureGenerator(target_type=self.problem_type, **init_params)
|
|
78
|
-
elif preprocessor_cls_name ==
|
|
81
|
+
elif preprocessor_cls_name == "OOFTargetEncodingFeatureGenerator":
|
|
79
82
|
prep_cls = OOFTargetEncodingFeatureGenerator(target_type=self.problem_type, **init_params)
|
|
80
83
|
else:
|
|
81
84
|
raise ValueError(f"Unknown preprocessor class name: {preprocessor_cls_name}")
|
|
82
|
-
n_numeric, n_categorical, n_binary = prep_cls.estimate_new_dtypes(
|
|
83
|
-
|
|
85
|
+
n_numeric, n_categorical, n_binary = prep_cls.estimate_new_dtypes(
|
|
86
|
+
n_numeric, n_categorical, n_binary, num_classes=self.num_classes
|
|
87
|
+
)
|
|
84
88
|
|
|
85
89
|
return n_numeric, n_categorical, n_binary
|
|
86
90
|
|
|
@@ -108,7 +112,7 @@ class ModelAgnosticPrepMixin:
|
|
|
108
112
|
df_lst.append(X_estimate_numeric)
|
|
109
113
|
if n_categorical > 0:
|
|
110
114
|
cardinality = int(X.select_dtypes(exclude=[np.number]).nunique().mean())
|
|
111
|
-
X_estimate = np.random.randint(0, cardinality, [shape, n_categorical]).astype(
|
|
115
|
+
X_estimate = np.random.randint(0, cardinality, [shape, n_categorical]).astype("str")
|
|
112
116
|
X_estimate_cat = pd.DataFrame(X_estimate)
|
|
113
117
|
df_lst.append(X_estimate_cat)
|
|
114
118
|
if n_binary > 0:
|
|
@@ -126,9 +130,9 @@ class ModelAgnosticPrepMixin:
|
|
|
126
130
|
)
|
|
127
131
|
|
|
128
132
|
def _init_preprocessor(
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
133
|
+
self,
|
|
134
|
+
preprocessor_cls: Type[AbstractFeatureGenerator] | str,
|
|
135
|
+
init_params: dict | None,
|
|
132
136
|
) -> AbstractFeatureGenerator:
|
|
133
137
|
if isinstance(preprocessor_cls, str):
|
|
134
138
|
preprocessor_cls = _feature_generator_class_map[preprocessor_cls]
|
|
@@ -188,23 +192,27 @@ class ModelAgnosticPrepMixin:
|
|
|
188
192
|
if len(preprocessors) == 1 and isinstance(preprocessors[0], AbstractFeatureGenerator):
|
|
189
193
|
return preprocessors
|
|
190
194
|
else:
|
|
191
|
-
preprocessors = [
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
195
|
+
preprocessors = [
|
|
196
|
+
BulkFeatureGenerator(
|
|
197
|
+
generators=preprocessors,
|
|
198
|
+
# TODO: "false_recursive" technically can slow down inference, but need to optimize `True` first
|
|
199
|
+
# Refer to `Bioresponse` dataset where setting to `True` -> 200s fit time vs `false_recursive` -> 1s fit time
|
|
200
|
+
remove_unused_features="false_recursive",
|
|
201
|
+
post_drop_duplicates=True,
|
|
202
|
+
passthrough=True,
|
|
203
|
+
passthrough_types=passthrough_types,
|
|
204
|
+
verbosity=0,
|
|
205
|
+
)
|
|
206
|
+
]
|
|
201
207
|
return preprocessors
|
|
202
208
|
|
|
203
209
|
def _preprocess(self, X: pd.DataFrame, y=None, is_train: bool = False, **kwargs):
|
|
204
210
|
if is_train:
|
|
205
211
|
self.preprocessors = self.get_preprocessors()
|
|
206
212
|
if self.preprocessors:
|
|
207
|
-
assert y is not None,
|
|
213
|
+
assert y is not None, (
|
|
214
|
+
f"y must be specified to fit preprocessors... Likely the inheriting class isn't passing `y` in its `preprocess` call."
|
|
215
|
+
)
|
|
208
216
|
# FIXME: add `post_drop_useless`, example: anneal has many useless features
|
|
209
217
|
feature_metadata_in = self._feature_metadata
|
|
210
218
|
for prep in self.preprocessors:
|
|
@@ -47,12 +47,24 @@ def quantile_transformer_converter(scope, operator, container):
|
|
|
47
47
|
if opv < 18:
|
|
48
48
|
C_col = OnnxSplit(C, axis=1, output_names=[f"C_col{x}" for x in range(op.n_features_in_)], op_version=opv)
|
|
49
49
|
else:
|
|
50
|
-
C_col = OnnxSplit(
|
|
50
|
+
C_col = OnnxSplit(
|
|
51
|
+
C,
|
|
52
|
+
axis=1,
|
|
53
|
+
num_outputs=C.shape[1],
|
|
54
|
+
output_names=[f"C_col{x}" for x in range(op.n_features_in_)],
|
|
55
|
+
op_version=opv,
|
|
56
|
+
)
|
|
51
57
|
C_col.add_to(scope, container)
|
|
52
58
|
if opv < 18:
|
|
53
59
|
X_col = OnnxSplit(X, axis=1, output_names=[f"X_col{x}" for x in range(op.n_features_in_)], op_version=opv)
|
|
54
60
|
else:
|
|
55
|
-
X_col = OnnxSplit(
|
|
61
|
+
X_col = OnnxSplit(
|
|
62
|
+
X,
|
|
63
|
+
axis=1,
|
|
64
|
+
num_outputs=X.type.shape[1],
|
|
65
|
+
output_names=[f"X_col{x}" for x in range(op.n_features_in_)],
|
|
66
|
+
op_version=opv,
|
|
67
|
+
)
|
|
56
68
|
X_col.add_to(scope, container)
|
|
57
69
|
Y_col = []
|
|
58
70
|
for feature_idx in range(op.n_features_in_):
|
|
@@ -83,8 +95,12 @@ def quantile_transformer_converter(scope, operator, container):
|
|
|
83
95
|
)
|
|
84
96
|
references = np.clip(norm.ppf(op.references_), -5.2, 5.2).astype(dtype)
|
|
85
97
|
cst = np.broadcast_to(references, (batch_size, n_quantiles))
|
|
86
|
-
argmin_reshaped = OnnxReshape(
|
|
87
|
-
|
|
98
|
+
argmin_reshaped = OnnxReshape(
|
|
99
|
+
argmin, np.array([batch_size, 1], dtype=np.int64), output_names=[f"reshape_col{feature_idx}"]
|
|
100
|
+
)
|
|
101
|
+
ref = OnnxGatherElements(
|
|
102
|
+
cst, argmin_reshaped, axis=1, op_version=opv, output_names=[f"gathernd_col{feature_idx}"]
|
|
103
|
+
)
|
|
88
104
|
ref_reshape = OnnxReshape(ref, np.array([batch_size, 1], dtype=np.int64), output_names=[f"Y_col{feature_idx}"])
|
|
89
105
|
ref_cast = OnnxCast(ref_reshape, to=1, op_version=opv, output_names=[f"ref_cast{feature_idx}"])
|
|
90
106
|
Y_col.append(ref_cast)
|
|
@@ -147,9 +163,17 @@ def _encoder_handle_unknown_transformer_converter(scope, operator, container, na
|
|
|
147
163
|
|
|
148
164
|
C_col = op.categories_
|
|
149
165
|
if opv < 18:
|
|
150
|
-
X_col = OnnxSplit(
|
|
166
|
+
X_col = OnnxSplit(
|
|
167
|
+
X, axis=1, output_names=[f"{name_prefix}X_col{x}" for x in range(num_categories)], op_version=opv
|
|
168
|
+
)
|
|
151
169
|
else:
|
|
152
|
-
X_col = OnnxSplit(
|
|
170
|
+
X_col = OnnxSplit(
|
|
171
|
+
X,
|
|
172
|
+
axis=1,
|
|
173
|
+
num_outputs=X.type.shape[1],
|
|
174
|
+
output_names=[f"{name_prefix}X_col{x}" for x in range(num_categories)],
|
|
175
|
+
op_version=opv,
|
|
176
|
+
)
|
|
153
177
|
X_col.add_to(scope, container)
|
|
154
178
|
Y_col = []
|
|
155
179
|
for feature_idx in range(num_categories):
|
|
@@ -194,7 +218,9 @@ def _encoder_handle_unknown_transformer_converter(scope, operator, container, na
|
|
|
194
218
|
output_names=[f"{name_prefix}Y_col{feature_idx}"],
|
|
195
219
|
op_version=opv,
|
|
196
220
|
)
|
|
197
|
-
onehot_cast = OnnxCast(
|
|
221
|
+
onehot_cast = OnnxCast(
|
|
222
|
+
onehot_reshaped, to=1, op_version=opv, output_names=[f"{name_prefix}onehot_cast{feature_idx}"]
|
|
223
|
+
)
|
|
198
224
|
Y_col.append(onehot_cast)
|
|
199
225
|
else:
|
|
200
226
|
argmin_reshaped = OnnxReshape(
|
|
@@ -203,7 +229,9 @@ def _encoder_handle_unknown_transformer_converter(scope, operator, container, na
|
|
|
203
229
|
output_names=[f"{name_prefix}Y_col{feature_idx}"],
|
|
204
230
|
op_version=opv,
|
|
205
231
|
)
|
|
206
|
-
argmin_cast = OnnxCast(
|
|
232
|
+
argmin_cast = OnnxCast(
|
|
233
|
+
argmin_reshaped, to=1, op_version=opv, output_names=[f"{name_prefix}argmin_cast{feature_idx}"]
|
|
234
|
+
)
|
|
207
235
|
Y_col.append(argmin_cast)
|
|
208
236
|
Y = OnnxConcat(*Y_col, axis=1, op_version=opv, output_names=out[:1])
|
|
209
237
|
Y.add_to(scope, container)
|