autogluon.tabular 1.5.1b20260105__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.tabular might be problematic. Click here for more details.
- autogluon/tabular/__init__.py +1 -0
- autogluon/tabular/configs/config_helper.py +18 -6
- autogluon/tabular/configs/feature_generator_presets.py +3 -1
- autogluon/tabular/configs/hyperparameter_configs.py +42 -9
- autogluon/tabular/configs/presets_configs.py +38 -14
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py +84 -14
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +48 -48
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_cpu_2025_12_18.py +774 -1
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_gpu_2025_12_18.py +421 -1
- autogluon/tabular/experimental/_scikit_mixin.py +6 -2
- autogluon/tabular/experimental/_tabular_classifier.py +3 -1
- autogluon/tabular/experimental/_tabular_regressor.py +3 -1
- autogluon/tabular/experimental/plot_leaderboard.py +73 -19
- autogluon/tabular/learner/abstract_learner.py +160 -42
- autogluon/tabular/learner/default_learner.py +78 -22
- autogluon/tabular/models/__init__.py +2 -2
- autogluon/tabular/models/_utils/rapids_utils.py +3 -1
- autogluon/tabular/models/abstract/abstract_torch_model.py +2 -0
- autogluon/tabular/models/automm/automm_model.py +12 -3
- autogluon/tabular/models/automm/ft_transformer.py +5 -1
- autogluon/tabular/models/catboost/callbacks.py +2 -2
- autogluon/tabular/models/catboost/catboost_model.py +93 -29
- autogluon/tabular/models/catboost/catboost_softclass_utils.py +4 -1
- autogluon/tabular/models/catboost/catboost_utils.py +3 -1
- autogluon/tabular/models/ebm/ebm_model.py +8 -13
- autogluon/tabular/models/ebm/hyperparameters/parameters.py +1 -0
- autogluon/tabular/models/ebm/hyperparameters/searchspaces.py +1 -0
- autogluon/tabular/models/fastainn/callbacks.py +20 -3
- autogluon/tabular/models/fastainn/hyperparameters/searchspaces.py +11 -1
- autogluon/tabular/models/fastainn/quantile_helpers.py +10 -2
- autogluon/tabular/models/fastainn/tabular_nn_fastai.py +65 -18
- autogluon/tabular/models/fasttext/fasttext_model.py +3 -1
- autogluon/tabular/models/image_prediction/image_predictor.py +7 -2
- autogluon/tabular/models/knn/knn_model.py +41 -8
- autogluon/tabular/models/lgb/callbacks.py +32 -9
- autogluon/tabular/models/lgb/hyperparameters/searchspaces.py +3 -1
- autogluon/tabular/models/lgb/lgb_model.py +150 -34
- autogluon/tabular/models/lgb/lgb_utils.py +12 -4
- autogluon/tabular/models/lr/hyperparameters/searchspaces.py +5 -1
- autogluon/tabular/models/lr/lr_model.py +40 -10
- autogluon/tabular/models/lr/lr_rapids_model.py +22 -13
- autogluon/tabular/models/mitra/_internal/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/config/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +36 -40
- autogluon/tabular/models/mitra/_internal/config/config_run.py +2 -14
- autogluon/tabular/models/mitra/_internal/config/enums.py +27 -26
- autogluon/tabular/models/mitra/_internal/core/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/core/callbacks.py +14 -21
- autogluon/tabular/models/mitra/_internal/core/get_loss.py +10 -12
- autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +17 -32
- autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +12 -27
- autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +16 -21
- autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +130 -111
- autogluon/tabular/models/mitra/_internal/data/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/data/collator.py +30 -26
- autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +18 -26
- autogluon/tabular/models/mitra/_internal/data/dataset_split.py +10 -7
- autogluon/tabular/models/mitra/_internal/data/preprocessor.py +70 -100
- autogluon/tabular/models/mitra/_internal/models/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/models/base.py +7 -10
- autogluon/tabular/models/mitra/_internal/models/embedding.py +46 -56
- autogluon/tabular/models/mitra/_internal/models/tab2d.py +140 -120
- autogluon/tabular/models/mitra/_internal/utils/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/utils/set_seed.py +3 -1
- autogluon/tabular/models/mitra/mitra_model.py +16 -11
- autogluon/tabular/models/mitra/sklearn_interface.py +178 -162
- autogluon/tabular/models/realmlp/realmlp_model.py +28 -15
- autogluon/tabular/models/rf/compilers/onnx.py +1 -1
- autogluon/tabular/models/rf/rf_model.py +45 -12
- autogluon/tabular/models/rf/rf_quantile.py +4 -2
- autogluon/tabular/models/tabdpt/tabdpt_model.py +8 -17
- autogluon/tabular/models/tabicl/tabicl_model.py +8 -1
- autogluon/tabular/models/tabm/_tabm_internal.py +6 -4
- autogluon/tabular/models/tabm/rtdl_num_embeddings.py +80 -127
- autogluon/tabular/models/tabm/tabm_model.py +8 -4
- autogluon/tabular/models/tabm/tabm_reference.py +53 -85
- autogluon/tabular/models/tabpfnmix/_internal/core/callbacks.py +7 -16
- autogluon/tabular/models/tabpfnmix/_internal/core/collator.py +16 -24
- autogluon/tabular/models/tabpfnmix/_internal/core/dataset_split.py +5 -7
- autogluon/tabular/models/tabpfnmix/_internal/core/enums.py +0 -2
- autogluon/tabular/models/tabpfnmix/_internal/core/get_loss.py +0 -1
- autogluon/tabular/models/tabpfnmix/_internal/core/get_optimizer.py +7 -18
- autogluon/tabular/models/tabpfnmix/_internal/core/get_scheduler.py +3 -14
- autogluon/tabular/models/tabpfnmix/_internal/core/trainer_finetune.py +79 -64
- autogluon/tabular/models/tabpfnmix/_internal/core/y_transformer.py +3 -5
- autogluon/tabular/models/tabpfnmix/_internal/data/dataset_finetune.py +17 -30
- autogluon/tabular/models/tabpfnmix/_internal/data/preprocessor.py +15 -35
- autogluon/tabular/models/tabpfnmix/_internal/models/foundation/embedding.py +21 -38
- autogluon/tabular/models/tabpfnmix/_internal/models/foundation/foundation_transformer.py +33 -51
- autogluon/tabular/models/tabpfnmix/_internal/results/prediction_metrics.py +4 -4
- autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_classifier.py +32 -12
- autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_regressor.py +32 -13
- autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +55 -19
- autogluon/tabular/models/tabpfnv2/tabpfnv2_5_model.py +21 -48
- autogluon/tabular/models/tabprep/prep_mixin.py +34 -26
- autogluon/tabular/models/tabular_nn/compilers/onnx.py +36 -8
- autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +130 -36
- autogluon/tabular/models/tabular_nn/torch/tabular_torch_dataset.py +8 -4
- autogluon/tabular/models/tabular_nn/torch/torch_network_modules.py +26 -5
- autogluon/tabular/models/tabular_nn/utils/categorical_encoders.py +41 -24
- autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py +33 -8
- autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py +21 -6
- autogluon/tabular/models/xgboost/callbacks.py +9 -3
- autogluon/tabular/models/xgboost/xgboost_model.py +59 -11
- autogluon/tabular/models/xt/xt_model.py +1 -0
- autogluon/tabular/predictor/interpretable_predictor.py +3 -1
- autogluon/tabular/predictor/predictor.py +409 -128
- autogluon/tabular/registry/__init__.py +1 -1
- autogluon/tabular/registry/_ag_model_registry.py +4 -5
- autogluon/tabular/registry/_model_registry.py +1 -0
- autogluon/tabular/testing/fit_helper.py +55 -15
- autogluon/tabular/testing/generate_datasets.py +1 -1
- autogluon/tabular/testing/model_fit_helper.py +10 -4
- autogluon/tabular/trainer/abstract_trainer.py +644 -230
- autogluon/tabular/trainer/auto_trainer.py +19 -8
- autogluon/tabular/trainer/model_presets/presets.py +33 -9
- autogluon/tabular/trainer/model_presets/presets_distill.py +16 -2
- autogluon/tabular/version.py +1 -1
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/METADATA +26 -26
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/RECORD +127 -135
- autogluon/tabular/models/tabpfnv2/rfpfn/__init__.py +0 -20
- autogluon/tabular/models/tabpfnv2/rfpfn/configs.py +0 -40
- autogluon/tabular/models/tabpfnv2/rfpfn/scoring_utils.py +0 -201
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_decision_tree_tabpfn.py +0 -1464
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_random_forest_tabpfn.py +0 -747
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_compat.py +0 -863
- autogluon/tabular/models/tabpfnv2/rfpfn/utils.py +0 -106
- autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +0 -466
- /autogluon.tabular-1.5.1b20260105-py3.11-nspkg.pth → /autogluon.tabular-1.5.1b20260116-py3.11-nspkg.pth +0 -0
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/WHEEL +0 -0
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/licenses/LICENSE +0 -0
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/licenses/NOTICE +0 -0
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/namespace_packages.txt +0 -0
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/top_level.txt +0 -0
- {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/zip-safe +0 -0
autogluon/tabular/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# noinspection PyUnresolvedReferences
|
|
2
2
|
from autogluon.common.dataset import TabularDataset
|
|
3
|
+
|
|
3
4
|
# noinspection PyUnresolvedReferences
|
|
4
5
|
from autogluon.common.features.feature_metadata import FeatureMetadata
|
|
5
6
|
from autogluon.common.utils.log_utils import _add_stream_handler
|
|
@@ -125,7 +125,9 @@ class ConfigBuilder:
|
|
|
125
125
|
|
|
126
126
|
if isinstance(presets, list):
|
|
127
127
|
unknown_keys = [k for k in presets if k not in valid_keys]
|
|
128
|
-
assert len(unknown_keys) == 0,
|
|
128
|
+
assert len(unknown_keys) == 0, (
|
|
129
|
+
f"The following presets are not recognized: {unknown_keys} - use one of the valid presets: {valid_keys}"
|
|
130
|
+
)
|
|
129
131
|
|
|
130
132
|
self.config["presets"] = presets
|
|
131
133
|
return self
|
|
@@ -144,12 +146,18 @@ class ConfigBuilder:
|
|
|
144
146
|
valid_keys = self._valid_keys()
|
|
145
147
|
valid_str_values = list(hyperparameter_config_dict.keys())
|
|
146
148
|
if isinstance(hyperparameters, str):
|
|
147
|
-
assert hyperparameters in hyperparameter_config_dict,
|
|
149
|
+
assert hyperparameters in hyperparameter_config_dict, (
|
|
150
|
+
f"{hyperparameters} is not one of the valid presets {valid_str_values}"
|
|
151
|
+
)
|
|
148
152
|
elif isinstance(hyperparameters, dict):
|
|
149
153
|
unknown_keys = [k for k in hyperparameters.keys() if isinstance(k, str) and (k not in valid_keys)]
|
|
150
|
-
assert len(unknown_keys) == 0,
|
|
154
|
+
assert len(unknown_keys) == 0, (
|
|
155
|
+
f"The following model types are not recognized: {unknown_keys} - use one of the valid models: {valid_keys}"
|
|
156
|
+
)
|
|
151
157
|
else:
|
|
152
|
-
raise ValueError(
|
|
158
|
+
raise ValueError(
|
|
159
|
+
f"hyperparameters must be either str: {valid_str_values} or dict with keys of {valid_keys}"
|
|
160
|
+
)
|
|
153
161
|
self.config["hyperparameters"] = hyperparameters
|
|
154
162
|
return self
|
|
155
163
|
|
|
@@ -230,7 +238,9 @@ class ConfigBuilder:
|
|
|
230
238
|
"""
|
|
231
239
|
valid_str_values = scheduler_factory._scheduler_presets.keys()
|
|
232
240
|
if isinstance(hyperparameter_tune_kwargs, str):
|
|
233
|
-
assert hyperparameter_tune_kwargs in valid_str_values,
|
|
241
|
+
assert hyperparameter_tune_kwargs in valid_str_values, (
|
|
242
|
+
f"{hyperparameter_tune_kwargs} string must be one of {valid_str_values}"
|
|
243
|
+
)
|
|
234
244
|
elif not isinstance(hyperparameter_tune_kwargs, dict):
|
|
235
245
|
raise ValueError(f"hyperparameter_tune_kwargs must be either str: {valid_str_values} or dict")
|
|
236
246
|
self.config["hyperparameter_tune_kwargs"] = hyperparameter_tune_kwargs
|
|
@@ -294,7 +304,9 @@ class ConfigBuilder:
|
|
|
294
304
|
models = [models]
|
|
295
305
|
|
|
296
306
|
unknown_keys = [k for k in models if isinstance(k, str) and (k not in valid_keys)]
|
|
297
|
-
assert len(unknown_keys) == 0,
|
|
307
|
+
assert len(unknown_keys) == 0, (
|
|
308
|
+
f"The following model types are not recognized: {unknown_keys} - use one of the valid models: {valid_keys}"
|
|
309
|
+
)
|
|
298
310
|
|
|
299
311
|
models = [m for m in valid_keys if m not in models]
|
|
300
312
|
self.config["excluded_model_types"] = models
|
|
@@ -18,7 +18,9 @@ def get_default_feature_generator(feature_generator, feature_metadata=None, init
|
|
|
18
18
|
elif feature_generator == "interpretable":
|
|
19
19
|
feature_generator = AutoMLInterpretablePipelineFeatureGenerator(**init_kwargs)
|
|
20
20
|
else:
|
|
21
|
-
raise ValueError(
|
|
21
|
+
raise ValueError(
|
|
22
|
+
f"Unknown feature_generator preset: '{feature_generator}', valid presets: {['auto', 'interpretable']}"
|
|
23
|
+
)
|
|
22
24
|
if feature_metadata is not None:
|
|
23
25
|
if feature_generator.feature_metadata_in is None and not feature_generator.is_fit():
|
|
24
26
|
feature_generator.feature_metadata_in = copy.deepcopy(feature_metadata)
|
|
@@ -27,12 +27,18 @@ hyperparameter_config_dict = dict(
|
|
|
27
27
|
"RF": [
|
|
28
28
|
{"criterion": "gini", "ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]}},
|
|
29
29
|
{"criterion": "entropy", "ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]}},
|
|
30
|
-
{
|
|
30
|
+
{
|
|
31
|
+
"criterion": "squared_error",
|
|
32
|
+
"ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]},
|
|
33
|
+
},
|
|
31
34
|
],
|
|
32
35
|
"XT": [
|
|
33
36
|
{"criterion": "gini", "ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]}},
|
|
34
37
|
{"criterion": "entropy", "ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]}},
|
|
35
|
-
{
|
|
38
|
+
{
|
|
39
|
+
"criterion": "squared_error",
|
|
40
|
+
"ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]},
|
|
41
|
+
},
|
|
36
42
|
],
|
|
37
43
|
},
|
|
38
44
|
# Results in smaller models. Generally will make inference speed much faster and disk usage much lower, but with worse accuracy.
|
|
@@ -53,14 +59,38 @@ hyperparameter_config_dict = dict(
|
|
|
53
59
|
"XGB": {},
|
|
54
60
|
"FASTAI": {},
|
|
55
61
|
"RF": [
|
|
56
|
-
{
|
|
57
|
-
|
|
58
|
-
|
|
62
|
+
{
|
|
63
|
+
"criterion": "gini",
|
|
64
|
+
"max_depth": 15,
|
|
65
|
+
"ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]},
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
"criterion": "entropy",
|
|
69
|
+
"max_depth": 15,
|
|
70
|
+
"ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]},
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"criterion": "squared_error",
|
|
74
|
+
"max_depth": 15,
|
|
75
|
+
"ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]},
|
|
76
|
+
},
|
|
59
77
|
],
|
|
60
78
|
"XT": [
|
|
61
|
-
{
|
|
62
|
-
|
|
63
|
-
|
|
79
|
+
{
|
|
80
|
+
"criterion": "gini",
|
|
81
|
+
"max_depth": 15,
|
|
82
|
+
"ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]},
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"criterion": "entropy",
|
|
86
|
+
"max_depth": 15,
|
|
87
|
+
"ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]},
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"criterion": "squared_error",
|
|
91
|
+
"max_depth": 15,
|
|
92
|
+
"ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]},
|
|
93
|
+
},
|
|
64
94
|
],
|
|
65
95
|
},
|
|
66
96
|
# Results in much smaller models. Behaves similarly to 'light', but in many cases with over 10x less disk usage and a further reduction in accuracy.
|
|
@@ -137,6 +167,7 @@ hyperparameter_config_dict["experimental_2024"] = {"TABPFNMIX": tabpfnmix_defaul
|
|
|
137
167
|
hyperparameter_config_dict["experimental_2024"].update(hyperparameter_config_dict["zeroshot_2023"])
|
|
138
168
|
hyperparameter_config_dict["experimental"] = hyperparameter_config_dict["experimental_2024"]
|
|
139
169
|
|
|
170
|
+
|
|
140
171
|
def get_hyperparameter_config_options():
|
|
141
172
|
return list(hyperparameter_config_dict.keys())
|
|
142
173
|
|
|
@@ -144,5 +175,7 @@ def get_hyperparameter_config_options():
|
|
|
144
175
|
def get_hyperparameter_config(config_name):
|
|
145
176
|
config_options = get_hyperparameter_config_options()
|
|
146
177
|
if config_name not in config_options:
|
|
147
|
-
raise ValueError(
|
|
178
|
+
raise ValueError(
|
|
179
|
+
f"Valid hyperparameter config names are: {config_options}, but '{config_name}' was given instead."
|
|
180
|
+
)
|
|
148
181
|
return copy.deepcopy(hyperparameter_config_dict[config_name])
|
|
@@ -9,14 +9,18 @@ tabular_presets_dict = dict(
|
|
|
9
9
|
"hyperparameters": "zeroshot",
|
|
10
10
|
"time_limit": 3600,
|
|
11
11
|
},
|
|
12
|
-
|
|
13
12
|
best_quality_v150={
|
|
14
13
|
"auto_stack": True,
|
|
15
14
|
"dynamic_stacking": "auto",
|
|
16
15
|
"num_stack_levels": 0,
|
|
17
16
|
"hyperparameters": "zeroshot_2025_12_18_cpu",
|
|
18
17
|
"time_limit": 3600,
|
|
19
|
-
"callbacks": [
|
|
18
|
+
"callbacks": [
|
|
19
|
+
[
|
|
20
|
+
"EarlyStoppingCountCallback",
|
|
21
|
+
{"patience": [[100, 4], [500, 8], [2500, 15], [10000, 40], [100000, 100], None]},
|
|
22
|
+
]
|
|
23
|
+
],
|
|
20
24
|
},
|
|
21
25
|
# High predictive accuracy with fast inference. ~8x faster inference and ~8x lower disk usage than `best_quality`.
|
|
22
26
|
# Recommended for applications that require fast inference speed and/or small model size.
|
|
@@ -30,19 +34,22 @@ tabular_presets_dict = dict(
|
|
|
30
34
|
"set_best_to_refit_full": True,
|
|
31
35
|
"save_bag_folds": False,
|
|
32
36
|
},
|
|
33
|
-
|
|
34
37
|
high_quality_v150={
|
|
35
38
|
"auto_stack": True,
|
|
36
39
|
"dynamic_stacking": "auto",
|
|
37
40
|
"num_stack_levels": 0,
|
|
38
41
|
"hyperparameters": "zeroshot_2025_12_18_cpu",
|
|
39
42
|
"time_limit": 3600,
|
|
40
|
-
"callbacks": [
|
|
43
|
+
"callbacks": [
|
|
44
|
+
[
|
|
45
|
+
"EarlyStoppingCountCallback",
|
|
46
|
+
{"patience": [[100, 4], [500, 8], [2500, 15], [10000, 40], [100000, 100], None]},
|
|
47
|
+
]
|
|
48
|
+
],
|
|
41
49
|
"refit_full": True,
|
|
42
50
|
"set_best_to_refit_full": True,
|
|
43
51
|
"save_bag_folds": False,
|
|
44
52
|
},
|
|
45
|
-
|
|
46
53
|
# Good predictive accuracy with very fast inference. ~4x faster training, ~8x faster inference and ~8x lower disk usage than `high_quality`.
|
|
47
54
|
# Recommended for applications that require very fast inference speed.
|
|
48
55
|
# Aliases: good
|
|
@@ -68,7 +75,13 @@ tabular_presets_dict = dict(
|
|
|
68
75
|
optimize_for_deployment={"keep_only_best": True, "save_space": True},
|
|
69
76
|
# Disables automated feature generation when text features are detected.
|
|
70
77
|
# This is useful to determine how beneficial text features are to the end result, as well as to ensure features are not mistaken for text when they are not.
|
|
71
|
-
ignore_text={
|
|
78
|
+
ignore_text={
|
|
79
|
+
"_feature_generator_kwargs": {
|
|
80
|
+
"enable_text_ngram_features": False,
|
|
81
|
+
"enable_text_special_features": False,
|
|
82
|
+
"enable_raw_text_features": False,
|
|
83
|
+
}
|
|
84
|
+
},
|
|
72
85
|
ignore_text_ngrams={"_feature_generator_kwargs": {"enable_text_ngram_features": False}},
|
|
73
86
|
# Fit only interpretable models.
|
|
74
87
|
interpretable={
|
|
@@ -86,14 +99,24 @@ tabular_presets_dict = dict(
|
|
|
86
99
|
best_quality_v082={"auto_stack": True},
|
|
87
100
|
# High predictive accuracy with fast inference. ~10x-200x faster inference and ~10x-200x lower disk usage than `best_quality`.
|
|
88
101
|
# Recommended for applications that require reasonable inference speed and/or model size.
|
|
89
|
-
high_quality_v082={
|
|
102
|
+
high_quality_v082={
|
|
103
|
+
"auto_stack": True,
|
|
104
|
+
"refit_full": True,
|
|
105
|
+
"set_best_to_refit_full": True,
|
|
106
|
+
"save_bag_folds": False,
|
|
107
|
+
},
|
|
90
108
|
# Good predictive accuracy with very fast inference. ~4x faster inference and ~4x lower disk usage than `high_quality`.
|
|
91
109
|
# Recommended for applications that require fast inference speed.
|
|
92
|
-
good_quality_v082={
|
|
110
|
+
good_quality_v082={
|
|
111
|
+
"auto_stack": True,
|
|
112
|
+
"refit_full": True,
|
|
113
|
+
"set_best_to_refit_full": True,
|
|
114
|
+
"save_bag_folds": False,
|
|
115
|
+
"hyperparameters": "light",
|
|
116
|
+
},
|
|
93
117
|
# ------------------------------------------
|
|
94
118
|
# Experimental presets. Only use these presets if you are ok with unstable and potentially poor performing presets.
|
|
95
119
|
# Experimental presets can be removed or changed without warning.
|
|
96
|
-
|
|
97
120
|
# [EXPERIMENTAL PRESET] The `extreme` preset may be changed or removed without warning.
|
|
98
121
|
# This preset acts as a testing ground for cutting edge features and models which could later be added to the `best_quality` preset in future releases.
|
|
99
122
|
# Using this preset can lead to unexpected crashes, as it hasn't been as thoroughly tested as other presets.
|
|
@@ -110,9 +133,13 @@ tabular_presets_dict = dict(
|
|
|
110
133
|
"num_stack_levels": 0,
|
|
111
134
|
"hyperparameters": "zeroshot_2025_12_18_gpu",
|
|
112
135
|
"time_limit": 3600,
|
|
113
|
-
"callbacks": [
|
|
136
|
+
"callbacks": [
|
|
137
|
+
[
|
|
138
|
+
"EarlyStoppingCountCallback",
|
|
139
|
+
{"patience": [[100, 4], [500, 8], [2500, 15], [10000, 40], [100000, 100], None]},
|
|
140
|
+
]
|
|
141
|
+
],
|
|
114
142
|
},
|
|
115
|
-
|
|
116
143
|
extreme_quality_v140={
|
|
117
144
|
"auto_stack": True,
|
|
118
145
|
"dynamic_stacking": "auto",
|
|
@@ -121,7 +148,6 @@ tabular_presets_dict = dict(
|
|
|
121
148
|
"hyperparameters": None,
|
|
122
149
|
"time_limit": 3600,
|
|
123
150
|
},
|
|
124
|
-
|
|
125
151
|
# Preset with a portfolio learned from TabArena v0.1: https://tabarena.ai/
|
|
126
152
|
# Uses tabular foundation models: TabPFNv2, TabICL, Mitra
|
|
127
153
|
# Uses deep learning model: TabM
|
|
@@ -136,7 +162,6 @@ tabular_presets_dict = dict(
|
|
|
136
162
|
"hyperparameters": "zeroshot_2025_tabfm",
|
|
137
163
|
"time_limit": 3600,
|
|
138
164
|
},
|
|
139
|
-
|
|
140
165
|
# DOES NOT SUPPORT GPU.
|
|
141
166
|
experimental_quality_v120={
|
|
142
167
|
"auto_stack": True,
|
|
@@ -147,7 +172,6 @@ tabular_presets_dict = dict(
|
|
|
147
172
|
"num_gpus": 0,
|
|
148
173
|
"time_limit": 3600,
|
|
149
174
|
},
|
|
150
|
-
|
|
151
175
|
# ------------------------------------------
|
|
152
176
|
# ------------------------------------------
|
|
153
177
|
# ------------------------------------------
|
|
@@ -773,24 +773,94 @@ hyperparameter_portfolio_zeroshot_2023 = {
|
|
|
773
773
|
{"criterion": "gini", "ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]}},
|
|
774
774
|
{"criterion": "entropy", "ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]}},
|
|
775
775
|
{"criterion": "squared_error", "ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]}},
|
|
776
|
-
{
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
{
|
|
776
|
+
{
|
|
777
|
+
"max_features": 0.75,
|
|
778
|
+
"max_leaf_nodes": 37308,
|
|
779
|
+
"min_samples_leaf": 1,
|
|
780
|
+
"ag_args": {"name_suffix": "_r195", "priority": -13},
|
|
781
|
+
},
|
|
782
|
+
{
|
|
783
|
+
"max_features": 0.75,
|
|
784
|
+
"max_leaf_nodes": 28310,
|
|
785
|
+
"min_samples_leaf": 2,
|
|
786
|
+
"ag_args": {"name_suffix": "_r39", "priority": -32},
|
|
787
|
+
},
|
|
788
|
+
{
|
|
789
|
+
"max_features": 1.0,
|
|
790
|
+
"max_leaf_nodes": 38572,
|
|
791
|
+
"min_samples_leaf": 5,
|
|
792
|
+
"ag_args": {"name_suffix": "_r127", "priority": -45},
|
|
793
|
+
},
|
|
794
|
+
{
|
|
795
|
+
"max_features": 0.75,
|
|
796
|
+
"max_leaf_nodes": 18242,
|
|
797
|
+
"min_samples_leaf": 40,
|
|
798
|
+
"ag_args": {"name_suffix": "_r34", "priority": -47},
|
|
799
|
+
},
|
|
800
|
+
{
|
|
801
|
+
"max_features": "log2",
|
|
802
|
+
"max_leaf_nodes": 42644,
|
|
803
|
+
"min_samples_leaf": 1,
|
|
804
|
+
"ag_args": {"name_suffix": "_r166", "priority": -63},
|
|
805
|
+
},
|
|
806
|
+
{
|
|
807
|
+
"max_features": 0.75,
|
|
808
|
+
"max_leaf_nodes": 36230,
|
|
809
|
+
"min_samples_leaf": 3,
|
|
810
|
+
"ag_args": {"name_suffix": "_r15", "priority": -68},
|
|
811
|
+
},
|
|
812
|
+
{
|
|
813
|
+
"max_features": 1.0,
|
|
814
|
+
"max_leaf_nodes": 48136,
|
|
815
|
+
"min_samples_leaf": 1,
|
|
816
|
+
"ag_args": {"name_suffix": "_r16", "priority": -81},
|
|
817
|
+
},
|
|
783
818
|
],
|
|
784
819
|
"XT": [
|
|
785
820
|
{"criterion": "gini", "ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]}},
|
|
786
821
|
{"criterion": "entropy", "ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]}},
|
|
787
822
|
{"criterion": "squared_error", "ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]}},
|
|
788
|
-
{
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
{
|
|
823
|
+
{
|
|
824
|
+
"max_features": 0.75,
|
|
825
|
+
"max_leaf_nodes": 18392,
|
|
826
|
+
"min_samples_leaf": 1,
|
|
827
|
+
"ag_args": {"name_suffix": "_r42", "priority": -9},
|
|
828
|
+
},
|
|
829
|
+
{
|
|
830
|
+
"max_features": 1.0,
|
|
831
|
+
"max_leaf_nodes": 12845,
|
|
832
|
+
"min_samples_leaf": 4,
|
|
833
|
+
"ag_args": {"name_suffix": "_r172", "priority": -23},
|
|
834
|
+
},
|
|
835
|
+
{
|
|
836
|
+
"max_features": "sqrt",
|
|
837
|
+
"max_leaf_nodes": 28532,
|
|
838
|
+
"min_samples_leaf": 1,
|
|
839
|
+
"ag_args": {"name_suffix": "_r49", "priority": -43},
|
|
840
|
+
},
|
|
841
|
+
{
|
|
842
|
+
"max_features": 1.0,
|
|
843
|
+
"max_leaf_nodes": 19935,
|
|
844
|
+
"min_samples_leaf": 20,
|
|
845
|
+
"ag_args": {"name_suffix": "_r4", "priority": -53},
|
|
846
|
+
},
|
|
847
|
+
{
|
|
848
|
+
"max_features": 0.75,
|
|
849
|
+
"max_leaf_nodes": 29813,
|
|
850
|
+
"min_samples_leaf": 4,
|
|
851
|
+
"ag_args": {"name_suffix": "_r178", "priority": -62},
|
|
852
|
+
},
|
|
853
|
+
{
|
|
854
|
+
"max_features": 1.0,
|
|
855
|
+
"max_leaf_nodes": 40459,
|
|
856
|
+
"min_samples_leaf": 1,
|
|
857
|
+
"ag_args": {"name_suffix": "_r197", "priority": -78},
|
|
858
|
+
},
|
|
859
|
+
{
|
|
860
|
+
"max_features": "sqrt",
|
|
861
|
+
"max_leaf_nodes": 29702,
|
|
862
|
+
"min_samples_leaf": 2,
|
|
863
|
+
"ag_args": {"name_suffix": "_r126", "priority": -86},
|
|
864
|
+
},
|
|
795
865
|
],
|
|
796
866
|
}
|