autogluon.tabular 1.5.0b20251228__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.tabular might be problematic. Click here for more details.

Files changed (135) hide show
  1. autogluon/tabular/__init__.py +1 -0
  2. autogluon/tabular/configs/config_helper.py +18 -6
  3. autogluon/tabular/configs/feature_generator_presets.py +3 -1
  4. autogluon/tabular/configs/hyperparameter_configs.py +42 -9
  5. autogluon/tabular/configs/presets_configs.py +38 -14
  6. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py +84 -14
  7. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +48 -48
  8. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_cpu_2025_12_18.py +774 -1
  9. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_gpu_2025_12_18.py +421 -1
  10. autogluon/tabular/experimental/_scikit_mixin.py +6 -2
  11. autogluon/tabular/experimental/_tabular_classifier.py +3 -1
  12. autogluon/tabular/experimental/_tabular_regressor.py +3 -1
  13. autogluon/tabular/experimental/plot_leaderboard.py +73 -19
  14. autogluon/tabular/learner/abstract_learner.py +160 -42
  15. autogluon/tabular/learner/default_learner.py +78 -22
  16. autogluon/tabular/models/__init__.py +2 -2
  17. autogluon/tabular/models/_utils/rapids_utils.py +3 -1
  18. autogluon/tabular/models/abstract/abstract_torch_model.py +2 -0
  19. autogluon/tabular/models/automm/automm_model.py +12 -3
  20. autogluon/tabular/models/automm/ft_transformer.py +5 -1
  21. autogluon/tabular/models/catboost/callbacks.py +2 -2
  22. autogluon/tabular/models/catboost/catboost_model.py +93 -29
  23. autogluon/tabular/models/catboost/catboost_softclass_utils.py +4 -1
  24. autogluon/tabular/models/catboost/catboost_utils.py +3 -1
  25. autogluon/tabular/models/ebm/ebm_model.py +8 -13
  26. autogluon/tabular/models/ebm/hyperparameters/parameters.py +1 -0
  27. autogluon/tabular/models/ebm/hyperparameters/searchspaces.py +1 -0
  28. autogluon/tabular/models/fastainn/callbacks.py +20 -3
  29. autogluon/tabular/models/fastainn/hyperparameters/searchspaces.py +11 -1
  30. autogluon/tabular/models/fastainn/quantile_helpers.py +10 -2
  31. autogluon/tabular/models/fastainn/tabular_nn_fastai.py +65 -18
  32. autogluon/tabular/models/fasttext/fasttext_model.py +3 -1
  33. autogluon/tabular/models/image_prediction/image_predictor.py +7 -2
  34. autogluon/tabular/models/knn/knn_model.py +41 -8
  35. autogluon/tabular/models/lgb/callbacks.py +32 -9
  36. autogluon/tabular/models/lgb/hyperparameters/searchspaces.py +3 -1
  37. autogluon/tabular/models/lgb/lgb_model.py +150 -34
  38. autogluon/tabular/models/lgb/lgb_utils.py +12 -4
  39. autogluon/tabular/models/lr/hyperparameters/searchspaces.py +5 -1
  40. autogluon/tabular/models/lr/lr_model.py +40 -10
  41. autogluon/tabular/models/lr/lr_rapids_model.py +22 -13
  42. autogluon/tabular/models/mitra/_internal/__init__.py +1 -1
  43. autogluon/tabular/models/mitra/_internal/config/__init__.py +1 -1
  44. autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +36 -40
  45. autogluon/tabular/models/mitra/_internal/config/config_run.py +2 -14
  46. autogluon/tabular/models/mitra/_internal/config/enums.py +27 -26
  47. autogluon/tabular/models/mitra/_internal/core/__init__.py +1 -1
  48. autogluon/tabular/models/mitra/_internal/core/callbacks.py +14 -21
  49. autogluon/tabular/models/mitra/_internal/core/get_loss.py +10 -12
  50. autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +17 -32
  51. autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +12 -27
  52. autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +16 -21
  53. autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +130 -111
  54. autogluon/tabular/models/mitra/_internal/data/__init__.py +1 -1
  55. autogluon/tabular/models/mitra/_internal/data/collator.py +30 -26
  56. autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +18 -26
  57. autogluon/tabular/models/mitra/_internal/data/dataset_split.py +10 -7
  58. autogluon/tabular/models/mitra/_internal/data/preprocessor.py +70 -100
  59. autogluon/tabular/models/mitra/_internal/models/__init__.py +1 -1
  60. autogluon/tabular/models/mitra/_internal/models/base.py +7 -10
  61. autogluon/tabular/models/mitra/_internal/models/embedding.py +46 -56
  62. autogluon/tabular/models/mitra/_internal/models/tab2d.py +140 -120
  63. autogluon/tabular/models/mitra/_internal/utils/__init__.py +1 -1
  64. autogluon/tabular/models/mitra/_internal/utils/set_seed.py +3 -1
  65. autogluon/tabular/models/mitra/mitra_model.py +16 -11
  66. autogluon/tabular/models/mitra/sklearn_interface.py +178 -162
  67. autogluon/tabular/models/realmlp/realmlp_model.py +28 -15
  68. autogluon/tabular/models/rf/compilers/onnx.py +1 -1
  69. autogluon/tabular/models/rf/rf_model.py +45 -12
  70. autogluon/tabular/models/rf/rf_quantile.py +4 -2
  71. autogluon/tabular/models/tabdpt/tabdpt_model.py +8 -17
  72. autogluon/tabular/models/tabicl/tabicl_model.py +8 -1
  73. autogluon/tabular/models/tabm/_tabm_internal.py +6 -4
  74. autogluon/tabular/models/tabm/rtdl_num_embeddings.py +80 -127
  75. autogluon/tabular/models/tabm/tabm_model.py +8 -4
  76. autogluon/tabular/models/tabm/tabm_reference.py +53 -85
  77. autogluon/tabular/models/tabpfnmix/_internal/core/callbacks.py +7 -16
  78. autogluon/tabular/models/tabpfnmix/_internal/core/collator.py +16 -24
  79. autogluon/tabular/models/tabpfnmix/_internal/core/dataset_split.py +5 -7
  80. autogluon/tabular/models/tabpfnmix/_internal/core/enums.py +0 -2
  81. autogluon/tabular/models/tabpfnmix/_internal/core/get_loss.py +0 -1
  82. autogluon/tabular/models/tabpfnmix/_internal/core/get_optimizer.py +7 -18
  83. autogluon/tabular/models/tabpfnmix/_internal/core/get_scheduler.py +3 -14
  84. autogluon/tabular/models/tabpfnmix/_internal/core/trainer_finetune.py +79 -64
  85. autogluon/tabular/models/tabpfnmix/_internal/core/y_transformer.py +3 -5
  86. autogluon/tabular/models/tabpfnmix/_internal/data/dataset_finetune.py +17 -30
  87. autogluon/tabular/models/tabpfnmix/_internal/data/preprocessor.py +15 -35
  88. autogluon/tabular/models/tabpfnmix/_internal/models/foundation/embedding.py +21 -38
  89. autogluon/tabular/models/tabpfnmix/_internal/models/foundation/foundation_transformer.py +33 -51
  90. autogluon/tabular/models/tabpfnmix/_internal/results/prediction_metrics.py +4 -4
  91. autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_classifier.py +32 -12
  92. autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_regressor.py +32 -13
  93. autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +55 -19
  94. autogluon/tabular/models/tabpfnv2/tabpfnv2_5_model.py +21 -48
  95. autogluon/tabular/models/tabprep/prep_mixin.py +34 -26
  96. autogluon/tabular/models/tabular_nn/compilers/onnx.py +36 -8
  97. autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +130 -36
  98. autogluon/tabular/models/tabular_nn/torch/tabular_torch_dataset.py +8 -4
  99. autogluon/tabular/models/tabular_nn/torch/torch_network_modules.py +26 -5
  100. autogluon/tabular/models/tabular_nn/utils/categorical_encoders.py +41 -24
  101. autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py +33 -8
  102. autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py +21 -6
  103. autogluon/tabular/models/xgboost/callbacks.py +9 -3
  104. autogluon/tabular/models/xgboost/xgboost_model.py +59 -11
  105. autogluon/tabular/models/xt/xt_model.py +1 -0
  106. autogluon/tabular/predictor/interpretable_predictor.py +3 -1
  107. autogluon/tabular/predictor/predictor.py +409 -128
  108. autogluon/tabular/registry/__init__.py +1 -1
  109. autogluon/tabular/registry/_ag_model_registry.py +4 -5
  110. autogluon/tabular/registry/_model_registry.py +1 -0
  111. autogluon/tabular/testing/fit_helper.py +55 -15
  112. autogluon/tabular/testing/generate_datasets.py +1 -1
  113. autogluon/tabular/testing/model_fit_helper.py +10 -4
  114. autogluon/tabular/trainer/abstract_trainer.py +644 -230
  115. autogluon/tabular/trainer/auto_trainer.py +19 -8
  116. autogluon/tabular/trainer/model_presets/presets.py +33 -9
  117. autogluon/tabular/trainer/model_presets/presets_distill.py +16 -2
  118. autogluon/tabular/version.py +1 -1
  119. {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/METADATA +26 -26
  120. {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/RECORD +127 -135
  121. autogluon/tabular/models/tabpfnv2/rfpfn/__init__.py +0 -20
  122. autogluon/tabular/models/tabpfnv2/rfpfn/configs.py +0 -40
  123. autogluon/tabular/models/tabpfnv2/rfpfn/scoring_utils.py +0 -201
  124. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_decision_tree_tabpfn.py +0 -1464
  125. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_random_forest_tabpfn.py +0 -747
  126. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_compat.py +0 -863
  127. autogluon/tabular/models/tabpfnv2/rfpfn/utils.py +0 -106
  128. autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +0 -466
  129. /autogluon.tabular-1.5.0b20251228-py3.11-nspkg.pth → /autogluon.tabular-1.5.1b20260116-py3.11-nspkg.pth +0 -0
  130. {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/WHEEL +0 -0
  131. {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/licenses/LICENSE +0 -0
  132. {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/licenses/NOTICE +0 -0
  133. {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/namespace_packages.txt +0 -0
  134. {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/top_level.txt +0 -0
  135. {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/zip-safe +0 -0
@@ -1,5 +1,6 @@
1
1
  # noinspection PyUnresolvedReferences
2
2
  from autogluon.common.dataset import TabularDataset
3
+
3
4
  # noinspection PyUnresolvedReferences
4
5
  from autogluon.common.features.feature_metadata import FeatureMetadata
5
6
  from autogluon.common.utils.log_utils import _add_stream_handler
@@ -125,7 +125,9 @@ class ConfigBuilder:
125
125
 
126
126
  if isinstance(presets, list):
127
127
  unknown_keys = [k for k in presets if k not in valid_keys]
128
- assert len(unknown_keys) == 0, f"The following presets are not recognized: {unknown_keys} - use one of the valid presets: {valid_keys}"
128
+ assert len(unknown_keys) == 0, (
129
+ f"The following presets are not recognized: {unknown_keys} - use one of the valid presets: {valid_keys}"
130
+ )
129
131
 
130
132
  self.config["presets"] = presets
131
133
  return self
@@ -144,12 +146,18 @@ class ConfigBuilder:
144
146
  valid_keys = self._valid_keys()
145
147
  valid_str_values = list(hyperparameter_config_dict.keys())
146
148
  if isinstance(hyperparameters, str):
147
- assert hyperparameters in hyperparameter_config_dict, f"{hyperparameters} is not one of the valid presets {valid_str_values}"
149
+ assert hyperparameters in hyperparameter_config_dict, (
150
+ f"{hyperparameters} is not one of the valid presets {valid_str_values}"
151
+ )
148
152
  elif isinstance(hyperparameters, dict):
149
153
  unknown_keys = [k for k in hyperparameters.keys() if isinstance(k, str) and (k not in valid_keys)]
150
- assert len(unknown_keys) == 0, f"The following model types are not recognized: {unknown_keys} - use one of the valid models: {valid_keys}"
154
+ assert len(unknown_keys) == 0, (
155
+ f"The following model types are not recognized: {unknown_keys} - use one of the valid models: {valid_keys}"
156
+ )
151
157
  else:
152
- raise ValueError(f"hyperparameters must be either str: {valid_str_values} or dict with keys of {valid_keys}")
158
+ raise ValueError(
159
+ f"hyperparameters must be either str: {valid_str_values} or dict with keys of {valid_keys}"
160
+ )
153
161
  self.config["hyperparameters"] = hyperparameters
154
162
  return self
155
163
 
@@ -230,7 +238,9 @@ class ConfigBuilder:
230
238
  """
231
239
  valid_str_values = scheduler_factory._scheduler_presets.keys()
232
240
  if isinstance(hyperparameter_tune_kwargs, str):
233
- assert hyperparameter_tune_kwargs in valid_str_values, f"{hyperparameter_tune_kwargs} string must be one of {valid_str_values}"
241
+ assert hyperparameter_tune_kwargs in valid_str_values, (
242
+ f"{hyperparameter_tune_kwargs} string must be one of {valid_str_values}"
243
+ )
234
244
  elif not isinstance(hyperparameter_tune_kwargs, dict):
235
245
  raise ValueError(f"hyperparameter_tune_kwargs must be either str: {valid_str_values} or dict")
236
246
  self.config["hyperparameter_tune_kwargs"] = hyperparameter_tune_kwargs
@@ -294,7 +304,9 @@ class ConfigBuilder:
294
304
  models = [models]
295
305
 
296
306
  unknown_keys = [k for k in models if isinstance(k, str) and (k not in valid_keys)]
297
- assert len(unknown_keys) == 0, f"The following model types are not recognized: {unknown_keys} - use one of the valid models: {valid_keys}"
307
+ assert len(unknown_keys) == 0, (
308
+ f"The following model types are not recognized: {unknown_keys} - use one of the valid models: {valid_keys}"
309
+ )
298
310
 
299
311
  models = [m for m in valid_keys if m not in models]
300
312
  self.config["excluded_model_types"] = models
@@ -18,7 +18,9 @@ def get_default_feature_generator(feature_generator, feature_metadata=None, init
18
18
  elif feature_generator == "interpretable":
19
19
  feature_generator = AutoMLInterpretablePipelineFeatureGenerator(**init_kwargs)
20
20
  else:
21
- raise ValueError(f"Unknown feature_generator preset: '{feature_generator}', valid presets: {['auto', 'interpretable']}")
21
+ raise ValueError(
22
+ f"Unknown feature_generator preset: '{feature_generator}', valid presets: {['auto', 'interpretable']}"
23
+ )
22
24
  if feature_metadata is not None:
23
25
  if feature_generator.feature_metadata_in is None and not feature_generator.is_fit():
24
26
  feature_generator.feature_metadata_in = copy.deepcopy(feature_metadata)
@@ -27,12 +27,18 @@ hyperparameter_config_dict = dict(
27
27
  "RF": [
28
28
  {"criterion": "gini", "ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]}},
29
29
  {"criterion": "entropy", "ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]}},
30
- {"criterion": "squared_error", "ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]}},
30
+ {
31
+ "criterion": "squared_error",
32
+ "ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]},
33
+ },
31
34
  ],
32
35
  "XT": [
33
36
  {"criterion": "gini", "ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]}},
34
37
  {"criterion": "entropy", "ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]}},
35
- {"criterion": "squared_error", "ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]}},
38
+ {
39
+ "criterion": "squared_error",
40
+ "ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]},
41
+ },
36
42
  ],
37
43
  },
38
44
  # Results in smaller models. Generally will make inference speed much faster and disk usage much lower, but with worse accuracy.
@@ -53,14 +59,38 @@ hyperparameter_config_dict = dict(
53
59
  "XGB": {},
54
60
  "FASTAI": {},
55
61
  "RF": [
56
- {"criterion": "gini", "max_depth": 15, "ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]}},
57
- {"criterion": "entropy", "max_depth": 15, "ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]}},
58
- {"criterion": "squared_error", "max_depth": 15, "ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]}},
62
+ {
63
+ "criterion": "gini",
64
+ "max_depth": 15,
65
+ "ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]},
66
+ },
67
+ {
68
+ "criterion": "entropy",
69
+ "max_depth": 15,
70
+ "ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]},
71
+ },
72
+ {
73
+ "criterion": "squared_error",
74
+ "max_depth": 15,
75
+ "ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]},
76
+ },
59
77
  ],
60
78
  "XT": [
61
- {"criterion": "gini", "max_depth": 15, "ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]}},
62
- {"criterion": "entropy", "max_depth": 15, "ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]}},
63
- {"criterion": "squared_error", "max_depth": 15, "ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]}},
79
+ {
80
+ "criterion": "gini",
81
+ "max_depth": 15,
82
+ "ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]},
83
+ },
84
+ {
85
+ "criterion": "entropy",
86
+ "max_depth": 15,
87
+ "ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]},
88
+ },
89
+ {
90
+ "criterion": "squared_error",
91
+ "max_depth": 15,
92
+ "ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]},
93
+ },
64
94
  ],
65
95
  },
66
96
  # Results in much smaller models. Behaves similarly to 'light', but in many cases with over 10x less disk usage and a further reduction in accuracy.
@@ -137,6 +167,7 @@ hyperparameter_config_dict["experimental_2024"] = {"TABPFNMIX": tabpfnmix_defaul
137
167
  hyperparameter_config_dict["experimental_2024"].update(hyperparameter_config_dict["zeroshot_2023"])
138
168
  hyperparameter_config_dict["experimental"] = hyperparameter_config_dict["experimental_2024"]
139
169
 
170
+
140
171
  def get_hyperparameter_config_options():
141
172
  return list(hyperparameter_config_dict.keys())
142
173
 
@@ -144,5 +175,7 @@ def get_hyperparameter_config_options():
144
175
  def get_hyperparameter_config(config_name):
145
176
  config_options = get_hyperparameter_config_options()
146
177
  if config_name not in config_options:
147
- raise ValueError(f"Valid hyperparameter config names are: {config_options}, but '{config_name}' was given instead.")
178
+ raise ValueError(
179
+ f"Valid hyperparameter config names are: {config_options}, but '{config_name}' was given instead."
180
+ )
148
181
  return copy.deepcopy(hyperparameter_config_dict[config_name])
@@ -9,14 +9,18 @@ tabular_presets_dict = dict(
9
9
  "hyperparameters": "zeroshot",
10
10
  "time_limit": 3600,
11
11
  },
12
-
13
12
  best_quality_v150={
14
13
  "auto_stack": True,
15
14
  "dynamic_stacking": "auto",
16
15
  "num_stack_levels": 0,
17
16
  "hyperparameters": "zeroshot_2025_12_18_cpu",
18
17
  "time_limit": 3600,
19
- "callbacks": [["EarlyStoppingCountCallback", {"patience": [[100, 4], [500, 8], [2500, 15], [10000, 40], [100000, 100], None]}]],
18
+ "callbacks": [
19
+ [
20
+ "EarlyStoppingCountCallback",
21
+ {"patience": [[100, 4], [500, 8], [2500, 15], [10000, 40], [100000, 100], None]},
22
+ ]
23
+ ],
20
24
  },
21
25
  # High predictive accuracy with fast inference. ~8x faster inference and ~8x lower disk usage than `best_quality`.
22
26
  # Recommended for applications that require fast inference speed and/or small model size.
@@ -30,19 +34,22 @@ tabular_presets_dict = dict(
30
34
  "set_best_to_refit_full": True,
31
35
  "save_bag_folds": False,
32
36
  },
33
-
34
37
  high_quality_v150={
35
38
  "auto_stack": True,
36
39
  "dynamic_stacking": "auto",
37
40
  "num_stack_levels": 0,
38
41
  "hyperparameters": "zeroshot_2025_12_18_cpu",
39
42
  "time_limit": 3600,
40
- "callbacks": [["EarlyStoppingCountCallback", {"patience": [[100, 4], [500, 8], [2500, 15], [10000, 40], [100000, 100], None]}]],
43
+ "callbacks": [
44
+ [
45
+ "EarlyStoppingCountCallback",
46
+ {"patience": [[100, 4], [500, 8], [2500, 15], [10000, 40], [100000, 100], None]},
47
+ ]
48
+ ],
41
49
  "refit_full": True,
42
50
  "set_best_to_refit_full": True,
43
51
  "save_bag_folds": False,
44
52
  },
45
-
46
53
  # Good predictive accuracy with very fast inference. ~4x faster training, ~8x faster inference and ~8x lower disk usage than `high_quality`.
47
54
  # Recommended for applications that require very fast inference speed.
48
55
  # Aliases: good
@@ -68,7 +75,13 @@ tabular_presets_dict = dict(
68
75
  optimize_for_deployment={"keep_only_best": True, "save_space": True},
69
76
  # Disables automated feature generation when text features are detected.
70
77
  # This is useful to determine how beneficial text features are to the end result, as well as to ensure features are not mistaken for text when they are not.
71
- ignore_text={"_feature_generator_kwargs": {"enable_text_ngram_features": False, "enable_text_special_features": False, "enable_raw_text_features": False}},
78
+ ignore_text={
79
+ "_feature_generator_kwargs": {
80
+ "enable_text_ngram_features": False,
81
+ "enable_text_special_features": False,
82
+ "enable_raw_text_features": False,
83
+ }
84
+ },
72
85
  ignore_text_ngrams={"_feature_generator_kwargs": {"enable_text_ngram_features": False}},
73
86
  # Fit only interpretable models.
74
87
  interpretable={
@@ -86,14 +99,24 @@ tabular_presets_dict = dict(
86
99
  best_quality_v082={"auto_stack": True},
87
100
  # High predictive accuracy with fast inference. ~10x-200x faster inference and ~10x-200x lower disk usage than `best_quality`.
88
101
  # Recommended for applications that require reasonable inference speed and/or model size.
89
- high_quality_v082={"auto_stack": True, "refit_full": True, "set_best_to_refit_full": True, "save_bag_folds": False},
102
+ high_quality_v082={
103
+ "auto_stack": True,
104
+ "refit_full": True,
105
+ "set_best_to_refit_full": True,
106
+ "save_bag_folds": False,
107
+ },
90
108
  # Good predictive accuracy with very fast inference. ~4x faster inference and ~4x lower disk usage than `high_quality`.
91
109
  # Recommended for applications that require fast inference speed.
92
- good_quality_v082={"auto_stack": True, "refit_full": True, "set_best_to_refit_full": True, "save_bag_folds": False, "hyperparameters": "light"},
110
+ good_quality_v082={
111
+ "auto_stack": True,
112
+ "refit_full": True,
113
+ "set_best_to_refit_full": True,
114
+ "save_bag_folds": False,
115
+ "hyperparameters": "light",
116
+ },
93
117
  # ------------------------------------------
94
118
  # Experimental presets. Only use these presets if you are ok with unstable and potentially poor performing presets.
95
119
  # Experimental presets can be removed or changed without warning.
96
-
97
120
  # [EXPERIMENTAL PRESET] The `extreme` preset may be changed or removed without warning.
98
121
  # This preset acts as a testing ground for cutting edge features and models which could later be added to the `best_quality` preset in future releases.
99
122
  # Using this preset can lead to unexpected crashes, as it hasn't been as thoroughly tested as other presets.
@@ -110,9 +133,13 @@ tabular_presets_dict = dict(
110
133
  "num_stack_levels": 0,
111
134
  "hyperparameters": "zeroshot_2025_12_18_gpu",
112
135
  "time_limit": 3600,
113
- "callbacks": [["EarlyStoppingCountCallback", {"patience": [[100, 4], [500, 8], [2500, 15], [10000, 40], [100000, 100], None]}]],
136
+ "callbacks": [
137
+ [
138
+ "EarlyStoppingCountCallback",
139
+ {"patience": [[100, 4], [500, 8], [2500, 15], [10000, 40], [100000, 100], None]},
140
+ ]
141
+ ],
114
142
  },
115
-
116
143
  extreme_quality_v140={
117
144
  "auto_stack": True,
118
145
  "dynamic_stacking": "auto",
@@ -121,7 +148,6 @@ tabular_presets_dict = dict(
121
148
  "hyperparameters": None,
122
149
  "time_limit": 3600,
123
150
  },
124
-
125
151
  # Preset with a portfolio learned from TabArena v0.1: https://tabarena.ai/
126
152
  # Uses tabular foundation models: TabPFNv2, TabICL, Mitra
127
153
  # Uses deep learning model: TabM
@@ -136,7 +162,6 @@ tabular_presets_dict = dict(
136
162
  "hyperparameters": "zeroshot_2025_tabfm",
137
163
  "time_limit": 3600,
138
164
  },
139
-
140
165
  # DOES NOT SUPPORT GPU.
141
166
  experimental_quality_v120={
142
167
  "auto_stack": True,
@@ -147,7 +172,6 @@ tabular_presets_dict = dict(
147
172
  "num_gpus": 0,
148
173
  "time_limit": 3600,
149
174
  },
150
-
151
175
  # ------------------------------------------
152
176
  # ------------------------------------------
153
177
  # ------------------------------------------
@@ -773,24 +773,94 @@ hyperparameter_portfolio_zeroshot_2023 = {
773
773
  {"criterion": "gini", "ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]}},
774
774
  {"criterion": "entropy", "ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]}},
775
775
  {"criterion": "squared_error", "ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]}},
776
- {"max_features": 0.75, "max_leaf_nodes": 37308, "min_samples_leaf": 1, "ag_args": {"name_suffix": "_r195", "priority": -13}},
777
- {"max_features": 0.75, "max_leaf_nodes": 28310, "min_samples_leaf": 2, "ag_args": {"name_suffix": "_r39", "priority": -32}},
778
- {"max_features": 1.0, "max_leaf_nodes": 38572, "min_samples_leaf": 5, "ag_args": {"name_suffix": "_r127", "priority": -45}},
779
- {"max_features": 0.75, "max_leaf_nodes": 18242, "min_samples_leaf": 40, "ag_args": {"name_suffix": "_r34", "priority": -47}},
780
- {"max_features": "log2", "max_leaf_nodes": 42644, "min_samples_leaf": 1, "ag_args": {"name_suffix": "_r166", "priority": -63}},
781
- {"max_features": 0.75, "max_leaf_nodes": 36230, "min_samples_leaf": 3, "ag_args": {"name_suffix": "_r15", "priority": -68}},
782
- {"max_features": 1.0, "max_leaf_nodes": 48136, "min_samples_leaf": 1, "ag_args": {"name_suffix": "_r16", "priority": -81}},
776
+ {
777
+ "max_features": 0.75,
778
+ "max_leaf_nodes": 37308,
779
+ "min_samples_leaf": 1,
780
+ "ag_args": {"name_suffix": "_r195", "priority": -13},
781
+ },
782
+ {
783
+ "max_features": 0.75,
784
+ "max_leaf_nodes": 28310,
785
+ "min_samples_leaf": 2,
786
+ "ag_args": {"name_suffix": "_r39", "priority": -32},
787
+ },
788
+ {
789
+ "max_features": 1.0,
790
+ "max_leaf_nodes": 38572,
791
+ "min_samples_leaf": 5,
792
+ "ag_args": {"name_suffix": "_r127", "priority": -45},
793
+ },
794
+ {
795
+ "max_features": 0.75,
796
+ "max_leaf_nodes": 18242,
797
+ "min_samples_leaf": 40,
798
+ "ag_args": {"name_suffix": "_r34", "priority": -47},
799
+ },
800
+ {
801
+ "max_features": "log2",
802
+ "max_leaf_nodes": 42644,
803
+ "min_samples_leaf": 1,
804
+ "ag_args": {"name_suffix": "_r166", "priority": -63},
805
+ },
806
+ {
807
+ "max_features": 0.75,
808
+ "max_leaf_nodes": 36230,
809
+ "min_samples_leaf": 3,
810
+ "ag_args": {"name_suffix": "_r15", "priority": -68},
811
+ },
812
+ {
813
+ "max_features": 1.0,
814
+ "max_leaf_nodes": 48136,
815
+ "min_samples_leaf": 1,
816
+ "ag_args": {"name_suffix": "_r16", "priority": -81},
817
+ },
783
818
  ],
784
819
  "XT": [
785
820
  {"criterion": "gini", "ag_args": {"name_suffix": "Gini", "problem_types": ["binary", "multiclass"]}},
786
821
  {"criterion": "entropy", "ag_args": {"name_suffix": "Entr", "problem_types": ["binary", "multiclass"]}},
787
822
  {"criterion": "squared_error", "ag_args": {"name_suffix": "MSE", "problem_types": ["regression", "quantile"]}},
788
- {"max_features": 0.75, "max_leaf_nodes": 18392, "min_samples_leaf": 1, "ag_args": {"name_suffix": "_r42", "priority": -9}},
789
- {"max_features": 1.0, "max_leaf_nodes": 12845, "min_samples_leaf": 4, "ag_args": {"name_suffix": "_r172", "priority": -23}},
790
- {"max_features": "sqrt", "max_leaf_nodes": 28532, "min_samples_leaf": 1, "ag_args": {"name_suffix": "_r49", "priority": -43}},
791
- {"max_features": 1.0, "max_leaf_nodes": 19935, "min_samples_leaf": 20, "ag_args": {"name_suffix": "_r4", "priority": -53}},
792
- {"max_features": 0.75, "max_leaf_nodes": 29813, "min_samples_leaf": 4, "ag_args": {"name_suffix": "_r178", "priority": -62}},
793
- {"max_features": 1.0, "max_leaf_nodes": 40459, "min_samples_leaf": 1, "ag_args": {"name_suffix": "_r197", "priority": -78}},
794
- {"max_features": "sqrt", "max_leaf_nodes": 29702, "min_samples_leaf": 2, "ag_args": {"name_suffix": "_r126", "priority": -86}},
823
+ {
824
+ "max_features": 0.75,
825
+ "max_leaf_nodes": 18392,
826
+ "min_samples_leaf": 1,
827
+ "ag_args": {"name_suffix": "_r42", "priority": -9},
828
+ },
829
+ {
830
+ "max_features": 1.0,
831
+ "max_leaf_nodes": 12845,
832
+ "min_samples_leaf": 4,
833
+ "ag_args": {"name_suffix": "_r172", "priority": -23},
834
+ },
835
+ {
836
+ "max_features": "sqrt",
837
+ "max_leaf_nodes": 28532,
838
+ "min_samples_leaf": 1,
839
+ "ag_args": {"name_suffix": "_r49", "priority": -43},
840
+ },
841
+ {
842
+ "max_features": 1.0,
843
+ "max_leaf_nodes": 19935,
844
+ "min_samples_leaf": 20,
845
+ "ag_args": {"name_suffix": "_r4", "priority": -53},
846
+ },
847
+ {
848
+ "max_features": 0.75,
849
+ "max_leaf_nodes": 29813,
850
+ "min_samples_leaf": 4,
851
+ "ag_args": {"name_suffix": "_r178", "priority": -62},
852
+ },
853
+ {
854
+ "max_features": 1.0,
855
+ "max_leaf_nodes": 40459,
856
+ "min_samples_leaf": 1,
857
+ "ag_args": {"name_suffix": "_r197", "priority": -78},
858
+ },
859
+ {
860
+ "max_features": "sqrt",
861
+ "max_leaf_nodes": 29702,
862
+ "min_samples_leaf": 2,
863
+ "ag_args": {"name_suffix": "_r126", "priority": -86},
864
+ },
795
865
  ],
796
866
  }