autogluon.tabular 1.5.1b20260105__py3-none-any.whl → 1.5.1b20260117__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.tabular might be problematic. Click here for more details.

Files changed (135) hide show
  1. autogluon/tabular/__init__.py +1 -0
  2. autogluon/tabular/configs/config_helper.py +18 -6
  3. autogluon/tabular/configs/feature_generator_presets.py +3 -1
  4. autogluon/tabular/configs/hyperparameter_configs.py +42 -9
  5. autogluon/tabular/configs/presets_configs.py +38 -14
  6. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py +84 -14
  7. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +48 -48
  8. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_cpu_2025_12_18.py +774 -1
  9. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_gpu_2025_12_18.py +421 -1
  10. autogluon/tabular/experimental/_scikit_mixin.py +6 -2
  11. autogluon/tabular/experimental/_tabular_classifier.py +3 -1
  12. autogluon/tabular/experimental/_tabular_regressor.py +3 -1
  13. autogluon/tabular/experimental/plot_leaderboard.py +73 -19
  14. autogluon/tabular/learner/abstract_learner.py +160 -42
  15. autogluon/tabular/learner/default_learner.py +78 -22
  16. autogluon/tabular/models/__init__.py +2 -2
  17. autogluon/tabular/models/_utils/rapids_utils.py +3 -1
  18. autogluon/tabular/models/abstract/abstract_torch_model.py +2 -0
  19. autogluon/tabular/models/automm/automm_model.py +12 -3
  20. autogluon/tabular/models/automm/ft_transformer.py +5 -1
  21. autogluon/tabular/models/catboost/callbacks.py +2 -2
  22. autogluon/tabular/models/catboost/catboost_model.py +93 -29
  23. autogluon/tabular/models/catboost/catboost_softclass_utils.py +4 -1
  24. autogluon/tabular/models/catboost/catboost_utils.py +3 -1
  25. autogluon/tabular/models/ebm/ebm_model.py +8 -13
  26. autogluon/tabular/models/ebm/hyperparameters/parameters.py +1 -0
  27. autogluon/tabular/models/ebm/hyperparameters/searchspaces.py +1 -0
  28. autogluon/tabular/models/fastainn/callbacks.py +20 -3
  29. autogluon/tabular/models/fastainn/hyperparameters/searchspaces.py +11 -1
  30. autogluon/tabular/models/fastainn/quantile_helpers.py +10 -2
  31. autogluon/tabular/models/fastainn/tabular_nn_fastai.py +65 -18
  32. autogluon/tabular/models/fasttext/fasttext_model.py +3 -1
  33. autogluon/tabular/models/image_prediction/image_predictor.py +7 -2
  34. autogluon/tabular/models/knn/knn_model.py +41 -8
  35. autogluon/tabular/models/lgb/callbacks.py +32 -9
  36. autogluon/tabular/models/lgb/hyperparameters/searchspaces.py +3 -1
  37. autogluon/tabular/models/lgb/lgb_model.py +150 -34
  38. autogluon/tabular/models/lgb/lgb_utils.py +12 -4
  39. autogluon/tabular/models/lr/hyperparameters/searchspaces.py +5 -1
  40. autogluon/tabular/models/lr/lr_model.py +40 -10
  41. autogluon/tabular/models/lr/lr_rapids_model.py +22 -13
  42. autogluon/tabular/models/mitra/_internal/__init__.py +1 -1
  43. autogluon/tabular/models/mitra/_internal/config/__init__.py +1 -1
  44. autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +36 -40
  45. autogluon/tabular/models/mitra/_internal/config/config_run.py +2 -14
  46. autogluon/tabular/models/mitra/_internal/config/enums.py +27 -26
  47. autogluon/tabular/models/mitra/_internal/core/__init__.py +1 -1
  48. autogluon/tabular/models/mitra/_internal/core/callbacks.py +14 -21
  49. autogluon/tabular/models/mitra/_internal/core/get_loss.py +10 -12
  50. autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +17 -32
  51. autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +12 -27
  52. autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +16 -21
  53. autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +130 -111
  54. autogluon/tabular/models/mitra/_internal/data/__init__.py +1 -1
  55. autogluon/tabular/models/mitra/_internal/data/collator.py +30 -26
  56. autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +18 -26
  57. autogluon/tabular/models/mitra/_internal/data/dataset_split.py +10 -7
  58. autogluon/tabular/models/mitra/_internal/data/preprocessor.py +70 -100
  59. autogluon/tabular/models/mitra/_internal/models/__init__.py +1 -1
  60. autogluon/tabular/models/mitra/_internal/models/base.py +7 -10
  61. autogluon/tabular/models/mitra/_internal/models/embedding.py +46 -56
  62. autogluon/tabular/models/mitra/_internal/models/tab2d.py +140 -120
  63. autogluon/tabular/models/mitra/_internal/utils/__init__.py +1 -1
  64. autogluon/tabular/models/mitra/_internal/utils/set_seed.py +3 -1
  65. autogluon/tabular/models/mitra/mitra_model.py +16 -11
  66. autogluon/tabular/models/mitra/sklearn_interface.py +178 -162
  67. autogluon/tabular/models/realmlp/realmlp_model.py +28 -15
  68. autogluon/tabular/models/rf/compilers/onnx.py +1 -1
  69. autogluon/tabular/models/rf/rf_model.py +45 -12
  70. autogluon/tabular/models/rf/rf_quantile.py +4 -2
  71. autogluon/tabular/models/tabdpt/tabdpt_model.py +8 -17
  72. autogluon/tabular/models/tabicl/tabicl_model.py +8 -1
  73. autogluon/tabular/models/tabm/_tabm_internal.py +6 -4
  74. autogluon/tabular/models/tabm/rtdl_num_embeddings.py +80 -127
  75. autogluon/tabular/models/tabm/tabm_model.py +8 -4
  76. autogluon/tabular/models/tabm/tabm_reference.py +53 -85
  77. autogluon/tabular/models/tabpfnmix/_internal/core/callbacks.py +7 -16
  78. autogluon/tabular/models/tabpfnmix/_internal/core/collator.py +16 -24
  79. autogluon/tabular/models/tabpfnmix/_internal/core/dataset_split.py +5 -7
  80. autogluon/tabular/models/tabpfnmix/_internal/core/enums.py +0 -2
  81. autogluon/tabular/models/tabpfnmix/_internal/core/get_loss.py +0 -1
  82. autogluon/tabular/models/tabpfnmix/_internal/core/get_optimizer.py +7 -18
  83. autogluon/tabular/models/tabpfnmix/_internal/core/get_scheduler.py +3 -14
  84. autogluon/tabular/models/tabpfnmix/_internal/core/trainer_finetune.py +79 -64
  85. autogluon/tabular/models/tabpfnmix/_internal/core/y_transformer.py +3 -5
  86. autogluon/tabular/models/tabpfnmix/_internal/data/dataset_finetune.py +17 -30
  87. autogluon/tabular/models/tabpfnmix/_internal/data/preprocessor.py +15 -35
  88. autogluon/tabular/models/tabpfnmix/_internal/models/foundation/embedding.py +21 -38
  89. autogluon/tabular/models/tabpfnmix/_internal/models/foundation/foundation_transformer.py +33 -51
  90. autogluon/tabular/models/tabpfnmix/_internal/results/prediction_metrics.py +4 -4
  91. autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_classifier.py +32 -12
  92. autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_regressor.py +32 -13
  93. autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +55 -19
  94. autogluon/tabular/models/tabpfnv2/tabpfnv2_5_model.py +21 -48
  95. autogluon/tabular/models/tabprep/prep_mixin.py +34 -26
  96. autogluon/tabular/models/tabular_nn/compilers/onnx.py +36 -8
  97. autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +130 -36
  98. autogluon/tabular/models/tabular_nn/torch/tabular_torch_dataset.py +8 -4
  99. autogluon/tabular/models/tabular_nn/torch/torch_network_modules.py +26 -5
  100. autogluon/tabular/models/tabular_nn/utils/categorical_encoders.py +41 -24
  101. autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py +33 -8
  102. autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py +21 -6
  103. autogluon/tabular/models/xgboost/callbacks.py +9 -3
  104. autogluon/tabular/models/xgboost/xgboost_model.py +59 -11
  105. autogluon/tabular/models/xt/xt_model.py +1 -0
  106. autogluon/tabular/predictor/interpretable_predictor.py +3 -1
  107. autogluon/tabular/predictor/predictor.py +409 -128
  108. autogluon/tabular/registry/__init__.py +1 -1
  109. autogluon/tabular/registry/_ag_model_registry.py +4 -5
  110. autogluon/tabular/registry/_model_registry.py +1 -0
  111. autogluon/tabular/testing/fit_helper.py +55 -15
  112. autogluon/tabular/testing/generate_datasets.py +1 -1
  113. autogluon/tabular/testing/model_fit_helper.py +10 -4
  114. autogluon/tabular/trainer/abstract_trainer.py +644 -230
  115. autogluon/tabular/trainer/auto_trainer.py +19 -8
  116. autogluon/tabular/trainer/model_presets/presets.py +33 -9
  117. autogluon/tabular/trainer/model_presets/presets_distill.py +16 -2
  118. autogluon/tabular/version.py +1 -1
  119. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/METADATA +27 -27
  120. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/RECORD +127 -135
  121. autogluon/tabular/models/tabpfnv2/rfpfn/__init__.py +0 -20
  122. autogluon/tabular/models/tabpfnv2/rfpfn/configs.py +0 -40
  123. autogluon/tabular/models/tabpfnv2/rfpfn/scoring_utils.py +0 -201
  124. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_decision_tree_tabpfn.py +0 -1464
  125. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_random_forest_tabpfn.py +0 -747
  126. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_compat.py +0 -863
  127. autogluon/tabular/models/tabpfnv2/rfpfn/utils.py +0 -106
  128. autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +0 -466
  129. /autogluon.tabular-1.5.1b20260105-py3.11-nspkg.pth → /autogluon.tabular-1.5.1b20260117-py3.11-nspkg.pth +0 -0
  130. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/WHEEL +0 -0
  131. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/licenses/LICENSE +0 -0
  132. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/licenses/NOTICE +0 -0
  133. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/namespace_packages.txt +0 -0
  134. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/top_level.txt +0 -0
  135. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260117.dist-info}/zip-safe +0 -0
@@ -2,8 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import os
5
- from pathlib import Path
6
5
  import time
6
+ from pathlib import Path
7
7
 
8
8
  import numpy as np
9
9
  import pandas as pd
@@ -11,7 +11,7 @@ import pandas as pd
11
11
  from autogluon.common.utils.pandas_utils import get_approximate_df_mem_usage
12
12
  from autogluon.common.utils.resource_utils import ResourceManager
13
13
  from autogluon.common.utils.try_import import try_import_torch
14
- from autogluon.core.constants import BINARY, MULTICLASS, REGRESSION, QUANTILE
14
+ from autogluon.core.constants import BINARY, MULTICLASS, QUANTILE, REGRESSION
15
15
  from autogluon.core.models import AbstractModel
16
16
  from autogluon.core.utils import generate_train_test_split
17
17
  from autogluon.core.utils.exceptions import TimeLimitExceeded
@@ -39,6 +39,7 @@ class TabPFNMixModel(AbstractModel):
39
39
 
40
40
  .. versionadded:: 1.2.0
41
41
  """
42
+
42
43
  ag_key = "TABPFNMIX"
43
44
  ag_name = "TabPFNMix"
44
45
  ag_priority = 45
@@ -54,9 +55,10 @@ class TabPFNMixModel(AbstractModel):
54
55
  def _get_model_type(self):
55
56
  from ._internal.tabpfnmix_classifier import TabPFNMixClassifier
56
57
  from ._internal.tabpfnmix_regressor import TabPFNMixRegressor
57
- if self.problem_type in ['binary', 'multiclass']:
58
+
59
+ if self.problem_type in ["binary", "multiclass"]:
58
60
  model_cls = TabPFNMixClassifier
59
- elif self.problem_type in ['regression']:
61
+ elif self.problem_type in ["regression"]:
60
62
  model_cls = TabPFNMixRegressor
61
63
  else:
62
64
  raise AssertionError(f"TabPFN does not support problem_type='{self.problem_type}'")
@@ -74,12 +76,10 @@ class TabPFNMixModel(AbstractModel):
74
76
  # weights_path_regressor, # if specified, overrides weights_path for regression problems
75
77
  "n_ensembles": 1, # FIXME: RENAME: n_estimators
76
78
  "max_epochs": 0, # fine-tuning epochs. Will do pure in-context learning if 0.
77
-
78
79
  # next most important hyperparameters
79
80
  "lr": 1.0e-05,
80
81
  "max_samples_query": 1024, # larger = slower but better quality on datasets with at least this many validation samples
81
82
  "max_samples_support": 8196, # larger = slower but better quality on datasets with at least this many training samples
82
-
83
83
  # other hyperparameters
84
84
  "early_stopping_patience": 40, # TODO: Figure out optimal value
85
85
  "linear_attention": True,
@@ -89,7 +89,6 @@ class TabPFNMixModel(AbstractModel):
89
89
  "use_feature_count_scaling": True,
90
90
  "use_quantile_transformer": True,
91
91
  "weight_decay": 0,
92
-
93
92
  # architecture hyperparameters, recommended to keep as default unless using a custom pre-trained backbone
94
93
  "n_classes": 10,
95
94
  "n_features": 100,
@@ -98,7 +97,6 @@ class TabPFNMixModel(AbstractModel):
98
97
  "attn_dropout": 0.0,
99
98
  "dim": 512,
100
99
  "y_as_float_embedding": True,
101
-
102
100
  # utility parameters, recommended to keep as default
103
101
  "split_val": False,
104
102
  "use_best_epoch": True,
@@ -111,17 +109,30 @@ class TabPFNMixModel(AbstractModel):
111
109
  # FIXME: Handle model weights download
112
110
  # FIXME: GPU support?
113
111
  # FIXME: Save model weights to file instead of pickling?
114
- def _fit(self, X: pd.DataFrame, y: pd.Series, X_val: pd.DataFrame = None, y_val: pd.Series = None, time_limit: float = None, num_cpus: int = 1, num_gpus: float = 0, **kwargs):
112
+ def _fit(
113
+ self,
114
+ X: pd.DataFrame,
115
+ y: pd.Series,
116
+ X_val: pd.DataFrame = None,
117
+ y_val: pd.Series = None,
118
+ time_limit: float = None,
119
+ num_cpus: int = 1,
120
+ num_gpus: float = 0,
121
+ **kwargs,
122
+ ):
115
123
  time_start = time.time()
116
124
  try_import_torch()
117
125
  import torch
126
+
118
127
  from ._internal.config.config_run import ConfigRun
119
128
 
120
129
  ag_params = self._get_ag_params()
121
130
  max_classes = ag_params.get("max_classes")
122
131
  if max_classes is not None and self.num_classes is not None and self.num_classes > max_classes:
123
132
  # TODO: Move to earlier stage when problem_type is checked
124
- raise AssertionError(f"Max allowed classes for the model is {max_classes}, " f"but found {self.num_classes} classes.")
133
+ raise AssertionError(
134
+ f"Max allowed classes for the model is {max_classes}, but found {self.num_classes} classes."
135
+ )
125
136
 
126
137
  params = self._get_model_params()
127
138
  random_state = params.pop(self.seed_name, self.default_random_seed)
@@ -131,17 +142,26 @@ class TabPFNMixModel(AbstractModel):
131
142
 
132
143
  # TODO: Make max_rows generic
133
144
  if max_rows is not None and isinstance(max_rows, (int, float)) and len(X) > max_rows:
134
- raise AssertionError(f"Skipping model due to X having more rows than `ag.max_rows={max_rows}` (len(X)={len(X)})")
145
+ raise AssertionError(
146
+ f"Skipping model due to X having more rows than `ag.max_rows={max_rows}` (len(X)={len(X)})"
147
+ )
135
148
 
136
149
  # TODO: Make sample_rows generic
137
150
  if sample_rows is not None and isinstance(sample_rows, int) and len(X) > sample_rows:
138
151
  X, y = self._subsample_data(X=X, y=y, num_rows=sample_rows, random_state=random_state)
139
152
 
140
153
  # TODO: Make sample_rows generic
141
- if X_val is not None and y_val is not None and sample_rows_val is not None and isinstance(sample_rows_val, int) and len(X_val) > sample_rows_val:
154
+ if (
155
+ X_val is not None
156
+ and y_val is not None
157
+ and sample_rows_val is not None
158
+ and isinstance(sample_rows_val, int)
159
+ and len(X_val) > sample_rows_val
160
+ ):
142
161
  X_val, y_val = self._subsample_data(X=X_val, y=y_val, num_rows=sample_rows_val, random_state=random_state)
143
162
 
144
163
  from ._internal.core.enums import Task
164
+
145
165
  if self.problem_type in [REGRESSION, QUANTILE]:
146
166
  task = Task.REGRESSION
147
167
  n_classes = 0
@@ -186,7 +206,7 @@ class TabPFNMixModel(AbstractModel):
186
206
  logger.log(
187
207
  30,
188
208
  f"WARNING: max_epochs should be > 0 if n_ensembles > 1, otherwise there will be zero quality benefit with slower inference. "
189
- f"(max_epochs={cfg.hyperparams['max_epochs']}, n_ensembles={cfg.hyperparams['n_ensembles']})"
209
+ f"(max_epochs={cfg.hyperparams['max_epochs']}, n_ensembles={cfg.hyperparams['n_ensembles']})",
190
210
  )
191
211
 
192
212
  X = self.preprocess(X)
@@ -211,7 +231,9 @@ class TabPFNMixModel(AbstractModel):
211
231
  time_cur = time.time()
212
232
  time_left = time_limit - (time_cur - time_start)
213
233
  if time_left <= 0:
214
- raise TimeLimitExceeded(f"No time remaining to fit model (time_limit={time_limit:.2f}s, time_left={time_left:.2f}s)")
234
+ raise TimeLimitExceeded(
235
+ f"No time remaining to fit model (time_limit={time_limit:.2f}s, time_left={time_left:.2f}s)"
236
+ )
215
237
  time_limit = time_left
216
238
 
217
239
  self.model = model_cls(
@@ -244,7 +266,9 @@ class TabPFNMixModel(AbstractModel):
244
266
  return self
245
267
 
246
268
  # TODO: Make this generic by creating a generic `preprocess_train` and putting this logic prior to `_preprocess`.
247
- def _subsample_data(self, X: pd.DataFrame, y: pd.Series, num_rows: int, random_state: int | None = 0) -> (pd.DataFrame, pd.Series):
269
+ def _subsample_data(
270
+ self, X: pd.DataFrame, y: pd.Series, num_rows: int, random_state: int | None = 0
271
+ ) -> (pd.DataFrame, pd.Series):
248
272
  num_rows_to_drop = len(X) - num_rows
249
273
  X, _, y, _ = generate_train_test_split(
250
274
  X=X,
@@ -282,6 +306,7 @@ class TabPFNMixModel(AbstractModel):
282
306
  path = super().save(path=path, verbose=verbose)
283
307
  if _model_weights is not None:
284
308
  import torch
309
+
285
310
  os.makedirs(self.path, exist_ok=True)
286
311
  torch.save(_model_weights, self.weights_path)
287
312
  self.model.trainer.model = _model_weights
@@ -294,6 +319,7 @@ class TabPFNMixModel(AbstractModel):
294
319
 
295
320
  if model._weights_saved:
296
321
  import torch
322
+
297
323
  model.model.trainer.model = torch.load(model.weights_path, weights_only=False) # nosec B614
298
324
  model._weights_saved = False
299
325
  return model
@@ -327,7 +353,13 @@ class TabPFNMixModel(AbstractModel):
327
353
 
328
354
  def _estimate_memory_usage(self, X: pd.DataFrame, **kwargs) -> int:
329
355
  hyperparameters = self._get_model_params()
330
- return self.estimate_memory_usage_static(X=X, problem_type=self.problem_type, num_classes=self.num_classes, hyperparameters=hyperparameters, **kwargs)
356
+ return self.estimate_memory_usage_static(
357
+ X=X,
358
+ problem_type=self.problem_type,
359
+ num_classes=self.num_classes,
360
+ hyperparameters=hyperparameters,
361
+ **kwargs,
362
+ )
331
363
 
332
364
  def get_minimum_ideal_resources(self) -> dict[str, int | float]:
333
365
  return {"num_cpus": 4}
@@ -344,8 +376,12 @@ class TabPFNMixModel(AbstractModel):
344
376
  # TODO: Fitting 4 in parallel still causes many OOM errors with 32 GB of memory on relatively small datasets, so each model is using over 8 GB of memory
345
377
  # The below logic returns a minimum of 8.8 GB, to avoid OOM errors
346
378
  data_mem_usage = 5 * get_approximate_df_mem_usage(X).sum() # rough estimate
347
- model_size = 160*1e6 # model weights are ~160 MB # TODO: Avoid hardcoding, we can derive from the model itself?
348
- model_mem_usage = model_size * 5 # Account for 1x copy being fit, 1x copy checkpointed, 2x for optimizer, and 1x for overhead
379
+ model_size = (
380
+ 160 * 1e6
381
+ ) # model weights are ~160 MB # TODO: Avoid hardcoding, we can derive from the model itself?
382
+ model_mem_usage = (
383
+ model_size * 5
384
+ ) # Account for 1x copy being fit, 1x copy checkpointed, 2x for optimizer, and 1x for overhead
349
385
  model_fit_usage = model_size * 50 # TODO: This is a placeholder large value to try to avoid OOM errors
350
386
  mem_usage_estimate = data_mem_usage + model_mem_usage + model_fit_usage
351
387
  return mem_usage_estimate
@@ -361,4 +397,4 @@ class TabPFNMixModel(AbstractModel):
361
397
 
362
398
  def _more_tags(self) -> dict:
363
399
  tags = {"can_refit_full": True}
364
- return tags
400
+ return tags
@@ -61,17 +61,13 @@ class TabPFNModel(AbstractTorchModel):
61
61
  # This converts categorical features to numeric via stateful label encoding.
62
62
  if self._feature_generator.features_in:
63
63
  X = X.copy()
64
- X[self._feature_generator.features_in] = self._feature_generator.transform(
65
- X=X
66
- )
64
+ X[self._feature_generator.features_in] = self._feature_generator.transform(X=X)
67
65
 
68
66
  if is_train:
69
67
  # Detect/set cat features and indices
70
68
  if self._cat_features is None:
71
69
  self._cat_features = self._feature_generator.features_in[:]
72
- self._cat_indices = [
73
- X.columns.get_loc(col) for col in self._cat_features
74
- ]
70
+ self._cat_indices = [X.columns.get_loc(col) for col in self._cat_features]
75
71
 
76
72
  return X
77
73
 
@@ -121,9 +117,7 @@ class TabPFNModel(AbstractTorchModel):
121
117
  {
122
118
  "name": scaler,
123
119
  "global_transformer_name": hps.pop("preprocessing/global", None),
124
- "categorical_name": hps.pop(
125
- "preprocessing/categoricals", "numeric"
126
- ),
120
+ "categorical_name": hps.pop("preprocessing/categoricals", "numeric"),
127
121
  "append_original": hps.pop("preprocessing/append_original", True),
128
122
  }
129
123
  for scaler in hps["preprocessing/scaling"]
@@ -161,9 +155,7 @@ class TabPFNModel(AbstractTorchModel):
161
155
 
162
156
  # Resolve inference_config
163
157
  inference_config = {
164
- _k: v
165
- for k, v in hps.items()
166
- if k.startswith("inference_config/") and (_k := k.split("/")[-1])
158
+ _k: v for k, v in hps.items() if k.startswith("inference_config/") and (_k := k.split("/")[-1])
167
159
  }
168
160
  if inference_config:
169
161
  hps["inference_config"] = inference_config
@@ -191,9 +183,7 @@ class TabPFNModel(AbstractTorchModel):
191
183
 
192
184
  return num_cpus, num_gpus
193
185
 
194
- def get_minimum_resources(
195
- self, is_gpu_available: bool = False
196
- ) -> dict[str, int | float]:
186
+ def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
197
187
  return {
198
188
  "num_cpus": 1,
199
189
  "num_gpus": 1 if is_gpu_available else 0,
@@ -278,21 +268,15 @@ class TabPFNModel(AbstractTorchModel):
278
268
  model_mem = 14489108 # Based on TabPFNv2 default
279
269
 
280
270
  n_samples, n_features = X.shape[0], min(X.shape[1], 2000)
281
- n_feature_groups = (
282
- n_features
283
- ) / features_per_group + 1 # TODO: Unsure how to calculate this
271
+ n_feature_groups = (n_features) / features_per_group + 1 # TODO: Unsure how to calculate this
284
272
 
285
273
  X_mem = n_samples * n_feature_groups * dtype_byte_size
286
- activation_mem = (
287
- n_samples * n_feature_groups * embedding_size * n_layers * dtype_byte_size
288
- )
274
+ activation_mem = n_samples * n_feature_groups * embedding_size * n_layers * dtype_byte_size
289
275
 
290
276
  baseline_overhead_mem_est = 1e9 # 1 GB generic overhead
291
277
 
292
278
  # Add some buffer to each term + 1 GB overhead to be safe
293
- return int(
294
- model_mem + 4 * X_mem + 2 * activation_mem + baseline_overhead_mem_est
295
- )
279
+ return int(model_mem + 4 * X_mem + 2 * activation_mem + baseline_overhead_mem_est)
296
280
 
297
281
  @classmethod
298
282
  def _class_tags(cls):
@@ -313,12 +297,11 @@ class TabPFNModel(AbstractTorchModel):
313
297
  if not _HAS_LOGGED_TABPFN_CPU_WARNING:
314
298
  if device == "cpu":
315
299
  logger.log(
316
- 20,
317
- "\tRunning TabPFN on CPU. This can be very slow. "
318
- "It is recommended to run TabPFN on a GPU."
300
+ 20, "\tRunning TabPFN on CPU. This can be very slow. It is recommended to run TabPFN on a GPU."
319
301
  )
320
302
  _HAS_LOGGED_TABPFN_CPU_WARNING = True
321
303
 
304
+
322
305
  class RealTabPFNv25Model(TabPFNModel):
323
306
  """RealTabPFN-v2.5 version: https://priorlabs.ai/technical-reports/tabpfn-2-5-model-report.
324
307
 
@@ -332,9 +315,7 @@ class RealTabPFNv25Model(TabPFNModel):
332
315
  ag_key = "REALTABPFN-V2.5"
333
316
  ag_name = "RealTabPFN-v2.5"
334
317
 
335
- default_classification_model: str | None = (
336
- "tabpfn-v2.5-classifier-v2.5_default.ckpt"
337
- )
318
+ default_classification_model: str | None = "tabpfn-v2.5-classifier-v2.5_default.ckpt"
338
319
  default_regression_model: str | None = "tabpfn-v2.5-regressor-v2.5_default.ckpt"
339
320
 
340
321
  @staticmethod
@@ -369,7 +350,7 @@ class RealTabPFNv25Model(TabPFNModel):
369
350
  "\tWarning: TabPFN-2.5 is a NONCOMMERCIAL model. "
370
351
  "Usage of this artifact (including through AutoGluon) is not permitted "
371
352
  "for commercial tasks unless granted explicit permission "
372
- "by the model authors (PriorLabs)."
353
+ "by the model authors (PriorLabs).",
373
354
  ) # Aligning with TabPFNv25 license
374
355
  _HAS_LOGGED_TABPFN_NONCOMMERICAL = True # Avoid repeated logging
375
356
 
@@ -388,9 +369,7 @@ class RealTabPFNv2Model(TabPFNModel):
388
369
  ag_name = "RealTabPFN-v2"
389
370
 
390
371
  # TODO: Verify if this is the same as the "default" ckpt
391
- default_classification_model: str | None = (
392
- "tabpfn-v2-classifier-finetuned-zk73skhh.ckpt"
393
- )
372
+ default_classification_model: str | None = "tabpfn-v2-classifier-finetuned-zk73skhh.ckpt"
394
373
  default_regression_model: str | None = "tabpfn-v2-regressor-v2_default.ckpt"
395
374
 
396
375
  def _get_default_auxiliary_params(self) -> dict:
@@ -414,11 +393,11 @@ class RealTabPFNv2Model(TabPFNModel):
414
393
  # FIXME: Avoid code dupe. This one has 500 features max, 2.5 has 2000.
415
394
  @classmethod
416
395
  def _estimate_memory_usage_static(
417
- cls,
418
- *,
419
- X: pd.DataFrame,
420
- hyperparameters: dict | None = None,
421
- **kwargs,
396
+ cls,
397
+ *,
398
+ X: pd.DataFrame,
399
+ hyperparameters: dict | None = None,
400
+ **kwargs,
422
401
  ) -> int:
423
402
  """Heuristic memory estimate based on TabPFN's memory estimate logic in:
424
403
  https://github.com/PriorLabs/TabPFN/blob/57a2efd3ebdb3886245e4d097cefa73a5261a969/src/tabpfn/model/memory.py#L147.
@@ -434,18 +413,12 @@ class RealTabPFNv2Model(TabPFNModel):
434
413
  model_mem = 14489108 # Based on TabPFNv2 default
435
414
 
436
415
  n_samples, n_features = X.shape[0], min(X.shape[1], 500)
437
- n_feature_groups = (
438
- n_features
439
- ) / features_per_group + 1 # TODO: Unsure how to calculate this
416
+ n_feature_groups = (n_features) / features_per_group + 1 # TODO: Unsure how to calculate this
440
417
 
441
418
  X_mem = n_samples * n_feature_groups * dtype_byte_size
442
- activation_mem = (
443
- n_samples * n_feature_groups * embedding_size * n_layers * dtype_byte_size
444
- )
419
+ activation_mem = n_samples * n_feature_groups * embedding_size * n_layers * dtype_byte_size
445
420
 
446
421
  baseline_overhead_mem_est = 1e9 # 1 GB generic overhead
447
422
 
448
423
  # Add some buffer to each term + 1 GB overhead to be safe
449
- return int(
450
- model_mem + 4 * X_mem + 2 * activation_mem + baseline_overhead_mem_est
451
- )
424
+ return int(model_mem + 4 * X_mem + 2 * activation_mem + baseline_overhead_mem_est)
@@ -6,10 +6,12 @@ from typing import Type
6
6
  import numpy as np
7
7
  import pandas as pd
8
8
 
9
- from autogluon.features import ArithmeticFeatureGenerator
10
- from autogluon.features import CategoricalInteractionFeatureGenerator
11
- from autogluon.features import OOFTargetEncodingFeatureGenerator
12
- from autogluon.features import BulkFeatureGenerator
9
+ from autogluon.features import (
10
+ ArithmeticFeatureGenerator,
11
+ BulkFeatureGenerator,
12
+ CategoricalInteractionFeatureGenerator,
13
+ OOFTargetEncodingFeatureGenerator,
14
+ )
13
15
  from autogluon.features.generators.abstract import AbstractFeatureGenerator
14
16
 
15
17
  logger = logging.getLogger(__name__)
@@ -66,21 +68,23 @@ class ModelAgnosticPrepMixin:
66
68
  X_nunique = X.nunique().values
67
69
  n_categorical = X.select_dtypes(exclude=[np.number]).shape[1]
68
70
  n_numeric = X.loc[:, X_nunique > 2].select_dtypes(include=[np.number]).shape[1]
69
- n_binary = X.loc[:, X_nunique <= 2].select_dtypes(include=[np.number]).shape[
70
- 1] # NOTE: It can happen that features have less than two unique values if cleaning is applied before the bagging, i.e. Bioresponse
71
+ n_binary = (
72
+ X.loc[:, X_nunique <= 2].select_dtypes(include=[np.number]).shape[1]
73
+ ) # NOTE: It can happen that features have less than two unique values if cleaning is applied before the bagging, i.e. Bioresponse
71
74
 
72
75
  assert n_numeric + n_categorical + n_binary == X.shape[1] # NOTE: FOr debugging, to be removed later
73
76
  for preprocessor_cls_name, init_params in prep_params:
74
- if preprocessor_cls_name == 'ArithmeticFeatureGenerator':
77
+ if preprocessor_cls_name == "ArithmeticFeatureGenerator":
75
78
  prep_cls = ArithmeticFeatureGenerator(target_type=self.problem_type, **init_params)
76
- elif preprocessor_cls_name == 'CategoricalInteractionFeatureGenerator':
79
+ elif preprocessor_cls_name == "CategoricalInteractionFeatureGenerator":
77
80
  prep_cls = CategoricalInteractionFeatureGenerator(target_type=self.problem_type, **init_params)
78
- elif preprocessor_cls_name == 'OOFTargetEncodingFeatureGenerator':
81
+ elif preprocessor_cls_name == "OOFTargetEncodingFeatureGenerator":
79
82
  prep_cls = OOFTargetEncodingFeatureGenerator(target_type=self.problem_type, **init_params)
80
83
  else:
81
84
  raise ValueError(f"Unknown preprocessor class name: {preprocessor_cls_name}")
82
- n_numeric, n_categorical, n_binary = prep_cls.estimate_new_dtypes(n_numeric, n_categorical, n_binary,
83
- num_classes=self.num_classes)
85
+ n_numeric, n_categorical, n_binary = prep_cls.estimate_new_dtypes(
86
+ n_numeric, n_categorical, n_binary, num_classes=self.num_classes
87
+ )
84
88
 
85
89
  return n_numeric, n_categorical, n_binary
86
90
 
@@ -108,7 +112,7 @@ class ModelAgnosticPrepMixin:
108
112
  df_lst.append(X_estimate_numeric)
109
113
  if n_categorical > 0:
110
114
  cardinality = int(X.select_dtypes(exclude=[np.number]).nunique().mean())
111
- X_estimate = np.random.randint(0, cardinality, [shape, n_categorical]).astype('str')
115
+ X_estimate = np.random.randint(0, cardinality, [shape, n_categorical]).astype("str")
112
116
  X_estimate_cat = pd.DataFrame(X_estimate)
113
117
  df_lst.append(X_estimate_cat)
114
118
  if n_binary > 0:
@@ -126,9 +130,9 @@ class ModelAgnosticPrepMixin:
126
130
  )
127
131
 
128
132
  def _init_preprocessor(
129
- self,
130
- preprocessor_cls: Type[AbstractFeatureGenerator] | str,
131
- init_params: dict | None,
133
+ self,
134
+ preprocessor_cls: Type[AbstractFeatureGenerator] | str,
135
+ init_params: dict | None,
132
136
  ) -> AbstractFeatureGenerator:
133
137
  if isinstance(preprocessor_cls, str):
134
138
  preprocessor_cls = _feature_generator_class_map[preprocessor_cls]
@@ -188,23 +192,27 @@ class ModelAgnosticPrepMixin:
188
192
  if len(preprocessors) == 1 and isinstance(preprocessors[0], AbstractFeatureGenerator):
189
193
  return preprocessors
190
194
  else:
191
- preprocessors = [BulkFeatureGenerator(
192
- generators=preprocessors,
193
- # TODO: "false_recursive" technically can slow down inference, but need to optimize `True` first
194
- # Refer to `Bioresponse` dataset where setting to `True` -> 200s fit time vs `false_recursive` -> 1s fit time
195
- remove_unused_features="false_recursive",
196
- post_drop_duplicates=True,
197
- passthrough=True,
198
- passthrough_types=passthrough_types,
199
- verbosity=0,
200
- )]
195
+ preprocessors = [
196
+ BulkFeatureGenerator(
197
+ generators=preprocessors,
198
+ # TODO: "false_recursive" technically can slow down inference, but need to optimize `True` first
199
+ # Refer to `Bioresponse` dataset where setting to `True` -> 200s fit time vs `false_recursive` -> 1s fit time
200
+ remove_unused_features="false_recursive",
201
+ post_drop_duplicates=True,
202
+ passthrough=True,
203
+ passthrough_types=passthrough_types,
204
+ verbosity=0,
205
+ )
206
+ ]
201
207
  return preprocessors
202
208
 
203
209
  def _preprocess(self, X: pd.DataFrame, y=None, is_train: bool = False, **kwargs):
204
210
  if is_train:
205
211
  self.preprocessors = self.get_preprocessors()
206
212
  if self.preprocessors:
207
- assert y is not None, f"y must be specified to fit preprocessors... Likely the inheriting class isn't passing `y` in its `preprocess` call."
213
+ assert y is not None, (
214
+ f"y must be specified to fit preprocessors... Likely the inheriting class isn't passing `y` in its `preprocess` call."
215
+ )
208
216
  # FIXME: add `post_drop_useless`, example: anneal has many useless features
209
217
  feature_metadata_in = self._feature_metadata
210
218
  for prep in self.preprocessors:
@@ -47,12 +47,24 @@ def quantile_transformer_converter(scope, operator, container):
47
47
  if opv < 18:
48
48
  C_col = OnnxSplit(C, axis=1, output_names=[f"C_col{x}" for x in range(op.n_features_in_)], op_version=opv)
49
49
  else:
50
- C_col = OnnxSplit(C, axis=1, num_outputs=C.shape[1], output_names=[f"C_col{x}" for x in range(op.n_features_in_)], op_version=opv)
50
+ C_col = OnnxSplit(
51
+ C,
52
+ axis=1,
53
+ num_outputs=C.shape[1],
54
+ output_names=[f"C_col{x}" for x in range(op.n_features_in_)],
55
+ op_version=opv,
56
+ )
51
57
  C_col.add_to(scope, container)
52
58
  if opv < 18:
53
59
  X_col = OnnxSplit(X, axis=1, output_names=[f"X_col{x}" for x in range(op.n_features_in_)], op_version=opv)
54
60
  else:
55
- X_col = OnnxSplit(X, axis=1, num_outputs=X.type.shape[1], output_names=[f"X_col{x}" for x in range(op.n_features_in_)], op_version=opv)
61
+ X_col = OnnxSplit(
62
+ X,
63
+ axis=1,
64
+ num_outputs=X.type.shape[1],
65
+ output_names=[f"X_col{x}" for x in range(op.n_features_in_)],
66
+ op_version=opv,
67
+ )
56
68
  X_col.add_to(scope, container)
57
69
  Y_col = []
58
70
  for feature_idx in range(op.n_features_in_):
@@ -83,8 +95,12 @@ def quantile_transformer_converter(scope, operator, container):
83
95
  )
84
96
  references = np.clip(norm.ppf(op.references_), -5.2, 5.2).astype(dtype)
85
97
  cst = np.broadcast_to(references, (batch_size, n_quantiles))
86
- argmin_reshaped = OnnxReshape(argmin, np.array([batch_size, 1], dtype=np.int64), output_names=[f"reshape_col{feature_idx}"])
87
- ref = OnnxGatherElements(cst, argmin_reshaped, axis=1, op_version=opv, output_names=[f"gathernd_col{feature_idx}"])
98
+ argmin_reshaped = OnnxReshape(
99
+ argmin, np.array([batch_size, 1], dtype=np.int64), output_names=[f"reshape_col{feature_idx}"]
100
+ )
101
+ ref = OnnxGatherElements(
102
+ cst, argmin_reshaped, axis=1, op_version=opv, output_names=[f"gathernd_col{feature_idx}"]
103
+ )
88
104
  ref_reshape = OnnxReshape(ref, np.array([batch_size, 1], dtype=np.int64), output_names=[f"Y_col{feature_idx}"])
89
105
  ref_cast = OnnxCast(ref_reshape, to=1, op_version=opv, output_names=[f"ref_cast{feature_idx}"])
90
106
  Y_col.append(ref_cast)
@@ -147,9 +163,17 @@ def _encoder_handle_unknown_transformer_converter(scope, operator, container, na
147
163
 
148
164
  C_col = op.categories_
149
165
  if opv < 18:
150
- X_col = OnnxSplit(X, axis=1, output_names=[f"{name_prefix}X_col{x}" for x in range(num_categories)], op_version=opv)
166
+ X_col = OnnxSplit(
167
+ X, axis=1, output_names=[f"{name_prefix}X_col{x}" for x in range(num_categories)], op_version=opv
168
+ )
151
169
  else:
152
- X_col = OnnxSplit(X, axis=1, num_outputs=X.type.shape[1], output_names=[f"{name_prefix}X_col{x}" for x in range(num_categories)], op_version=opv)
170
+ X_col = OnnxSplit(
171
+ X,
172
+ axis=1,
173
+ num_outputs=X.type.shape[1],
174
+ output_names=[f"{name_prefix}X_col{x}" for x in range(num_categories)],
175
+ op_version=opv,
176
+ )
153
177
  X_col.add_to(scope, container)
154
178
  Y_col = []
155
179
  for feature_idx in range(num_categories):
@@ -194,7 +218,9 @@ def _encoder_handle_unknown_transformer_converter(scope, operator, container, na
194
218
  output_names=[f"{name_prefix}Y_col{feature_idx}"],
195
219
  op_version=opv,
196
220
  )
197
- onehot_cast = OnnxCast(onehot_reshaped, to=1, op_version=opv, output_names=[f"{name_prefix}onehot_cast{feature_idx}"])
221
+ onehot_cast = OnnxCast(
222
+ onehot_reshaped, to=1, op_version=opv, output_names=[f"{name_prefix}onehot_cast{feature_idx}"]
223
+ )
198
224
  Y_col.append(onehot_cast)
199
225
  else:
200
226
  argmin_reshaped = OnnxReshape(
@@ -203,7 +229,9 @@ def _encoder_handle_unknown_transformer_converter(scope, operator, container, na
203
229
  output_names=[f"{name_prefix}Y_col{feature_idx}"],
204
230
  op_version=opv,
205
231
  )
206
- argmin_cast = OnnxCast(argmin_reshaped, to=1, op_version=opv, output_names=[f"{name_prefix}argmin_cast{feature_idx}"])
232
+ argmin_cast = OnnxCast(
233
+ argmin_reshaped, to=1, op_version=opv, output_names=[f"{name_prefix}argmin_cast{feature_idx}"]
234
+ )
207
235
  Y_col.append(argmin_cast)
208
236
  Y = OnnxConcat(*Y_col, axis=1, op_version=opv, output_names=out[:1])
209
237
  Y.add_to(scope, container)