autogluon.tabular 1.4.0__py3-none-any.whl → 1.4.1b20251128__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.tabular might be problematic. Click here for more details.

Files changed (40) hide show
  1. autogluon/tabular/configs/pipeline_presets.py +130 -0
  2. autogluon/tabular/configs/presets_configs.py +0 -3
  3. autogluon/tabular/models/__init__.py +1 -0
  4. autogluon/tabular/models/catboost/catboost_model.py +4 -1
  5. autogluon/tabular/models/ebm/__init__.py +0 -0
  6. autogluon/tabular/models/ebm/ebm_model.py +259 -0
  7. autogluon/tabular/models/ebm/hyperparameters/__init__.py +0 -0
  8. autogluon/tabular/models/ebm/hyperparameters/parameters.py +39 -0
  9. autogluon/tabular/models/ebm/hyperparameters/searchspaces.py +72 -0
  10. autogluon/tabular/models/fastainn/tabular_nn_fastai.py +4 -2
  11. autogluon/tabular/models/knn/knn_model.py +7 -3
  12. autogluon/tabular/models/lgb/lgb_model.py +56 -18
  13. autogluon/tabular/models/lr/lr_model.py +6 -1
  14. autogluon/tabular/models/lr/lr_preprocessing_utils.py +6 -7
  15. autogluon/tabular/models/mitra/_internal/models/tab2d.py +10 -10
  16. autogluon/tabular/models/mitra/mitra_model.py +43 -3
  17. autogluon/tabular/models/mitra/sklearn_interface.py +8 -21
  18. autogluon/tabular/models/realmlp/realmlp_model.py +1 -3
  19. autogluon/tabular/models/rf/rf_model.py +5 -1
  20. autogluon/tabular/models/tabicl/tabicl_model.py +1 -7
  21. autogluon/tabular/models/tabm/tabm_model.py +76 -6
  22. autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +6 -4
  23. autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +1 -7
  24. autogluon/tabular/models/tabular_nn/hyperparameters/parameters.py +1 -3
  25. autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +2 -1
  26. autogluon/tabular/models/xgboost/xgboost_model.py +8 -1
  27. autogluon/tabular/predictor/predictor.py +63 -55
  28. autogluon/tabular/registry/_ag_model_registry.py +2 -0
  29. autogluon/tabular/testing/fit_helper.py +28 -0
  30. autogluon/tabular/version.py +1 -1
  31. autogluon.tabular-1.4.1b20251128-py3.11-nspkg.pth +1 -0
  32. {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info}/METADATA +87 -71
  33. {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info}/RECORD +39 -33
  34. {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info}/WHEEL +1 -1
  35. autogluon.tabular-1.4.0-py3.9-nspkg.pth +0 -1
  36. {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info/licenses}/LICENSE +0 -0
  37. {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info/licenses}/NOTICE +0 -0
  38. {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info}/namespace_packages.txt +0 -0
  39. {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info}/top_level.txt +0 -0
  40. {autogluon.tabular-1.4.0.dist-info → autogluon_tabular-1.4.1b20251128.dist-info}/zip-safe +0 -0
@@ -50,6 +50,7 @@ class TabularNeuralNetTorchModel(AbstractNeuralNetworkModel):
50
50
  ag_key = "NN_TORCH"
51
51
  ag_name = "NeuralNetTorch"
52
52
  ag_priority = 25
53
+ seed_name = "seed_value"
53
54
 
54
55
  # Constants used throughout this class:
55
56
  unique_category_str = np.nan # string used to represent missing values and unknown categories for categorical features.
@@ -191,7 +192,7 @@ class TabularNeuralNetTorchModel(AbstractNeuralNetworkModel):
191
192
 
192
193
  processor_kwargs, optimizer_kwargs, fit_kwargs, loss_kwargs, params = self._prepare_params(params=params)
193
194
 
194
- seed_value = params.pop("seed_value", 0)
195
+ seed_value = params.pop(self.seed_name, self.default_random_seed)
195
196
 
196
197
  self._num_cpus_infer = params.pop("_num_cpus_infer", 1)
197
198
  if seed_value is not None: # Set seeds
@@ -32,6 +32,7 @@ class XGBoostModel(AbstractModel):
32
32
  ag_key = "XGB"
33
33
  ag_name = "XGBoost"
34
34
  ag_priority = 40
35
+ seed_name = "seed"
35
36
 
36
37
  def __init__(self, **kwargs):
37
38
  super().__init__(**kwargs)
@@ -182,12 +183,18 @@ class XGBoostModel(AbstractModel):
182
183
  from xgboost import XGBClassifier, XGBRegressor
183
184
 
184
185
  model_type = XGBClassifier if self.problem_type in PROBLEM_TYPES_CLASSIFICATION else XGBRegressor
185
- self.model = model_type(**params)
186
+
186
187
  import warnings
187
188
 
188
189
  with warnings.catch_warnings():
189
190
  # FIXME: v1.1: Upgrade XGBoost to 2.0.1+ to avoid deprecation warnings from Pandas 2.1+ during XGBoost fit.
190
191
  warnings.simplefilter(action="ignore", category=FutureWarning)
192
+ if params.get("device", "cpu") == "cuda:0":
193
+ # verbosity=0 to hide UserWarning: Falling back to prediction using DMatrix due to mismatched devices.
194
+ # TODO: Find a way to hide this warning without setting verbosity=0
195
+ # ref: https://github.com/dmlc/xgboost/issues/9791
196
+ params["verbosity"] = 0
197
+ self.model = model_type(**params)
191
198
  self.model.fit(X=X, y=y, eval_set=eval_set, verbose=False, sample_weight=sample_weight)
192
199
 
193
200
  if generate_curves:
@@ -20,6 +20,7 @@ from autogluon.common import FeatureMetadata, TabularDataset
20
20
  from autogluon.common.loaders import load_json
21
21
  from autogluon.common.savers import save_json
22
22
  from autogluon.common.utils.file_utils import get_directory_size, get_directory_size_per_file
23
+ from autogluon.common.utils.resource_utils import ResourceManager, get_resource_manager
23
24
  from autogluon.common.utils.hyperparameter_utils import get_hyperparameter_str_deprecation_msg, is_advanced_hyperparameter_format
24
25
  from autogluon.common.utils.log_utils import add_log_to_file, set_logger_verbosity, warn_if_mlflow_autologging_is_enabled
25
26
  from autogluon.common.utils.pandas_utils import get_approximate_df_mem_usage
@@ -48,10 +49,14 @@ from autogluon.core.utils import get_pred_from_proba_df, plot_performance_vs_tri
48
49
  from autogluon.core.utils.decorators import apply_presets
49
50
  from autogluon.core.utils.loaders import load_pkl, load_str
50
51
  from autogluon.core.utils.savers import save_pkl, save_str
51
- from autogluon.core.utils.utils import CVSplitter, default_holdout_frac, generate_train_test_split_combined
52
+ from autogluon.core.utils.utils import CVSplitter, generate_train_test_split_combined
52
53
 
53
54
  from ..configs.feature_generator_presets import get_default_feature_generator
54
55
  from ..configs.hyperparameter_configs import get_hyperparameter_config
56
+ from ..configs.pipeline_presets import (
57
+ USE_BAG_HOLDOUT_AUTO_THRESHOLD,
58
+ get_validation_and_stacking_method,
59
+ )
55
60
  from ..configs.presets_configs import tabular_presets_alias, tabular_presets_dict
56
61
  from ..learner import AbstractTabularLearner, DefaultLearner
57
62
  from ..trainer.abstract_trainer import AbstractTabularTrainer
@@ -165,6 +170,10 @@ class TabularPredictor:
165
170
  trainer_type : AbstractTabularTrainer, default = AutoTrainer
166
171
  A class inheriting from `AbstractTabularTrainer` that controls training/ensembling of many models.
167
172
  If you don't know what this is, keep it as the default.
173
+ default_base_path : str | Path | None, default = None
174
+ A default base path to use for the time-stamped folder if `path` is None.
175
+ If None, defaults to `AutogluonModels`. Only used if `path` is None, and thus
176
+ only used for local paths, not s3 paths.
168
177
  """
169
178
 
170
179
  Dataset = TabularDataset
@@ -201,7 +210,7 @@ class TabularPredictor:
201
210
  f"We do not recommend specifying weight_evaluation when sample_weight='{self.sample_weight}', instead specify appropriate eval_metric."
202
211
  )
203
212
  self._validate_init_kwargs(kwargs)
204
- path = setup_outputdir(path)
213
+ path = setup_outputdir(path=path, default_base_path=kwargs.get("default_base_path"))
205
214
 
206
215
  learner_type = kwargs.get("learner_type", DefaultLearner)
207
216
  learner_kwargs = kwargs.get("learner_kwargs", dict())
@@ -425,12 +434,17 @@ class TabularPredictor:
425
434
  Table of the training data as a pandas DataFrame.
426
435
  If str is passed, `train_data` will be loaded using the str value as the file path.
427
436
  tuning_data : :class:`pd.DataFrame` or str, optional
428
- Another dataset containing validation data reserved for tuning processes such as early stopping and hyperparameter tuning.
437
+ Another dataset containing validation data reserved for tuning processes such as early stopping, hyperparameter tuning, and ensembling.
429
438
  This dataset should be in the same format as `train_data`.
430
439
  If str is passed, `tuning_data` will be loaded using the str value as the file path.
431
- Note: final model returned may be fit on `tuning_data` as well as `train_data`. Do not provide your evaluation test data here!
432
- In particular, when `num_bag_folds` > 0 or `num_stack_levels` > 0, models will be trained on both `tuning_data` and `train_data`.
433
- If `tuning_data = None`, `fit()` will automatically hold out some random validation examples from `train_data`.
440
+ Note: If `refit_full=True` is specified, the final model may be fit on `tuning_data` as well as `train_data`.
441
+ Note: Because `tuning_data` is used to determine which model is the 'best' model, as well as to determine the ensemble weights,
442
+ it should not be considered a fully unseen dataset. It is possible that AutoGluon will be overfit to the `tuning_data`.
443
+ To ensure an unbiased evaluation, use separate unseen test data to evaluate the final model using `predictor.leaderboard(test_data, display=True)`.
444
+ Do not provide your evaluation test data as `tuning_data`!
445
+ If bagging is not enabled and `tuning_data = None`: `fit()` will automatically hold out some random validation samples from `train_data`.
446
+ If bagging is enabled and `tuning_data = None`: no tuning data will be used. Instead, AutoGluon will perform cross-validation.
447
+ If bagging is enabled: `use_bag_holdout=True` must be specified in order to provide tuning data. If specified, AutoGluon will still perform cross-validation for model fits, but will use `tuning_data` for optimizing the weighted ensemble weights and model calibration.
434
448
  time_limit : int, default = None
435
449
  Approximately how long `fit()` should run for (wallclock time in seconds).
436
450
  If not specified, `fit()` will run until all models have completed training, but will not repeatedly bag models unless `num_bag_sets` is specified.
@@ -512,6 +526,7 @@ class TabularPredictor:
512
526
  'GBM' (LightGBM)
513
527
  'CAT' (CatBoost)
514
528
  'XGB' (XGBoost)
529
+ 'EBM' (Explainable Boosting Machine)
515
530
  'REALMLP' (RealMLP)
516
531
  'TABM' (TabM)
517
532
  'MITRA' (Mitra)
@@ -1077,7 +1092,8 @@ class TabularPredictor:
1077
1092
  elif verbosity >= 4:
1078
1093
  logger.log(20, f"Verbosity: {verbosity} (Maximum Logging)")
1079
1094
 
1080
- include_gpu_count = verbosity >= 3
1095
+ resource_manager: ResourceManager = get_resource_manager()
1096
+ include_gpu_count = resource_manager.get_gpu_count_torch() or verbosity >= 3
1081
1097
  sys_msg = get_ag_system_info(path=self.path, include_gpu_count=include_gpu_count)
1082
1098
  logger.log(20, sys_msg)
1083
1099
 
@@ -1117,10 +1133,6 @@ class TabularPredictor:
1117
1133
  self._validate_calibrate_decision_threshold(calibrate_decision_threshold=calibrate_decision_threshold)
1118
1134
  self._validate_fit_strategy(fit_strategy=fit_strategy)
1119
1135
 
1120
- holdout_frac = kwargs["holdout_frac"]
1121
- num_bag_folds = kwargs["num_bag_folds"]
1122
- num_bag_sets = kwargs["num_bag_sets"]
1123
- num_stack_levels = kwargs["num_stack_levels"]
1124
1136
  auto_stack = kwargs["auto_stack"]
1125
1137
  feature_generator = kwargs["feature_generator"]
1126
1138
  unlabeled_data = kwargs["unlabeled_data"]
@@ -1216,16 +1228,46 @@ class TabularPredictor:
1216
1228
  else:
1217
1229
  ag_args_fit = learning_curves
1218
1230
 
1231
+ use_bag_holdout_was_auto = False
1232
+ dynamic_stacking_was_auto = False
1233
+ if isinstance(use_bag_holdout,str) and use_bag_holdout == "auto":
1234
+ use_bag_holdout = None
1235
+ use_bag_holdout_was_auto = True
1236
+ if isinstance(dynamic_stacking,str) and dynamic_stacking == "auto":
1237
+ dynamic_stacking = None
1238
+ dynamic_stacking_was_auto = True
1239
+
1240
+ (
1241
+ num_bag_folds,
1242
+ num_bag_sets,
1243
+ num_stack_levels,
1244
+ dynamic_stacking,
1245
+ use_bag_holdout,
1246
+ holdout_frac,
1247
+ refit_full,
1248
+ ) = get_validation_and_stacking_method(
1249
+ num_bag_folds=kwargs["num_bag_folds"],
1250
+ num_bag_sets=kwargs["num_bag_sets"],
1251
+ use_bag_holdout=use_bag_holdout,
1252
+ holdout_frac=kwargs["holdout_frac"],
1253
+ auto_stack=auto_stack,
1254
+ num_stack_levels=kwargs["num_stack_levels"],
1255
+ dynamic_stacking=dynamic_stacking,
1256
+ refit_full=kwargs["refit_full"],
1257
+ num_train_rows=len(train_data),
1258
+ problem_type=inferred_problem_type,
1259
+ hpo_enabled=ag_args.get("hyperparameter_tune_kwargs", None) is not None,
1260
+ )
1261
+
1219
1262
  num_bag_folds, num_bag_sets, num_stack_levels, dynamic_stacking, use_bag_holdout = self._sanitize_stack_args(
1220
1263
  num_bag_folds=num_bag_folds,
1221
1264
  num_bag_sets=num_bag_sets,
1222
1265
  num_stack_levels=num_stack_levels,
1223
- time_limit=time_limit,
1224
- auto_stack=auto_stack,
1225
1266
  num_train_rows=len(train_data),
1226
- problem_type=inferred_problem_type,
1227
1267
  dynamic_stacking=dynamic_stacking,
1228
1268
  use_bag_holdout=use_bag_holdout,
1269
+ use_bag_holdout_was_auto=use_bag_holdout_was_auto,
1270
+ dynamic_stacking_was_auto=dynamic_stacking_was_auto,
1229
1271
  )
1230
1272
  if auto_stack:
1231
1273
  logger.log(
@@ -1234,9 +1276,6 @@ class TabularPredictor:
1234
1276
  f"num_stack_levels={num_stack_levels}, num_bag_folds={num_bag_folds}, num_bag_sets={num_bag_sets}",
1235
1277
  )
1236
1278
 
1237
- if holdout_frac is None:
1238
- holdout_frac = default_holdout_frac(len(train_data), ag_args.get("hyperparameter_tune_kwargs", None) is not None)
1239
-
1240
1279
  if kwargs["save_bag_folds"] is not None and kwargs["_save_bag_folds"] is not None:
1241
1280
  raise ValueError(
1242
1281
  f"Cannot specify both `save_bag_folds` and `_save_bag_folds` at the same time. "
@@ -1324,7 +1363,7 @@ class TabularPredictor:
1324
1363
  )
1325
1364
  ag_post_fit_kwargs = dict(
1326
1365
  keep_only_best=kwargs["keep_only_best"],
1327
- refit_full=kwargs["refit_full"],
1366
+ refit_full=refit_full,
1328
1367
  set_best_to_refit_full=kwargs["set_best_to_refit_full"],
1329
1368
  save_space=kwargs["save_space"],
1330
1369
  calibrate=kwargs["calibrate"],
@@ -1593,7 +1632,6 @@ class TabularPredictor:
1593
1632
  if _ds_ray is not None:
1594
1633
  # Handle resources
1595
1634
  # FIXME: what about distributed?
1596
- from autogluon.common.utils.resource_utils import ResourceManager
1597
1635
 
1598
1636
  total_resources = ag_fit_kwargs["core_kwargs"]["total_resources"]
1599
1637
 
@@ -5061,6 +5099,7 @@ class TabularPredictor:
5061
5099
  "learner_type",
5062
5100
  "learner_kwargs",
5063
5101
  "quantile_levels",
5102
+ "default_base_path",
5064
5103
  }
5065
5104
  invalid_keys = []
5066
5105
  for key in kwargs:
@@ -5484,41 +5523,12 @@ class TabularPredictor:
5484
5523
  num_bag_folds: int,
5485
5524
  num_bag_sets: int,
5486
5525
  num_stack_levels: int,
5487
- time_limit: float | None,
5488
- auto_stack: bool,
5489
5526
  num_train_rows: int,
5490
- problem_type: str,
5491
5527
  dynamic_stacking: bool | str,
5492
5528
  use_bag_holdout: bool | str,
5529
+ use_bag_holdout_was_auto: bool,
5530
+ dynamic_stacking_was_auto: bool,
5493
5531
  ):
5494
- use_bag_holdout_auto_threshold = 1000000
5495
- use_bag_holdout_was_auto = False
5496
- dynamic_stacking_was_auto = False
5497
- if isinstance(use_bag_holdout, str) and use_bag_holdout == "auto":
5498
- # Leverage use_bag_holdout when data is large to safeguard against stack leakage
5499
- use_bag_holdout = num_train_rows >= use_bag_holdout_auto_threshold
5500
- use_bag_holdout_was_auto = True
5501
- if isinstance(dynamic_stacking, str) and dynamic_stacking == "auto":
5502
- dynamic_stacking = not use_bag_holdout
5503
- dynamic_stacking_was_auto = True
5504
- if auto_stack:
5505
- # TODO: What about datasets that are 100k+? At a certain point should we not bag?
5506
- # TODO: What about time_limit? Metalearning can tell us expected runtime of each model, then we can select optimal folds + stack levels to fit time constraint
5507
- if num_bag_folds is None:
5508
- num_bag_folds = min(8, max(5, math.floor(num_train_rows / 10)))
5509
- if num_stack_levels is None:
5510
- if dynamic_stacking:
5511
- num_stack_levels = 1
5512
- else:
5513
- if use_bag_holdout or problem_type != BINARY:
5514
- num_stack_levels = min(1, max(0, math.floor(num_train_rows / 750)))
5515
- else:
5516
- # Disable multi-layer stacking to avoid stack info leakage
5517
- num_stack_levels = 0
5518
- if num_bag_folds is None:
5519
- num_bag_folds = 0
5520
- if num_stack_levels is None:
5521
- num_stack_levels = 0
5522
5532
  if not isinstance(num_bag_folds, int):
5523
5533
  raise ValueError(f"num_bag_folds must be an integer. (num_bag_folds={num_bag_folds})")
5524
5534
  if not isinstance(num_stack_levels, int):
@@ -5527,8 +5537,6 @@ class TabularPredictor:
5527
5537
  raise ValueError(f"num_bag_folds must be equal to 0 or >=2. (num_bag_folds={num_bag_folds})")
5528
5538
  if num_stack_levels != 0 and num_bag_folds == 0:
5529
5539
  raise ValueError(f"num_stack_levels must be 0 if num_bag_folds is 0. (num_stack_levels={num_stack_levels}, num_bag_folds={num_bag_folds})")
5530
- if num_bag_sets is None:
5531
- num_bag_sets = 1
5532
5540
  if not isinstance(num_bag_sets, int):
5533
5541
  raise ValueError(f"num_bag_sets must be an integer. (num_bag_sets={num_bag_sets})")
5534
5542
  if not isinstance(dynamic_stacking, bool):
@@ -5538,11 +5546,11 @@ class TabularPredictor:
5538
5546
 
5539
5547
  if use_bag_holdout_was_auto and num_bag_folds != 0:
5540
5548
  if use_bag_holdout:
5541
- log_extra = f"Reason: num_train_rows >= {use_bag_holdout_auto_threshold}. (num_train_rows={num_train_rows})"
5549
+ log_extra = f"Reason: num_train_rows >= {USE_BAG_HOLDOUT_AUTO_THRESHOLD}. (num_train_rows={num_train_rows})"
5542
5550
  else:
5543
- log_extra = f"Reason: num_train_rows < {use_bag_holdout_auto_threshold}. (num_train_rows={num_train_rows})"
5551
+ log_extra = f"Reason: num_train_rows < {USE_BAG_HOLDOUT_AUTO_THRESHOLD}. (num_train_rows={num_train_rows})"
5544
5552
  logger.log(20, f"Setting use_bag_holdout from 'auto' to {use_bag_holdout}. {log_extra}")
5545
- log_extra_ds = None
5553
+
5546
5554
  if dynamic_stacking and num_stack_levels < 1:
5547
5555
  log_extra_ds = f"Reason: Stacking is not enabled. (num_stack_levels={num_stack_levels})"
5548
5556
  if not dynamic_stacking_was_auto:
@@ -8,6 +8,7 @@ from . import ModelRegistry
8
8
  from ..models import (
9
9
  BoostedRulesModel,
10
10
  CatBoostModel,
11
+ EBMModel,
11
12
  FastTextModel,
12
13
  FigsModel,
13
14
  FTTransformerModel,
@@ -64,6 +65,7 @@ REGISTERED_MODEL_CLS_LST = [
64
65
  HSTreeModel,
65
66
  BoostedRulesModel,
66
67
  DummyModel,
68
+ EBMModel,
67
69
  ]
68
70
 
69
71
  # TODO: Replace logic in `autogluon.tabular.trainer.model_presets.presets` with `ag_model_registry`
@@ -175,6 +175,7 @@ class FitHelper:
175
175
  use_test_for_val: bool = False,
176
176
  raise_on_model_failure: bool | None = None,
177
177
  deepcopy_fit_args: bool = True,
178
+ verify_model_seed: bool = False,
178
179
  ) -> TabularPredictor:
179
180
  if compiler_configs is None:
180
181
  compiler_configs = {}
@@ -269,6 +270,11 @@ class FitHelper:
269
270
  assert not model_info["val_in_fit"], f"val data must not be present in refit model if `can_refit_full=True`. Maybe an exception occurred?"
270
271
  else:
271
272
  assert model_info["val_in_fit"], f"val data must be present in refit model if `can_refit_full=False`"
273
+ if verify_model_seed:
274
+ model_names = predictor.model_names()
275
+ for model_name in model_names:
276
+ model = predictor._trainer.load_model(model_name)
277
+ _verify_model_seed(model=model)
272
278
 
273
279
  if predictor_info:
274
280
  predictor.info()
@@ -339,6 +345,7 @@ class FitHelper:
339
345
  require_known_problem_types: bool = True,
340
346
  raise_on_model_failure: bool = True,
341
347
  problem_types: list[str] | None = None,
348
+ verify_model_seed: bool = True,
342
349
  **kwargs,
343
350
  ):
344
351
  """
@@ -355,12 +362,18 @@ class FitHelper:
355
362
  problem_types: list[str], optional
356
363
  If specified, checks the given problem_types.
357
364
  If None, checks `model_cls.supported_problem_types()`
365
+ verify_model_seed: bool = True
358
366
  **kwargs
359
367
 
360
368
  Returns
361
369
  -------
362
370
 
363
371
  """
372
+ if verify_model_seed and model_cls.seed_name is not None:
373
+ # verify that the seed logic works
374
+ model_hyperparameters = model_hyperparameters.copy()
375
+ model_hyperparameters[model_cls.seed_name] = 42
376
+
364
377
  fit_args = dict(
365
378
  hyperparameters={model_cls: model_hyperparameters},
366
379
  )
@@ -429,6 +442,7 @@ class FitHelper:
429
442
  refit_full=refit_full,
430
443
  extra_metrics=_extra_metrics,
431
444
  raise_on_model_failure=raise_on_model_failure,
445
+ verify_model_seed=verify_model_seed,
432
446
  **kwargs,
433
447
  )
434
448
 
@@ -460,6 +474,7 @@ class FitHelper:
460
474
  refit_full=refit_full,
461
475
  extra_metrics=_extra_metrics,
462
476
  raise_on_model_failure=raise_on_model_failure,
477
+ verify_model_seed=verify_model_seed,
463
478
  **kwargs,
464
479
  )
465
480
 
@@ -476,3 +491,16 @@ def stacked_overfitting_assert(
476
491
  if expected_stacked_overfitting_at_test is not None:
477
492
  stacked_overfitting = check_stacked_overfitting_from_leaderboard(lb)
478
493
  assert stacked_overfitting == expected_stacked_overfitting_at_test, "Expected stacked overfitting at test mismatch!"
494
+
495
+
496
+ def _verify_model_seed(model: AbstractModel):
497
+ assert model.random_seed is None or isinstance(model.random_seed, int)
498
+ if model.seed_name is not None:
499
+ if model.seed_name in model._user_params:
500
+ assert model.random_seed == model._user_params[model.seed_name]
501
+ assert model.seed_name in model.params
502
+ assert model.random_seed == model.params[model.seed_name]
503
+ if isinstance(model, BaggedEnsembleModel):
504
+ for child in model.models:
505
+ child = model.load_child(child)
506
+ _verify_model_seed(child)
@@ -1,4 +1,4 @@
1
1
  """This is the autogluon version file."""
2
2
 
3
- __version__ = "1.4.0"
3
+ __version__ = "1.4.1b20251128"
4
4
  __lite__ = False
@@ -0,0 +1 @@
1
+ import sys, types, os;p = os.path.join(sys._getframe(1).f_locals['sitedir'], *('autogluon',));importlib = __import__('importlib.util');__import__('importlib.machinery');m = sys.modules.setdefault('autogluon', importlib.util.module_from_spec(importlib.machinery.PathFinder.find_spec('autogluon', [os.path.dirname(p)])));m = m or sys.modules.setdefault('autogluon', types.ModuleType('autogluon'));mp = (m or []) and m.__dict__.setdefault('__path__',[]);(p not in mp) and mp.append(p)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: autogluon.tabular
3
- Version: 1.4.0
3
+ Version: 1.4.1b20251128
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -9,8 +9,7 @@ Project-URL: Documentation, https://auto.gluon.ai
9
9
  Project-URL: Bug Reports, https://github.com/autogluon/autogluon/issues
10
10
  Project-URL: Source, https://github.com/autogluon/autogluon/
11
11
  Project-URL: Contribute!, https://github.com/autogluon/autogluon/blob/master/CONTRIBUTING.md
12
- Platform: UNKNOWN
13
- Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Development Status :: 4 - Beta
14
13
  Classifier: Intended Audience :: Education
15
14
  Classifier: Intended Audience :: Developers
16
15
  Classifier: Intended Audience :: Science/Research
@@ -24,114 +23,130 @@ Classifier: Operating System :: Microsoft :: Windows
24
23
  Classifier: Operating System :: POSIX
25
24
  Classifier: Operating System :: Unix
26
25
  Classifier: Programming Language :: Python :: 3
27
- Classifier: Programming Language :: Python :: 3.9
28
26
  Classifier: Programming Language :: Python :: 3.10
29
27
  Classifier: Programming Language :: Python :: 3.11
30
28
  Classifier: Programming Language :: Python :: 3.12
29
+ Classifier: Programming Language :: Python :: 3.13
31
30
  Classifier: Topic :: Software Development
32
31
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
33
32
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
34
33
  Classifier: Topic :: Scientific/Engineering :: Image Recognition
35
- Requires-Python: >=3.9, <3.13
34
+ Requires-Python: >=3.10, <3.14
36
35
  Description-Content-Type: text/markdown
37
- License-File: ../LICENSE
38
- License-File: ../NOTICE
36
+ License-File: LICENSE
37
+ License-File: NOTICE
39
38
  Requires-Dist: numpy<2.4.0,>=1.25.0
40
39
  Requires-Dist: scipy<1.17,>=1.5.4
41
40
  Requires-Dist: pandas<2.4.0,>=2.0.0
42
41
  Requires-Dist: scikit-learn<1.8.0,>=1.4.0
43
42
  Requires-Dist: networkx<4,>=3.0
44
- Requires-Dist: autogluon.core==1.4.0
45
- Requires-Dist: autogluon.features==1.4.0
46
- Provides-Extra: all
47
- Requires-Dist: catboost<1.3,>=1.2; extra == "all"
48
- Requires-Dist: fastai<2.9,>=2.3.1; extra == "all"
49
- Requires-Dist: numpy<2.3.0,>=1.25; extra == "all"
50
- Requires-Dist: loguru; extra == "all"
51
- Requires-Dist: lightgbm<4.7,>=4.0; extra == "all"
52
- Requires-Dist: einx; extra == "all"
53
- Requires-Dist: autogluon.core[all]==1.4.0; extra == "all"
54
- Requires-Dist: xgboost<3.1,>=2.0; extra == "all"
55
- Requires-Dist: transformers; extra == "all"
56
- Requires-Dist: spacy<3.9; extra == "all"
57
- Requires-Dist: torch<2.8,>=2.2; extra == "all"
58
- Requires-Dist: omegaconf; extra == "all"
59
- Requires-Dist: huggingface-hub[torch]; extra == "all"
60
- Requires-Dist: blis<1.2.1,>=0.7.0; (platform_system == "Windows" and python_version == "3.9") and extra == "all"
43
+ Requires-Dist: autogluon.core==1.4.1b20251128
44
+ Requires-Dist: autogluon.features==1.4.1b20251128
45
+ Provides-Extra: lightgbm
46
+ Requires-Dist: lightgbm<4.7,>=4.0; extra == "lightgbm"
61
47
  Provides-Extra: catboost
62
48
  Requires-Dist: numpy<2.3.0,>=1.25; extra == "catboost"
63
49
  Requires-Dist: catboost<1.3,>=1.2; extra == "catboost"
50
+ Provides-Extra: xgboost
51
+ Requires-Dist: xgboost<3.1,>=2.0; extra == "xgboost"
52
+ Provides-Extra: realmlp
53
+ Requires-Dist: pytabkit<1.7,>=1.6; extra == "realmlp"
54
+ Provides-Extra: interpret
55
+ Requires-Dist: interpret-core<0.8,>=0.7.2; extra == "interpret"
64
56
  Provides-Extra: fastai
65
57
  Requires-Dist: spacy<3.9; extra == "fastai"
66
- Requires-Dist: torch<2.8,>=2.2; extra == "fastai"
58
+ Requires-Dist: torch<2.8,>=2.6; extra == "fastai"
67
59
  Requires-Dist: fastai<2.9,>=2.3.1; extra == "fastai"
68
- Requires-Dist: blis<1.2.1,>=0.7.0; (platform_system == "Windows" and python_version == "3.9") and extra == "fastai"
69
- Provides-Extra: imodels
70
- Requires-Dist: imodels<2.1.0,>=1.3.10; extra == "imodels"
71
- Provides-Extra: lightgbm
72
- Requires-Dist: lightgbm<4.7,>=4.0; extra == "lightgbm"
60
+ Provides-Extra: tabm
61
+ Requires-Dist: torch<2.8,>=2.6; extra == "tabm"
62
+ Provides-Extra: tabpfn
63
+ Requires-Dist: tabpfn<2.2,>=2.0.9; extra == "tabpfn"
64
+ Provides-Extra: tabpfnmix
65
+ Requires-Dist: torch<2.8,>=2.6; extra == "tabpfnmix"
66
+ Requires-Dist: huggingface_hub[torch]<1.0; extra == "tabpfnmix"
67
+ Requires-Dist: einops<0.9,>=0.7; extra == "tabpfnmix"
73
68
  Provides-Extra: mitra
74
69
  Requires-Dist: loguru; extra == "mitra"
75
70
  Requires-Dist: einx; extra == "mitra"
76
71
  Requires-Dist: omegaconf; extra == "mitra"
77
- Requires-Dist: torch<2.8,>=2.2; extra == "mitra"
72
+ Requires-Dist: torch<2.8,>=2.6; extra == "mitra"
78
73
  Requires-Dist: transformers; extra == "mitra"
79
- Requires-Dist: huggingface-hub[torch]; extra == "mitra"
74
+ Requires-Dist: huggingface_hub[torch]<1.0; extra == "mitra"
75
+ Requires-Dist: einops<0.9,>=0.7; extra == "mitra"
76
+ Provides-Extra: tabicl
77
+ Requires-Dist: tabicl<0.2,>=0.1.3; extra == "tabicl"
80
78
  Provides-Extra: ray
81
- Requires-Dist: autogluon.core[all]==1.4.0; extra == "ray"
82
- Provides-Extra: realmlp
83
- Requires-Dist: pytabkit<1.7,>=1.6; extra == "realmlp"
79
+ Requires-Dist: autogluon.core[all]==1.4.1b20251128; extra == "ray"
84
80
  Provides-Extra: skex
85
81
  Requires-Dist: scikit-learn-intelex<2025.5,>=2024.0; extra == "skex"
82
+ Provides-Extra: imodels
83
+ Requires-Dist: imodels<2.1.0,>=1.3.10; extra == "imodels"
86
84
  Provides-Extra: skl2onnx
85
+ Requires-Dist: onnx<1.16.2,>=1.13.0; platform_system == "Windows" and extra == "skl2onnx"
86
+ Requires-Dist: onnx<1.18.0,>=1.13.0; platform_system != "Windows" and extra == "skl2onnx"
87
87
  Requires-Dist: skl2onnx<1.18.0,>=1.15.0; extra == "skl2onnx"
88
88
  Requires-Dist: onnxruntime<1.20.0,>=1.17.0; extra == "skl2onnx"
89
89
  Requires-Dist: onnxruntime-gpu<1.20.0,>=1.17.0; extra == "skl2onnx"
90
- Requires-Dist: onnx<1.18.0,>=1.13.0; platform_system != "Windows" and extra == "skl2onnx"
91
- Requires-Dist: onnx<1.16.2,>=1.13.0; platform_system == "Windows" and extra == "skl2onnx"
90
+ Provides-Extra: all
91
+ Requires-Dist: omegaconf; extra == "all"
92
+ Requires-Dist: numpy<2.3.0,>=1.25; extra == "all"
93
+ Requires-Dist: spacy<3.9; extra == "all"
94
+ Requires-Dist: einops<0.9,>=0.7; extra == "all"
95
+ Requires-Dist: lightgbm<4.7,>=4.0; extra == "all"
96
+ Requires-Dist: einx; extra == "all"
97
+ Requires-Dist: autogluon.core[all]==1.4.1b20251128; extra == "all"
98
+ Requires-Dist: fastai<2.9,>=2.3.1; extra == "all"
99
+ Requires-Dist: torch<2.8,>=2.6; extra == "all"
100
+ Requires-Dist: loguru; extra == "all"
101
+ Requires-Dist: catboost<1.3,>=1.2; extra == "all"
102
+ Requires-Dist: transformers; extra == "all"
103
+ Requires-Dist: xgboost<3.1,>=2.0; extra == "all"
104
+ Requires-Dist: huggingface_hub[torch]<1.0; extra == "all"
92
105
  Provides-Extra: tabarena
93
106
  Requires-Dist: catboost<1.3,>=1.2; extra == "tabarena"
94
- Requires-Dist: fastai<2.9,>=2.3.1; extra == "tabarena"
107
+ Requires-Dist: omegaconf; extra == "tabarena"
95
108
  Requires-Dist: numpy<2.3.0,>=1.25; extra == "tabarena"
96
- Requires-Dist: loguru; extra == "tabarena"
109
+ Requires-Dist: spacy<3.9; extra == "tabarena"
110
+ Requires-Dist: einops<0.9,>=0.7; extra == "tabarena"
111
+ Requires-Dist: tabpfn<2.2,>=2.0.9; extra == "tabarena"
97
112
  Requires-Dist: lightgbm<4.7,>=4.0; extra == "tabarena"
98
113
  Requires-Dist: einx; extra == "tabarena"
99
- Requires-Dist: autogluon.core[all]==1.4.0; extra == "tabarena"
114
+ Requires-Dist: autogluon.core[all]==1.4.1b20251128; extra == "tabarena"
115
+ Requires-Dist: interpret-core<0.8,>=0.7.2; extra == "tabarena"
116
+ Requires-Dist: fastai<2.9,>=2.3.1; extra == "tabarena"
117
+ Requires-Dist: loguru; extra == "tabarena"
118
+ Requires-Dist: pytabkit<1.7,>=1.6; extra == "tabarena"
119
+ Requires-Dist: torch<2.8,>=2.6; extra == "tabarena"
120
+ Requires-Dist: transformers; extra == "tabarena"
100
121
  Requires-Dist: tabicl<0.2,>=0.1.3; extra == "tabarena"
101
122
  Requires-Dist: xgboost<3.1,>=2.0; extra == "tabarena"
102
- Requires-Dist: transformers; extra == "tabarena"
103
- Requires-Dist: spacy<3.9; extra == "tabarena"
104
- Requires-Dist: torch<2.8,>=2.2; extra == "tabarena"
105
- Requires-Dist: omegaconf; extra == "tabarena"
106
- Requires-Dist: huggingface-hub[torch]; extra == "tabarena"
107
- Requires-Dist: tabpfn<2.2,>=2.0.9; extra == "tabarena"
108
- Requires-Dist: pytabkit<1.7,>=1.6; extra == "tabarena"
109
- Requires-Dist: blis<1.2.1,>=0.7.0; (platform_system == "Windows" and python_version == "3.9") and extra == "tabarena"
110
- Provides-Extra: tabicl
111
- Requires-Dist: tabicl<0.2,>=0.1.3; extra == "tabicl"
112
- Provides-Extra: tabm
113
- Requires-Dist: torch<2.8,>=2.2; extra == "tabm"
114
- Provides-Extra: tabpfn
115
- Requires-Dist: tabpfn<2.2,>=2.0.9; extra == "tabpfn"
116
- Provides-Extra: tabpfnmix
117
- Requires-Dist: torch<2.8,>=2.2; extra == "tabpfnmix"
118
- Requires-Dist: huggingface-hub[torch]; extra == "tabpfnmix"
119
- Requires-Dist: einops<0.9,>=0.7; extra == "tabpfnmix"
123
+ Requires-Dist: huggingface_hub[torch]<1.0; extra == "tabarena"
120
124
  Provides-Extra: tests
125
+ Requires-Dist: interpret-core<0.8,>=0.7.2; extra == "tests"
121
126
  Requires-Dist: tabicl<0.2,>=0.1.3; extra == "tests"
122
127
  Requires-Dist: tabpfn<2.2,>=2.0.9; extra == "tests"
123
128
  Requires-Dist: pytabkit<1.7,>=1.6; extra == "tests"
124
- Requires-Dist: torch<2.8,>=2.2; extra == "tests"
125
- Requires-Dist: huggingface-hub[torch]; extra == "tests"
129
+ Requires-Dist: torch<2.8,>=2.6; extra == "tests"
130
+ Requires-Dist: huggingface_hub[torch]<1.0; extra == "tests"
126
131
  Requires-Dist: einops<0.9,>=0.7; extra == "tests"
127
132
  Requires-Dist: imodels<2.1.0,>=1.3.10; extra == "tests"
133
+ Requires-Dist: onnx<1.16.2,>=1.13.0; platform_system == "Windows" and extra == "tests"
134
+ Requires-Dist: onnx<1.18.0,>=1.13.0; platform_system != "Windows" and extra == "tests"
128
135
  Requires-Dist: skl2onnx<1.18.0,>=1.15.0; extra == "tests"
129
136
  Requires-Dist: onnxruntime<1.20.0,>=1.17.0; extra == "tests"
130
137
  Requires-Dist: onnxruntime-gpu<1.20.0,>=1.17.0; extra == "tests"
131
- Requires-Dist: onnx<1.18.0,>=1.13.0; platform_system != "Windows" and extra == "tests"
132
- Requires-Dist: onnx<1.16.2,>=1.13.0; platform_system == "Windows" and extra == "tests"
133
- Provides-Extra: xgboost
134
- Requires-Dist: xgboost<3.1,>=2.0; extra == "xgboost"
138
+ Dynamic: author
139
+ Dynamic: classifier
140
+ Dynamic: description
141
+ Dynamic: description-content-type
142
+ Dynamic: home-page
143
+ Dynamic: license
144
+ Dynamic: license-file
145
+ Dynamic: project-url
146
+ Dynamic: provides-extra
147
+ Dynamic: requires-dist
148
+ Dynamic: requires-python
149
+ Dynamic: summary
135
150
 
136
151
 
137
152
 
@@ -142,7 +157,7 @@ Requires-Dist: xgboost<3.1,>=2.0; extra == "xgboost"
142
157
 
143
158
  [![Latest Release](https://img.shields.io/github/v/release/autogluon/autogluon)](https://github.com/autogluon/autogluon/releases)
144
159
  [![Conda Forge](https://img.shields.io/conda/vn/conda-forge/autogluon.svg)](https://anaconda.org/conda-forge/autogluon)
145
- [![Python Versions](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)](https://pypi.org/project/autogluon/)
160
+ [![Python Versions](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue)](https://pypi.org/project/autogluon/)
146
161
  [![Downloads](https://pepy.tech/badge/autogluon/month)](https://pepy.tech/project/autogluon)
147
162
  [![GitHub license](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](./LICENSE)
148
163
  [![Discord](https://img.shields.io/discord/1043248669505368144?color=7289da&label=Discord&logo=discord&logoColor=ffffff)](https://discord.gg/wjUmjqAc2N)
@@ -159,7 +174,7 @@ AutoGluon, developed by AWS AI, automates machine learning tasks enabling you to
159
174
 
160
175
  ## 💾 Installation
161
176
 
162
- AutoGluon is supported on Python 3.9 - 3.12 and is available on Linux, MacOS, and Windows.
177
+ AutoGluon is supported on Python 3.10 - 3.13 and is available on Linux, MacOS, and Windows.
163
178
 
164
179
  You can install AutoGluon with:
165
180
 
@@ -182,8 +197,8 @@ predictions = predictor.predict("test.csv")
182
197
  | AutoGluon Task | Quickstart | API |
183
198
  |:--------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------:|
184
199
  | TabularPredictor | [![Quick Start](https://img.shields.io/static/v1?label=&message=tutorial&color=grey)](https://auto.gluon.ai/stable/tutorials/tabular/tabular-quick-start.html) | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://auto.gluon.ai/stable/api/autogluon.tabular.TabularPredictor.html) |
185
- | MultiModalPredictor | [![Quick Start](https://img.shields.io/static/v1?label=&message=tutorial&color=grey)](https://auto.gluon.ai/stable/tutorials/multimodal/multimodal_prediction/multimodal-quick-start.html) | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://auto.gluon.ai/stable/api/autogluon.multimodal.MultiModalPredictor.html) |
186
200
  | TimeSeriesPredictor | [![Quick Start](https://img.shields.io/static/v1?label=&message=tutorial&color=grey)](https://auto.gluon.ai/stable/tutorials/timeseries/forecasting-quick-start.html) | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://auto.gluon.ai/stable/api/autogluon.timeseries.TimeSeriesPredictor.html) |
201
+ | MultiModalPredictor | [![Quick Start](https://img.shields.io/static/v1?label=&message=tutorial&color=grey)](https://auto.gluon.ai/stable/tutorials/multimodal/multimodal_prediction/multimodal-quick-start.html) | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://auto.gluon.ai/stable/api/autogluon.multimodal.MultiModalPredictor.html) |
187
202
 
188
203
  ## :mag: Resources
189
204
 
@@ -206,7 +221,10 @@ Below is a curated list of recent tutorials and talks on AutoGluon. A comprehens
206
221
  - [Benchmarking Multimodal AutoML for Tabular Data with Text Fields](https://datasets-benchmarks-proceedings.neurips.cc/paper/2021/file/9bf31c7ff062936a96d3c8bd1f8f2ff3-Paper-round2.pdf) (*NeurIPS*, 2021) ([BibTeX](CITING.md#autogluonmultimodal))
207
222
  - [XTab: Cross-table Pretraining for Tabular Transformers](https://proceedings.mlr.press/v202/zhu23k/zhu23k.pdf) (*ICML*, 2023)
208
223
  - [AutoGluon-TimeSeries: AutoML for Probabilistic Time Series Forecasting](https://arxiv.org/abs/2308.05566) (*AutoML Conf*, 2023) ([BibTeX](CITING.md#autogluontimeseries))
209
- - [TabRepo: A Large Scale Repository of Tabular Model Evaluations and its AutoML Applications](https://arxiv.org/pdf/2311.02971.pdf) (*Under Review*, 2024)
224
+ - [TabRepo: A Large Scale Repository of Tabular Model Evaluations and its AutoML Applications](https://arxiv.org/pdf/2311.02971.pdf) (*AutoML Conf*, 2024)
225
+ - [AutoGluon-Multimodal (AutoMM): Supercharging Multimodal AutoML with Foundation Models](https://arxiv.org/pdf/2404.16233) (*AutoML Conf*, 2024) ([BibTeX](CITING.md#autogluonmultimodal))
226
+ - [Multi-layer Stack Ensembles for Time Series Forecasting](https://arxiv.org/abs/2511.15350) (*AutoML Conf*, 2025) ([BibTeX](CITING.md#autogluontimeseries))
227
+ - [Chronos-2: From Univariate to Universal Forecasting](https://arxiv.org/abs/2510.15821) (*Arxiv*, 2025) ([BibTeX](CITING.md#autogluontimeseries))
210
228
 
211
229
  ### Articles
212
230
  - [AutoGluon-TimeSeries: Every Time Series Forecasting Model In One Library](https://towardsdatascience.com/autogluon-timeseries-every-time-series-forecasting-model-in-one-library-29a3bf6879db) (*Towards Data Science*, Jan 2024)
@@ -232,5 +250,3 @@ We are actively accepting code contributions to the AutoGluon project. If you ar
232
250
  ## :classical_building: License
233
251
 
234
252
  This library is licensed under the Apache 2.0 License.
235
-
236
-