autogluon.tabular 1.3.2b20250610__py3-none-any.whl → 1.4.1b20251214__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/tabular/configs/config_helper.py +1 -1
- autogluon/tabular/configs/hyperparameter_configs.py +2 -265
- autogluon/tabular/configs/pipeline_presets.py +130 -0
- autogluon/tabular/configs/presets_configs.py +51 -26
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py +0 -1
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +310 -0
- autogluon/tabular/models/__init__.py +6 -1
- autogluon/tabular/models/_utils/rapids_utils.py +1 -1
- autogluon/tabular/models/automm/automm_model.py +2 -0
- autogluon/tabular/models/automm/ft_transformer.py +4 -1
- autogluon/tabular/models/catboost/callbacks.py +3 -2
- autogluon/tabular/models/catboost/catboost_model.py +15 -9
- autogluon/tabular/models/catboost/catboost_utils.py +17 -3
- autogluon/tabular/models/ebm/__init__.py +0 -0
- autogluon/tabular/models/ebm/ebm_model.py +259 -0
- autogluon/tabular/models/ebm/hyperparameters/__init__.py +0 -0
- autogluon/tabular/models/ebm/hyperparameters/parameters.py +39 -0
- autogluon/tabular/models/ebm/hyperparameters/searchspaces.py +72 -0
- autogluon/tabular/models/fastainn/tabular_nn_fastai.py +7 -5
- autogluon/tabular/models/knn/knn_model.py +7 -3
- autogluon/tabular/models/lgb/lgb_model.py +60 -21
- autogluon/tabular/models/lr/lr_model.py +6 -1
- autogluon/tabular/models/lr/lr_preprocessing_utils.py +6 -7
- autogluon/tabular/models/lr/lr_rapids_model.py +45 -5
- autogluon/tabular/models/mitra/__init__.py +0 -0
- autogluon/tabular/models/mitra/_internal/__init__.py +1 -0
- autogluon/tabular/models/mitra/_internal/config/__init__.py +1 -0
- autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +190 -0
- autogluon/tabular/models/mitra/_internal/config/config_run.py +32 -0
- autogluon/tabular/models/mitra/_internal/config/enums.py +162 -0
- autogluon/tabular/models/mitra/_internal/core/__init__.py +1 -0
- autogluon/tabular/models/mitra/_internal/core/callbacks.py +94 -0
- autogluon/tabular/models/mitra/_internal/core/get_loss.py +54 -0
- autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +108 -0
- autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +67 -0
- autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +132 -0
- autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +373 -0
- autogluon/tabular/models/mitra/_internal/data/__init__.py +1 -0
- autogluon/tabular/models/mitra/_internal/data/collator.py +46 -0
- autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +136 -0
- autogluon/tabular/models/mitra/_internal/data/dataset_split.py +57 -0
- autogluon/tabular/models/mitra/_internal/data/preprocessor.py +420 -0
- autogluon/tabular/models/mitra/_internal/models/__init__.py +1 -0
- autogluon/tabular/models/mitra/_internal/models/base.py +21 -0
- autogluon/tabular/models/mitra/_internal/models/embedding.py +182 -0
- autogluon/tabular/models/mitra/_internal/models/tab2d.py +667 -0
- autogluon/tabular/models/mitra/_internal/utils/__init__.py +1 -0
- autogluon/tabular/models/mitra/_internal/utils/set_seed.py +15 -0
- autogluon/tabular/models/mitra/mitra_model.py +380 -0
- autogluon/tabular/models/mitra/sklearn_interface.py +494 -0
- autogluon/tabular/models/realmlp/__init__.py +0 -0
- autogluon/tabular/models/realmlp/realmlp_model.py +360 -0
- autogluon/tabular/models/rf/rf_model.py +11 -6
- autogluon/tabular/models/tabicl/__init__.py +0 -0
- autogluon/tabular/models/tabicl/tabicl_model.py +179 -0
- autogluon/tabular/models/tabm/__init__.py +0 -0
- autogluon/tabular/models/tabm/_tabm_internal.py +545 -0
- autogluon/tabular/models/tabm/rtdl_num_embeddings.py +810 -0
- autogluon/tabular/models/tabm/tabm_model.py +356 -0
- autogluon/tabular/models/tabm/tabm_reference.py +631 -0
- autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +13 -7
- autogluon/tabular/models/tabpfnv2/__init__.py +0 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/__init__.py +20 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/configs.py +40 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/scoring_utils.py +201 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_decision_tree_tabpfn.py +1464 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_random_forest_tabpfn.py +747 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_compat.py +863 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/utils.py +106 -0
- autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +388 -0
- autogluon/tabular/models/tabular_nn/hyperparameters/parameters.py +1 -3
- autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +5 -5
- autogluon/tabular/models/xgboost/xgboost_model.py +10 -3
- autogluon/tabular/predictor/predictor.py +147 -84
- autogluon/tabular/registry/_ag_model_registry.py +12 -2
- autogluon/tabular/testing/fit_helper.py +57 -27
- autogluon/tabular/testing/generate_datasets.py +7 -0
- autogluon/tabular/trainer/abstract_trainer.py +3 -1
- autogluon/tabular/trainer/model_presets/presets.py +10 -1
- autogluon/tabular/version.py +1 -1
- autogluon.tabular-1.4.1b20251214-py3.11-nspkg.pth +1 -0
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info}/METADATA +112 -57
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info}/RECORD +89 -40
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info}/WHEEL +1 -1
- autogluon/tabular/models/tabpfn/__init__.py +0 -1
- autogluon/tabular/models/tabpfn/tabpfn_model.py +0 -153
- autogluon.tabular-1.3.2b20250610-py3.9-nspkg.pth +0 -1
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info/licenses}/LICENSE +0 -0
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info/licenses}/NOTICE +0 -0
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info}/namespace_packages.txt +0 -0
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info}/top_level.txt +0 -0
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info}/zip-safe +0 -0
|
@@ -20,6 +20,7 @@ from autogluon.common import FeatureMetadata, TabularDataset
|
|
|
20
20
|
from autogluon.common.loaders import load_json
|
|
21
21
|
from autogluon.common.savers import save_json
|
|
22
22
|
from autogluon.common.utils.file_utils import get_directory_size, get_directory_size_per_file
|
|
23
|
+
from autogluon.common.utils.resource_utils import ResourceManager, get_resource_manager
|
|
23
24
|
from autogluon.common.utils.hyperparameter_utils import get_hyperparameter_str_deprecation_msg, is_advanced_hyperparameter_format
|
|
24
25
|
from autogluon.common.utils.log_utils import add_log_to_file, set_logger_verbosity, warn_if_mlflow_autologging_is_enabled
|
|
25
26
|
from autogluon.common.utils.pandas_utils import get_approximate_df_mem_usage
|
|
@@ -48,10 +49,14 @@ from autogluon.core.utils import get_pred_from_proba_df, plot_performance_vs_tri
|
|
|
48
49
|
from autogluon.core.utils.decorators import apply_presets
|
|
49
50
|
from autogluon.core.utils.loaders import load_pkl, load_str
|
|
50
51
|
from autogluon.core.utils.savers import save_pkl, save_str
|
|
51
|
-
from autogluon.core.utils.utils import CVSplitter,
|
|
52
|
+
from autogluon.core.utils.utils import CVSplitter, generate_train_test_split_combined
|
|
52
53
|
|
|
53
54
|
from ..configs.feature_generator_presets import get_default_feature_generator
|
|
54
55
|
from ..configs.hyperparameter_configs import get_hyperparameter_config
|
|
56
|
+
from ..configs.pipeline_presets import (
|
|
57
|
+
USE_BAG_HOLDOUT_AUTO_THRESHOLD,
|
|
58
|
+
get_validation_and_stacking_method,
|
|
59
|
+
)
|
|
55
60
|
from ..configs.presets_configs import tabular_presets_alias, tabular_presets_dict
|
|
56
61
|
from ..learner import AbstractTabularLearner, DefaultLearner
|
|
57
62
|
from ..trainer.abstract_trainer import AbstractTabularTrainer
|
|
@@ -165,6 +170,10 @@ class TabularPredictor:
|
|
|
165
170
|
trainer_type : AbstractTabularTrainer, default = AutoTrainer
|
|
166
171
|
A class inheriting from `AbstractTabularTrainer` that controls training/ensembling of many models.
|
|
167
172
|
If you don't know what this is, keep it as the default.
|
|
173
|
+
default_base_path : str | Path | None, default = None
|
|
174
|
+
A default base path to use for the time-stamped folder if `path` is None.
|
|
175
|
+
If None, defaults to `AutogluonModels`. Only used if `path` is None, and thus
|
|
176
|
+
only used for local paths, not s3 paths.
|
|
168
177
|
"""
|
|
169
178
|
|
|
170
179
|
Dataset = TabularDataset
|
|
@@ -201,7 +210,7 @@ class TabularPredictor:
|
|
|
201
210
|
f"We do not recommend specifying weight_evaluation when sample_weight='{self.sample_weight}', instead specify appropriate eval_metric."
|
|
202
211
|
)
|
|
203
212
|
self._validate_init_kwargs(kwargs)
|
|
204
|
-
path = setup_outputdir(path)
|
|
213
|
+
path = setup_outputdir(path=path, default_base_path=kwargs.get("default_base_path"))
|
|
205
214
|
|
|
206
215
|
learner_type = kwargs.get("learner_type", DefaultLearner)
|
|
207
216
|
learner_kwargs = kwargs.get("learner_kwargs", dict())
|
|
@@ -401,7 +410,7 @@ class TabularPredictor:
|
|
|
401
410
|
time_limit: float = None,
|
|
402
411
|
presets: list[str] | str = None,
|
|
403
412
|
hyperparameters: dict | str = None,
|
|
404
|
-
feature_metadata="infer",
|
|
413
|
+
feature_metadata: str | FeatureMetadata = "infer",
|
|
405
414
|
infer_limit: float = None,
|
|
406
415
|
infer_limit_batch_size: int = None,
|
|
407
416
|
fit_weighted_ensemble: bool = True,
|
|
@@ -425,30 +434,41 @@ class TabularPredictor:
|
|
|
425
434
|
Table of the training data as a pandas DataFrame.
|
|
426
435
|
If str is passed, `train_data` will be loaded using the str value as the file path.
|
|
427
436
|
tuning_data : :class:`pd.DataFrame` or str, optional
|
|
428
|
-
Another dataset containing validation data reserved for tuning processes such as early stopping
|
|
437
|
+
Another dataset containing validation data reserved for tuning processes such as early stopping, hyperparameter tuning, and ensembling.
|
|
429
438
|
This dataset should be in the same format as `train_data`.
|
|
430
439
|
If str is passed, `tuning_data` will be loaded using the str value as the file path.
|
|
431
|
-
Note: final model
|
|
432
|
-
|
|
433
|
-
|
|
440
|
+
Note: If `refit_full=True` is specified, the final model may be fit on `tuning_data` as well as `train_data`.
|
|
441
|
+
Note: Because `tuning_data` is used to determine which model is the 'best' model, as well as to determine the ensemble weights,
|
|
442
|
+
it should not be considered a fully unseen dataset. It is possible that AutoGluon will be overfit to the `tuning_data`.
|
|
443
|
+
To ensure an unbiased evaluation, use separate unseen test data to evaluate the final model using `predictor.leaderboard(test_data, display=True)`.
|
|
444
|
+
Do not provide your evaluation test data as `tuning_data`!
|
|
445
|
+
If bagging is not enabled and `tuning_data = None`: `fit()` will automatically hold out some random validation samples from `train_data`.
|
|
446
|
+
If bagging is enabled and `tuning_data = None`: no tuning data will be used. Instead, AutoGluon will perform cross-validation.
|
|
447
|
+
If bagging is enabled: `use_bag_holdout=True` must be specified in order to provide tuning data. If specified, AutoGluon will still perform cross-validation for model fits, but will use `tuning_data` for optimizing the weighted ensemble weights and model calibration.
|
|
434
448
|
time_limit : int, default = None
|
|
435
449
|
Approximately how long `fit()` should run for (wallclock time in seconds).
|
|
436
450
|
If not specified, `fit()` will run until all models have completed training, but will not repeatedly bag models unless `num_bag_sets` is specified.
|
|
437
451
|
presets : list or str or dict, default = ['medium_quality']
|
|
438
452
|
List of preset configurations for various arguments in `fit()`. Can significantly impact predictive accuracy, memory-footprint, and inference latency of trained models, and various other properties of the returned `predictor`.
|
|
439
453
|
It is recommended to specify presets and avoid specifying most other `fit()` arguments or model hyperparameters prior to becoming familiar with AutoGluon.
|
|
440
|
-
As an example, to get the most accurate overall predictor (regardless of its efficiency), set `presets='best_quality'
|
|
454
|
+
As an example, to get the most accurate overall predictor (regardless of its efficiency), set `presets='best_quality'` (or `extreme_quality` if a GPU is available).
|
|
441
455
|
To get good quality with minimal disk usage, set `presets=['good_quality', 'optimize_for_deployment']`
|
|
442
456
|
Any user-specified arguments in `fit()` will override the values used by presets.
|
|
443
457
|
If specifying a list of presets, later presets will override earlier presets if they alter the same argument.
|
|
444
458
|
For precise definitions of the provided presets, see file: `autogluon/tabular/configs/presets_configs.py`.
|
|
445
459
|
Users can specify custom presets by passing in a dictionary of argument values as an element to the list.
|
|
446
460
|
|
|
447
|
-
Available Presets: ['best_quality', 'high_quality', 'good_quality', 'medium_quality', 'experimental_quality', 'optimize_for_deployment', 'interpretable', 'ignore_text']
|
|
461
|
+
Available Presets: ['extreme_quality', 'best_quality', 'high_quality', 'good_quality', 'medium_quality', 'experimental_quality', 'optimize_for_deployment', 'interpretable', 'ignore_text']
|
|
448
462
|
|
|
449
463
|
It is recommended to only use one `quality` based preset in a given call to `fit()` as they alter many of the same arguments and are not compatible with each-other.
|
|
450
464
|
|
|
451
465
|
In-depth Preset Info:
|
|
466
|
+
extreme_quality={"auto_stack": True, "dynamic_stacking": "auto", "_experimental_dynamic_hyperparameters": True, "hyperparameters": None}
|
|
467
|
+
Significantly more accurate than `best_quality` on datasets <= 30000 samples. Requires a GPU for best results.
|
|
468
|
+
For datasets <= 30000 samples, will use recent tabular foundation models TabPFNv2, TabICL, and Mitra to maximize performance.
|
|
469
|
+
For datasets > 30000 samples, will behave identically to `best_quality`.
|
|
470
|
+
Recommended for applications that benefit from the best possible model accuracy.
|
|
471
|
+
|
|
452
472
|
best_quality={'auto_stack': True, 'dynamic_stacking': 'auto', 'hyperparameters': 'zeroshot'}
|
|
453
473
|
Best predictive accuracy with little consideration to inference time or disk usage. Achieve even better results by specifying a large time_limit value.
|
|
454
474
|
Recommended for applications that benefit from the best possible model accuracy.
|
|
@@ -477,7 +497,7 @@ class TabularPredictor:
|
|
|
477
497
|
Because unused models will be deleted under this preset, methods like `predictor.leaderboard()` and `predictor.fit_summary()` will no longer show the full set of models that were trained during `fit()`.
|
|
478
498
|
Recommended for applications where the inner details of AutoGluon's training is not important and there is no intention of manually choosing between the final models.
|
|
479
499
|
This preset pairs well with the other presets such as `good_quality` to make a very compact final model.
|
|
480
|
-
Identical to calling `predictor.delete_models(models_to_keep='best'
|
|
500
|
+
Identical to calling `predictor.delete_models(models_to_keep='best')` and `predictor.save_space()` directly after `fit()`.
|
|
481
501
|
|
|
482
502
|
interpretable={'auto_stack': False, 'hyperparameters': 'interpretable'}
|
|
483
503
|
Fits only interpretable rule-based models from the imodels package.
|
|
@@ -491,9 +511,10 @@ class TabularPredictor:
|
|
|
491
511
|
hyperparameters : str or dict, default = 'default'
|
|
492
512
|
Determines the hyperparameters used by the models.
|
|
493
513
|
If `str` is passed, will use a preset hyperparameter configuration.
|
|
494
|
-
Valid `str` options: ['default', 'zeroshot', 'light', 'very_light', 'toy', 'multimodal']
|
|
514
|
+
Valid `str` options: ['default', 'zeroshot', 'zeroshot_2025_tabfm', 'light', 'very_light', 'toy', 'multimodal']
|
|
495
515
|
'default': Default AutoGluon hyperparameters intended to get strong accuracy with reasonable disk usage and inference time. Used in the 'medium_quality' preset.
|
|
496
516
|
'zeroshot': A powerful model portfolio learned from TabRepo's ensemble simulation on 200 datasets. Contains ~100 models and is used in 'best_quality' and 'high_quality' presets.
|
|
517
|
+
'zeroshot_2025_tabfm': Absolute cutting edge portfolio learned from TabArena's ensemble simulation that leverages tabular foundation models. Contains 22 models and is used in the `extreme_quality` preset.
|
|
497
518
|
'light': Results in smaller models. Generally will make inference speed much faster and disk usage much lower, but with worse accuracy. Used in the 'good_quality' preset.
|
|
498
519
|
'very_light': Results in much smaller models. Behaves similarly to 'light', but in many cases with over 10x less disk usage and a further reduction in accuracy.
|
|
499
520
|
'toy': Results in extremely small models. Only use this when prototyping, as the model quality will be severely reduced.
|
|
@@ -505,6 +526,12 @@ class TabularPredictor:
|
|
|
505
526
|
'GBM' (LightGBM)
|
|
506
527
|
'CAT' (CatBoost)
|
|
507
528
|
'XGB' (XGBoost)
|
|
529
|
+
'EBM' (Explainable Boosting Machine)
|
|
530
|
+
'REALMLP' (RealMLP)
|
|
531
|
+
'TABM' (TabM)
|
|
532
|
+
'MITRA' (Mitra)
|
|
533
|
+
'TABICL' (TabICL)
|
|
534
|
+
'TABPFNV2' (TabPFNv2)
|
|
508
535
|
'RF' (random forest)
|
|
509
536
|
'XT' (extremely randomized trees)
|
|
510
537
|
'KNN' (k-nearest neighbors)
|
|
@@ -513,9 +540,8 @@ class TabularPredictor:
|
|
|
513
540
|
'FASTAI' (neural network with FastAI backend)
|
|
514
541
|
'AG_AUTOMM' (`MultimodalPredictor` from `autogluon.multimodal`. Supports Tabular, Text, and Image modalities. GPU is required.)
|
|
515
542
|
Experimental model options include:
|
|
516
|
-
'FT_TRANSFORMER' (Tabular Transformer, GPU is recommended. Does not scale well to >100 features.)
|
|
543
|
+
'FT_TRANSFORMER' (Tabular Transformer, GPU is recommended. Does not scale well to >100 features. Recommended to use TabM instead.)
|
|
517
544
|
'FASTTEXT' (FastText. Note: Has not been tested for a long time.)
|
|
518
|
-
'TABPFN' (TabPFN. Does not scale well to >100 features or >1000 rows, and does not support regression. Extremely slow inference speed.)
|
|
519
545
|
'AG_TEXT_NN' (Multimodal Text+Tabular model, GPU is required. Recommended to instead use its successor, 'AG_AUTOMM'.)
|
|
520
546
|
'AG_IMAGE_NN' (Image model, GPU is required. Recommended to instead use its successor, 'AG_AUTOMM'.)
|
|
521
547
|
If a certain key is missing from hyperparameters, then `fit()` will not train any models of that type. Omitting a model key from hyperparameters is equivalent to including this model key in `excluded_model_types`.
|
|
@@ -591,6 +617,8 @@ class TabularPredictor:
|
|
|
591
617
|
Advanced functionality: Custom AutoGluon model arguments
|
|
592
618
|
These arguments are optional and can be specified in any model's hyperparameters.
|
|
593
619
|
Example: `hyperparameters = {'RF': {..., 'ag_args': {'name_suffix': 'CustomModelSuffix', 'disable_in_hpo': True}}`
|
|
620
|
+
Individual arguments can be passed for ag_args_fit by adding the prefix `ag.`: `hyperparameters = {'RF': {..., 'ag.num_cpus': 1}}`
|
|
621
|
+
Individual arguments can be passed for ag_args_ensemble by adding the prefix `ag.ens`: `hyperparameters = {'RF': {..., 'ag.ens.fold_fitting_strategy': 'sequential_local'}}`
|
|
594
622
|
ag_args: Dictionary of customization options related to meta properties of the model such as its name, the order it is trained, the problem types it is valid for, and the type of HPO it utilizes.
|
|
595
623
|
Valid keys:
|
|
596
624
|
name: (str) The name of the model. This overrides AutoGluon's naming logic and all other name arguments if present.
|
|
@@ -621,6 +649,16 @@ class TabularPredictor:
|
|
|
621
649
|
How many GPUs to use during model fit.
|
|
622
650
|
If 'auto', model will decide. Some models can use GPUs but don't by default due to differences in model quality.
|
|
623
651
|
Set to 0 to disable usage of GPUs.
|
|
652
|
+
max_rows : (int, default=None)
|
|
653
|
+
If train_data has more rows than `max_rows`, the model will raise an AssertionError at the start of fit.
|
|
654
|
+
max_features : (int, default=None)
|
|
655
|
+
If train_data has more features than `max_features`, the model will raise an AssertionError at the start of fit.
|
|
656
|
+
max_classes : (int, default==None)
|
|
657
|
+
If train_data has more classes than `max_classes`, the model will raise an AssertionError at the start of fit.
|
|
658
|
+
problem_types : (list[str], default=None)
|
|
659
|
+
If the task is not a problem_type in `problem_types`, the model will raise an AssertionError at the start of fit.
|
|
660
|
+
ignore_constraints : (bool, default=False)
|
|
661
|
+
If True, will ignore the values of `max_rows`, `max_features`, `max_classes`, and `problem_type`, treating them as None.
|
|
624
662
|
ag_args_ensemble: Dictionary of hyperparameters shared by all models that control how they are ensembled, if bag mode is enabled.
|
|
625
663
|
Valid keys:
|
|
626
664
|
use_orig_features: [True, False, "never"], default True
|
|
@@ -659,10 +697,10 @@ class TabularPredictor:
|
|
|
659
697
|
num_folds_parallel: (int or str, default='auto') Number of folds to be trained in parallel if using ParallelLocalFoldFittingStrategy. Consider lowering this value if you encounter either out of memory issue or CUDA out of memory issue(when trained on gpu).
|
|
660
698
|
if 'auto', will try to train all folds in parallel.
|
|
661
699
|
|
|
662
|
-
feature_metadata : :class:`autogluon.
|
|
700
|
+
feature_metadata : :class:`autogluon.common.FeatureMetadata` or str, default = 'infer'
|
|
663
701
|
The feature metadata used in various inner logic in feature preprocessing.
|
|
664
702
|
If 'infer', will automatically construct a FeatureMetadata object based on the properties of `train_data`.
|
|
665
|
-
In this case, `train_data` is input into :meth:`autogluon.
|
|
703
|
+
In this case, `train_data` is input into :meth:`autogluon.common.FeatureMetadata.from_df` to infer `feature_metadata`.
|
|
666
704
|
If 'infer' incorrectly assumes the dtypes of features, consider explicitly specifying `feature_metadata`.
|
|
667
705
|
infer_limit : float, default = None
|
|
668
706
|
The inference time limit in seconds per row to adhere to during fit.
|
|
@@ -721,6 +759,7 @@ class TabularPredictor:
|
|
|
721
759
|
If "sequential", models will be fit sequentially. This is the most stable option with the most readable logging.
|
|
722
760
|
If "parallel", models will be fit in parallel with ray, splitting available compute between them.
|
|
723
761
|
Note: "parallel" is experimental and may run into issues. It was first added in version 1.2.0.
|
|
762
|
+
Note: "parallel" does not yet support running with GPUs.
|
|
724
763
|
For machines with 16 or more CPU cores, it is likely that "parallel" will be faster than "sequential".
|
|
725
764
|
|
|
726
765
|
.. versionadded:: 1.2.0
|
|
@@ -931,14 +970,14 @@ class TabularPredictor:
|
|
|
931
970
|
This is because by default, refit_full will fall back to cloning the first fold of the bagged model in case it lacks memory to refit.
|
|
932
971
|
However, if `save_bag_folds=False`, this fallback isn't possible, as there is not fold model to clone because it wasn't saved.
|
|
933
972
|
In this scenario, refit will raise an exception for `save_bag_folds=False`, but will succeed if `save_bag_folds=True`.
|
|
934
|
-
Final disk usage of predictor will be identical regardless of the setting after `predictor.delete_models(models_to_keep="best"
|
|
973
|
+
Final disk usage of predictor will be identical regardless of the setting after `predictor.delete_models(models_to_keep="best")` is called post-fit.
|
|
935
974
|
set_best_to_refit_full : bool, default = False
|
|
936
975
|
If True, will change the default model that Predictor uses for prediction when model is not specified to the refit_full version of the model that exhibited the highest validation score.
|
|
937
976
|
Only valid if `refit_full` is set.
|
|
938
977
|
keep_only_best : bool, default = False
|
|
939
978
|
If True, only the best model and its ancestor models are saved in the outputted `predictor`. All other models are deleted.
|
|
940
979
|
If you only care about deploying the most accurate predictor with the smallest file-size and no longer need any of the other trained models or functionality beyond prediction on new data, then set: `keep_only_best=True`, `save_space=True`.
|
|
941
|
-
This is equivalent to calling `predictor.delete_models(models_to_keep='best'
|
|
980
|
+
This is equivalent to calling `predictor.delete_models(models_to_keep='best')` directly after `fit()`.
|
|
942
981
|
If used with `refit_full` and `set_best_to_refit_full`, the best model will be the refit_full model, and the original bagged best model will be deleted.
|
|
943
982
|
`refit_full` will be automatically set to 'best' in this case to avoid training models which will be later deleted.
|
|
944
983
|
save_space : bool, default = False
|
|
@@ -1053,7 +1092,8 @@ class TabularPredictor:
|
|
|
1053
1092
|
elif verbosity >= 4:
|
|
1054
1093
|
logger.log(20, f"Verbosity: {verbosity} (Maximum Logging)")
|
|
1055
1094
|
|
|
1056
|
-
|
|
1095
|
+
resource_manager: ResourceManager = get_resource_manager()
|
|
1096
|
+
include_gpu_count = resource_manager.get_gpu_count_torch() or verbosity >= 3
|
|
1057
1097
|
sys_msg = get_ag_system_info(path=self.path, include_gpu_count=include_gpu_count)
|
|
1058
1098
|
logger.log(20, sys_msg)
|
|
1059
1099
|
|
|
@@ -1066,11 +1106,11 @@ class TabularPredictor:
|
|
|
1066
1106
|
20,
|
|
1067
1107
|
"No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...\n"
|
|
1068
1108
|
"\tRecommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):\n"
|
|
1069
|
-
"\tpresets='
|
|
1070
|
-
"\tpresets='best'
|
|
1071
|
-
"\tpresets='high'
|
|
1072
|
-
"\tpresets='good'
|
|
1073
|
-
"\tpresets='medium'
|
|
1109
|
+
"\tpresets='extreme' : New in v1.4: Massively better than 'best' on datasets <30000 samples by using new models meta-learned on https://tabarena.ai: TabPFNv2, TabICL, Mitra, and TabM. Absolute best accuracy. Requires a GPU. Recommended 64 GB CPU memory and 32+ GB GPU memory.\n"
|
|
1110
|
+
"\tpresets='best' : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.\n"
|
|
1111
|
+
"\tpresets='high' : Strong accuracy with fast inference speed.\n"
|
|
1112
|
+
"\tpresets='good' : Good accuracy with very fast inference speed.\n"
|
|
1113
|
+
"\tpresets='medium' : Fast training time, ideal for initial prototyping.",
|
|
1074
1114
|
)
|
|
1075
1115
|
|
|
1076
1116
|
kwargs_orig = kwargs.copy()
|
|
@@ -1093,10 +1133,6 @@ class TabularPredictor:
|
|
|
1093
1133
|
self._validate_calibrate_decision_threshold(calibrate_decision_threshold=calibrate_decision_threshold)
|
|
1094
1134
|
self._validate_fit_strategy(fit_strategy=fit_strategy)
|
|
1095
1135
|
|
|
1096
|
-
holdout_frac = kwargs["holdout_frac"]
|
|
1097
|
-
num_bag_folds = kwargs["num_bag_folds"]
|
|
1098
|
-
num_bag_sets = kwargs["num_bag_sets"]
|
|
1099
|
-
num_stack_levels = kwargs["num_stack_levels"]
|
|
1100
1136
|
auto_stack = kwargs["auto_stack"]
|
|
1101
1137
|
feature_generator = kwargs["feature_generator"]
|
|
1102
1138
|
unlabeled_data = kwargs["unlabeled_data"]
|
|
@@ -1125,10 +1161,48 @@ class TabularPredictor:
|
|
|
1125
1161
|
)
|
|
1126
1162
|
infer_limit, infer_limit_batch_size = self._validate_infer_limit(infer_limit=infer_limit, infer_limit_batch_size=infer_limit_batch_size)
|
|
1127
1163
|
|
|
1164
|
+
# TODO: Temporary for v1.4. Make this more extensible for v1.5 by letting users make their own dynamic hyperparameters.
|
|
1165
|
+
dynamic_hyperparameters = kwargs["_experimental_dynamic_hyperparameters"]
|
|
1166
|
+
if dynamic_hyperparameters:
|
|
1167
|
+
logger.log(20, f"`extreme` preset uses a dynamic portfolio based on dataset size...")
|
|
1168
|
+
assert hyperparameters is None, f"hyperparameters must be unspecified when `_experimental_dynamic_hyperparameters=True`."
|
|
1169
|
+
n_samples = len(train_data)
|
|
1170
|
+
if n_samples > 30000:
|
|
1171
|
+
data_size = "large"
|
|
1172
|
+
else:
|
|
1173
|
+
data_size = "small"
|
|
1174
|
+
assert data_size in ["large", "small"]
|
|
1175
|
+
if data_size == "large":
|
|
1176
|
+
logger.log(20, f"\tDetected data size: large (>30000 samples), using `zeroshot` portfolio (identical to 'best_quality' preset).")
|
|
1177
|
+
hyperparameters = "zeroshot"
|
|
1178
|
+
else:
|
|
1179
|
+
if "num_stack_levels" not in kwargs_orig:
|
|
1180
|
+
# disable stacking for tabfm portfolio
|
|
1181
|
+
num_stack_levels = 0
|
|
1182
|
+
kwargs["num_stack_levels"] = 0
|
|
1183
|
+
logger.log(
|
|
1184
|
+
20,
|
|
1185
|
+
f"\tDetected data size: small (<=30000 samples), using `zeroshot_2025_tabfm` portfolio."
|
|
1186
|
+
f"\n\t\tNote: `zeroshot_2025_tabfm` portfolio requires a CUDA compatible GPU for best performance."
|
|
1187
|
+
f"\n\t\tMake sure you have all the relevant dependencies installed: "
|
|
1188
|
+
f"`pip install autogluon.tabular[tabarena]`."
|
|
1189
|
+
f"\n\t\tIt is strongly recommended to use a machine with 64+ GB memory "
|
|
1190
|
+
f"and a CUDA compatible GPU with 32+ GB vRAM when using this preset. "
|
|
1191
|
+
f"\n\t\tThis portfolio will download foundation model weights from HuggingFace during training. "
|
|
1192
|
+
f"Ensure you have an internet connection or have pre-downloaded the weights to use these models."
|
|
1193
|
+
f"\n\t\tThis portfolio was meta-learned with TabArena: https://tabarena.ai"
|
|
1194
|
+
)
|
|
1195
|
+
hyperparameters = "zeroshot_2025_tabfm"
|
|
1196
|
+
|
|
1128
1197
|
if hyperparameters is None:
|
|
1129
1198
|
hyperparameters = "default"
|
|
1130
1199
|
if isinstance(hyperparameters, str):
|
|
1200
|
+
hyperparameters_str = hyperparameters
|
|
1131
1201
|
hyperparameters = get_hyperparameter_config(hyperparameters)
|
|
1202
|
+
logger.log(
|
|
1203
|
+
20,
|
|
1204
|
+
f"Using hyperparameters preset: hyperparameters='{hyperparameters_str}'",
|
|
1205
|
+
)
|
|
1132
1206
|
self._validate_hyperparameters(hyperparameters=hyperparameters)
|
|
1133
1207
|
self.fit_hyperparameters_ = hyperparameters
|
|
1134
1208
|
|
|
@@ -1154,16 +1228,46 @@ class TabularPredictor:
|
|
|
1154
1228
|
else:
|
|
1155
1229
|
ag_args_fit = learning_curves
|
|
1156
1230
|
|
|
1231
|
+
use_bag_holdout_was_auto = False
|
|
1232
|
+
dynamic_stacking_was_auto = False
|
|
1233
|
+
if isinstance(use_bag_holdout,str) and use_bag_holdout == "auto":
|
|
1234
|
+
use_bag_holdout = None
|
|
1235
|
+
use_bag_holdout_was_auto = True
|
|
1236
|
+
if isinstance(dynamic_stacking,str) and dynamic_stacking == "auto":
|
|
1237
|
+
dynamic_stacking = None
|
|
1238
|
+
dynamic_stacking_was_auto = True
|
|
1239
|
+
|
|
1240
|
+
(
|
|
1241
|
+
num_bag_folds,
|
|
1242
|
+
num_bag_sets,
|
|
1243
|
+
num_stack_levels,
|
|
1244
|
+
dynamic_stacking,
|
|
1245
|
+
use_bag_holdout,
|
|
1246
|
+
holdout_frac,
|
|
1247
|
+
refit_full,
|
|
1248
|
+
) = get_validation_and_stacking_method(
|
|
1249
|
+
num_bag_folds=kwargs["num_bag_folds"],
|
|
1250
|
+
num_bag_sets=kwargs["num_bag_sets"],
|
|
1251
|
+
use_bag_holdout=use_bag_holdout,
|
|
1252
|
+
holdout_frac=kwargs["holdout_frac"],
|
|
1253
|
+
auto_stack=auto_stack,
|
|
1254
|
+
num_stack_levels=kwargs["num_stack_levels"],
|
|
1255
|
+
dynamic_stacking=dynamic_stacking,
|
|
1256
|
+
refit_full=kwargs["refit_full"],
|
|
1257
|
+
num_train_rows=len(train_data),
|
|
1258
|
+
problem_type=inferred_problem_type,
|
|
1259
|
+
hpo_enabled=ag_args.get("hyperparameter_tune_kwargs", None) is not None,
|
|
1260
|
+
)
|
|
1261
|
+
|
|
1157
1262
|
num_bag_folds, num_bag_sets, num_stack_levels, dynamic_stacking, use_bag_holdout = self._sanitize_stack_args(
|
|
1158
1263
|
num_bag_folds=num_bag_folds,
|
|
1159
1264
|
num_bag_sets=num_bag_sets,
|
|
1160
1265
|
num_stack_levels=num_stack_levels,
|
|
1161
|
-
time_limit=time_limit,
|
|
1162
|
-
auto_stack=auto_stack,
|
|
1163
1266
|
num_train_rows=len(train_data),
|
|
1164
|
-
problem_type=inferred_problem_type,
|
|
1165
1267
|
dynamic_stacking=dynamic_stacking,
|
|
1166
1268
|
use_bag_holdout=use_bag_holdout,
|
|
1269
|
+
use_bag_holdout_was_auto=use_bag_holdout_was_auto,
|
|
1270
|
+
dynamic_stacking_was_auto=dynamic_stacking_was_auto,
|
|
1167
1271
|
)
|
|
1168
1272
|
if auto_stack:
|
|
1169
1273
|
logger.log(
|
|
@@ -1172,9 +1276,6 @@ class TabularPredictor:
|
|
|
1172
1276
|
f"num_stack_levels={num_stack_levels}, num_bag_folds={num_bag_folds}, num_bag_sets={num_bag_sets}",
|
|
1173
1277
|
)
|
|
1174
1278
|
|
|
1175
|
-
if holdout_frac is None:
|
|
1176
|
-
holdout_frac = default_holdout_frac(len(train_data), ag_args.get("hyperparameter_tune_kwargs", None) is not None)
|
|
1177
|
-
|
|
1178
1279
|
if kwargs["save_bag_folds"] is not None and kwargs["_save_bag_folds"] is not None:
|
|
1179
1280
|
raise ValueError(
|
|
1180
1281
|
f"Cannot specify both `save_bag_folds` and `_save_bag_folds` at the same time. "
|
|
@@ -1262,7 +1363,7 @@ class TabularPredictor:
|
|
|
1262
1363
|
)
|
|
1263
1364
|
ag_post_fit_kwargs = dict(
|
|
1264
1365
|
keep_only_best=kwargs["keep_only_best"],
|
|
1265
|
-
refit_full=
|
|
1366
|
+
refit_full=refit_full,
|
|
1266
1367
|
set_best_to_refit_full=kwargs["set_best_to_refit_full"],
|
|
1267
1368
|
save_space=kwargs["save_space"],
|
|
1268
1369
|
calibrate=kwargs["calibrate"],
|
|
@@ -1531,7 +1632,6 @@ class TabularPredictor:
|
|
|
1531
1632
|
if _ds_ray is not None:
|
|
1532
1633
|
# Handle resources
|
|
1533
1634
|
# FIXME: what about distributed?
|
|
1534
|
-
from autogluon.common.utils.resource_utils import ResourceManager
|
|
1535
1635
|
|
|
1536
1636
|
total_resources = ag_fit_kwargs["core_kwargs"]["total_resources"]
|
|
1537
1637
|
|
|
@@ -4339,7 +4439,7 @@ class TabularPredictor:
|
|
|
4339
4439
|
models_to_delete: str | list[str] | None = None,
|
|
4340
4440
|
allow_delete_cascade: bool = False,
|
|
4341
4441
|
delete_from_disk: bool = True,
|
|
4342
|
-
dry_run: bool
|
|
4442
|
+
dry_run: bool = False,
|
|
4343
4443
|
):
|
|
4344
4444
|
"""
|
|
4345
4445
|
Deletes models from `predictor`.
|
|
@@ -4370,20 +4470,11 @@ class TabularPredictor:
|
|
|
4370
4470
|
If `True`, deletes the models from disk if they were persisted.
|
|
4371
4471
|
WARNING: This deletes the entire directory for the deleted models, and ALL FILES located there.
|
|
4372
4472
|
It is highly recommended to first run with `dry_run=True` to understand which directories will be deleted.
|
|
4373
|
-
dry_run : bool, default =
|
|
4374
|
-
WARNING: Starting in v1.4.0 dry_run will default to False.
|
|
4473
|
+
dry_run : bool, default = False
|
|
4375
4474
|
If `True`, then deletions don't occur, and logging statements are printed describing what would have occurred.
|
|
4376
4475
|
Set `dry_run=False` to perform the deletions.
|
|
4377
4476
|
|
|
4378
4477
|
"""
|
|
4379
|
-
if dry_run is None:
|
|
4380
|
-
warnings.warn(
|
|
4381
|
-
f"dry_run was not specified for `TabularPredictor.delete_models`. dry_run prior to version 1.4.0 defaults to True. "
|
|
4382
|
-
f"Starting in version 1.4, AutoGluon will default dry_run to False. "
|
|
4383
|
-
f"If you want to maintain the current logic in future versions, explicitly specify `dry_run=True`.",
|
|
4384
|
-
category=FutureWarning,
|
|
4385
|
-
)
|
|
4386
|
-
dry_run = True
|
|
4387
4478
|
self._assert_is_fit("delete_models")
|
|
4388
4479
|
if models_to_keep == "best":
|
|
4389
4480
|
models_to_keep = self.model_best
|
|
@@ -5008,6 +5099,7 @@ class TabularPredictor:
|
|
|
5008
5099
|
"learner_type",
|
|
5009
5100
|
"learner_kwargs",
|
|
5010
5101
|
"quantile_levels",
|
|
5102
|
+
"default_base_path",
|
|
5011
5103
|
}
|
|
5012
5104
|
invalid_keys = []
|
|
5013
5105
|
for key in kwargs:
|
|
@@ -5040,6 +5132,8 @@ class TabularPredictor:
|
|
|
5040
5132
|
learning_curves=False,
|
|
5041
5133
|
test_data=None,
|
|
5042
5134
|
raise_on_model_failure=False,
|
|
5135
|
+
# experimental
|
|
5136
|
+
_experimental_dynamic_hyperparameters=False,
|
|
5043
5137
|
)
|
|
5044
5138
|
kwargs, ds_valid_keys = self._sanitize_dynamic_stacking_kwargs(kwargs)
|
|
5045
5139
|
kwargs = self._validate_fit_extra_kwargs(kwargs, extra_valid_keys=list(fit_kwargs_default.keys()) + ds_valid_keys)
|
|
@@ -5429,41 +5523,12 @@ class TabularPredictor:
|
|
|
5429
5523
|
num_bag_folds: int,
|
|
5430
5524
|
num_bag_sets: int,
|
|
5431
5525
|
num_stack_levels: int,
|
|
5432
|
-
time_limit: float | None,
|
|
5433
|
-
auto_stack: bool,
|
|
5434
5526
|
num_train_rows: int,
|
|
5435
|
-
problem_type: str,
|
|
5436
5527
|
dynamic_stacking: bool | str,
|
|
5437
5528
|
use_bag_holdout: bool | str,
|
|
5529
|
+
use_bag_holdout_was_auto: bool,
|
|
5530
|
+
dynamic_stacking_was_auto: bool,
|
|
5438
5531
|
):
|
|
5439
|
-
use_bag_holdout_auto_threshold = 1000000
|
|
5440
|
-
use_bag_holdout_was_auto = False
|
|
5441
|
-
dynamic_stacking_was_auto = False
|
|
5442
|
-
if isinstance(use_bag_holdout, str) and use_bag_holdout == "auto":
|
|
5443
|
-
# Leverage use_bag_holdout when data is large to safeguard against stack leakage
|
|
5444
|
-
use_bag_holdout = num_train_rows >= use_bag_holdout_auto_threshold
|
|
5445
|
-
use_bag_holdout_was_auto = True
|
|
5446
|
-
if isinstance(dynamic_stacking, str) and dynamic_stacking == "auto":
|
|
5447
|
-
dynamic_stacking = not use_bag_holdout
|
|
5448
|
-
dynamic_stacking_was_auto = True
|
|
5449
|
-
if auto_stack:
|
|
5450
|
-
# TODO: What about datasets that are 100k+? At a certain point should we not bag?
|
|
5451
|
-
# TODO: What about time_limit? Metalearning can tell us expected runtime of each model, then we can select optimal folds + stack levels to fit time constraint
|
|
5452
|
-
if num_bag_folds is None:
|
|
5453
|
-
num_bag_folds = min(8, max(5, math.floor(num_train_rows / 10)))
|
|
5454
|
-
if num_stack_levels is None:
|
|
5455
|
-
if dynamic_stacking:
|
|
5456
|
-
num_stack_levels = 1
|
|
5457
|
-
else:
|
|
5458
|
-
if use_bag_holdout or problem_type != BINARY:
|
|
5459
|
-
num_stack_levels = min(1, max(0, math.floor(num_train_rows / 750)))
|
|
5460
|
-
else:
|
|
5461
|
-
# Disable multi-layer stacking to avoid stack info leakage
|
|
5462
|
-
num_stack_levels = 0
|
|
5463
|
-
if num_bag_folds is None:
|
|
5464
|
-
num_bag_folds = 0
|
|
5465
|
-
if num_stack_levels is None:
|
|
5466
|
-
num_stack_levels = 0
|
|
5467
5532
|
if not isinstance(num_bag_folds, int):
|
|
5468
5533
|
raise ValueError(f"num_bag_folds must be an integer. (num_bag_folds={num_bag_folds})")
|
|
5469
5534
|
if not isinstance(num_stack_levels, int):
|
|
@@ -5472,8 +5537,6 @@ class TabularPredictor:
|
|
|
5472
5537
|
raise ValueError(f"num_bag_folds must be equal to 0 or >=2. (num_bag_folds={num_bag_folds})")
|
|
5473
5538
|
if num_stack_levels != 0 and num_bag_folds == 0:
|
|
5474
5539
|
raise ValueError(f"num_stack_levels must be 0 if num_bag_folds is 0. (num_stack_levels={num_stack_levels}, num_bag_folds={num_bag_folds})")
|
|
5475
|
-
if num_bag_sets is None:
|
|
5476
|
-
num_bag_sets = 1
|
|
5477
5540
|
if not isinstance(num_bag_sets, int):
|
|
5478
5541
|
raise ValueError(f"num_bag_sets must be an integer. (num_bag_sets={num_bag_sets})")
|
|
5479
5542
|
if not isinstance(dynamic_stacking, bool):
|
|
@@ -5483,11 +5546,11 @@ class TabularPredictor:
|
|
|
5483
5546
|
|
|
5484
5547
|
if use_bag_holdout_was_auto and num_bag_folds != 0:
|
|
5485
5548
|
if use_bag_holdout:
|
|
5486
|
-
log_extra = f"Reason: num_train_rows >= {
|
|
5549
|
+
log_extra = f"Reason: num_train_rows >= {USE_BAG_HOLDOUT_AUTO_THRESHOLD}. (num_train_rows={num_train_rows})"
|
|
5487
5550
|
else:
|
|
5488
|
-
log_extra = f"Reason: num_train_rows < {
|
|
5551
|
+
log_extra = f"Reason: num_train_rows < {USE_BAG_HOLDOUT_AUTO_THRESHOLD}. (num_train_rows={num_train_rows})"
|
|
5489
5552
|
logger.log(20, f"Setting use_bag_holdout from 'auto' to {use_bag_holdout}. {log_extra}")
|
|
5490
|
-
|
|
5553
|
+
|
|
5491
5554
|
if dynamic_stacking and num_stack_levels < 1:
|
|
5492
5555
|
log_extra_ds = f"Reason: Stacking is not enabled. (num_stack_levels={num_stack_levels})"
|
|
5493
5556
|
if not dynamic_stacking_was_auto:
|
|
@@ -5550,7 +5613,7 @@ class TabularPredictor:
|
|
|
5550
5613
|
Identical to performing the following operations in order:
|
|
5551
5614
|
|
|
5552
5615
|
predictor_clone = predictor.clone(path=path, return_clone=True, dirs_exist_ok=dirs_exist_ok)
|
|
5553
|
-
predictor_clone.delete_models(models_to_keep=model
|
|
5616
|
+
predictor_clone.delete_models(models_to_keep=model)
|
|
5554
5617
|
predictor_clone.set_model_best(model=model, save_trainer=True)
|
|
5555
5618
|
predictor_clone.save_space()
|
|
5556
5619
|
|
|
@@ -5562,7 +5625,7 @@ class TabularPredictor:
|
|
|
5562
5625
|
The model to use in the optimized predictor clone.
|
|
5563
5626
|
All other unrelated models will be deleted to save disk space.
|
|
5564
5627
|
Refer to the `models_to_keep` argument of `predictor.delete_models` for available options.
|
|
5565
|
-
Internally calls `predictor_clone.delete_models(models_to_keep=model
|
|
5628
|
+
Internally calls `predictor_clone.delete_models(models_to_keep=model)`
|
|
5566
5629
|
return_clone : bool, default = False
|
|
5567
5630
|
If True, returns the loaded cloned TabularPredictor object.
|
|
5568
5631
|
If False, returns the local path to the cloned TabularPredictor object.
|
|
@@ -8,6 +8,7 @@ from . import ModelRegistry
|
|
|
8
8
|
from ..models import (
|
|
9
9
|
BoostedRulesModel,
|
|
10
10
|
CatBoostModel,
|
|
11
|
+
EBMModel,
|
|
11
12
|
FastTextModel,
|
|
12
13
|
FigsModel,
|
|
13
14
|
FTTransformerModel,
|
|
@@ -19,10 +20,14 @@ from ..models import (
|
|
|
19
20
|
LinearModel,
|
|
20
21
|
MultiModalPredictorModel,
|
|
21
22
|
NNFastAiTabularModel,
|
|
23
|
+
RealMLPModel,
|
|
22
24
|
RFModel,
|
|
23
25
|
RuleFitModel,
|
|
26
|
+
TabICLModel,
|
|
27
|
+
TabMModel,
|
|
24
28
|
TabPFNMixModel,
|
|
25
|
-
|
|
29
|
+
MitraModel,
|
|
30
|
+
TabPFNV2Model,
|
|
26
31
|
TabularNeuralNetTorchModel,
|
|
27
32
|
TextPredictorModel,
|
|
28
33
|
XGBoostModel,
|
|
@@ -38,6 +43,7 @@ REGISTERED_MODEL_CLS_LST = [
|
|
|
38
43
|
LGBModel,
|
|
39
44
|
CatBoostModel,
|
|
40
45
|
XGBoostModel,
|
|
46
|
+
RealMLPModel,
|
|
41
47
|
TabularNeuralNetTorchModel,
|
|
42
48
|
LinearModel,
|
|
43
49
|
NNFastAiTabularModel,
|
|
@@ -45,8 +51,11 @@ REGISTERED_MODEL_CLS_LST = [
|
|
|
45
51
|
ImagePredictorModel,
|
|
46
52
|
MultiModalPredictorModel,
|
|
47
53
|
FTTransformerModel,
|
|
48
|
-
|
|
54
|
+
TabICLModel,
|
|
55
|
+
TabMModel,
|
|
49
56
|
TabPFNMixModel,
|
|
57
|
+
TabPFNV2Model,
|
|
58
|
+
MitraModel,
|
|
50
59
|
FastTextModel,
|
|
51
60
|
GreedyWeightedEnsembleModel,
|
|
52
61
|
SimpleWeightedEnsembleModel,
|
|
@@ -56,6 +65,7 @@ REGISTERED_MODEL_CLS_LST = [
|
|
|
56
65
|
HSTreeModel,
|
|
57
66
|
BoostedRulesModel,
|
|
58
67
|
DummyModel,
|
|
68
|
+
EBMModel,
|
|
59
69
|
]
|
|
60
70
|
|
|
61
71
|
# TODO: Replace logic in `autogluon.tabular.trainer.model_presets.presets` with `ag_model_registry`
|