PyPI - autogluon.tabular - Versions diffs - 1.3.2b20250723__py3-none-any.whl → 1.4.0b20250725__py3-none-any.whl - Mend

autogluon.tabular 1.3.2b20250723py3-none-any.whl → 1.4.0b20250725py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.tabular might be problematic. Click here for more details.

Files changed (31) hide show

autogluon/tabular/models/mitra/mitra_model.py CHANGED Viewed

@@ -1,49 +1,56 @@
-# TODO: To ensure deterministic operations we need to set torch.use_deterministic_algorithms(True)
-# and os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'. The CUBLAS environment variable configures
-# the workspace size for certain CUBLAS operations to ensure reproducibility when using CUDA >= 10.2.
-# Both settings are required to ensure deterministic behavior in operations such as matrix multiplications.
-import os
-os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
+from __future__ import annotations
+import logging
 import os
 from typing import List, Optional
 import pandas as pd
-import torch
-import logging
 from autogluon.common.utils.resource_utils import ResourceManager
 from autogluon.core.models import AbstractModel
+from autogluon.features.generators import LabelEncoderFeatureGenerator
+from autogluon.tabular import __version__
 logger = logging.getLogger(__name__)
-# TODO: Needs memory usage estimate method
 class MitraModel(AbstractModel):
+    """
+    Mitra is a tabular foundation model pre-trained purely on synthetic data with the goal
+    of optimizing fine-tuning performance over in-context learning performance.
+    Mitra was developed by the AutoGluon team @ AWS AI.
+    Mitra's default hyperparameters outperforms all methods for small datasets on TabArena-v0.1 (excluding ensembling): https://tabarena.ai
+    Authors: Xiyuan Zhang, Danielle C. Maddix, Junming Yin, Nick Erickson, Abdul Fatir Ansari, Boran Han, Shuai Zhang, Leman Akoglu, Christos Faloutsos, Michael W. Mahoney, Cuixiong Hu, Huzefa Rangwala, George Karypis, Bernie Wang
+    Blog Post: https://www.amazon.science/blog/mitra-mixed-synthetic-priors-for-enhancing-tabular-foundation-models
+    License: Apache-2.0
+    .. versionadded:: 1.4.0
+    """
     ag_key = "MITRA"
     ag_name = "Mitra"
     weights_file_name = "model.pt"
     ag_priority = 55
-    def __init__(self, problem_type=None, **kwargs):
+    def __init__(self, **kwargs):
         super().__init__(**kwargs)
-        self.problem_type = problem_type
         self._weights_saved = False
+        self._feature_generator = None
     @staticmethod
     def _get_default_device():
         """Get the best available device for the current system."""
         if ResourceManager.get_gpu_count_torch(cuda_only=True) > 0:
-            logger.info("Using CUDA GPU")
+            logger.log(15, "Using CUDA GPU")
             return "cuda"
         else:
             return "cpu"
     def get_model_cls(self):
-        from .sklearn_interface import MitraClassifier
         if self.problem_type in ["binary", "multiclass"]:
+            from .sklearn_interface import MitraClassifier
             model_cls = MitraClassifier
         elif self.problem_type == "regression":
             from .sklearn_interface import MitraRegressor
@@ -53,6 +60,23 @@ class MitraModel(AbstractModel):
             raise AssertionError(f"Unsupported problem_type: {self.problem_type}")
         return model_cls
+    def _preprocess(self, X: pd.DataFrame, is_train: bool = False, **kwargs) -> pd.DataFrame:
+        X = super()._preprocess(X, **kwargs)
+        if is_train:
+            # X will be the training data.
+            self._feature_generator = LabelEncoderFeatureGenerator(verbosity=0)
+            self._feature_generator.fit(X=X)
+        # This converts categorical features to numeric via stateful label encoding.
+        if self._feature_generator.features_in:
+            X = X.copy()
+            X[self._feature_generator.features_in] = self._feature_generator.transform(
+                X=X
+            )
+        return X
     def _fit(
         self,
         X: pd.DataFrame,
@@ -61,11 +85,25 @@ class MitraModel(AbstractModel):
         y_val: pd.Series = None,
         time_limit: float = None,
         num_cpus: int = 1,
+        num_gpus: float = 0,
+        verbosity: int = 2,
         **kwargs,
     ):
         # TODO: Reset the number of threads based on the specified num_cpus
         need_to_reset_torch_threads = False
         torch_threads_og = None
+        try:
+            model_cls = self.get_model_cls()
+            import torch
+        except ImportError as err:
+            logger.log(
+                40,
+                f"\tFailed to import Mitra! To use the Mitra model, "
+                f"do: `pip install autogluon.tabular[mitra]=={__version__}`.",
+            )
+            raise err
         if num_cpus is not None and isinstance(num_cpus, (int, float)):
             torch_threads_og = torch.get_num_threads()
             if torch_threads_og != num_cpus:
@@ -73,9 +111,14 @@ class MitraModel(AbstractModel):
                 torch.set_num_threads(num_cpus)
                 need_to_reset_torch_threads = True
-        model_cls = self.get_model_cls()
         hyp = self._get_model_params()
+        if hyp.get("device", None) is None:
+            if num_gpus == 0:
+                hyp["device"] = "cpu"
+            else:
+                hyp["device"] = self._get_default_device()
         if "state_dict_classification" in hyp:
             state_dict_classification = hyp.pop("state_dict_classification")
             if self.problem_type in ["binary", "multiclass"]:
@@ -85,11 +128,14 @@ class MitraModel(AbstractModel):
             if self.problem_type in ["regression"]:
                 hyp["state_dict"] = state_dict_regression
+        if "verbose" not in hyp:
+            hyp["verbose"] = verbosity >= 3
         self.model = model_cls(
             **hyp,
         )
-        X = self.preprocess(X)
+        X = self.preprocess(X, is_train=True)
         if X_val is not None:
             X_val = self.preprocess(X_val)
@@ -106,7 +152,6 @@ class MitraModel(AbstractModel):
     def _set_default_params(self):
         default_params = {
-            "device": self._get_default_device(),
             "n_estimators": 1,
         }
         for param, val in default_params.items():
@@ -184,6 +229,24 @@ class MitraModel(AbstractModel):
         return num_cpus, num_gpus
+    def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
+        """
+        Parameters
+        ----------
+        is_gpu_available : bool, default = False
+            Whether gpu is available in the system.
+            Model that can be trained both on cpu and gpu can decide the minimum resources based on this.
+        Returns a dictionary of minimum resource requirements to fit the model.
+        Subclass should consider overriding this method if it requires more resources to train.
+        If a resource is not part of the output dictionary, it is considered unnecessary.
+        Valid keys: 'num_cpus', 'num_gpus'.
+        """
+        return {
+            "num_cpus": 1,
+            "num_gpus": 0.5,
+        }
     def _estimate_memory_usage(self, X: pd.DataFrame, **kwargs) -> int:
         return self.estimate_memory_usage_static(
             X=X, problem_type=self.problem_type, num_classes=self.num_classes, **kwargs
@@ -196,12 +259,13 @@ class MitraModel(AbstractModel):
         X: pd.DataFrame,
         **kwargs,
     ) -> int:
-        return max(
+        # Multiply by 0.9 as currently this is overly safe
+        return int(0.9 * max(
             cls._estimate_memory_usage_static_cpu_icl(X=X, **kwargs),
             cls._estimate_memory_usage_static_cpu_ft_icl(X=X, **kwargs),
             cls._estimate_memory_usage_static_gpu_cpu(X=X, **kwargs),
             cls._estimate_memory_usage_static_gpu_gpu(X=X, **kwargs),
-        )
+        ))
     @classmethod
     def _estimate_memory_usage_static_cpu_icl(

autogluon/tabular/models/mitra/sklearn_interface.py CHANGED Viewed

@@ -1,3 +1,6 @@
+from __future__ import annotations
+import os
 import time
 from pathlib import Path
 import contextlib
@@ -76,6 +79,7 @@ class MitraBase(BaseEstimator):
             random_mirror_regression=RANDOM_MIRROR_REGRESSION,
             random_mirror_x=RANDOM_MIRROR_X,
             seed=SEED,
+            verbose=True,
         ):
         """
         Initialize the base Mitra model.
@@ -114,8 +118,11 @@ class MitraBase(BaseEstimator):
         self.trainers = []
         self.train_time = 0
         self.seed = seed
+        self.verbose = verbose
-        set_seed(self.seed)
+        # FIXME: set_seed was removed in v1.4 as quality and speed reduction was observed when setting seed.
+        #  This should be investigated and fixed for v1.5
+        # set_seed(self.seed)
     def _create_config(self, task, dim_output, time_limit=None):
         cfg = ConfigRun(
@@ -183,6 +190,7 @@ class MitraBase(BaseEstimator):
         """Train the ensemble of models."""
         cfg, Tab2D = self._create_config(task, dim_output, time_limit)
+        rng = np.random.RandomState(cfg.seed)
         success = False
         while not (success and cfg.hyperparams["max_samples_support"] > 0 and cfg.hyperparams["max_samples_query"] > 0):
@@ -217,7 +225,7 @@ class MitraBase(BaseEstimator):
                             path_to_weights=Path(self.state_dict),
                             device=self.device,
                         )
-                    trainer = TrainerFinetune(cfg, model, n_classes=n_classes, device=self.device)
+                    trainer = TrainerFinetune(cfg, model, n_classes=n_classes, device=self.device, rng=rng, verbose=self.verbose)
                     start_time = time.time()
                     trainer.train(X_train, y_train, X_valid, y_valid)
@@ -275,6 +283,7 @@ class MitraClassifier(MitraBase, ClassifierMixin):
             random_mirror_regression=RANDOM_MIRROR_REGRESSION,
             random_mirror_x=RANDOM_MIRROR_X,
             seed=SEED,
+            verbose=True,
         ):
         """Initialize the classifier."""
         super().__init__(
@@ -294,6 +303,7 @@ class MitraClassifier(MitraBase, ClassifierMixin):
             random_mirror_regression=random_mirror_regression,
             random_mirror_x=random_mirror_x,
             seed=seed,
+            verbose=verbose,
         )
         self.task = 'classification'
@@ -403,6 +413,7 @@ class MitraRegressor(MitraBase, RegressorMixin):
             random_mirror_regression=RANDOM_MIRROR_REGRESSION,
             random_mirror_x=RANDOM_MIRROR_X,
             seed=SEED,
+            verbose=True,
         ):
         """Initialize the regressor."""
         super().__init__(
@@ -422,6 +433,7 @@ class MitraRegressor(MitraBase, RegressorMixin):
             random_mirror_regression=random_mirror_regression,
             random_mirror_x=random_mirror_x,
             seed=seed,
+            verbose=verbose,
         )
         self.task = 'regression'
@@ -492,14 +504,4 @@ class MitraRegressor(MitraBase, RegressorMixin):
 @contextlib.contextmanager
 def mitra_deterministic_context():
     """Context manager to set deterministic settings only for Mitra operations."""
-    original_deterministic_algorithms_set = False
-    try:
-        torch.use_deterministic_algorithms(True)
-        original_deterministic_algorithms_set = True
-        yield
-    finally:
-        if original_deterministic_algorithms_set:
-            torch.use_deterministic_algorithms(False)
+    yield

autogluon/tabular/models/realmlp/realmlp_model.py CHANGED Viewed

@@ -1,11 +1,5 @@
 """
 Code Adapted from TabArena: https://github.com/autogluon/tabrepo/blob/main/tabrepo/benchmark/models/ag/realmlp/realmlp_model.py
-Model: RealMLP
-Paper: Better by Default: Strong Pre-Tuned MLPs and Boosted Trees on Tabular Data
-Authors: David Holzmüller, Léo Grinsztajn, Ingo Steinwart
-Codebase: https://github.com/dholzmueller/pytabkit
-License: Apache-2.0
 """
 from __future__ import annotations
@@ -41,6 +35,19 @@ def set_logger_level(logger_name: str, level: int):
 # pip install pytabkit
 class RealMLPModel(AbstractModel):
+    """
+    RealMLP is an improved multilayer perception (MLP) model
+    through a bag of tricks and better default hyperparameters.
+    RealMLP is the top performing method overall on TabArena-v0.1: https://tabarena.ai
+    Paper: Better by Default: Strong Pre-Tuned MLPs and Boosted Trees on Tabular Data
+    Authors: David Holzmüller, Léo Grinsztajn, Ingo Steinwart
+    Codebase: https://github.com/dholzmueller/pytabkit
+    License: Apache-2.0
+    .. versionadded:: 1.4.0
+    """
     ag_key = "REALMLP"
     ag_name = "RealMLP"
     ag_priority = 75

autogluon/tabular/models/tabicl/tabicl_model.py CHANGED Viewed

@@ -1,10 +1,5 @@
 """
 Code Adapted from TabArena: https://github.com/autogluon/tabrepo/blob/main/tabrepo/benchmark/models/ag/tabicl/tabicl_model.py
-Model: TabICL
-Paper: TabICL: A Tabular Foundation Model for In-Context Learning on Large Data
-Authors: Jingang Qu, David Holzmüller, Gaël Varoquaux, Marine Le Morvan
-Codebase: https://github.com/soda-inria/tabicl
-License: BSD-3-Clause
 """
 from __future__ import annotations
@@ -23,6 +18,20 @@ logger = logging.getLogger(__name__)
 # TODO: Verify if crashes when weights are not yet downloaded and fit in parallel
 class TabICLModel(AbstractModel):
+    """
+    TabICL is a foundation model for tabular data using in-context learning
+    that is scalable to larger datasets than TabPFNv2. It is pretrained purely on synthetic data.
+    TabICL currently only supports classification tasks.
+    TabICL is one of the top performing methods overall on TabArena-v0.1: https://tabarena.ai
+    Paper: TabICL: A Tabular Foundation Model for In-Context Learning on Large Data
+    Authors: Jingang Qu, David Holzmüller, Gaël Varoquaux, Marine Le Morvan
+    Codebase: https://github.com/soda-inria/tabicl
+    License: BSD-3-Clause
+    .. versionadded:: 1.4.0
+    """
     ag_key = "TABICL"
     ag_name = "TabICL"
     ag_priority = 65
@@ -98,8 +107,8 @@ class TabICLModel(AbstractModel):
         default_auxiliary_params = super()._get_default_auxiliary_params()
         default_auxiliary_params.update(
             {
-                "max_rows": 100000,
-                "max_features": 500,
+                "max_rows": 30000,
+                "max_features": 2000,
             }
         )
         return default_auxiliary_params
@@ -147,7 +156,7 @@ class TabICLModel(AbstractModel):
         model_mem_estimate *= 1.3  # add 30% buffer
         # TODO: Observed memory spikes above expected values on large datasets, increasing mem estimate to compensate
-        model_mem_estimate *= 1.5
+        model_mem_estimate *= 2.0  # Note: 1.5 is not large enough, still gets OOM
         mem_estimate = model_mem_estimate + dataset_size_mem_est + baseline_overhead_mem_est

autogluon/tabular/models/tabm/rtdl_num_embeddings.py CHANGED Viewed

@@ -1,6 +1,8 @@
 # taken from https://github.com/yandex-research/rtdl-num-embeddings/blob/main/package/rtdl_num_embeddings.py
 """On Embeddings for Numerical Features in Tabular Deep Learning."""
+from __future__ import annotations
 __version__ = '0.0.12'
 __all__ = [
@@ -12,6 +14,7 @@ __all__ = [
     'compute_bins',
 ]
 import math
 import warnings
 from typing import Any, Literal, Optional, Union

autogluon/tabular/models/tabm/tabm_model.py CHANGED Viewed

@@ -4,12 +4,6 @@ Note: This is a custom implementation of TabM based on TabArena. Because the Aut
 the same time as TabM became available on PyPi, we chose to use TabArena's implementation
 for the AutoGluon 1.4 release as it has already been benchmarked.
-Model: TabM
-Paper: TabM: Advancing Tabular Deep Learning with Parameter-Efficient Ensembling
-Authors: Yury Gorishniy, Akim Kotelnikov, Artem Babenko
-Codebase: https://github.com/yandex-research/tabm
-License: Apache-2.0
 Partially adapted from pytabkit's TabM implementation.
 """
@@ -28,6 +22,20 @@ logger = logging.getLogger(__name__)
 class TabMModel(AbstractModel):
+    """
+    TabM is an efficient ensemble of MLPs that is trained simultaneously with mostly shared parameters.
+    TabM is one of the top performing methods overall on TabArena-v0.1: https://tabarena.ai
+    Paper: TabM: Advancing Tabular Deep Learning with Parameter-Efficient Ensembling
+    Authors: Yury Gorishniy, Akim Kotelnikov, Artem Babenko
+    Codebase: https://github.com/yandex-research/tabm
+    License: Apache-2.0
+    Partially adapted from pytabkit's TabM implementation.
+    .. versionadded:: 1.4.0
+    """
     ag_key = "TABM"
     ag_name = "TabM"
     ag_priority = 85

autogluon/tabular/models/tabm/tabm_reference.py CHANGED Viewed

@@ -3,6 +3,8 @@
 # NOTE
 # The minimum required versions of the dependencies are specified in README.md.
+from __future__ import annotations
 import itertools
 from typing import Any, Literal, Union

autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py CHANGED Viewed

@@ -26,6 +26,8 @@ class TabPFNMixModel(AbstractModel):
     TabPFNMix is based off of the TabPFN and TabForestPFN models.
+    We recommend using Mitra instead, as it is an improved version of TabPFNMix.
     It is a tabular transformer model pre-trained on purely synthetic data.
     It currently has several limitations:
@@ -34,6 +36,8 @@ class TabPFNMixModel(AbstractModel):
     3. Does not support GPU
     For more information, refer to the `./_internals/README.md` file.
+    .. versionadded:: 1.2.0
     """
     ag_key = "TABPFNMIX"
     ag_name = "TabPFNMix"

autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py CHANGED Viewed

@@ -1,11 +1,5 @@
 """
 Code Adapted from TabArena: https://github.com/autogluon/tabrepo/blob/main/tabrepo/benchmark/models/ag/tabpfnv2/tabpfnv2_model.py
-Model: TabPFNv2
-Paper: Accurate predictions on small data with a tabular foundation model
-Authors: Noah Hollmann, Samuel Müller, Lennart Purucker, Arjun Krishnakumar, Max Körfer, Shi Bin Hoo, Robin Tibor Schirrmeister & Frank Hutter
-Codebase: https://github.com/PriorLabs/TabPFN
-License: https://github.com/PriorLabs/TabPFN/blob/main/LICENSE
 """
 from __future__ import annotations
@@ -111,6 +105,20 @@ class FixedSafePowerTransformer(PowerTransformer):
 class TabPFNV2Model(AbstractModel):
+    """
+    TabPFNv2 is a tabular foundation model pre-trained purely on synthetic data that achieves
+    state-of-the-art results with in-context learning on small datasets with <=10000 samples and <=500 features.
+    TabPFNv2 is developed and maintained by PriorLabs: https://priorlabs.ai/
+    TabPFNv2 is the top performing method for small datasets on TabArena-v0.1: https://tabarena.ai
+    Paper: Accurate predictions on small data with a tabular foundation model
+    Authors: Noah Hollmann, Samuel Müller, Lennart Purucker, Arjun Krishnakumar, Max Körfer, Shi Bin Hoo, Robin Tibor Schirrmeister & Frank Hutter
+    Codebase: https://github.com/PriorLabs/TabPFN
+    License: https://github.com/PriorLabs/TabPFN/blob/main/LICENSE
+    .. versionadded:: 1.4.0
+    """
     ag_key = "TABPFNV2"
     ag_name = "TabPFNv2"
     ag_priority = 105
@@ -119,12 +127,14 @@ class TabPFNV2Model(AbstractModel):
         super().__init__(**kwargs)
         self._feature_generator = None
         self._cat_features = None
+        self._cat_indices = None
     def _preprocess(self, X: pd.DataFrame, is_train=False, **kwargs) -> pd.DataFrame:
         X = super()._preprocess(X, **kwargs)
-        self._cat_indices = []
         if is_train:
+            self._cat_indices = []
             # X will be the training data.
             self._feature_generator = LabelEncoderFeatureGenerator(verbosity=0)
             self._feature_generator.fit(X=X)
@@ -136,10 +146,11 @@ class TabPFNV2Model(AbstractModel):
                 X=X
             )
-            # Detect/set cat features and indices
-            if self._cat_features is None:
-                self._cat_features = self._feature_generator.features_in[:]
-            self._cat_indices = [X.columns.get_loc(col) for col in self._cat_features]
+            if is_train:
+                # Detect/set cat features and indices
+                if self._cat_features is None:
+                    self._cat_features = self._feature_generator.features_in[:]
+                self._cat_indices = [X.columns.get_loc(col) for col in self._cat_features]
         return X
@@ -187,6 +198,12 @@ class TabPFNV2Model(AbstractModel):
             # logs "Built with PriorLabs-TabPFN"
             self._log_license(device=device)
+        if num_gpus == 0:
+            logger.log(
+                30,
+                f"\tWARNING: Running TabPFNv2 on CPU. This can be very slow. We recommend using a GPU instead."
+            )
         X = self.preprocess(X, is_train=True)
         hps = self._get_model_params()
@@ -366,7 +383,7 @@ class TabPFNV2Model(AbstractModel):
         # Add some buffer to each term + 1 GB overhead to be safe
         return int(
-            model_mem + 4 * X_mem + 1.5 * activation_mem + baseline_overhead_mem_est
+            model_mem + 4 * X_mem + 2 * activation_mem + baseline_overhead_mem_est
         )
     @classmethod

autogluon/tabular/predictor/predictor.py CHANGED Viewed

@@ -1068,11 +1068,11 @@ class TabularPredictor:
                 20,
                 "No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...\n"
                 "\tRecommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):\n"
-                "\tpresets='experimental' : New in v1.2: Pre-trained foundation model + parallel fits. The absolute best accuracy without consideration for inference speed. Does not support GPU.\n"
-                "\tpresets='best'         : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.\n"
-                "\tpresets='high'         : Strong accuracy with fast inference speed.\n"
-                "\tpresets='good'         : Good accuracy with very fast inference speed.\n"
-                "\tpresets='medium'       : Fast training time, ideal for initial prototyping.",
+                "\tpresets='extreme' : New in v1.4: Massively better than 'best' on datasets <30000 samples by using new models meta-learned on https://tabarena.ai: TabPFNv2, TabICL, Mitra, and TabM. Absolute best accuracy. Requires a GPU. Recommended 64 GB CPU memory and 32+ GB GPU memory.\n"
+                "\tpresets='best'    : Maximize accuracy. Recommended for most users. Use in competitions and benchmarks.\n"
+                "\tpresets='high'    : Strong accuracy with fast inference speed.\n"
+                "\tpresets='good'    : Good accuracy with very fast inference speed.\n"
+                "\tpresets='medium'  : Fast training time, ideal for initial prototyping.",
             )
         kwargs_orig = kwargs.copy()
@@ -1127,10 +1127,48 @@ class TabularPredictor:
         )
         infer_limit, infer_limit_batch_size = self._validate_infer_limit(infer_limit=infer_limit, infer_limit_batch_size=infer_limit_batch_size)
+        # TODO: Temporary for v1.4. Make this more extensible for v1.5 by letting users make their own dynamic hyperparameters.
+        dynamic_hyperparameters = kwargs["_experimental_dynamic_hyperparameters"]
+        if dynamic_hyperparameters:
+            logger.log(20, f"`extreme` preset uses a dynamic portfolio based on dataset size...")
+            assert hyperparameters is None, f"hyperparameters must be unspecified when `_experimental_dynamic_hyperparameters=True`."
+            n_samples = len(train_data)
+            if n_samples > 30000:
+                data_size = "large"
+            else:
+                data_size = "small"
+            assert data_size in ["large", "small"]
+            if data_size == "large":
+                logger.log(20, f"\tDetected data size: large (>30000 samples), using `zeroshot` portfolio (identical to 'best_quality' preset).")
+                hyperparameters = "zeroshot"
+            else:
+                if "num_stack_levels" not in kwargs_orig:
+                    # disable stacking for tabfm portfolio
+                    num_stack_levels = 0
+                    kwargs["num_stack_levels"] = 0
+                logger.log(
+                    20,
+                    f"\tDetected data size: small (<=30000 samples), using `zeroshot_2025_tabfm` portfolio."
+                    f"\n\t\tNote: `zeroshot_2025_tabfm` portfolio requires a CUDA compatible GPU for best performance."
+                    f"\n\t\tMake sure you have all the relevant dependencies installed: "
+                    f"`pip install autogluon.tabular[tabarena]`."
+                    f"\n\t\tIt is strongly recommended to use a machine with 64+ GB memory "
+                    f"and a CUDA compatible GPU with 32+ GB vRAM when using this preset. "
+                    f"\n\t\tThis portfolio will download foundation model weights from HuggingFace during training. "
+                    f"Ensure you have an internet connection or have pre-downloaded the weights to use these models."
+                    f"\n\t\tThis portfolio was meta-learned with TabArena: https://tabarena.ai"
+                )
+                hyperparameters = "zeroshot_2025_tabfm"
         if hyperparameters is None:
             hyperparameters = "default"
         if isinstance(hyperparameters, str):
+            hyperparameters_str = hyperparameters
             hyperparameters = get_hyperparameter_config(hyperparameters)
+            logger.log(
+                20,
+                f"Using hyperparameters preset: hyperparameters='{hyperparameters_str}'",
+            )
         self._validate_hyperparameters(hyperparameters=hyperparameters)
         self.fit_hyperparameters_ = hyperparameters
@@ -5042,6 +5080,8 @@ class TabularPredictor:
             learning_curves=False,
             test_data=None,
             raise_on_model_failure=False,
+            # experimental
+            _experimental_dynamic_hyperparameters=False,
         )
         kwargs, ds_valid_keys = self._sanitize_dynamic_stacking_kwargs(kwargs)
         kwargs = self._validate_fit_extra_kwargs(kwargs, extra_valid_keys=list(fit_kwargs_default.keys()) + ds_valid_keys)

autogluon/tabular/trainer/abstract_trainer.py CHANGED Viewed

@@ -2131,6 +2131,8 @@ class AbstractTabularTrainer(AbstractTrainer[AbstractModel]):
         if isinstance(model, BaggedEnsembleModel) and not compute_score:
             # Do not perform OOF predictions when we don't compute a score.
             model_fit_kwargs["_skip_oof"] = True
+        if not isinstance(model, BaggedEnsembleModel):
+            model_fit_kwargs.setdefault("log_resources", True)
         model_fit_kwargs = dict(
             model=model,

autogluon/tabular/version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 """This is the autogluon version file."""
-__version__ = "1.3.2b20250723"
+__version__ = "1.4.0b20250725"
 __lite__ = False

autogluon.tabular 1.3.2b20250723__py3-none-any.whl → 1.4.0b20250725__py3-none-any.whl

Potentially problematic release.

autogluon.tabular 1.3.2b20250723py3-none-any.whl → 1.4.0b20250725py3-none-any.whl