PyPI - autogluon.tabular - Versions diffs - 1.3.2b20250722__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

autogluon.tabular 1.3.2b20250722py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py ADDED Viewed

@@ -0,0 +1,310 @@
+# optimized for <=10000 samples and <=500 features, with a GPU present
+hyperparameter_portfolio_zeroshot_2025_small = {
+    "TABPFNV2": [
+        {
+            "ag_args": {'name_suffix': '_r143', 'priority': -1},
+            "average_before_softmax": False,
+            "classification_model_path": 'tabpfn-v2-classifier-od3j1g5m.ckpt',
+            "inference_config/FINGERPRINT_FEATURE": False,
+            "inference_config/OUTLIER_REMOVAL_STD": None,
+            "inference_config/POLYNOMIAL_FEATURES": 'no',
+            "inference_config/PREPROCESS_TRANSFORMS": [{'append_original': True, 'categorical_name': 'ordinal_very_common_categories_shuffled', 'global_transformer_name': None, 'name': 'safepower', 'subsample_features': -1}, {'append_original': True, 'categorical_name': 'ordinal_very_common_categories_shuffled', 'global_transformer_name': None, 'name': 'quantile_uni', 'subsample_features': -1}],
+            "inference_config/REGRESSION_Y_PREPROCESS_TRANSFORMS": [None, 'power'],
+            "inference_config/SUBSAMPLE_SAMPLES": 0.99,
+            "model_type": 'single',
+            "n_ensemble_repeats": 4,
+            "regression_model_path": 'tabpfn-v2-regressor-wyl4o83o.ckpt',
+            "softmax_temperature": 0.75,
+        },
+        {
+            "ag_args": {'name_suffix': '_r94', 'priority': -3},
+            "average_before_softmax": True,
+            "classification_model_path": 'tabpfn-v2-classifier-vutqq28w.ckpt',
+            "inference_config/FINGERPRINT_FEATURE": True,
+            "inference_config/OUTLIER_REMOVAL_STD": None,
+            "inference_config/POLYNOMIAL_FEATURES": 'no',
+            "inference_config/PREPROCESS_TRANSFORMS": [{'append_original': True, 'categorical_name': 'ordinal_very_common_categories_shuffled', 'global_transformer_name': None, 'name': 'quantile_uni', 'subsample_features': 0.99}],
+            "inference_config/REGRESSION_Y_PREPROCESS_TRANSFORMS": [None],
+            "inference_config/SUBSAMPLE_SAMPLES": None,
+            "model_type": 'single',
+            "n_ensemble_repeats": 4,
+            "regression_model_path": 'tabpfn-v2-regressor-5wof9ojf.ckpt',
+            "softmax_temperature": 0.9,
+        },
+        {
+            "ag_args": {'name_suffix': '_r181', 'priority': -4},
+            "average_before_softmax": False,
+            "classification_model_path": 'tabpfn-v2-classifier-llderlii.ckpt',
+            "inference_config/FINGERPRINT_FEATURE": False,
+            "inference_config/OUTLIER_REMOVAL_STD": 9.0,
+            "inference_config/POLYNOMIAL_FEATURES": 50,
+            "inference_config/PREPROCESS_TRANSFORMS": [{'append_original': True, 'categorical_name': 'onehot', 'global_transformer_name': 'svd', 'name': 'quantile_uni_coarse', 'subsample_features': 0.99}],
+            "inference_config/REGRESSION_Y_PREPROCESS_TRANSFORMS": ['power'],
+            "inference_config/SUBSAMPLE_SAMPLES": None,
+            "model_type": 'single',
+            "n_ensemble_repeats": 4,
+            "regression_model_path": 'tabpfn-v2-regressor.ckpt',
+            "softmax_temperature": 0.95,
+        },
+    ],
+    "GBM": [
+        {
+            "ag_args": {'name_suffix': '_r33', 'priority': -2},
+            "bagging_fraction": 0.9625293420216,
+            "bagging_freq": 1,
+            "cat_l2": 0.1236875455555,
+            "cat_smooth": 68.8584757332856,
+            "extra_trees": False,
+            "feature_fraction": 0.6189215809382,
+            "lambda_l1": 0.1641757352921,
+            "lambda_l2": 0.6937755557881,
+            "learning_rate": 0.0154031028561,
+            "max_cat_to_onehot": 17,
+            "min_data_in_leaf": 1,
+            "min_data_per_group": 30,
+            "num_leaves": 68,
+        },
+        {
+            "ag_args": {'name_suffix': '_r21', 'priority': -16},
+            "bagging_fraction": 0.7218730663234,
+            "bagging_freq": 1,
+            "cat_l2": 0.0296205152578,
+            "cat_smooth": 0.0010255271303,
+            "extra_trees": False,
+            "feature_fraction": 0.4557131604374,
+            "lambda_l1": 0.5219704038237,
+            "lambda_l2": 0.1070959487853,
+            "learning_rate": 0.0055891584996,
+            "max_cat_to_onehot": 71,
+            "min_data_in_leaf": 50,
+            "min_data_per_group": 10,
+            "num_leaves": 30,
+        },
+        {
+            "ag_args": {'name_suffix': '_r11', 'priority': -19},
+            "bagging_fraction": 0.775784726514,
+            "bagging_freq": 1,
+            "cat_l2": 0.3888471449178,
+            "cat_smooth": 0.0057144748021,
+            "extra_trees": True,
+            "feature_fraction": 0.7732354787904,
+            "lambda_l1": 0.2211002452568,
+            "lambda_l2": 1.1318405980187,
+            "learning_rate": 0.0090151778542,
+            "max_cat_to_onehot": 15,
+            "min_data_in_leaf": 4,
+            "min_data_per_group": 15,
+            "num_leaves": 2,
+        },
+    ],
+    "CAT": [
+        {
+            "ag_args": {'priority': -5},
+        },
+        {
+            "ag_args": {'name_suffix': '_r51', 'priority': -10},
+            "boosting_type": 'Plain',
+            "bootstrap_type": 'Bernoulli',
+            "colsample_bylevel": 0.8771035272558,
+            "depth": 7,
+            "grow_policy": 'SymmetricTree',
+            "l2_leaf_reg": 2.0107286863021,
+            "leaf_estimation_iterations": 2,
+            "learning_rate": 0.0058424016622,
+            "max_bin": 254,
+            "max_ctr_complexity": 4,
+            "model_size_reg": 0.1307400355809,
+            "one_hot_max_size": 23,
+            "subsample": 0.809527841437,
+        },
+        {
+            "ag_args": {'name_suffix': '_r10', 'priority': -12},
+            "boosting_type": 'Plain',
+            "bootstrap_type": 'Bernoulli',
+            "colsample_bylevel": 0.8994502668431,
+            "depth": 6,
+            "grow_policy": 'Depthwise',
+            "l2_leaf_reg": 1.8187025215896,
+            "leaf_estimation_iterations": 7,
+            "learning_rate": 0.005177304142,
+            "max_bin": 254,
+            "max_ctr_complexity": 4,
+            "model_size_reg": 0.5247386875068,
+            "one_hot_max_size": 53,
+            "subsample": 0.8705228845742,
+        },
+        {
+            "ag_args": {'name_suffix': '_r24', 'priority': -15},
+            "boosting_type": 'Plain',
+            "bootstrap_type": 'Bernoulli',
+            "colsample_bylevel": 0.8597809376276,
+            "depth": 8,
+            "grow_policy": 'Depthwise',
+            "l2_leaf_reg": 0.3628261923976,
+            "leaf_estimation_iterations": 5,
+            "learning_rate": 0.016851077771,
+            "max_bin": 254,
+            "max_ctr_complexity": 4,
+            "model_size_reg": 0.1253820547902,
+            "one_hot_max_size": 20,
+            "subsample": 0.8120271122061,
+        },
+        {
+            "ag_args": {'name_suffix': '_r91', 'priority': -17},
+            "boosting_type": 'Plain',
+            "bootstrap_type": 'Bernoulli',
+            "colsample_bylevel": 0.8959275863514,
+            "depth": 4,
+            "grow_policy": 'SymmetricTree',
+            "l2_leaf_reg": 0.0026915894253,
+            "leaf_estimation_iterations": 12,
+            "learning_rate": 0.0475233791203,
+            "max_bin": 254,
+            "max_ctr_complexity": 5,
+            "model_size_reg": 0.1633175256924,
+            "one_hot_max_size": 11,
+            "subsample": 0.798554178926,
+        },
+    ],
+    "TABM": [
+        {
+            "ag_args": {'name_suffix': '_r184', 'priority': -6},
+            "amp": False,
+            "arch_type": 'tabm-mini',
+            "batch_size": 'auto',
+            "d_block": 864,
+            "d_embedding": 24,
+            "dropout": 0.0,
+            "gradient_clipping_norm": 1.0,
+            "lr": 0.0019256819924656217,
+            "n_blocks": 3,
+            "num_emb_n_bins": 3,
+            "num_emb_type": 'pwl',
+            "patience": 16,
+            "share_training_batches": False,
+            "tabm_k": 32,
+            "weight_decay": 0.0,
+        },
+        {
+            "ag_args": {'name_suffix': '_r69', 'priority': -7},
+            "amp": False,
+            "arch_type": 'tabm-mini',
+            "batch_size": 'auto',
+            "d_block": 848,
+            "d_embedding": 28,
+            "dropout": 0.40215621636031007,
+            "gradient_clipping_norm": 1.0,
+            "lr": 0.0010413640454559532,
+            "n_blocks": 3,
+            "num_emb_n_bins": 18,
+            "num_emb_type": 'pwl',
+            "patience": 16,
+            "share_training_batches": False,
+            "tabm_k": 32,
+            "weight_decay": 0.0,
+        },
+        {
+            "ag_args": {'name_suffix': '_r52', 'priority': -11},
+            "amp": False,
+            "arch_type": 'tabm-mini',
+            "batch_size": 'auto',
+            "d_block": 1024,
+            "d_embedding": 32,
+            "dropout": 0.0,
+            "gradient_clipping_norm": 1.0,
+            "lr": 0.0006297851297842611,
+            "n_blocks": 4,
+            "num_emb_n_bins": 22,
+            "num_emb_type": 'pwl',
+            "patience": 16,
+            "share_training_batches": False,
+            "tabm_k": 32,
+            "weight_decay": 0.06900108498839816,
+        },
+        {
+            "ag_args": {'priority': -13},
+        },
+        {
+            "ag_args": {'name_suffix': '_r191', 'priority': -14},
+            "amp": False,
+            "arch_type": 'tabm-mini',
+            "batch_size": 'auto',
+            "d_block": 864,
+            "d_embedding": 8,
+            "dropout": 0.45321529282058803,
+            "gradient_clipping_norm": 1.0,
+            "lr": 0.0003781238075322413,
+            "n_blocks": 4,
+            "num_emb_n_bins": 27,
+            "num_emb_type": 'pwl',
+            "patience": 16,
+            "share_training_batches": False,
+            "tabm_k": 32,
+            "weight_decay": 0.01766851962579851,
+        },
+        {
+            "ag_args": {'name_suffix': '_r49', 'priority': -20},
+            "amp": False,
+            "arch_type": 'tabm-mini',
+            "batch_size": 'auto',
+            "d_block": 640,
+            "d_embedding": 28,
+            "dropout": 0.15296207419190627,
+            "gradient_clipping_norm": 1.0,
+            "lr": 0.002277678490593717,
+            "n_blocks": 3,
+            "num_emb_n_bins": 48,
+            "num_emb_type": 'pwl',
+            "patience": 16,
+            "share_training_batches": False,
+            "tabm_k": 32,
+            "weight_decay": 0.0578159148243893,
+        },
+    ],
+    "TABICL": [
+        {
+            "ag_args": {'priority': -8},
+        },
+    ],
+    "XGB": [
+        {
+            "ag_args": {'name_suffix': '_r171', 'priority': -9},
+            "colsample_bylevel": 0.9213705632288,
+            "colsample_bynode": 0.6443385965381,
+            "enable_categorical": True,
+            "grow_policy": 'lossguide',
+            "learning_rate": 0.0068171645251,
+            "max_cat_to_onehot": 8,
+            "max_depth": 6,
+            "max_leaves": 10,
+            "min_child_weight": 0.0507304250576,
+            "reg_alpha": 4.2446346389037,
+            "reg_lambda": 1.4800570021253,
+            "subsample": 0.9656290596647,
+        },
+        {
+            "ag_args": {'name_suffix': '_r40', 'priority': -18},
+            "colsample_bylevel": 0.6377491713202,
+            "colsample_bynode": 0.9237625621103,
+            "enable_categorical": True,
+            "grow_policy": 'lossguide',
+            "learning_rate": 0.0112462621131,
+            "max_cat_to_onehot": 33,
+            "max_depth": 10,
+            "max_leaves": 35,
+            "min_child_weight": 0.1403464856034,
+            "reg_alpha": 3.4960653958503,
+            "reg_lambda": 1.3062320805235,
+            "subsample": 0.6948898835178,
+        },
+    ],
+    "MITRA": [
+        {
+            "n_estimators": 1,
+            "fine_tune": True,
+            "fine_tune_steps": 50,
+            "ag.num_gpus": 1,
+            "ag_args": {'priority': -21},
+        },
+    ],
+}

autogluon/tabular/models/automm/automm_model.py CHANGED Viewed

@@ -65,6 +65,8 @@ class MultiModalPredictorModel(AbstractModel):
             Names of the features.
         feature_metadata
             The feature metadata.
+        .. versionadded:: 0.3.0
         """
         super().__init__(**kwargs)
         self._label_column_name = None

autogluon/tabular/models/automm/ft_transformer.py CHANGED Viewed

@@ -17,7 +17,8 @@ class FTTransformerModel(MultiModalPredictorModel):
     ag_name = "FTTransformer"
     def __init__(self, **kwargs):
-        """Wrapper of autogluon.multimodal.MultiModalPredictor.
+        """
+        FT-Transformer model.
         The features can be a mix of
         - categorical column
@@ -48,6 +49,8 @@ class FTTransformerModel(MultiModalPredictorModel):
             Names of the features.
         feature_metadata
             The feature metadata.
+        .. versionadded:: 0.6.0
         """
         super().__init__(**kwargs)

autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py CHANGED Viewed

@@ -24,10 +24,16 @@ class TrainerFinetune(BaseEstimator):
             cfg: ConfigRun,
             model: torch.nn.Module,
             n_classes: int,
-            device: str
-        ) -> None:
+            device: str,
+            rng: np.random.RandomState = None,
+            verbose: bool = True,
+    ):
         self.cfg = cfg
+        if rng is None:
+            rng = np.random.RandomState(self.cfg.seed)
+        self.rng = rng
+        self.verbose = verbose
         self.device = device
         self.model = model.to(self.device, non_blocking=True)
         self.n_classes = n_classes
@@ -81,13 +87,15 @@ class TrainerFinetune(BaseEstimator):
             y = y_train_transformed,
             task = self.cfg.task,
             max_samples_support = self.cfg.hyperparams['max_samples_support'],
-            max_samples_query = self.cfg.hyperparams['max_samples_query']
+            max_samples_query = self.cfg.hyperparams['max_samples_query'],
+            rng=self.rng,
         )
         self.checkpoint.reset(self.model)
         metrics_valid = self.evaluate(x_train, y_train, x_val, y_val)
-        self.log_start_metrics(metrics_valid)
+        if self.verbose:
+            self.log_start_metrics(metrics_valid)
         self.checkpoint(self.model, metrics_valid.loss)
         start_time = time.time()
@@ -154,13 +162,15 @@ class TrainerFinetune(BaseEstimator):
             metrics_train = prediction_metrics_tracker.get_metrics()
             metrics_valid = self.evaluate(x_train, y_train, x_val, y_val)
-            self.log_metrics(epoch, metrics_train, metrics_valid)
+            if self.verbose:
+                self.log_metrics(epoch, metrics_train, metrics_valid)
             self.checkpoint(self.model, metrics_valid.loss)
             self.early_stopping(metrics_valid.metrics[self.metric])
             if self.early_stopping.we_should_stop():
-                logger.info("Early stopping")
+                if self.verbose:
+                    logger.info("Early stopping")
                 break
             if self.cfg.hyperparams["budget"] is not None and self.cfg.hyperparams["budget"] > 0 and time.time() - start_time > self.cfg.hyperparams["budget"]:
@@ -192,6 +202,7 @@ class TrainerFinetune(BaseEstimator):
             y_query = y_query,
             max_samples_support = self.cfg.hyperparams['max_samples_support'],
             max_samples_query = self.cfg.hyperparams['max_samples_query'],
+            rng=self.rng,
         )
         loader = self.make_loader(dataset, training=False)
@@ -246,6 +257,7 @@ class TrainerFinetune(BaseEstimator):
             y_query = None,
             max_samples_support = self.cfg.hyperparams['max_samples_support'],
             max_samples_query = self.cfg.hyperparams['max_samples_query'],
+            rng=self.rng,
         )
         loader = self.make_loader(dataset, training=False)

autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py CHANGED Viewed

@@ -26,13 +26,15 @@ class DatasetFinetune(torch.utils.data.Dataset):
         x_query: np.ndarray,
         y_query: Optional[np.ndarray],
         max_samples_support: int,
-        max_samples_query: int
+        max_samples_query: int,
+        rng: np.random.RandomState,
     ):
         """
         :param: max_features: number of features the tab pfn model has been trained on
         """
         self.cfg = cfg
+        self.rng = rng
         self.x_support = x_support
         self.y_support = y_support
@@ -59,7 +61,7 @@ class DatasetFinetune(torch.utils.data.Dataset):
     def __getitem__(self, idx):
-        support_indices = np.random.choice(
+        support_indices = self.rng.choice(
             self.n_samples_support,
             size=self.support_size,
             replace=False
@@ -101,7 +103,8 @@ def DatasetFinetuneGenerator(
     y: np.ndarray,
     task: Task,
     max_samples_support: int,
-    max_samples_query: int
+    max_samples_query: int,
+    rng: np.random.RandomState,
 ):
     """
     The dataset fine-tune generator is a generator that yields a dataset for fine-tuning.
@@ -112,7 +115,7 @@ def DatasetFinetuneGenerator(
     while True:
-        x_support, x_query, y_support, y_query = make_dataset_split(x=x, y=y, task=task, seed=cfg.seed)
+        x_support, x_query, y_support, y_query = make_dataset_split(x=x, y=y, task=task, seed=rng)
         n_samples_support = x_support.shape[0]
         n_samples_query = x_query.shape[0]
@@ -127,6 +130,7 @@ def DatasetFinetuneGenerator(
             y_query=y_query[:query_size],
             max_samples_support=max_samples_support,
             max_samples_query=max_samples_query,
+            rng=rng,
         )
         yield dataset_finetune

autogluon/tabular/models/mitra/_internal/data/dataset_split.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import numpy as np
 from sklearn.model_selection import StratifiedKFold, train_test_split
@@ -19,9 +21,11 @@ def make_dataset_split(x: np.ndarray, y: np.ndarray, task: Task, seed: int) -> t
 def make_stratified_dataset_split(x, y, n_splits=5, seed=0):
+    if isinstance(seed, int):
+        seed = np.random.RandomState(seed)
     # Stratify doesn't shuffle the data, so we shuffle it first
-    permutation = np.random.permutation(len(y))
+    permutation = seed.permutation(len(y))
     x, y = x[permutation], y[permutation]
     min_samples_per_class = np.min(np.bincount(y))

autogluon/tabular/models/mitra/_internal/models/tab2d.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
+import logging
 import os
 from typing import Optional, Union
@@ -29,6 +30,8 @@ from ..._internal.models.embedding import (
     Tab2DQuantileEmbeddingX,
 )
+logger = logging.getLogger(__name__)
 class Tab2D(BaseModel):

autogluon/tabular/models/mitra/mitra_model.py CHANGED Viewed

@@ -1,49 +1,56 @@
-# TODO: To ensure deterministic operations we need to set torch.use_deterministic_algorithms(True)
-# and os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'. The CUBLAS environment variable configures
-# the workspace size for certain CUBLAS operations to ensure reproducibility when using CUDA >= 10.2.
-# Both settings are required to ensure deterministic behavior in operations such as matrix multiplications.
-import os
-os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
+from __future__ import annotations
+import logging
 import os
 from typing import List, Optional
 import pandas as pd
-import torch
-import logging
 from autogluon.common.utils.resource_utils import ResourceManager
 from autogluon.core.models import AbstractModel
+from autogluon.features.generators import LabelEncoderFeatureGenerator
+from autogluon.tabular import __version__
 logger = logging.getLogger(__name__)
-# TODO: Needs memory usage estimate method
 class MitraModel(AbstractModel):
+    """
+    Mitra is a tabular foundation model pre-trained purely on synthetic data with the goal
+    of optimizing fine-tuning performance over in-context learning performance.
+    Mitra was developed by the AutoGluon team @ AWS AI.
+    Mitra's default hyperparameters outperforms all methods for small datasets on TabArena-v0.1 (excluding ensembling): https://tabarena.ai
+    Authors: Xiyuan Zhang, Danielle C. Maddix, Junming Yin, Nick Erickson, Abdul Fatir Ansari, Boran Han, Shuai Zhang, Leman Akoglu, Christos Faloutsos, Michael W. Mahoney, Cuixiong Hu, Huzefa Rangwala, George Karypis, Bernie Wang
+    Blog Post: https://www.amazon.science/blog/mitra-mixed-synthetic-priors-for-enhancing-tabular-foundation-models
+    License: Apache-2.0
+    .. versionadded:: 1.4.0
+    """
     ag_key = "MITRA"
     ag_name = "Mitra"
     weights_file_name = "model.pt"
     ag_priority = 55
-    def __init__(self, problem_type=None, **kwargs):
+    def __init__(self, **kwargs):
         super().__init__(**kwargs)
-        self.problem_type = problem_type
         self._weights_saved = False
+        self._feature_generator = None
     @staticmethod
     def _get_default_device():
         """Get the best available device for the current system."""
         if ResourceManager.get_gpu_count_torch(cuda_only=True) > 0:
-            logger.info("Using CUDA GPU")
+            logger.log(15, "Using CUDA GPU")
             return "cuda"
         else:
             return "cpu"
     def get_model_cls(self):
-        from .sklearn_interface import MitraClassifier
         if self.problem_type in ["binary", "multiclass"]:
+            from .sklearn_interface import MitraClassifier
             model_cls = MitraClassifier
         elif self.problem_type == "regression":
             from .sklearn_interface import MitraRegressor
@@ -53,6 +60,23 @@ class MitraModel(AbstractModel):
             raise AssertionError(f"Unsupported problem_type: {self.problem_type}")
         return model_cls
+    def _preprocess(self, X: pd.DataFrame, is_train: bool = False, **kwargs) -> pd.DataFrame:
+        X = super()._preprocess(X, **kwargs)
+        if is_train:
+            # X will be the training data.
+            self._feature_generator = LabelEncoderFeatureGenerator(verbosity=0)
+            self._feature_generator.fit(X=X)
+        # This converts categorical features to numeric via stateful label encoding.
+        if self._feature_generator.features_in:
+            X = X.copy()
+            X[self._feature_generator.features_in] = self._feature_generator.transform(
+                X=X
+            )
+        return X
     def _fit(
         self,
         X: pd.DataFrame,
@@ -61,11 +85,25 @@ class MitraModel(AbstractModel):
         y_val: pd.Series = None,
         time_limit: float = None,
         num_cpus: int = 1,
+        num_gpus: float = 0,
+        verbosity: int = 2,
         **kwargs,
     ):
         # TODO: Reset the number of threads based on the specified num_cpus
         need_to_reset_torch_threads = False
         torch_threads_og = None
+        try:
+            model_cls = self.get_model_cls()
+            import torch
+        except ImportError as err:
+            logger.log(
+                40,
+                f"\tFailed to import Mitra! To use the Mitra model, "
+                f"do: `pip install autogluon.tabular[mitra]=={__version__}`.",
+            )
+            raise err
         if num_cpus is not None and isinstance(num_cpus, (int, float)):
             torch_threads_og = torch.get_num_threads()
             if torch_threads_og != num_cpus:
@@ -73,9 +111,21 @@ class MitraModel(AbstractModel):
                 torch.set_num_threads(num_cpus)
                 need_to_reset_torch_threads = True
-        model_cls = self.get_model_cls()
         hyp = self._get_model_params()
+        if hyp.get("device", None) is None:
+            if num_gpus == 0:
+                hyp["device"] = "cpu"
+            else:
+                hyp["device"] = self._get_default_device()
+        if hyp["device"] == "cpu" and hyp.get("fine_tune", True):
+            logger.log(
+                30,
+                f"\tWarning: Attempting to fine-tune Mitra on CPU. This will be very slow. "
+                f"We strongly recommend using a GPU instance to fine-tune Mitra."
+            )
         if "state_dict_classification" in hyp:
             state_dict_classification = hyp.pop("state_dict_classification")
             if self.problem_type in ["binary", "multiclass"]:
@@ -85,11 +135,14 @@ class MitraModel(AbstractModel):
             if self.problem_type in ["regression"]:
                 hyp["state_dict"] = state_dict_regression
+        if "verbose" not in hyp:
+            hyp["verbose"] = verbosity >= 3
         self.model = model_cls(
             **hyp,
         )
-        X = self.preprocess(X)
+        X = self.preprocess(X, is_train=True)
         if X_val is not None:
             X_val = self.preprocess(X_val)
@@ -106,7 +159,6 @@ class MitraModel(AbstractModel):
     def _set_default_params(self):
         default_params = {
-            "device": self._get_default_device(),
             "n_estimators": 1,
         }
         for param, val in default_params.items():
@@ -196,12 +248,13 @@ class MitraModel(AbstractModel):
         X: pd.DataFrame,
         **kwargs,
     ) -> int:
-        return max(
+        # Multiply by 0.9 as currently this is overly safe
+        return int(0.9 * max(
             cls._estimate_memory_usage_static_cpu_icl(X=X, **kwargs),
             cls._estimate_memory_usage_static_cpu_ft_icl(X=X, **kwargs),
             cls._estimate_memory_usage_static_gpu_cpu(X=X, **kwargs),
             cls._estimate_memory_usage_static_gpu_gpu(X=X, **kwargs),
-        )
+        ))
     @classmethod
     def _estimate_memory_usage_static_cpu_icl(

autogluon.tabular 1.3.2b20250722__py3-none-any.whl → 1.4.0__py3-none-any.whl

autogluon.tabular 1.3.2b20250722py3-none-any.whl → 1.4.0py3-none-any.whl