PyPI - autogluon.tabular - Versions diffs - 1.3.2b20250715__py3-none-any.whl → 1.3.2b20250717__py3-none-any.whl - Mend

autogluon.tabular 1.3.2b20250715py3-none-any.whl → 1.3.2b20250717py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

autogluon/tabular/models/mitra/_internal/data/preprocessor.py CHANGED Viewed

@@ -1,23 +1,25 @@
-from typing import Optional, Self
 import random
+from typing import Optional
 import numpy as np
 from loguru import logger
-from sklearn.feature_selection import SelectKBest
-from sklearn.preprocessing import QuantileTransformer, StandardScaler, OrdinalEncoder
+from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.compose import ColumnTransformer
 from sklearn.decomposition import TruncatedSVD
-from sklearn.pipeline import Pipeline, FeatureUnion
-from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.feature_selection import SelectKBest
+from sklearn.pipeline import FeatureUnion, Pipeline
+from sklearn.preprocessing import (OrdinalEncoder, QuantileTransformer,
+                                   StandardScaler)
 from ..._internal.config.enums import Task
 class NoneTransformer(BaseEstimator, TransformerMixin):
     def fit(self, X, y=None):
         return self
     def transform(self, X):
         return X
 class Preprocessor():
     """
     This class is used to preprocess the data before it is pushed through the model.
@@ -28,9 +30,9 @@ class Preprocessor():
     """
     def __init__(
-            self,
+            self,
             dim_embedding: Optional[int],   # Size of the feature embedding. For some models this is None, which means the embedding does not depend on the number of features
-            n_classes: int,   # Actual number of classes in the dataset, assumed to be numbered 0, ..., n_classes - 1
+            n_classes: int,   # Actual number of classes in the dataset, assumed to be numbered 0, ..., n_classes - 1
             dim_output: int,  # Maximum number of classes the model has been trained on -> size of the output
             use_quantile_transformer: bool,
             use_feature_count_scaling: bool,
@@ -53,8 +55,8 @@ class Preprocessor():
         self.random_mirror_regression = random_mirror_regression
         self.random_mirror_x = random_mirror_x
         self.task = task
-    def fit(self, X: np.ndarray, y: np.ndarray) -> Self:
+    def fit(self, X: np.ndarray, y: np.ndarray) -> "Preprocessor":
         """
         X: np.ndarray [n_samples, n_features]
         y: np.ndarray [n_samples]
@@ -78,16 +80,16 @@ class Preprocessor():
         if self.use_quantile_transformer:
             # If use quantile transform is off, it means that the preprocessing will happen on the GPU.
             X = self.fit_transform_quantile_transformer(X)
             self.mean, self.std = self.calc_mean_std(X)
             X = self.normalize_by_mean_std(X, self.mean, self.std)
         if self.use_random_transforms:
             X = self.transform_tabpfn(X)
         if self.task == Task.CLASSIFICATION and self.shuffle_classes:
             self.determine_shuffle_class_order()
         if self.shuffle_features:
             self.determine_feature_order(X)
@@ -104,7 +106,7 @@ class Preprocessor():
         X[np.isinf(X)] = 0
         return self
     def transform_X(self, X: np.ndarray):
@@ -116,12 +118,12 @@ class Preprocessor():
             # If use quantile transform is off, it means that the preprocessing will happen on the GPU.
             X = self.quantile_transformer.transform(X)
             X = self.normalize_by_mean_std(X, self.mean, self.std)
             if self.use_feature_count_scaling:
                 X = self.normalize_by_feature_count(X)
         if self.use_random_transforms:
             X = self.random_transforms.transform(X)
@@ -140,11 +142,11 @@ class Preprocessor():
     def transform_tabpfn(self, X: np.ndarray):
         n_samples = X.shape[0]
         n_features = X.shape[1]
-        use_config1 = random.random() < 0.5
+        use_config1 = random.random() < 0.5
         random_state = random.randint(0, 1000000)
         if use_config1:
@@ -171,12 +173,12 @@ class Preprocessor():
                 ('ordinal', OrdinalEncoder(
                     handle_unknown="use_encoded_value",
                     unknown_value=np.nan
-                ), [])
+                ), [])
             ], remainder='passthrough')
         return self.random_transforms.fit_transform(X)
     def transform_y(self, y: np.ndarray):
         if self.task == Task.CLASSIFICATION:
@@ -193,36 +195,34 @@ class Preprocessor():
         if self.task == Task.REGRESSION and self.random_mirror_regression:
             y = self.apply_random_mirror_regression(y)
-        match self.task:
-            case Task.CLASSIFICATION:
-                y = y.astype(np.int64)
-            case Task.REGRESSION:
-                y = y.astype(np.float32)
+        if self.task == Task.CLASSIFICATION:
+            y = y.astype(np.int64)
+        elif self.task == Task.REGRESSION:
+            y = y.astype(np.float32)
         return y
     def inverse_transform_y(self, y: np.ndarray):
         # Function used during the prediction to transform the model output back to the original space
         # For classification, y is assumed to be logits of shape [n_samples, n_classes]
-        match self.task:
-            case Task.CLASSIFICATION:
-                y = self.extract_correct_classes(y)
+        if self.task == Task.CLASSIFICATION:
+            y = self.extract_correct_classes(y)
-                if self.shuffle_classes:
-                    y = self.undo_randomize_class_order(y)
+            if self.shuffle_classes:
+                y = self.undo_randomize_class_order(y)
-            case Task.REGRESSION:
+        elif self.task == Task.REGRESSION:
-                if  self.random_mirror_regression:
-                    y = self.apply_random_mirror_regression(y)
+            if  self.random_mirror_regression:
+                y = self.apply_random_mirror_regression(y)
-                y = self.undo_normalize_y(y)
+            y = self.undo_normalize_y(y)
         return y
     def fit_transform_quantile_transformer(self, X: np.ndarray) -> np.ndarray:
@@ -233,12 +233,12 @@ class Preprocessor():
         return X
     def determine_which_features_are_singular(self, x: np.ndarray) -> None:
         self.singular_features = np.array([ len(np.unique(x_col)) for x_col in x.T ]) == 1
     def determine_which_features_to_select(self, x: np.ndarray, y: np.ndarray) -> None:
@@ -267,7 +267,7 @@ class Preprocessor():
         x[inds] = np.take(self.pre_nan_mean, inds[1])
         return x
     def select_features(self, x: np.ndarray) -> np.ndarray:
         if self.dim_embedding is None:
@@ -278,7 +278,7 @@ class Preprocessor():
             x = self.select_k_best.transform(x)
         return x
     def cutoff_singular_features(self, x: np.ndarray, singular_features: np.ndarray) -> np.ndarray:
@@ -295,7 +295,7 @@ class Preprocessor():
         mean = x.mean(axis=0)
         std = x.std(axis=0) + 1e-6
         return mean, std
     def normalize_by_mean_std(self, x: np.ndarray, mean: np.ndarray, std: np.ndarray) -> np.ndarray:
         """
@@ -329,23 +329,23 @@ class Preprocessor():
         added_zeros = np.zeros((x.shape[0], dim_embedding - x.shape[1]), dtype=np.float32)
         x = np.concatenate([x, added_zeros], axis=1)
         return x
     def determine_mix_max_scale(self, y: np.ndarray) -> None:
         self.y_min = y.min()
         self.y_max = y.max()
         assert self.y_min != self.y_max, "y_min and y_max are the same, cannot normalize, regression makes no sense"
     def normalize_y(self, y: np.ndarray) -> np.ndarray:
         y = (y - self.y_min) / (self.y_max - self.y_min)
         return y
     def undo_normalize_y(self, y: np.ndarray) -> np.ndarray:
         y = y * (self.y_max - self.y_min) + self.y_min
         return y
     def determine_regression_mirror(self) -> None:
         self.regression_mirror = np.random.choice([True, False], size=(1,)).item()
@@ -355,7 +355,7 @@ class Preprocessor():
         if self.regression_mirror:
             y = 1 - y
         return y
     def determine_mirror(self, x: np.ndarray) -> None:
@@ -376,15 +376,15 @@ class Preprocessor():
         else:
             self.new_shuffle_classes = np.arange(self.n_classes)
     def randomize_class_order(self, y: np.ndarray) -> np.ndarray:
         mapping = { i: self.new_shuffle_classes[i] for i in range(self.n_classes) }
         y = np.array([mapping[i.item()] for i in y], dtype=np.int64)
-        return y
+        return y
     def undo_randomize_class_order(self, y_logits: np.ndarray) -> np.ndarray:
         """
         We assume y_logits has shape [n_samples, n_classes]
@@ -393,9 +393,9 @@ class Preprocessor():
         # mapping = {self.new_shuffle_classes[i]: i for i in range(self.n_classes)}
         mapping = {i: self.new_shuffle_classes[i] for i in range(self.n_classes)}
         y = np.concatenate([y_logits[:, mapping[i]:mapping[i]+1] for i in range(self.n_classes)], axis=1)
         return y
     def extract_correct_classes(self, y_logits: np.ndarray) -> np.ndarray:
         # Even though our network might be able to support 10 classes,

autogluon/tabular/models/mitra/_internal/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Model architecture modules for MitraModel

autogluon/tabular/models/mitra/_internal/models/tab2d.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import json
+import os
 from typing import Optional, Union
 import einops
@@ -5,11 +7,8 @@ import einx
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from safetensors.torch import save_file
 from huggingface_hub import hf_hub_download
-from safetensors.torch import load_file
-import os
-import json
+from safetensors.torch import load_file, save_file
 # Try to import flash attention, but make it optional
 try:
@@ -24,8 +23,8 @@ from torch.utils.checkpoint import checkpoint
 from ..._internal.config.enums import Task
 from ..._internal.models.base import BaseModel
 from ..._internal.models.embedding import (
-    Tab2DEmbeddingX,
-    Tab2DEmbeddingYClasses,
+    Tab2DEmbeddingX,
+    Tab2DEmbeddingYClasses,
     Tab2DEmbeddingYRegression,
     Tab2DQuantileEmbeddingX,
 )
@@ -64,16 +63,15 @@ class Tab2D(BaseModel):
         self.x_embedding = Tab2DEmbeddingX(dim)
-        match self.task:
-            case Task.CLASSIFICATION:
-                self.y_embedding = Tab2DEmbeddingYClasses(dim, dim_output)     # type: nn.Module
-            case Task.REGRESSION:
-                if self.dim_output == 1:
-                    self.y_embedding = Tab2DEmbeddingYRegression(dim)
-                else:
-                    self.y_embedding = Tab2DEmbeddingYClasses(dim, dim_output)
-            case _:
-                raise ValueError(f"Task {task} not supported")
+        if self.task == Task.CLASSIFICATION:
+            self.y_embedding = Tab2DEmbeddingYClasses(dim, dim_output)     # type: nn.Module
+        elif self.task == Task.REGRESSION:
+            if self.dim_output == 1:
+                self.y_embedding = Tab2DEmbeddingYRegression(dim)
+            else:
+                self.y_embedding = Tab2DEmbeddingYClasses(dim, dim_output)
+        else:
+            raise ValueError(f"Task {task} not supported")
         self.layers = nn.ModuleList()
@@ -165,18 +163,17 @@ class Tab2D(BaseModel):
         y_query__, x_query__ = einops.unpack(query__, pack_query__, 'b s * c') # (b, n_q, 1, c), (b, n_q, f, c)
-        match self.task:
+        if self.task == Task.REGRESSION:
             # output has shape (batch_size, n_observations_query, n_features, n_classes)
             # we want to remove the n_features dimension, and for regression, the n_classes dimension
-            case Task.REGRESSION:
-                if self.dim_output == 1:
-                    y_query__ = y_query__[:, :, 0, 0]
-                else:
-                    y_query__ = y_query__[:, :, 0, :]
-            case Task.CLASSIFICATION:
+            if self.dim_output == 1:
+                y_query__ = y_query__[:, :, 0, 0]
+            else:
                 y_query__ = y_query__[:, :, 0, :]
-            case _:
-                raise ValueError(f"Task {self.task} not supported")
+        elif self.task == Task.CLASSIFICATION:
+            y_query__ = y_query__[:, :, 0, :]
+        else:
+            raise ValueError(f"Task {self.task} not supported")
         return y_query__

autogluon/tabular/models/mitra/_internal/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Utility modules for MitraModel

autogluon/tabular/models/mitra/mitra_model.py CHANGED Viewed

@@ -1,8 +1,18 @@
+# TODO: To ensure deterministic operations we need to set torch.use_deterministic_algorithms(True)
+# and os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'. The CUBLAS environment variable configures
+# the workspace size for certain CUBLAS operations to ensure reproducibility when using CUDA >= 10.2.
+# Both settings are required to ensure deterministic behavior in operations such as matrix multiplications.
+import os
+os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
+import os
+from typing import List, Optional
 import pandas as pd
-from typing import Optional, List
 from autogluon.common.utils.resource_utils import ResourceManager
 from autogluon.core.models import AbstractModel
-import os
 # TODO: Needs memory usage estimate method
 class MitraModel(AbstractModel):
@@ -37,6 +47,17 @@ class MitraModel(AbstractModel):
         num_cpus: int = 1,
         **kwargs,
     ):
+        # TODO: Reset the number of threads based on the specified num_cpus
+        need_to_reset_torch_threads = False
+        torch_threads_og = None
+        if num_cpus is not None and isinstance(num_cpus, (int, float)):
+            torch_threads_og = torch.get_num_threads()
+            if torch_threads_og != num_cpus:
+                # reset torch threads back to original value after fit
+                torch.set_num_threads(num_cpus)
+                need_to_reset_torch_threads = True
         model_cls = self.get_model_cls()
         hyp = self._get_model_params()
@@ -65,14 +86,17 @@ class MitraModel(AbstractModel):
             time_limit=time_limit,
         )
+        if need_to_reset_torch_threads:
+            torch.set_num_threads(torch_threads_og)
     def _set_default_params(self):
         default_params = {
-            "device": "cuda", # "cpu"
+            "device": "cpu",
             "n_estimators": 1,
         }
         for param, val in default_params.items():
             self._set_default_param_value(param, val)
     def _get_default_auxiliary_params(self) -> dict:
         default_auxiliary_params = super()._get_default_auxiliary_params()
         default_auxiliary_params.update(
@@ -87,7 +111,7 @@ class MitraModel(AbstractModel):
     @property
     def weights_path(self) -> str:
         return os.path.join(self.path, self.weights_file_name)
     def save(self, path: str = None, verbose=True) -> str:
         _model_weights_list = None
         if self.model is not None:
@@ -98,7 +122,7 @@ class MitraModel(AbstractModel):
                 self.model.trainers[i].model = None
                 self.model.trainers[i].optimizer = None
                 self.model.trainers[i].scheduler_warmup = None
-                self.model.trainers[i].scheduler_reduce_on_plateau = None
+                self.model.trainers[i].scheduler_reduce_on_plateau = None
             self._weights_saved = True
         path = super().save(path=path, verbose=verbose)
         if _model_weights_list is not None:
@@ -108,7 +132,7 @@ class MitraModel(AbstractModel):
             for i in range(len(self.model.trainers)):
                 self.model.trainers[i].model = _model_weights_list[i]
         return path
     @classmethod
     def load(cls, path: str, reset_paths=False, verbose=True):
         model: MitraModel = super().load(path=path, reset_paths=reset_paths, verbose=verbose)
@@ -136,14 +160,16 @@ class MitraModel(AbstractModel):
         return default_ag_args_ensemble
     def _get_default_resources(self) -> tuple[int, int]:
-        # logical=False is faster in training
-        num_cpus = ResourceManager.get_cpu_count_psutil(logical=False)
-        num_gpus = 1
+        # Use only physical cores for better performance based on benchmarks
+        num_cpus = ResourceManager.get_cpu_count(only_physical_cores=True)
+        num_gpus = min(1, ResourceManager.get_gpu_count_torch(cuda_only=True))
         return num_cpus, num_gpus
     def _estimate_memory_usage(self, X: pd.DataFrame, **kwargs) -> int:
         return self.estimate_memory_usage_static(X=X, problem_type=self.problem_type, num_classes=self.num_classes, **kwargs)
     @classmethod
     def _estimate_memory_usage_static(
         cls,
@@ -157,7 +183,7 @@ class MitraModel(AbstractModel):
             cls._estimate_memory_usage_static_gpu_cpu(X=X, **kwargs),
             cls._estimate_memory_usage_static_gpu_gpu(X=X, **kwargs),
         )
     @classmethod
     def _estimate_memory_usage_static_cpu_icl(
         cls,
@@ -165,10 +191,18 @@ class MitraModel(AbstractModel):
         X: pd.DataFrame,
         **kwargs,
     ) -> int:
-        cpu_memory_kb = 1.3 * (0.001748 * (X.shape[0]**2) * X.shape[1] + \
-                        0.001206 * X.shape[0] * (X.shape[1]**2) + \
-                        10.3482 * X.shape[0] * X.shape[1] + \
-                        6409698)
+        rows, features = X.shape[0], X.shape[1]
+        # For very small datasets, use a more conservative estimate
+        if rows * features < 100:  # Small dataset threshold
+            # Use a simpler linear formula for small datasets
+            cpu_memory_kb = 1.3 * (100 * rows * features + 1000000)  # 1GB base + linear scaling
+        else:
+            # Original formula for larger datasets
+            cpu_memory_kb = 1.3 * (0.001748 * (rows**2) * features + \
+                            0.001206 * rows * (features**2) + \
+                            10.3482 * rows * features + \
+                            6409698)
         return int(cpu_memory_kb * 1e3)
     @classmethod
@@ -178,12 +212,20 @@ class MitraModel(AbstractModel):
         X: pd.DataFrame,
         **kwargs,
     ) -> int:
-        cpu_memory_kb = 1.3 * (0.001 * (X.shape[0]**2) * X.shape[1] + \
-                        0.004541 * X.shape[0] * (X.shape[1]**2) + \
-                        46.2974 * X.shape[0] * X.shape[1] + \
-                        5605681)
+        rows, features = X.shape[0], X.shape[1]
+        # For very small datasets, use a more conservative estimate
+        if rows * features < 100:  # Small dataset threshold
+            # Use a simpler linear formula for small datasets
+            cpu_memory_kb = 1.3 * (200 * rows * features + 2000000)  # 2GB base + linear scaling
+        else:
+            # Original formula for larger datasets
+            cpu_memory_kb = 1.3 * (0.001 * (rows**2) * features + \
+                            0.004541 * rows * (features**2) + \
+                            46.2974 * rows * features + \
+                            5605681)
         return int(cpu_memory_kb * 1e3)
     @classmethod
     def _estimate_memory_usage_static_gpu_cpu(
         cls,
@@ -191,7 +233,13 @@ class MitraModel(AbstractModel):
         X: pd.DataFrame,
         **kwargs,
     ) -> int:
-        return int(5 * 1e9)
+        rows, features = X.shape[0], X.shape[1]
+        # For very small datasets, use a more conservative estimate
+        if rows * features < 100:  # Small dataset threshold
+            return int(2.5 * 1e9)  # 2.5GB for small datasets
+        else:
+            return int(5 * 1e9)  # 5GB for larger datasets
     @classmethod
     def _estimate_memory_usage_static_gpu_gpu(
@@ -200,7 +248,15 @@ class MitraModel(AbstractModel):
         X: pd.DataFrame,
         **kwargs,
     ) -> int:
-        gpu_memory_mb = 1.3 * (0.05676 * X.shape[0] * X.shape[1] + 3901)
+        rows, features = X.shape[0], X.shape[1]
+        # For very small datasets, use a more conservative estimate
+        if rows * features < 100:  # Small dataset threshold
+            # Use a simpler linear formula for small datasets
+            gpu_memory_mb = 1.3 * (10 * rows * features + 2000)  # 2GB base + linear scaling
+        else:
+            # Original formula for larger datasets
+            gpu_memory_mb = 1.3 * (0.05676 * rows * features + 3901)
         return int(gpu_memory_mb * 1e6)
     @classmethod
@@ -208,7 +264,7 @@ class MitraModel(AbstractModel):
         return {
             "can_estimate_memory_usage_static": True,
         }
     def _more_tags(self) -> dict:
         tags = {"can_refit_full": True}
         return tags

autogluon.tabular 1.3.2b20250715__py3-none-any.whl → 1.3.2b20250717__py3-none-any.whl

autogluon.tabular 1.3.2b20250715py3-none-any.whl → 1.3.2b20250717py3-none-any.whl