PyPI - autogluon.tabular - Versions diffs - 1.5.0b20251228__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl - Mend

autogluon.tabular 1.5.0b20251228py3-none-any.whl → 1.5.1b20260116py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.tabular might be problematic. Click here for more details.

Files changed (135) hide show

autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py CHANGED Viewed

@@ -4,8 +4,8 @@ import numpy as np
 import torch
 from ..._internal.config.config_run import ConfigRun
-from ..._internal.data.dataset_split import make_dataset_split
 from ..._internal.config.enums import Task
+from ..._internal.data.dataset_split import make_dataset_split
 class DatasetFinetune(torch.utils.data.Dataset):
@@ -19,11 +19,11 @@ class DatasetFinetune(torch.utils.data.Dataset):
     """
     def __init__(
-        self,
+        self,
         cfg: ConfigRun,
-        x_support: np.ndarray,
-        y_support: np.ndarray,
-        x_query: np.ndarray,
+        x_support: np.ndarray,
+        y_support: np.ndarray,
+        x_query: np.ndarray,
         y_query: Optional[np.ndarray],
         max_samples_support: int,
         max_samples_query: int,
@@ -35,10 +35,10 @@ class DatasetFinetune(torch.utils.data.Dataset):
         self.cfg = cfg
         self.rng = rng
         self.x_support = x_support
         self.y_support = y_support
-        self.x_query = x_query
+        self.x_query = x_query
         self.y_query = y_query
         if self.y_query is None:
@@ -55,17 +55,11 @@ class DatasetFinetune(torch.utils.data.Dataset):
         # We push the whole training data through the model, unless it is too large
         self.support_size = min(self.max_samples_support, self.n_samples_support)
     def __len__(self):
         return len(self.x_queries)
     def __getitem__(self, idx):
-        support_indices = self.rng.choice(
-            self.n_samples_support,
-            size=self.support_size,
-            replace=False
-        )
+        support_indices = self.rng.choice(self.n_samples_support, size=self.support_size, replace=False)
         x_support = self.x_support[support_indices]
         y_support = self.y_support[support_indices]
@@ -76,13 +70,11 @@ class DatasetFinetune(torch.utils.data.Dataset):
         y_query_tensor = torch.as_tensor(self.y_queries[idx])
         return {
-            'x_support': x_support_tensor,
-            'y_support': y_support_tensor,
-            'x_query': x_query_tensor,
-            'y_query': y_query_tensor,
+            "x_support": x_support_tensor,
+            "y_support": y_support_tensor,
+            "x_query": x_query_tensor,
+            "y_query": y_query_tensor,
         }
     def split_in_chunks(self, x: np.ndarray, batch_size: int) -> list[np.ndarray]:
         """
@@ -93,14 +85,15 @@ class DatasetFinetune(torch.utils.data.Dataset):
         x_chunks = []
         for i in range(n_chunks):
-            x_chunks.append(x[i * batch_size: (i + 1) * batch_size])
+            x_chunks.append(x[i * batch_size : (i + 1) * batch_size])
         return x_chunks
 def DatasetFinetuneGenerator(
     cfg: ConfigRun,
-    x: np.ndarray,
-    y: np.ndarray,
+    x: np.ndarray,
+    y: np.ndarray,
     task: Task,
     max_samples_support: int,
     max_samples_query: int,
@@ -112,9 +105,8 @@ def DatasetFinetuneGenerator(
     Every single iteration, the generator yields a different support and query set split.
     The dataset made always has exactly one batch.
     """
-    while True:
+    while True:
         x_support, x_query, y_support, y_query = make_dataset_split(x=x, y=y, task=task, seed=rng)
         n_samples_support = x_support.shape[0]
         n_samples_query = x_query.shape[0]
@@ -133,4 +125,4 @@ def DatasetFinetuneGenerator(
             rng=rng,
         )
-        yield dataset_finetune
+        yield dataset_finetune

autogluon/tabular/models/mitra/_internal/data/dataset_split.py CHANGED Viewed

@@ -5,6 +5,7 @@ from sklearn.model_selection import StratifiedKFold, train_test_split
 from ..._internal.config.enums import Task
 def make_dataset_split(x: np.ndarray, y: np.ndarray, task: Task, seed: int) -> tuple[np.ndarray, ...]:
     # Splits the dataset into train and validation sets with ratio 80/20
@@ -18,7 +19,7 @@ def make_dataset_split(x: np.ndarray, y: np.ndarray, task: Task, seed: int) -> t
         return make_stratified_dataset_split(x, y, seed=seed)
     else:
         return make_standard_dataset_split(x, y, seed=seed)
 def make_stratified_dataset_split(x, y, n_splits=5, seed=0):
     if isinstance(seed, int):
@@ -29,12 +30,12 @@ def make_stratified_dataset_split(x, y, n_splits=5, seed=0):
     x, y = x[permutation], y[permutation]
     min_samples_per_class = np.min(np.bincount(y))
     # Adjust n_splits based on both total samples and minimum samples per class
     n_samples = len(y)
     max_possible_splits = min(n_samples - 1, min_samples_per_class)
     n_splits = min(n_splits, max_possible_splits)
     # Ensure we have at least 2 splits if possible
     if n_samples >= 2 and min_samples_per_class >= 2:
         n_splits = max(2, n_splits)
@@ -44,14 +45,16 @@ def make_stratified_dataset_split(x, y, n_splits=5, seed=0):
     skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
     indices = next(skf.split(x, y))
-    x_t_train, x_t_valid = x[indices[0]], x[indices[1]] # 80%, 20%
+    x_t_train, x_t_valid = x[indices[0]], x[indices[1]]  # 80%, 20%
     y_t_train, y_t_valid = y[indices[0]], y[indices[1]]
     return x_t_train, x_t_valid, y_t_train, y_t_valid
 def make_standard_dataset_split(x, y, seed):
     return train_test_split(
-        x, y, test_size=0.2, random_state=seed,
-    )
+        x,
+        y,
+        test_size=0.2,
+        random_state=seed,
+    )

autogluon/tabular/models/mitra/_internal/data/preprocessor.py CHANGED Viewed

@@ -8,8 +8,7 @@ from sklearn.compose import ColumnTransformer
 from sklearn.decomposition import TruncatedSVD
 from sklearn.feature_selection import SelectKBest
 from sklearn.pipeline import FeatureUnion, Pipeline
-from sklearn.preprocessing import (OrdinalEncoder, QuantileTransformer,
-                                   StandardScaler)
+from sklearn.preprocessing import OrdinalEncoder, QuantileTransformer, StandardScaler
 from ..._internal.config.enums import Task
@@ -17,33 +16,36 @@ from ..._internal.config.enums import Task
 class NoneTransformer(BaseEstimator, TransformerMixin):
     def fit(self, X, y=None):
         return self
     def transform(self, X):
         return X
-class Preprocessor():
+class Preprocessor:
     """
     This class is used to preprocess the data before it is pushed through the model.
     The preprocessor assures that the data has the right shape and is normalized,
-    This way the model always gets the same input distribution,
+    This way the model always gets the same input distribution,
     no matter whether the input data is synthetic or real.
     """
     def __init__(
-            self,
-            dim_embedding: Optional[int],   # Size of the feature embedding. For some models this is None, which means the embedding does not depend on the number of features
-            n_classes: int,   # Actual number of classes in the dataset, assumed to be numbered 0, ..., n_classes - 1
-            dim_output: int,  # Maximum number of classes the model has been trained on -> size of the output
-            use_quantile_transformer: bool,
-            use_feature_count_scaling: bool,
-            use_random_transforms: bool,
-            shuffle_classes: bool,
-            shuffle_features: bool,
-            random_mirror_regression: bool,
-            random_mirror_x: bool,
-            task: Task
-        ):
+        self,
+        dim_embedding: Optional[
+            int
+        ],  # Size of the feature embedding. For some models this is None, which means the embedding does not depend on the number of features
+        n_classes: int,  # Actual number of classes in the dataset, assumed to be numbered 0, ..., n_classes - 1
+        dim_output: int,  # Maximum number of classes the model has been trained on -> size of the output
+        use_quantile_transformer: bool,
+        use_feature_count_scaling: bool,
+        use_random_transforms: bool,
+        shuffle_classes: bool,
+        shuffle_features: bool,
+        random_mirror_regression: bool,
+        random_mirror_x: bool,
+        task: Task,
+    ):
         self.dim_embedding = dim_embedding
         self.n_classes = n_classes
         self.dim_output = dim_output
@@ -107,9 +109,7 @@ class Preprocessor():
         return self
     def transform_X(self, X: np.ndarray):
         X = self.impute_nan_features_with_mean(X)
         X = self.cutoff_singular_features(X, self.singular_features)
         X = self.select_features(X)
@@ -140,9 +140,7 @@ class Preprocessor():
         return X
     def transform_tabpfn(self, X: np.ndarray):
         n_samples = X.shape[0]
         n_features = X.shape[1]
@@ -150,37 +148,51 @@ class Preprocessor():
         random_state = random.randint(0, 1000000)
         if use_config1:
-            self.random_transforms = Pipeline([
-                ('quantile', QuantileTransformer(
-                    output_distribution="normal",
-                    n_quantiles=max(n_samples // 10, 2),
-                    random_state=random_state
-                )),
-                ('svd', FeatureUnion([
-                    ('passthrough', NoneTransformer()),
-                    ('svd', Pipeline([
-                        ('standard', StandardScaler(with_mean=False)),
-                        ('svd', TruncatedSVD(
-                            algorithm="arpack",
-                            n_components=max(1, min(n_samples // 10 + 1, n_features // 2)),
-                            random_state=random_state
-                        ))
-                    ]))
-                ]))
-            ])
+            self.random_transforms = Pipeline(
+                [
+                    (
+                        "quantile",
+                        QuantileTransformer(
+                            output_distribution="normal",
+                            n_quantiles=max(n_samples // 10, 2),
+                            random_state=random_state,
+                        ),
+                    ),
+                    (
+                        "svd",
+                        FeatureUnion(
+                            [
+                                ("passthrough", NoneTransformer()),
+                                (
+                                    "svd",
+                                    Pipeline(
+                                        [
+                                            ("standard", StandardScaler(with_mean=False)),
+                                            (
+                                                "svd",
+                                                TruncatedSVD(
+                                                    algorithm="arpack",
+                                                    n_components=max(1, min(n_samples // 10 + 1, n_features // 2)),
+                                                    random_state=random_state,
+                                                ),
+                                            ),
+                                        ]
+                                    ),
+                                ),
+                            ]
+                        ),
+                    ),
+                ]
+            )
         else:
-            self.random_transforms = ColumnTransformer([
-                ('ordinal', OrdinalEncoder(
-                    handle_unknown="use_encoded_value",
-                    unknown_value=np.nan
-                ), [])
-            ], remainder='passthrough')
+            self.random_transforms = ColumnTransformer(
+                [("ordinal", OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan), [])],
+                remainder="passthrough",
+            )
         return self.random_transforms.fit_transform(X)
     def transform_y(self, y: np.ndarray):
         if self.task == Task.CLASSIFICATION:
             # We assume that y properly presents classes [0, 1, 2, ...] before passing to the preprocessor
             # If the test set has a class that is not in the training set, we will throw an error
@@ -202,7 +214,6 @@ class Preprocessor():
         return y
     def inverse_transform_y(self, y: np.ndarray):
         # Function used during the prediction to transform the model output back to the original space
         # For classification, y is assumed to be logits of shape [n_samples, n_classes]
@@ -214,62 +225,49 @@ class Preprocessor():
                 y = self.undo_randomize_class_order(y)
         elif self.task == Task.REGRESSION:
-            if  self.random_mirror_regression:
+            if self.random_mirror_regression:
                 y = self.apply_random_mirror_regression(y)
             y = self.undo_normalize_y(y)
         return y
     def fit_transform_quantile_transformer(self, X: np.ndarray) -> np.ndarray:
         n_obs, n_features = X.shape
         n_quantiles = min(n_obs, 1000)
-        self.quantile_transformer = QuantileTransformer(n_quantiles=n_quantiles, output_distribution='normal')
+        self.quantile_transformer = QuantileTransformer(n_quantiles=n_quantiles, output_distribution="normal")
         X = self.quantile_transformer.fit_transform(X)
         return X
     def determine_which_features_are_singular(self, x: np.ndarray) -> None:
-        self.singular_features = np.array([ len(np.unique(x_col)) for x_col in x.T ]) == 1
+        self.singular_features = np.array([len(np.unique(x_col)) for x_col in x.T]) == 1
     def determine_which_features_to_select(self, x: np.ndarray, y: np.ndarray) -> None:
         if self.dim_embedding is None:
             # All features are selected
             return
         if x.shape[1] > self.dim_embedding:
-            logger.info(f"Number of features is capped at {self.dim_embedding}, but the dataset has {x.shape[1]} features. A subset of {self.dim_embedding} are selected using SelectKBest")
+            logger.info(
+                f"Number of features is capped at {self.dim_embedding}, but the dataset has {x.shape[1]} features. A subset of {self.dim_embedding} are selected using SelectKBest"
+            )
             self.select_k_best = SelectKBest(k=self.dim_embedding)
             self.select_k_best.fit(x, y)
     def compute_pre_nan_mean(self, x: np.ndarray) -> None:
         """
         Computes the mean of the data before the NaNs are imputed
         """
         self.pre_nan_mean = np.nanmean(x, axis=0)
     def impute_nan_features_with_mean(self, x: np.ndarray) -> np.ndarray:
         inds = np.where(np.isnan(x))
         x[inds] = np.take(self.pre_nan_mean, inds[1])
         return x
     def select_features(self, x: np.ndarray) -> np.ndarray:
         if self.dim_embedding is None:
             # All features are selected
             return x
@@ -279,15 +277,12 @@ class Preprocessor():
         return x
     def cutoff_singular_features(self, x: np.ndarray, singular_features: np.ndarray) -> np.ndarray:
         if singular_features.any():
             x = x[:, ~singular_features]
         return x
     def calc_mean_std(self, x: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
         """
         Calculates the mean and std of the training data
@@ -296,7 +291,6 @@ class Preprocessor():
         std = x.std(axis=0) + 1e-6
         return mean, std
     def normalize_by_mean_std(self, x: np.ndarray, mean: np.ndarray, std: np.ndarray) -> np.ndarray:
         """
         Normalizes the data by the mean and std
@@ -305,7 +299,6 @@ class Preprocessor():
         x = (x - mean) / std
         return x
     def normalize_by_feature_count(self, x: np.ndarray) -> np.ndarray:
         """
         An interesting way of normalization by the tabPFN paper
@@ -317,8 +310,6 @@ class Preprocessor():
         return x
     def extend_feature_dim_to_dim_embedding(self, x: np.ndarray, dim_embedding) -> np.ndarray:
         """
         Increases the number of features to the number of features the model has been trained on
@@ -330,61 +321,47 @@ class Preprocessor():
         x = np.concatenate([x, added_zeros], axis=1)
         return x
     def determine_mix_max_scale(self, y: np.ndarray) -> None:
         self.y_min = y.min()
         self.y_max = y.max()
         assert self.y_min != self.y_max, "y_min and y_max are the same, cannot normalize, regression makes no sense"
     def normalize_y(self, y: np.ndarray) -> np.ndarray:
         y = (y - self.y_min) / (self.y_max - self.y_min)
         return y
     def undo_normalize_y(self, y: np.ndarray) -> np.ndarray:
         y = y * (self.y_max - self.y_min) + self.y_min
         return y
     def determine_regression_mirror(self) -> None:
         self.regression_mirror = np.random.choice([True, False], size=(1,)).item()
     def apply_random_mirror_regression(self, y: np.ndarray) -> np.ndarray:
         if self.regression_mirror:
             y = 1 - y
         return y
     def determine_mirror(self, x: np.ndarray) -> None:
         n_features = x.shape[1]
         self.mirror = np.random.choice([1, -1], size=(1, n_features))
     def apply_random_mirror_x(self, x: np.ndarray) -> np.ndarray:
         x = x * self.mirror
         return x
     def determine_shuffle_class_order(self) -> None:
         if self.shuffle_classes:
             self.new_shuffle_classes = np.random.permutation(self.n_classes)
         else:
             self.new_shuffle_classes = np.arange(self.n_classes)
     def randomize_class_order(self, y: np.ndarray) -> np.ndarray:
-        mapping = { i: self.new_shuffle_classes[i] for i in range(self.n_classes) }
+        mapping = {i: self.new_shuffle_classes[i] for i in range(self.n_classes)}
         y = np.array([mapping[i.item()] for i in y], dtype=np.int64)
         return y
     def undo_randomize_class_order(self, y_logits: np.ndarray) -> np.ndarray:
         """
         We assume y_logits has shape [n_samples, n_classes]
@@ -392,29 +369,22 @@ class Preprocessor():
         # mapping = {self.new_shuffle_classes[i]: i for i in range(self.n_classes)}
         mapping = {i: self.new_shuffle_classes[i] for i in range(self.n_classes)}
-        y = np.concatenate([y_logits[:, mapping[i]:mapping[i]+1] for i in range(self.n_classes)], axis=1)
+        y = np.concatenate([y_logits[:, mapping[i] : mapping[i] + 1] for i in range(self.n_classes)], axis=1)
         return y
     def extract_correct_classes(self, y_logits: np.ndarray) -> np.ndarray:
         # Even though our network might be able to support 10 classes,
         # If the problem only has three classes, we should give three classes as output.
         # We assume y_logits has shape [n_samples, n_classes]
-        y_logits = y_logits[:, :self.n_classes]
+        y_logits = y_logits[:, : self.n_classes]
         return y_logits
     def determine_feature_order(self, x: np.ndarray) -> None:
         n_features = x.shape[1]
         self.new_feature_order = np.random.permutation(n_features)
     def randomize_feature_order(self, x: np.ndarray) -> np.ndarray:
         x = x[:, self.new_feature_order]
-        return x
+        return x

autogluon/tabular/models/mitra/_internal/models/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- # Model architecture modules for MitraModel
1	+ # Model architecture modules for MitraModel

autogluon/tabular/models/mitra/_internal/models/base.py CHANGED Viewed

@@ -1,21 +1,18 @@
+from abc import ABC, abstractmethod
 import torch
 import torch.nn as nn
-from abc import ABC, abstractmethod
 class BaseModel(nn.Module, ABC):
     def __init__(self):
         super().__init__()
     def init_weights(self):
         """Initialize model weights."""
         pass
     @abstractmethod
-    def forward(self,
-        x_support: torch.Tensor,
-        y_support: torch.Tensor,
-        x_query: torch.Tensor,
-        **kwargs):
+    def forward(self, x_support: torch.Tensor, y_support: torch.Tensor, x_query: torch.Tensor, **kwargs):
         """Forward pass for the model."""
-        pass
+        pass

autogluon.tabular 1.5.0b20251228__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl

Potentially problematic release.

autogluon.tabular 1.5.0b20251228py3-none-any.whl → 1.5.1b20260116py3-none-any.whl