PyPI - autogluon.tabular - Versions diffs - 1.5.1b20260105__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl - Mend

autogluon.tabular 1.5.1b20260105py3-none-any.whl → 1.5.1b20260116py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.tabular might be problematic. Click here for more details.

Files changed (135) hide show

autogluon/tabular/models/tabpfnmix/_internal/models/foundation/embedding.py CHANGED Viewed

@@ -4,13 +4,11 @@ import torch.nn as nn
 class FoundationEmbeddingX(torch.nn.Module):
     def __init__(
-            self,
-            dim: int,
-            n_features: int,
-        ) -> None:
+        self,
+        dim: int,
+        n_features: int,
+    ) -> None:
         super().__init__()
         self.dim = dim
@@ -18,9 +16,7 @@ class FoundationEmbeddingX(torch.nn.Module):
         self.x_embedding = nn.Linear(n_features, dim)
     def forward(self, x_support: torch.Tensor, x_query__: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
         batch_size = x_support.shape[0]
         n_obs_support = x_support.shape[1]
         n_obs_query__ = x_query__.shape[1]
@@ -32,53 +28,44 @@ class FoundationEmbeddingX(torch.nn.Module):
 class FoundationEmbeddingYFloat(torch.nn.Module):
     def __init__(
-            self,
-            dim: int,
-        ) -> None:
+        self,
+        dim: int,
+    ) -> None:
         super().__init__()
         self.dim = dim
         self.y_embedding = nn.Linear(1, dim)
     def forward(self, y_support: torch.Tensor, n_obs_query: int) -> tuple[torch.Tensor, torch.Tensor]:
         batch_size = y_support.shape[0]
         y_support = y_support.type(torch.float32)
-        y_support = einops.rearrange(y_support, 'b n -> b n 1')
+        y_support = einops.rearrange(y_support, "b n -> b n 1")
         y_support = self.y_embedding(y_support)
         y_query = torch.zeros((batch_size, n_obs_query, self.dim), device=y_support.device, dtype=torch.float32)
         return y_support, y_query
 class FoundationEmbeddingYInteger(torch.nn.Module):
     def __init__(
-            self,
-            n_classes: int,
-            dim: int,
-        ) -> None:
+        self,
+        n_classes: int,
+        dim: int,
+    ) -> None:
         super().__init__()
         self.n_classes = n_classes
         self.dim = dim
         self.y_embedding = nn.Embedding(n_classes, dim)
-        self.y_padding = nn.Embedding(1, dim, padding_idx=0) # padding is modeled as a separate class
-        self.y_mask = nn.Embedding(1, dim) # masking is also modeled as a separate class
+        self.y_padding = nn.Embedding(1, dim, padding_idx=0)  # padding is modeled as a separate class
+        self.y_mask = nn.Embedding(1, dim)  # masking is also modeled as a separate class
     def forward(self, y_support: torch.Tensor, n_obs_query: int) -> tuple[torch.Tensor, torch.Tensor]:
         batch_size = y_support.shape[0]
         n_obs_support = y_support.shape[1]
@@ -88,33 +75,29 @@ class FoundationEmbeddingYInteger(torch.nn.Module):
         y_support_pad = y_support == -100
         y_sup = torch.zeros((batch_size, n_obs_support, self.dim), device=y_support.device, dtype=torch.float32)
-        y_sup[ y_support_pad] = self.y_padding(   y_support[ y_support_pad] + 100 )
-        y_sup[~y_support_pad] = self.y_embedding( y_support[~y_support_pad]       )
+        y_sup[y_support_pad] = self.y_padding(y_support[y_support_pad] + 100)
+        y_sup[~y_support_pad] = self.y_embedding(y_support[~y_support_pad])
         y_query = torch.zeros((batch_size, n_obs_query), device=y_support.device, dtype=torch.int64)
         y_query = self.y_mask(y_query)
         return y_sup, y_query
-class FoundationObservationEmbedding(torch.nn.Module):
+class FoundationObservationEmbedding(torch.nn.Module):
     def __init__(self, dim: int) -> None:
         super().__init__()
         self.dim = dim
         self.max_dim = 2**16
         self.embedding = nn.Embedding(self.max_dim, dim)
     def forward(self, batch_size: int, n_obs: int) -> torch.Tensor:
+        assert n_obs <= self.max_dim, f"Number of observations is too large. Max is {self.max_dim}, got {n_obs}"
-        assert n_obs <= self.max_dim, f'Number of observations is too large. Max is {self.max_dim}, got {n_obs}'
-        # Take a random embedding from the pool of embeddings
+        # Take a random embedding from the pool of embeddings
         weights = torch.ones((batch_size, self.max_dim), dtype=torch.float32, device=self.embedding.weight.device)
         indices = torch.multinomial(weights, num_samples=n_obs, replacement=False)
         x = self.embedding(indices)
-        return x
+        return x

autogluon/tabular/models/tabpfnmix/_internal/models/foundation/foundation_transformer.py CHANGED Viewed

@@ -1,16 +1,14 @@
 import einops
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from ...core.enums import Task
+from huggingface_hub import PyTorchModelHubMixin
+from ...core.enums import Task
 from .embedding import FoundationEmbeddingX, FoundationEmbeddingYFloat, FoundationEmbeddingYInteger
-from huggingface_hub import PyTorchModelHubMixin
 class FoundationTransformer(nn.Module, PyTorchModelHubMixin):
     def __init__(
         self,
         n_features: int,
@@ -22,7 +20,6 @@ class FoundationTransformer(nn.Module, PyTorchModelHubMixin):
         y_as_float_embedding: bool,
         task: str = Task.CLASSIFICATION,
     ) -> None:
         super().__init__()
         self.n_features = n_features
@@ -44,36 +41,34 @@ class FoundationTransformer(nn.Module, PyTorchModelHubMixin):
         self.layers = nn.ModuleList([])
         for _ in range(n_layers):
             att = MultiheadAttention(dim, n_heads)
-            self.layers.append(nn.ModuleDict({
-                'layer_norm1': nn.LayerNorm(dim),
-                'attention': att,
-                'layer_norm2': nn.LayerNorm(dim),
-                'linear1': nn.Linear(dim, dim*4),
-                'linear2': nn.Linear(dim*4, dim),
-            }))
-        self.final_layer1 = nn.Linear(dim, dim*4)
+            self.layers.append(
+                nn.ModuleDict(
+                    {
+                        "layer_norm1": nn.LayerNorm(dim),
+                        "attention": att,
+                        "layer_norm2": nn.LayerNorm(dim),
+                        "linear1": nn.Linear(dim, dim * 4),
+                        "linear2": nn.Linear(dim * 4, dim),
+                    }
+                )
+            )
+        self.final_layer1 = nn.Linear(dim, dim * 4)
         if self.task == Task.CLASSIFICATION:
-            self.final_layer2 = nn.Linear(dim*4, n_classes)
+            self.final_layer2 = nn.Linear(dim * 4, n_classes)
         elif self.task == Task.REGRESSION:
-            self.final_layer2 = nn.Linear(dim*4, 1)
+            self.final_layer2 = nn.Linear(dim * 4, 1)
         self.init_weights()
     def init_weights(self):
         for module_dict in self.layers:
             # module_dict['attention'].init_weights()
-            nn.init.zeros_(module_dict['linear2'].weight)
-            nn.init.zeros_(module_dict['linear2'].bias)
+            nn.init.zeros_(module_dict["linear2"].weight)
+            nn.init.zeros_(module_dict["linear2"].bias)
     def forward(self, x_support: torch.Tensor, y_support: torch.Tensor, x_query: torch.Tensor):
         """
         x_support is (batch_size, n_observations_support, n_features)
         y_support is (batch_size, n_observations_support)
@@ -106,38 +101,34 @@ class FoundationTransformer(nn.Module, PyTorchModelHubMixin):
         support = x_support + y_support
         query__ = x_query__ + y_query__
-        x, pack = einops.pack((support, query__), 'b * d')
-        for module_dict in self.layers:
+        x, pack = einops.pack((support, query__), "b * d")
+        for module_dict in self.layers:
             x_residual = x
-            support, query__ = einops.unpack(x, pack, 'b * d')
-            att_support = module_dict['attention'](support, support, support, key_padding_mask=padding_mask)
-            att_query__ = module_dict['attention'](query__, support, support, key_padding_mask=padding_mask)
-            x = einops.pack((att_support, att_query__), 'b * d')[0]
+            support, query__ = einops.unpack(x, pack, "b * d")
+            att_support = module_dict["attention"](support, support, support, key_padding_mask=padding_mask)
+            att_query__ = module_dict["attention"](query__, support, support, key_padding_mask=padding_mask)
+            x = einops.pack((att_support, att_query__), "b * d")[0]
             x = x_residual + x
-            x = module_dict['layer_norm1'](x)
+            x = module_dict["layer_norm1"](x)
             x_residual = x
-            x = module_dict['linear1'](x)
+            x = module_dict["linear1"](x)
             x = torch.nn.functional.gelu(x)
-            x = module_dict['linear2'](x)
+            x = module_dict["linear2"](x)
             x = x_residual + x
-            x = module_dict['layer_norm2'](x)
+            x = module_dict["layer_norm2"](x)
         x = self.final_layer1(x)
         x = F.gelu(x)
         x = self.final_layer2(x)
-        support, query__ = einops.unpack(x, pack, 'b * c')
+        support, query__ = einops.unpack(x, pack, "b * c")
         return query__
 class MultiheadAttention(torch.nn.Module):
     def __init__(self, dim: int, n_heads: int) -> None:
         super().__init__()
         self.use_flash_attention = False
@@ -146,21 +137,14 @@ class MultiheadAttention(torch.nn.Module):
         self.att = nn.MultiheadAttention(dim, n_heads, dropout=0.0, batch_first=True)
     def init_weights(self):
         pass
         # nn.init.zeros_(self.att.out_proj.weight)
         # nn.init.zeros_(self.att.out_proj.bias)
     def forward(
-            self,
-            query: torch.Tensor,
-            key: torch.Tensor,
-            value: torch.Tensor,
-            key_padding_mask: torch.Tensor
-        ) -> torch.Tensor:
+        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, key_padding_mask: torch.Tensor
+    ) -> torch.Tensor:
         """
         b = batch size
         n = number of samples (dataset size)
@@ -179,9 +163,7 @@ class MultiheadAttention(torch.nn.Module):
         return output
 class SwiGLU(nn.Module):
     def forward(self, x):
         x, gate = x.chunk(2, dim=-1)
-        return F.silu(gate) * x
+        return F.silu(gate) * x

autogluon/tabular/models/tabpfnmix/_internal/results/prediction_metrics.py CHANGED Viewed

@@ -17,21 +17,21 @@ class PredictionMetrics:
     @classmethod
     def from_prediction(cls, y_pred: np.ndarray, y_true: np.ndarray, task: Task, metric: Scorer):
         loss, score, metrics = compute_metrics(y_pred, y_true, task, metric=metric)
         return PredictionMetrics(task=task, loss=loss, score=score, metrics=metrics)
 def compute_metrics(y_pred: np.ndarray, y_true: np.ndarray, task: Task, metric: Scorer) -> tuple[float, float, dict]:
     if task == Task.CLASSIFICATION:
         return compute_classification_metrics(y_pred, y_true, metric=metric)
     else:
         return compute_regression_metrics(y_pred, y_true, metric=metric)
-def compute_classification_metrics(y_pred: np.ndarray, y_true: np.ndarray, metric: Scorer) -> tuple[float, float, dict]:
+def compute_classification_metrics(
+    y_pred: np.ndarray, y_true: np.ndarray, metric: Scorer
+) -> tuple[float, float, dict]:
     # predictions are assumed to be log-probabilities
     if metric.needs_pred or metric.needs_class:

autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_classifier.py CHANGED Viewed

@@ -3,8 +3,8 @@ from __future__ import annotations
 from pathlib import Path
 import numpy as np
-from sklearn.base import BaseEstimator, ClassifierMixin
 import torch
+from sklearn.base import BaseEstimator, ClassifierMixin
 from .core.dataset_split import make_stratified_dataset_split
 from .core.trainer_finetune import TrainerFinetune
@@ -16,32 +16,52 @@ from .models.foundation.foundation_transformer import FoundationTransformer
 # TODO: To mitigate val overfitting, can fit multiple random seeds at same time and pick same epoch for all of them, track average performance on epoch.
 # TODO: Test shuffling the data and see if it makes TabPFNv2 worse, same with TabForestPFN
 class TabPFNMixClassifier(BaseEstimator, ClassifierMixin):
-    def __init__(self, n_classes, cfg, split_val, model_path: str = None, weights_path: str | Path = None, stopping_metric=None, use_best_epoch: bool = True):
+    def __init__(
+        self,
+        n_classes,
+        cfg,
+        split_val,
+        model_path: str = None,
+        weights_path: str | Path = None,
+        stopping_metric=None,
+        use_best_epoch: bool = True,
+    ):
         if weights_path is not None:
             weights_path = str(Path(weights_path))
         if model_path is not None:
             model = FoundationTransformer.from_pretrained(model_path)
-            assert model.task == cfg.task, f"The pretrained model '{model_path}' is for task {model.task}, but the problem type is for task {cfg.task}..."
+            assert model.task == cfg.task, (
+                f"The pretrained model '{model_path}' is for task {model.task}, but the problem type is for task {cfg.task}..."
+            )
         else:
             model = FoundationTransformer(
-                n_features=cfg.hyperparams['n_features'],
-                n_classes=cfg.hyperparams['n_classes'],
-                dim=cfg.hyperparams['dim'],
-                n_layers=cfg.hyperparams['n_layers'],
-                n_heads=cfg.hyperparams['n_heads'],
-                attn_dropout=cfg.hyperparams['attn_dropout'],
-                y_as_float_embedding=cfg.hyperparams['y_as_float_embedding'],
+                n_features=cfg.hyperparams["n_features"],
+                n_classes=cfg.hyperparams["n_classes"],
+                dim=cfg.hyperparams["dim"],
+                n_layers=cfg.hyperparams["n_layers"],
+                n_heads=cfg.hyperparams["n_heads"],
+                attn_dropout=cfg.hyperparams["attn_dropout"],
+                y_as_float_embedding=cfg.hyperparams["y_as_float_embedding"],
                 task=cfg.task,
             )
         if weights_path is not None:
             model.load_state_dict(torch.load(weights_path, weights_only=True))  # nosec B614
         self.split_val = split_val
-        self.trainer = TrainerFinetune(cfg, model, n_classes=n_classes, stopping_metric=stopping_metric, use_best_epoch=use_best_epoch)
+        self.trainer = TrainerFinetune(
+            cfg, model, n_classes=n_classes, stopping_metric=stopping_metric, use_best_epoch=use_best_epoch
+        )
         super().__init__()
-    def fit(self, X: np.ndarray, y: np.ndarray, X_val: np.ndarray = None, y_val: np.ndarray = None, time_limit: float = None):
+    def fit(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+        X_val: np.ndarray = None,
+        y_val: np.ndarray = None,
+        time_limit: float = None,
+    ):
         # FIXME: Should X and y be preprocessed for inference efficiency? Yes.
         self.X_ = X  # FIXME: Optimize storage of X and y? Is this redundant? Is X and y saving done multiple times during pickle?
         self.y_ = y

autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_regressor.py CHANGED Viewed

@@ -3,8 +3,8 @@ from __future__ import annotations
 from pathlib import Path
 import numpy as np
-from sklearn.base import BaseEstimator, RegressorMixin
 import torch
+from sklearn.base import BaseEstimator, RegressorMixin
 from .core.dataset_split import make_stratified_dataset_split
 from .core.trainer_finetune import TrainerFinetune
@@ -16,8 +16,16 @@ from .models.foundation.foundation_transformer import FoundationTransformer
 # TODO: To mitigate val overfitting, can fit multiple random seeds at same time and pick same epoch for all of them, track average performance on epoch.
 # TODO: Test shuffling the data and see if it makes TabPFNv2 worse, same with TabForestPFN
 class TabPFNMixRegressor(BaseEstimator, RegressorMixin):
-    def __init__(self, n_classes, cfg, split_val, model_path: str = None, weights_path: str | Path = None, stopping_metric=None, use_best_epoch: bool = True):
+    def __init__(
+        self,
+        n_classes,
+        cfg,
+        split_val,
+        model_path: str = None,
+        weights_path: str | Path = None,
+        stopping_metric=None,
+        use_best_epoch: bool = True,
+    ):
         self.cfg = cfg
         if weights_path is not None:
@@ -25,26 +33,37 @@ class TabPFNMixRegressor(BaseEstimator, RegressorMixin):
         if model_path is not None:
             model = FoundationTransformer.from_pretrained(model_path)
-            assert model.task == cfg.task, f"The pretrained model '{model_path}' is for task {model.task}, but the problem type is for task {cfg.task}..."
+            assert model.task == cfg.task, (
+                f"The pretrained model '{model_path}' is for task {model.task}, but the problem type is for task {cfg.task}..."
+            )
         else:
             model = FoundationTransformer(
-                n_features=cfg.hyperparams['n_features'],
-                n_classes=cfg.hyperparams['n_classes'],
-                dim=cfg.hyperparams['dim'],
-                n_layers=cfg.hyperparams['n_layers'],
-                n_heads=cfg.hyperparams['n_heads'],
-                attn_dropout=cfg.hyperparams['attn_dropout'],
-                y_as_float_embedding=cfg.hyperparams['y_as_float_embedding'],
+                n_features=cfg.hyperparams["n_features"],
+                n_classes=cfg.hyperparams["n_classes"],
+                dim=cfg.hyperparams["dim"],
+                n_layers=cfg.hyperparams["n_layers"],
+                n_heads=cfg.hyperparams["n_heads"],
+                attn_dropout=cfg.hyperparams["attn_dropout"],
+                y_as_float_embedding=cfg.hyperparams["y_as_float_embedding"],
                 task=cfg.task,
             )
         if weights_path is not None:
             model.load_state_dict(torch.load(weights_path, weights_only=True))  # nosec B614
         self.split_val = split_val
-        self.trainer = TrainerFinetune(cfg, model, n_classes=n_classes, stopping_metric=stopping_metric, use_best_epoch=use_best_epoch)
+        self.trainer = TrainerFinetune(
+            cfg, model, n_classes=n_classes, stopping_metric=stopping_metric, use_best_epoch=use_best_epoch
+        )
         super().__init__()
-    def fit(self, X: np.ndarray, y: np.ndarray, X_val: np.ndarray = None, y_val: np.ndarray = None, time_limit: float = None):
+    def fit(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+        X_val: np.ndarray = None,
+        y_val: np.ndarray = None,
+        time_limit: float = None,
+    ):
         # FIXME: Should X and y be preprocessed for inference efficiency? Yes.
         self.X_ = X  # FIXME: Optimize storage of X and y? Is this redundant? Is X and y saving done multiple times during pickle?
         self.y_ = y

autogluon.tabular 1.5.1b20260105__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl

Potentially problematic release.

autogluon.tabular 1.5.1b20260105py3-none-any.whl → 1.5.1b20260116py3-none-any.whl