autogluon.tabular 1.3.2b20250713__py3-none-any.whl → 1.3.2b20250715__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. autogluon/tabular/models/__init__.py +1 -0
  2. autogluon/tabular/models/catboost/catboost_model.py +9 -6
  3. autogluon/tabular/models/catboost/catboost_utils.py +10 -0
  4. autogluon/tabular/models/lgb/lgb_model.py +2 -1
  5. autogluon/tabular/models/mitra/__init__.py +0 -0
  6. autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +190 -0
  7. autogluon/tabular/models/mitra/_internal/config/config_run.py +32 -0
  8. autogluon/tabular/models/mitra/_internal/config/enums.py +145 -0
  9. autogluon/tabular/models/mitra/_internal/core/callbacks.py +94 -0
  10. autogluon/tabular/models/mitra/_internal/core/get_loss.py +55 -0
  11. autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +108 -0
  12. autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +67 -0
  13. autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +134 -0
  14. autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +367 -0
  15. autogluon/tabular/models/mitra/_internal/data/collator.py +46 -0
  16. autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +132 -0
  17. autogluon/tabular/models/mitra/_internal/data/dataset_split.py +53 -0
  18. autogluon/tabular/models/mitra/_internal/data/preprocessor.py +420 -0
  19. autogluon/tabular/models/mitra/_internal/models/base.py +21 -0
  20. autogluon/tabular/models/mitra/_internal/models/embedding.py +182 -0
  21. autogluon/tabular/models/mitra/_internal/models/tab2d.py +667 -0
  22. autogluon/tabular/models/mitra/_internal/utils/set_seed.py +15 -0
  23. autogluon/tabular/models/mitra/mitra_model.py +214 -0
  24. autogluon/tabular/models/mitra/sklearn_interface.py +462 -0
  25. autogluon/tabular/registry/_ag_model_registry.py +2 -0
  26. autogluon/tabular/testing/fit_helper.py +2 -2
  27. autogluon/tabular/version.py +1 -1
  28. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/METADATA +21 -12
  29. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/RECORD +36 -16
  30. /autogluon.tabular-1.3.2b20250713-py3.9-nspkg.pth → /autogluon.tabular-1.3.2b20250715-py3.9-nspkg.pth +0 -0
  31. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/LICENSE +0 -0
  32. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/NOTICE +0 -0
  33. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/WHEEL +0 -0
  34. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/namespace_packages.txt +0 -0
  35. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/top_level.txt +0 -0
  36. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/zip-safe +0 -0
@@ -0,0 +1,182 @@
1
+
2
+ import einops
3
+ import einx
4
+ import torch
5
+ import torch.nn as nn
6
+
7
+
8
+ class Tab2DEmbeddingX(torch.nn.Module):
9
+
10
+ def __init__(self, dim: int) -> None:
11
+ super().__init__()
12
+
13
+ self.dim = dim
14
+ self.x_embedding = nn.Linear(1, dim)
15
+
16
+
17
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
18
+
19
+ x = einx.rearrange('b s f -> b s f 1', x)
20
+ x = self.x_embedding(x)
21
+
22
+ return x
23
+
24
+
25
+
26
+ class Tab2DQuantileEmbeddingX(torch.nn.Module):
27
+
28
+ def __init__(
29
+ self,
30
+ dim: int,
31
+ ) -> None:
32
+
33
+ super().__init__()
34
+
35
+ self.dim = dim
36
+
37
+
38
+ def forward(
39
+ self,
40
+ x_support: torch.Tensor,
41
+ x_query__: torch.Tensor,
42
+ padding_mask: torch.Tensor,
43
+ feature_mask: torch.Tensor,
44
+ ) -> tuple[torch.Tensor, torch.Tensor]:
45
+
46
+ """
47
+ Syntax:
48
+ b = batch size
49
+ s = number of observations
50
+ f = number of features
51
+ q = number of quantiles
52
+ """
53
+
54
+ batch_size = padding_mask.shape[0]
55
+ seq_len = einx.sum('b [s]', ~padding_mask)
56
+ feature_count = einx.sum('b [f]', ~feature_mask)
57
+
58
+ # By setting the padded tokens to 9999 we ensure they don't participate in the quantile calculation
59
+ x_support[padding_mask] = 9999
60
+
61
+ q = torch.arange(1, 1000, dtype=torch.float, device=x_support.device) / 1000
62
+ quantiles = torch.quantile(x_support, q=q, dim=1)
63
+ quantiles = einx.rearrange('q b f -> (b f) q', quantiles)
64
+ x_support = einx.rearrange('b s f -> (b f) s', x_support).contiguous()
65
+ x_query__ = einx.rearrange('b s f -> (b f) s', x_query__).contiguous()
66
+
67
+ bucketize = torch.vmap(torch.bucketize, in_dims=(0, 0), out_dims=0)
68
+ x_support = bucketize(x_support, quantiles).float()
69
+ x_query__ = bucketize(x_query__, quantiles).float()
70
+ x_support = einx.rearrange('(b f) s -> b s f', x_support, b=batch_size).contiguous()
71
+ x_query__ = einx.rearrange('(b f) s -> b s f', x_query__, b=batch_size).contiguous()
72
+
73
+ # If 30% is padded, the minimum will have quantile 0.0 and the maximum will have quantile 0.7 times max_length.
74
+ # Here we correct the quantiles so that the minimum has quantile 0.0 and the maximum has quantile 1.0.
75
+ x_support = x_support / seq_len[:, None, None]
76
+ x_query__ = x_query__ / seq_len[:, None, None]
77
+
78
+ # Make sure that the padding is not used in the calculation of the mean
79
+ x_support[padding_mask] = 0
80
+ x_support_mean = einx.sum('b [s] f', x_support, keepdims=True) / seq_len[:, None, None]
81
+
82
+ x_support = x_support - x_support_mean
83
+ x_query__ = x_query__ - x_support_mean
84
+
85
+ # Make sure that the padding is not used in the calculation of the variance
86
+ x_support[padding_mask] = 0
87
+ x_support_var = einx.sum('b [s] f', x_support**2, keepdims=True) / seq_len[:, None, None]
88
+
89
+ x_support = x_support / x_support_var.sqrt()
90
+ x_query__ = x_query__ / x_support_var.sqrt()
91
+
92
+ # In case an x_support feature column contains one unique feature, set the feature to zero
93
+ x_support = torch.where(x_support_var == 0, 0, x_support)
94
+ x_query__ = torch.where(x_support_var == 0, 0, x_query__)
95
+
96
+ return x_support, x_query__
97
+
98
+
99
+ class Tab2DEmbeddingY(torch.nn.Module):
100
+
101
+ def __init__(self, dim: int, n_classes: int) -> None:
102
+ super().__init__()
103
+
104
+ self.dim = dim
105
+ self.n_classes = n_classes
106
+ self.y_embedding_support = nn.Linear(1, dim)
107
+ self.y_embedding_query = nn.Embedding(1, dim)
108
+
109
+
110
+ def forward(self, y_support: torch.Tensor, padding_obs_support: torch.Tensor, n_obs_query: int) -> tuple[torch.Tensor, torch.Tensor]:
111
+
112
+ batch_size = y_support.shape[0]
113
+
114
+ y_support = y_support.type(torch.float32)
115
+ y_support = y_support / self.n_classes - 0.5
116
+ y_support = einops.rearrange(y_support, 'b n -> b n 1')
117
+
118
+ y_support = self.y_embedding_support(y_support)
119
+ y_support[padding_obs_support] = 0
120
+
121
+ y_query = torch.zeros((batch_size, n_obs_query, 1), device=y_support.device, dtype=torch.int64)
122
+ y_query = self.y_embedding_query(y_query)
123
+
124
+ return y_support, y_query
125
+
126
+
127
+ class Tab2DEmbeddingYClasses(torch.nn.Module):
128
+
129
+ def __init__(
130
+ self,
131
+ dim: int,
132
+ n_classes: int,
133
+ ) -> None:
134
+
135
+ super().__init__()
136
+
137
+ self.n_classes = n_classes
138
+ self.dim = dim
139
+
140
+ self.y_embedding = nn.Embedding(n_classes, dim,)
141
+ self.y_mask = nn.Embedding(1, dim) # masking is also modeled as a separate class
142
+
143
+
144
+ def forward(self, y_support: torch.Tensor, padding_obs_support: torch.Tensor, n_obs_query: int) -> tuple[torch.Tensor, torch.Tensor]:
145
+
146
+ batch_size = y_support.shape[0]
147
+ n_obs_support = y_support.shape[1]
148
+
149
+ y_support = y_support.type(torch.int64)
150
+ y_support = einops.rearrange(y_support, 'b n -> b n 1')
151
+ y_support[padding_obs_support] = 0 # padded tokens are -100 -> set it to zero so nn.Embedding can handle it
152
+ y_support = self.y_embedding(y_support)
153
+ y_support[padding_obs_support] = 0 # just to make sure, set it to zero again
154
+
155
+ y_query = torch.zeros((batch_size, n_obs_query, 1), device=y_support.device, dtype=torch.int64)
156
+ y_query = self.y_mask(y_query)
157
+
158
+ return y_support, y_query
159
+
160
+
161
+ class Tab2DEmbeddingYRegression(torch.nn.Module):
162
+
163
+ def __init__(self, dim: int) -> None:
164
+ super().__init__()
165
+
166
+ self.dim = dim
167
+ self.y_embedding = nn.Linear(1, dim)
168
+ self.y_mask = nn.Embedding(1, dim)
169
+
170
+
171
+ def forward(self, y_support: torch.Tensor, padding_obs_support: torch.Tensor, n_obs_query: int) -> tuple[torch.Tensor, torch.Tensor]:
172
+
173
+ batch_size = y_support.shape[0]
174
+ y_support = y_support.type(torch.float32)
175
+ y_support = einops.rearrange(y_support, 'b n -> b n 1')
176
+ y_support = self.y_embedding(y_support)
177
+ y_support[padding_obs_support] = 0
178
+
179
+ y_query = torch.zeros((batch_size, n_obs_query, 1), device=y_support.device, dtype=torch.int64)
180
+ y_query = self.y_mask(y_query)
181
+
182
+ return y_support, y_query