autogluon.tabular 1.3.2b20250713__py3-none-any.whl → 1.3.2b20250715__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. autogluon/tabular/models/__init__.py +1 -0
  2. autogluon/tabular/models/catboost/catboost_model.py +9 -6
  3. autogluon/tabular/models/catboost/catboost_utils.py +10 -0
  4. autogluon/tabular/models/lgb/lgb_model.py +2 -1
  5. autogluon/tabular/models/mitra/__init__.py +0 -0
  6. autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +190 -0
  7. autogluon/tabular/models/mitra/_internal/config/config_run.py +32 -0
  8. autogluon/tabular/models/mitra/_internal/config/enums.py +145 -0
  9. autogluon/tabular/models/mitra/_internal/core/callbacks.py +94 -0
  10. autogluon/tabular/models/mitra/_internal/core/get_loss.py +55 -0
  11. autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +108 -0
  12. autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +67 -0
  13. autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +134 -0
  14. autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +367 -0
  15. autogluon/tabular/models/mitra/_internal/data/collator.py +46 -0
  16. autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +132 -0
  17. autogluon/tabular/models/mitra/_internal/data/dataset_split.py +53 -0
  18. autogluon/tabular/models/mitra/_internal/data/preprocessor.py +420 -0
  19. autogluon/tabular/models/mitra/_internal/models/base.py +21 -0
  20. autogluon/tabular/models/mitra/_internal/models/embedding.py +182 -0
  21. autogluon/tabular/models/mitra/_internal/models/tab2d.py +667 -0
  22. autogluon/tabular/models/mitra/_internal/utils/set_seed.py +15 -0
  23. autogluon/tabular/models/mitra/mitra_model.py +214 -0
  24. autogluon/tabular/models/mitra/sklearn_interface.py +462 -0
  25. autogluon/tabular/registry/_ag_model_registry.py +2 -0
  26. autogluon/tabular/testing/fit_helper.py +2 -2
  27. autogluon/tabular/version.py +1 -1
  28. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/METADATA +21 -12
  29. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/RECORD +36 -16
  30. /autogluon.tabular-1.3.2b20250713-py3.9-nspkg.pth → /autogluon.tabular-1.3.2b20250715-py3.9-nspkg.pth +0 -0
  31. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/LICENSE +0 -0
  32. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/NOTICE +0 -0
  33. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/WHEEL +0 -0
  34. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/namespace_packages.txt +0 -0
  35. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/top_level.txt +0 -0
  36. {autogluon.tabular-1.3.2b20250713.dist-info → autogluon.tabular-1.3.2b20250715.dist-info}/zip-safe +0 -0
@@ -0,0 +1,46 @@
1
+ import torch
2
+
3
+
4
+ class CollatorWithPadding():
5
+
6
+ def __init__(
7
+ self,
8
+ max_features: int,
9
+ pad_to_max_features: bool,
10
+ ) -> None:
11
+
12
+ self.max_features = max_features
13
+ self.pad_to_max_features = pad_to_max_features
14
+
15
+
16
+ def __call__(self, batch: list[dict[str, torch.Tensor]]) -> dict[str, torch.Tensor]:
17
+
18
+ max_support_samples = max(dataset['x_support'].shape[0] for dataset in batch)
19
+ max_query_samples = max(dataset['x_query'].shape[0] for dataset in batch)
20
+ max_features = max(dataset['x_support'].shape[1] for dataset in batch)
21
+
22
+ if self.pad_to_max_features:
23
+ max_features = self.max_features
24
+
25
+ batch_size = len(batch)
26
+
27
+ tensor_dict = {
28
+ 'x_support': torch.zeros((batch_size, max_support_samples, max_features), dtype=batch[0]['x_support'].dtype),
29
+ 'y_support': torch.full((batch_size, max_support_samples), fill_value=-100, dtype=batch[0]['y_support'].dtype),
30
+ 'x_query': torch.zeros((batch_size, max_query_samples, max_features), dtype=batch[0]['x_query'].dtype),
31
+ 'y_query': torch.full((batch_size, max_query_samples), fill_value=-100, dtype=batch[0]['y_query'].dtype),
32
+ 'padding_features': torch.ones((batch_size, max_features), dtype=torch.bool),
33
+ 'padding_obs_support': torch.ones((batch_size, max_support_samples), dtype=torch.bool),
34
+ 'padding_obs_query': torch.ones((batch_size, max_query_samples), dtype=torch.bool),
35
+ }
36
+
37
+ for i, dataset in enumerate(batch):
38
+ tensor_dict['x_support'][i, :dataset['x_support'].shape[0], :dataset['x_support'].shape[1]] = dataset['x_support']
39
+ tensor_dict['y_support'][i, :dataset['y_support'].shape[0]] = dataset['y_support']
40
+ tensor_dict['x_query'][i, :dataset['x_query'].shape[0], :dataset['x_support'].shape[1]] = dataset['x_query']
41
+ tensor_dict['y_query'][i, :dataset['y_query'].shape[0]] = dataset['y_query']
42
+ tensor_dict['padding_features'][i, :dataset['x_support'].shape[1]] = False
43
+ tensor_dict['padding_obs_support'][i, :dataset['x_support'].shape[0]] = False
44
+ tensor_dict['padding_obs_query'][i, :dataset['x_query'].shape[0]] = False
45
+
46
+ return tensor_dict
@@ -0,0 +1,132 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ import torch
5
+
6
+ from ..._internal.config.config_run import ConfigRun
7
+ from ..._internal.data.dataset_split import make_dataset_split
8
+ from ..._internal.config.enums import Task
9
+
10
+
11
+ class DatasetFinetune(torch.utils.data.Dataset):
12
+ """
13
+ The main goal of this class is to generate a dataset for fine-tuning.
14
+ The input data are the full (x_support, y_support, x_query, y_query)
15
+ But these arrays are too large to be pushed through the model at once.
16
+ So here we split query the data into chunks if the query data is too large.
17
+ If the support data is too large, we randomly sample from it.
18
+ Furthermore, we transition from numpy to tensors.
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ cfg: ConfigRun,
24
+ x_support: np.ndarray,
25
+ y_support: np.ndarray,
26
+ x_query: np.ndarray,
27
+ y_query: Optional[np.ndarray],
28
+ max_samples_support: int,
29
+ max_samples_query: int
30
+ ):
31
+ """
32
+ :param: max_features: number of features the tab pfn model has been trained on
33
+ """
34
+
35
+ self.cfg = cfg
36
+
37
+ self.x_support = x_support
38
+ self.y_support = y_support
39
+ self.x_query = x_query
40
+ self.y_query = y_query
41
+
42
+ if self.y_query is None:
43
+ self.y_query = np.zeros((self.x_query.shape[0],)) - 1
44
+
45
+ self.max_samples_support = max_samples_support
46
+ self.max_samples_query = max_samples_query
47
+
48
+ self.x_queries = self.split_in_chunks(self.x_query, max_samples_query)
49
+ self.y_queries = self.split_in_chunks(self.y_query, max_samples_query)
50
+
51
+ self.n_samples_support = self.x_support.shape[0]
52
+
53
+ # We push the whole training data through the model, unless it is too large
54
+ self.support_size = min(self.max_samples_support, self.n_samples_support)
55
+
56
+
57
+ def __len__(self):
58
+ return len(self.x_queries)
59
+
60
+ def __getitem__(self, idx):
61
+
62
+ support_indices = np.random.choice(
63
+ self.n_samples_support,
64
+ size=self.support_size,
65
+ replace=False
66
+ )
67
+
68
+ x_support = self.x_support[support_indices]
69
+ y_support = self.y_support[support_indices]
70
+
71
+ x_support_tensor = torch.as_tensor(x_support)
72
+ y_support_tensor = torch.as_tensor(y_support)
73
+ x_query_tensor = torch.as_tensor(self.x_queries[idx])
74
+ y_query_tensor = torch.as_tensor(self.y_queries[idx])
75
+
76
+ return {
77
+ 'x_support': x_support_tensor,
78
+ 'y_support': y_support_tensor,
79
+ 'x_query': x_query_tensor,
80
+ 'y_query': y_query_tensor,
81
+ }
82
+
83
+
84
+
85
+ def split_in_chunks(self, x: np.ndarray, batch_size: int) -> list[np.ndarray]:
86
+ """
87
+ Splits the data into chunks of size batch_size
88
+ """
89
+
90
+ n_chunks = int(np.ceil(x.shape[0] / batch_size))
91
+ x_chunks = []
92
+
93
+ for i in range(n_chunks):
94
+ x_chunks.append(x[i * batch_size: (i + 1) * batch_size])
95
+
96
+ return x_chunks
97
+
98
+ def DatasetFinetuneGenerator(
99
+ cfg: ConfigRun,
100
+ x: np.ndarray,
101
+ y: np.ndarray,
102
+ task: Task,
103
+ max_samples_support: int,
104
+ max_samples_query: int
105
+ ):
106
+ """
107
+ The dataset fine-tune generator is a generator that yields a dataset for fine-tuning.
108
+ The idea is to split the training dataset into a support and query set.
109
+ Every single iteration, the generator yields a different support and query set split.
110
+ The dataset made always has exactly one batch.
111
+ """
112
+
113
+ while True:
114
+
115
+ x_support, x_query, y_support, y_query = make_dataset_split(x=x, y=y, task=task, seed=cfg.seed)
116
+ n_samples_support = x_support.shape[0]
117
+ n_samples_query = x_query.shape[0]
118
+
119
+ support_size = min(max_samples_support, n_samples_support)
120
+ query_size = min(max_samples_query, n_samples_query)
121
+
122
+ dataset_finetune = DatasetFinetune(
123
+ cfg=cfg,
124
+ x_support=x_support[:support_size],
125
+ y_support=y_support[:support_size],
126
+ x_query=x_query[:query_size],
127
+ y_query=y_query[:query_size],
128
+ max_samples_support=max_samples_support,
129
+ max_samples_query=max_samples_query,
130
+ )
131
+
132
+ yield dataset_finetune
@@ -0,0 +1,53 @@
1
+ import numpy as np
2
+ from sklearn.model_selection import StratifiedKFold, train_test_split
3
+
4
+ from ..._internal.config.enums import Task
5
+
6
+ def make_dataset_split(x: np.ndarray, y: np.ndarray, task: Task, seed: int) -> tuple[np.ndarray, ...]:
7
+ # Splits the dataset into train and validation sets with ratio 80/20
8
+
9
+ if task == Task.REGRESSION:
10
+ return make_standard_dataset_split(x, y, seed=seed)
11
+
12
+ size_of_smallest_class = np.min(np.bincount(y))
13
+
14
+ if size_of_smallest_class >= 5:
15
+ # stratification needs have at least 5 samples in each class if split is 80/20
16
+ return make_stratified_dataset_split(x, y, seed=seed)
17
+ else:
18
+ return make_standard_dataset_split(x, y, seed=seed)
19
+
20
+
21
+ def make_stratified_dataset_split(x, y, n_splits=5, seed=0):
22
+
23
+ # Stratify doesn't shuffle the data, so we shuffle it first
24
+ permutation = np.random.permutation(len(y))
25
+ x, y = x[permutation], y[permutation]
26
+
27
+ min_samples_per_class = np.min(np.bincount(y))
28
+
29
+ # Adjust n_splits based on both total samples and minimum samples per class
30
+ n_samples = len(y)
31
+ max_possible_splits = min(n_samples - 1, min_samples_per_class)
32
+ n_splits = min(n_splits, max_possible_splits)
33
+
34
+ # Ensure we have at least 2 splits if possible
35
+ if n_samples >= 2 and min_samples_per_class >= 2:
36
+ n_splits = max(2, n_splits)
37
+ else:
38
+ # If we can't do stratified splitting, fall back to standard split
39
+ return make_standard_dataset_split(x, y, seed)
40
+
41
+ skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
42
+ indices = next(skf.split(x, y))
43
+ x_t_train, x_t_valid = x[indices[0]], x[indices[1]] # 80%, 20%
44
+ y_t_train, y_t_valid = y[indices[0]], y[indices[1]]
45
+
46
+ return x_t_train, x_t_valid, y_t_train, y_t_valid
47
+
48
+
49
+ def make_standard_dataset_split(x, y, seed):
50
+
51
+ return train_test_split(
52
+ x, y, test_size=0.2, random_state=seed,
53
+ )
@@ -0,0 +1,420 @@
1
+ from typing import Optional, Self
2
+
3
+ import random
4
+ import numpy as np
5
+ from loguru import logger
6
+ from sklearn.feature_selection import SelectKBest
7
+ from sklearn.preprocessing import QuantileTransformer, StandardScaler, OrdinalEncoder
8
+ from sklearn.compose import ColumnTransformer
9
+ from sklearn.decomposition import TruncatedSVD
10
+ from sklearn.pipeline import Pipeline, FeatureUnion
11
+ from sklearn.base import BaseEstimator, TransformerMixin
12
+
13
+ from ..._internal.config.enums import Task
14
+
15
+ class NoneTransformer(BaseEstimator, TransformerMixin):
16
+ def fit(self, X, y=None):
17
+ return self
18
+ def transform(self, X):
19
+ return X
20
+
21
+ class Preprocessor():
22
+ """
23
+ This class is used to preprocess the data before it is pushed through the model.
24
+ The preprocessor assures that the data has the right shape and is normalized,
25
+ This way the model always gets the same input distribution,
26
+ no matter whether the input data is synthetic or real.
27
+
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ dim_embedding: Optional[int], # Size of the feature embedding. For some models this is None, which means the embedding does not depend on the number of features
33
+ n_classes: int, # Actual number of classes in the dataset, assumed to be numbered 0, ..., n_classes - 1
34
+ dim_output: int, # Maximum number of classes the model has been trained on -> size of the output
35
+ use_quantile_transformer: bool,
36
+ use_feature_count_scaling: bool,
37
+ use_random_transforms: bool,
38
+ shuffle_classes: bool,
39
+ shuffle_features: bool,
40
+ random_mirror_regression: bool,
41
+ random_mirror_x: bool,
42
+ task: Task
43
+ ):
44
+
45
+ self.dim_embedding = dim_embedding
46
+ self.n_classes = n_classes
47
+ self.dim_output = dim_output
48
+ self.use_quantile_transformer = use_quantile_transformer
49
+ self.use_feature_count_scaling = use_feature_count_scaling
50
+ self.use_random_transforms = use_random_transforms
51
+ self.shuffle_classes = shuffle_classes
52
+ self.shuffle_features = shuffle_features
53
+ self.random_mirror_regression = random_mirror_regression
54
+ self.random_mirror_x = random_mirror_x
55
+ self.task = task
56
+
57
+ def fit(self, X: np.ndarray, y: np.ndarray) -> Self:
58
+ """
59
+ X: np.ndarray [n_samples, n_features]
60
+ y: np.ndarray [n_samples]
61
+ """
62
+
63
+ if self.task == Task.CLASSIFICATION:
64
+ # We assume that y properly presents classes [0, 1, 2, ...] before passing to the preprocessor
65
+ # If the test set has a class that is not in the training set, we will throw an error
66
+
67
+ assert np.all(y < self.n_classes), "y contains class values that are not in the range of n_classes"
68
+
69
+ self.compute_pre_nan_mean(X)
70
+ X = self.impute_nan_features_with_mean(X)
71
+
72
+ self.determine_which_features_are_singular(X)
73
+ X = self.cutoff_singular_features(X, self.singular_features)
74
+
75
+ self.determine_which_features_to_select(X, y)
76
+ X = self.select_features(X)
77
+
78
+ if self.use_quantile_transformer:
79
+ # If use quantile transform is off, it means that the preprocessing will happen on the GPU.
80
+ X = self.fit_transform_quantile_transformer(X)
81
+
82
+ self.mean, self.std = self.calc_mean_std(X)
83
+ X = self.normalize_by_mean_std(X, self.mean, self.std)
84
+
85
+ if self.use_random_transforms:
86
+ X = self.transform_tabpfn(X)
87
+
88
+ if self.task == Task.CLASSIFICATION and self.shuffle_classes:
89
+ self.determine_shuffle_class_order()
90
+
91
+ if self.shuffle_features:
92
+ self.determine_feature_order(X)
93
+
94
+ if self.task == Task.REGRESSION:
95
+ self.determine_mix_max_scale(y)
96
+
97
+ if self.task == Task.REGRESSION and self.random_mirror_regression:
98
+ self.determine_regression_mirror()
99
+
100
+ if self.random_mirror_x:
101
+ self.determine_mirror(X)
102
+
103
+ X[np.isnan(X)] = 0
104
+ X[np.isinf(X)] = 0
105
+
106
+ return self
107
+
108
+
109
+ def transform_X(self, X: np.ndarray):
110
+
111
+ X = self.impute_nan_features_with_mean(X)
112
+ X = self.cutoff_singular_features(X, self.singular_features)
113
+ X = self.select_features(X)
114
+
115
+ if self.use_quantile_transformer:
116
+ # If use quantile transform is off, it means that the preprocessing will happen on the GPU.
117
+
118
+ X = self.quantile_transformer.transform(X)
119
+
120
+ X = self.normalize_by_mean_std(X, self.mean, self.std)
121
+
122
+ if self.use_feature_count_scaling:
123
+ X = self.normalize_by_feature_count(X)
124
+
125
+ if self.use_random_transforms:
126
+ X = self.random_transforms.transform(X)
127
+
128
+ if self.shuffle_features:
129
+ X = self.randomize_feature_order(X)
130
+
131
+ if self.random_mirror_x:
132
+ X = self.apply_random_mirror_x(X)
133
+
134
+ X = X.astype(np.float32)
135
+
136
+ X[np.isnan(X)] = 0
137
+ X[np.isinf(X)] = 0
138
+
139
+ return X
140
+
141
+
142
+ def transform_tabpfn(self, X: np.ndarray):
143
+
144
+ n_samples = X.shape[0]
145
+ n_features = X.shape[1]
146
+
147
+ use_config1 = random.random() < 0.5
148
+ random_state = random.randint(0, 1000000)
149
+
150
+ if use_config1:
151
+ self.random_transforms = Pipeline([
152
+ ('quantile', QuantileTransformer(
153
+ output_distribution="normal",
154
+ n_quantiles=max(n_samples // 10, 2),
155
+ random_state=random_state
156
+ )),
157
+ ('svd', FeatureUnion([
158
+ ('passthrough', NoneTransformer()),
159
+ ('svd', Pipeline([
160
+ ('standard', StandardScaler(with_mean=False)),
161
+ ('svd', TruncatedSVD(
162
+ algorithm="arpack",
163
+ n_components=max(1, min(n_samples // 10 + 1, n_features // 2)),
164
+ random_state=random_state
165
+ ))
166
+ ]))
167
+ ]))
168
+ ])
169
+ else:
170
+ self.random_transforms = ColumnTransformer([
171
+ ('ordinal', OrdinalEncoder(
172
+ handle_unknown="use_encoded_value",
173
+ unknown_value=np.nan
174
+ ), [])
175
+ ], remainder='passthrough')
176
+
177
+ return self.random_transforms.fit_transform(X)
178
+
179
+
180
+ def transform_y(self, y: np.ndarray):
181
+
182
+ if self.task == Task.CLASSIFICATION:
183
+ # We assume that y properly presents classes [0, 1, 2, ...] before passing to the preprocessor
184
+ # If the test set has a class that is not in the training set, we will throw an error
185
+ assert np.all(y < self.n_classes), "y contains class values that are not in the range of n_classes"
186
+
187
+ if self.task == Task.CLASSIFICATION and self.shuffle_classes:
188
+ y = self.randomize_class_order(y)
189
+
190
+ if self.task == Task.REGRESSION:
191
+ y = self.normalize_y(y)
192
+
193
+ if self.task == Task.REGRESSION and self.random_mirror_regression:
194
+ y = self.apply_random_mirror_regression(y)
195
+
196
+ match self.task:
197
+ case Task.CLASSIFICATION:
198
+ y = y.astype(np.int64)
199
+ case Task.REGRESSION:
200
+ y = y.astype(np.float32)
201
+
202
+ return y
203
+
204
+
205
+ def inverse_transform_y(self, y: np.ndarray):
206
+ # Function used during the prediction to transform the model output back to the original space
207
+ # For classification, y is assumed to be logits of shape [n_samples, n_classes]
208
+
209
+ match self.task:
210
+ case Task.CLASSIFICATION:
211
+ y = self.extract_correct_classes(y)
212
+
213
+ if self.shuffle_classes:
214
+ y = self.undo_randomize_class_order(y)
215
+
216
+ case Task.REGRESSION:
217
+
218
+ if self.random_mirror_regression:
219
+ y = self.apply_random_mirror_regression(y)
220
+
221
+ y = self.undo_normalize_y(y)
222
+
223
+ return y
224
+
225
+
226
+
227
+ def fit_transform_quantile_transformer(self, X: np.ndarray) -> np.ndarray:
228
+
229
+ n_obs, n_features = X.shape
230
+ n_quantiles = min(n_obs, 1000)
231
+ self.quantile_transformer = QuantileTransformer(n_quantiles=n_quantiles, output_distribution='normal')
232
+ X = self.quantile_transformer.fit_transform(X)
233
+
234
+ return X
235
+
236
+
237
+
238
+ def determine_which_features_are_singular(self, x: np.ndarray) -> None:
239
+
240
+ self.singular_features = np.array([ len(np.unique(x_col)) for x_col in x.T ]) == 1
241
+
242
+
243
+
244
+ def determine_which_features_to_select(self, x: np.ndarray, y: np.ndarray) -> None:
245
+
246
+ if self.dim_embedding is None:
247
+ # All features are selected
248
+ return
249
+
250
+ if x.shape[1] > self.dim_embedding:
251
+ logger.info(f"Number of features is capped at {self.dim_embedding}, but the dataset has {x.shape[1]} features. A subset of {self.dim_embedding} are selected using SelectKBest")
252
+
253
+ self.select_k_best = SelectKBest(k=self.dim_embedding)
254
+ self.select_k_best.fit(x, y)
255
+
256
+
257
+ def compute_pre_nan_mean(self, x: np.ndarray) -> None:
258
+ """
259
+ Computes the mean of the data before the NaNs are imputed
260
+ """
261
+ self.pre_nan_mean = np.nanmean(x, axis=0)
262
+
263
+
264
+ def impute_nan_features_with_mean(self, x: np.ndarray) -> np.ndarray:
265
+
266
+ inds = np.where(np.isnan(x))
267
+ x[inds] = np.take(self.pre_nan_mean, inds[1])
268
+ return x
269
+
270
+
271
+ def select_features(self, x: np.ndarray) -> np.ndarray:
272
+
273
+ if self.dim_embedding is None:
274
+ # All features are selected
275
+ return x
276
+
277
+ if x.shape[1] > self.dim_embedding:
278
+ x = self.select_k_best.transform(x)
279
+
280
+ return x
281
+
282
+
283
+ def cutoff_singular_features(self, x: np.ndarray, singular_features: np.ndarray) -> np.ndarray:
284
+
285
+ if singular_features.any():
286
+ x = x[:, ~singular_features]
287
+
288
+ return x
289
+
290
+
291
+ def calc_mean_std(self, x: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
292
+ """
293
+ Calculates the mean and std of the training data
294
+ """
295
+ mean = x.mean(axis=0)
296
+ std = x.std(axis=0) + 1e-6
297
+ return mean, std
298
+
299
+
300
+ def normalize_by_mean_std(self, x: np.ndarray, mean: np.ndarray, std: np.ndarray) -> np.ndarray:
301
+ """
302
+ Normalizes the data by the mean and std
303
+ """
304
+
305
+ x = (x - mean) / std
306
+ return x
307
+
308
+
309
+ def normalize_by_feature_count(self, x: np.ndarray) -> np.ndarray:
310
+ """
311
+ An interesting way of normalization by the tabPFN paper
312
+ """
313
+
314
+ assert self.dim_embedding is not None, "dim_embedding must be set to use this feature count scaling"
315
+
316
+ x = x * self.dim_embedding / x.shape[1]
317
+
318
+ return x
319
+
320
+
321
+
322
+ def extend_feature_dim_to_dim_embedding(self, x: np.ndarray, dim_embedding) -> np.ndarray:
323
+ """
324
+ Increases the number of features to the number of features the model has been trained on
325
+ """
326
+
327
+ assert self.dim_embedding is not None, "dim_embedding must be set to extend the feature dimension"
328
+
329
+ added_zeros = np.zeros((x.shape[0], dim_embedding - x.shape[1]), dtype=np.float32)
330
+ x = np.concatenate([x, added_zeros], axis=1)
331
+ return x
332
+
333
+
334
+ def determine_mix_max_scale(self, y: np.ndarray) -> None:
335
+ self.y_min = y.min()
336
+ self.y_max = y.max()
337
+ assert self.y_min != self.y_max, "y_min and y_max are the same, cannot normalize, regression makes no sense"
338
+
339
+
340
+ def normalize_y(self, y: np.ndarray) -> np.ndarray:
341
+ y = (y - self.y_min) / (self.y_max - self.y_min)
342
+ return y
343
+
344
+
345
+ def undo_normalize_y(self, y: np.ndarray) -> np.ndarray:
346
+ y = y * (self.y_max - self.y_min) + self.y_min
347
+ return y
348
+
349
+
350
+ def determine_regression_mirror(self) -> None:
351
+ self.regression_mirror = np.random.choice([True, False], size=(1,)).item()
352
+
353
+
354
+ def apply_random_mirror_regression(self, y: np.ndarray) -> np.ndarray:
355
+ if self.regression_mirror:
356
+ y = 1 - y
357
+ return y
358
+
359
+
360
+ def determine_mirror(self, x: np.ndarray) -> None:
361
+
362
+ n_features = x.shape[1]
363
+ self.mirror = np.random.choice([1, -1], size=(1, n_features))
364
+
365
+
366
+ def apply_random_mirror_x(self, x: np.ndarray) -> np.ndarray:
367
+
368
+ x = x * self.mirror
369
+ return x
370
+
371
+
372
+ def determine_shuffle_class_order(self) -> None:
373
+
374
+ if self.shuffle_classes:
375
+ self.new_shuffle_classes = np.random.permutation(self.n_classes)
376
+ else:
377
+ self.new_shuffle_classes = np.arange(self.n_classes)
378
+
379
+
380
+ def randomize_class_order(self, y: np.ndarray) -> np.ndarray:
381
+
382
+ mapping = { i: self.new_shuffle_classes[i] for i in range(self.n_classes) }
383
+ y = np.array([mapping[i.item()] for i in y], dtype=np.int64)
384
+
385
+ return y
386
+
387
+
388
+ def undo_randomize_class_order(self, y_logits: np.ndarray) -> np.ndarray:
389
+ """
390
+ We assume y_logits has shape [n_samples, n_classes]
391
+ """
392
+
393
+ # mapping = {self.new_shuffle_classes[i]: i for i in range(self.n_classes)}
394
+ mapping = {i: self.new_shuffle_classes[i] for i in range(self.n_classes)}
395
+ y = np.concatenate([y_logits[:, mapping[i]:mapping[i]+1] for i in range(self.n_classes)], axis=1)
396
+
397
+ return y
398
+
399
+
400
+ def extract_correct_classes(self, y_logits: np.ndarray) -> np.ndarray:
401
+ # Even though our network might be able to support 10 classes,
402
+ # If the problem only has three classes, we should give three classes as output.
403
+ # We assume y_logits has shape [n_samples, n_classes]
404
+ y_logits = y_logits[:, :self.n_classes]
405
+ return y_logits
406
+
407
+
408
+
409
+ def determine_feature_order(self, x: np.ndarray) -> None:
410
+
411
+ n_features = x.shape[1]
412
+ self.new_feature_order = np.random.permutation(n_features)
413
+
414
+
415
+
416
+ def randomize_feature_order(self, x: np.ndarray) -> np.ndarray:
417
+
418
+ x = x[:, self.new_feature_order]
419
+
420
+ return x
@@ -0,0 +1,21 @@
1
+ import torch
2
+ import torch.nn as nn
3
+ from abc import ABC, abstractmethod
4
+
5
+ class BaseModel(nn.Module, ABC):
6
+
7
+ def __init__(self):
8
+ super().__init__()
9
+
10
+ def init_weights(self):
11
+ """Initialize model weights."""
12
+ pass
13
+
14
+ @abstractmethod
15
+ def forward(self,
16
+ x_support: torch.Tensor,
17
+ y_support: torch.Tensor,
18
+ x_query: torch.Tensor,
19
+ **kwargs):
20
+ """Forward pass for the model."""
21
+ pass