autogluon.tabular 1.5.0b20251228__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.tabular might be problematic. Click here for more details.
- autogluon/tabular/__init__.py +1 -0
- autogluon/tabular/configs/config_helper.py +18 -6
- autogluon/tabular/configs/feature_generator_presets.py +3 -1
- autogluon/tabular/configs/hyperparameter_configs.py +42 -9
- autogluon/tabular/configs/presets_configs.py +38 -14
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py +84 -14
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +48 -48
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_cpu_2025_12_18.py +774 -1
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_gpu_2025_12_18.py +421 -1
- autogluon/tabular/experimental/_scikit_mixin.py +6 -2
- autogluon/tabular/experimental/_tabular_classifier.py +3 -1
- autogluon/tabular/experimental/_tabular_regressor.py +3 -1
- autogluon/tabular/experimental/plot_leaderboard.py +73 -19
- autogluon/tabular/learner/abstract_learner.py +160 -42
- autogluon/tabular/learner/default_learner.py +78 -22
- autogluon/tabular/models/__init__.py +2 -2
- autogluon/tabular/models/_utils/rapids_utils.py +3 -1
- autogluon/tabular/models/abstract/abstract_torch_model.py +2 -0
- autogluon/tabular/models/automm/automm_model.py +12 -3
- autogluon/tabular/models/automm/ft_transformer.py +5 -1
- autogluon/tabular/models/catboost/callbacks.py +2 -2
- autogluon/tabular/models/catboost/catboost_model.py +93 -29
- autogluon/tabular/models/catboost/catboost_softclass_utils.py +4 -1
- autogluon/tabular/models/catboost/catboost_utils.py +3 -1
- autogluon/tabular/models/ebm/ebm_model.py +8 -13
- autogluon/tabular/models/ebm/hyperparameters/parameters.py +1 -0
- autogluon/tabular/models/ebm/hyperparameters/searchspaces.py +1 -0
- autogluon/tabular/models/fastainn/callbacks.py +20 -3
- autogluon/tabular/models/fastainn/hyperparameters/searchspaces.py +11 -1
- autogluon/tabular/models/fastainn/quantile_helpers.py +10 -2
- autogluon/tabular/models/fastainn/tabular_nn_fastai.py +65 -18
- autogluon/tabular/models/fasttext/fasttext_model.py +3 -1
- autogluon/tabular/models/image_prediction/image_predictor.py +7 -2
- autogluon/tabular/models/knn/knn_model.py +41 -8
- autogluon/tabular/models/lgb/callbacks.py +32 -9
- autogluon/tabular/models/lgb/hyperparameters/searchspaces.py +3 -1
- autogluon/tabular/models/lgb/lgb_model.py +150 -34
- autogluon/tabular/models/lgb/lgb_utils.py +12 -4
- autogluon/tabular/models/lr/hyperparameters/searchspaces.py +5 -1
- autogluon/tabular/models/lr/lr_model.py +40 -10
- autogluon/tabular/models/lr/lr_rapids_model.py +22 -13
- autogluon/tabular/models/mitra/_internal/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/config/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +36 -40
- autogluon/tabular/models/mitra/_internal/config/config_run.py +2 -14
- autogluon/tabular/models/mitra/_internal/config/enums.py +27 -26
- autogluon/tabular/models/mitra/_internal/core/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/core/callbacks.py +14 -21
- autogluon/tabular/models/mitra/_internal/core/get_loss.py +10 -12
- autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +17 -32
- autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +12 -27
- autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +16 -21
- autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +130 -111
- autogluon/tabular/models/mitra/_internal/data/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/data/collator.py +30 -26
- autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +18 -26
- autogluon/tabular/models/mitra/_internal/data/dataset_split.py +10 -7
- autogluon/tabular/models/mitra/_internal/data/preprocessor.py +70 -100
- autogluon/tabular/models/mitra/_internal/models/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/models/base.py +7 -10
- autogluon/tabular/models/mitra/_internal/models/embedding.py +46 -56
- autogluon/tabular/models/mitra/_internal/models/tab2d.py +140 -120
- autogluon/tabular/models/mitra/_internal/utils/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/utils/set_seed.py +3 -1
- autogluon/tabular/models/mitra/mitra_model.py +16 -11
- autogluon/tabular/models/mitra/sklearn_interface.py +178 -162
- autogluon/tabular/models/realmlp/realmlp_model.py +28 -15
- autogluon/tabular/models/rf/compilers/onnx.py +1 -1
- autogluon/tabular/models/rf/rf_model.py +45 -12
- autogluon/tabular/models/rf/rf_quantile.py +4 -2
- autogluon/tabular/models/tabdpt/tabdpt_model.py +8 -17
- autogluon/tabular/models/tabicl/tabicl_model.py +8 -1
- autogluon/tabular/models/tabm/_tabm_internal.py +6 -4
- autogluon/tabular/models/tabm/rtdl_num_embeddings.py +80 -127
- autogluon/tabular/models/tabm/tabm_model.py +8 -4
- autogluon/tabular/models/tabm/tabm_reference.py +53 -85
- autogluon/tabular/models/tabpfnmix/_internal/core/callbacks.py +7 -16
- autogluon/tabular/models/tabpfnmix/_internal/core/collator.py +16 -24
- autogluon/tabular/models/tabpfnmix/_internal/core/dataset_split.py +5 -7
- autogluon/tabular/models/tabpfnmix/_internal/core/enums.py +0 -2
- autogluon/tabular/models/tabpfnmix/_internal/core/get_loss.py +0 -1
- autogluon/tabular/models/tabpfnmix/_internal/core/get_optimizer.py +7 -18
- autogluon/tabular/models/tabpfnmix/_internal/core/get_scheduler.py +3 -14
- autogluon/tabular/models/tabpfnmix/_internal/core/trainer_finetune.py +79 -64
- autogluon/tabular/models/tabpfnmix/_internal/core/y_transformer.py +3 -5
- autogluon/tabular/models/tabpfnmix/_internal/data/dataset_finetune.py +17 -30
- autogluon/tabular/models/tabpfnmix/_internal/data/preprocessor.py +15 -35
- autogluon/tabular/models/tabpfnmix/_internal/models/foundation/embedding.py +21 -38
- autogluon/tabular/models/tabpfnmix/_internal/models/foundation/foundation_transformer.py +33 -51
- autogluon/tabular/models/tabpfnmix/_internal/results/prediction_metrics.py +4 -4
- autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_classifier.py +32 -12
- autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_regressor.py +32 -13
- autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +55 -19
- autogluon/tabular/models/tabpfnv2/tabpfnv2_5_model.py +21 -48
- autogluon/tabular/models/tabprep/prep_mixin.py +34 -26
- autogluon/tabular/models/tabular_nn/compilers/onnx.py +36 -8
- autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +130 -36
- autogluon/tabular/models/tabular_nn/torch/tabular_torch_dataset.py +8 -4
- autogluon/tabular/models/tabular_nn/torch/torch_network_modules.py +26 -5
- autogluon/tabular/models/tabular_nn/utils/categorical_encoders.py +41 -24
- autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py +33 -8
- autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py +21 -6
- autogluon/tabular/models/xgboost/callbacks.py +9 -3
- autogluon/tabular/models/xgboost/xgboost_model.py +59 -11
- autogluon/tabular/models/xt/xt_model.py +1 -0
- autogluon/tabular/predictor/interpretable_predictor.py +3 -1
- autogluon/tabular/predictor/predictor.py +409 -128
- autogluon/tabular/registry/__init__.py +1 -1
- autogluon/tabular/registry/_ag_model_registry.py +4 -5
- autogluon/tabular/registry/_model_registry.py +1 -0
- autogluon/tabular/testing/fit_helper.py +55 -15
- autogluon/tabular/testing/generate_datasets.py +1 -1
- autogluon/tabular/testing/model_fit_helper.py +10 -4
- autogluon/tabular/trainer/abstract_trainer.py +644 -230
- autogluon/tabular/trainer/auto_trainer.py +19 -8
- autogluon/tabular/trainer/model_presets/presets.py +33 -9
- autogluon/tabular/trainer/model_presets/presets_distill.py +16 -2
- autogluon/tabular/version.py +1 -1
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/METADATA +26 -26
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/RECORD +127 -135
- autogluon/tabular/models/tabpfnv2/rfpfn/__init__.py +0 -20
- autogluon/tabular/models/tabpfnv2/rfpfn/configs.py +0 -40
- autogluon/tabular/models/tabpfnv2/rfpfn/scoring_utils.py +0 -201
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_decision_tree_tabpfn.py +0 -1464
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_random_forest_tabpfn.py +0 -747
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_compat.py +0 -863
- autogluon/tabular/models/tabpfnv2/rfpfn/utils.py +0 -106
- autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +0 -466
- /autogluon.tabular-1.5.0b20251228-py3.11-nspkg.pth → /autogluon.tabular-1.5.1b20260116-py3.11-nspkg.pth +0 -0
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/WHEEL +0 -0
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/licenses/LICENSE +0 -0
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/licenses/NOTICE +0 -0
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/namespace_packages.txt +0 -0
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/top_level.txt +0 -0
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/zip-safe +0 -0
|
@@ -4,8 +4,8 @@ import numpy as np
|
|
|
4
4
|
import torch
|
|
5
5
|
|
|
6
6
|
from ..._internal.config.config_run import ConfigRun
|
|
7
|
-
from ..._internal.data.dataset_split import make_dataset_split
|
|
8
7
|
from ..._internal.config.enums import Task
|
|
8
|
+
from ..._internal.data.dataset_split import make_dataset_split
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class DatasetFinetune(torch.utils.data.Dataset):
|
|
@@ -19,11 +19,11 @@ class DatasetFinetune(torch.utils.data.Dataset):
|
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
def __init__(
|
|
22
|
-
self,
|
|
22
|
+
self,
|
|
23
23
|
cfg: ConfigRun,
|
|
24
|
-
x_support: np.ndarray,
|
|
25
|
-
y_support: np.ndarray,
|
|
26
|
-
x_query: np.ndarray,
|
|
24
|
+
x_support: np.ndarray,
|
|
25
|
+
y_support: np.ndarray,
|
|
26
|
+
x_query: np.ndarray,
|
|
27
27
|
y_query: Optional[np.ndarray],
|
|
28
28
|
max_samples_support: int,
|
|
29
29
|
max_samples_query: int,
|
|
@@ -35,10 +35,10 @@ class DatasetFinetune(torch.utils.data.Dataset):
|
|
|
35
35
|
|
|
36
36
|
self.cfg = cfg
|
|
37
37
|
self.rng = rng
|
|
38
|
-
|
|
38
|
+
|
|
39
39
|
self.x_support = x_support
|
|
40
40
|
self.y_support = y_support
|
|
41
|
-
self.x_query = x_query
|
|
41
|
+
self.x_query = x_query
|
|
42
42
|
self.y_query = y_query
|
|
43
43
|
|
|
44
44
|
if self.y_query is None:
|
|
@@ -55,17 +55,11 @@ class DatasetFinetune(torch.utils.data.Dataset):
|
|
|
55
55
|
# We push the whole training data through the model, unless it is too large
|
|
56
56
|
self.support_size = min(self.max_samples_support, self.n_samples_support)
|
|
57
57
|
|
|
58
|
-
|
|
59
58
|
def __len__(self):
|
|
60
59
|
return len(self.x_queries)
|
|
61
60
|
|
|
62
61
|
def __getitem__(self, idx):
|
|
63
|
-
|
|
64
|
-
support_indices = self.rng.choice(
|
|
65
|
-
self.n_samples_support,
|
|
66
|
-
size=self.support_size,
|
|
67
|
-
replace=False
|
|
68
|
-
)
|
|
62
|
+
support_indices = self.rng.choice(self.n_samples_support, size=self.support_size, replace=False)
|
|
69
63
|
|
|
70
64
|
x_support = self.x_support[support_indices]
|
|
71
65
|
y_support = self.y_support[support_indices]
|
|
@@ -76,13 +70,11 @@ class DatasetFinetune(torch.utils.data.Dataset):
|
|
|
76
70
|
y_query_tensor = torch.as_tensor(self.y_queries[idx])
|
|
77
71
|
|
|
78
72
|
return {
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
73
|
+
"x_support": x_support_tensor,
|
|
74
|
+
"y_support": y_support_tensor,
|
|
75
|
+
"x_query": x_query_tensor,
|
|
76
|
+
"y_query": y_query_tensor,
|
|
83
77
|
}
|
|
84
|
-
|
|
85
|
-
|
|
86
78
|
|
|
87
79
|
def split_in_chunks(self, x: np.ndarray, batch_size: int) -> list[np.ndarray]:
|
|
88
80
|
"""
|
|
@@ -93,14 +85,15 @@ class DatasetFinetune(torch.utils.data.Dataset):
|
|
|
93
85
|
x_chunks = []
|
|
94
86
|
|
|
95
87
|
for i in range(n_chunks):
|
|
96
|
-
x_chunks.append(x[i * batch_size: (i + 1) * batch_size])
|
|
88
|
+
x_chunks.append(x[i * batch_size : (i + 1) * batch_size])
|
|
97
89
|
|
|
98
90
|
return x_chunks
|
|
99
91
|
|
|
92
|
+
|
|
100
93
|
def DatasetFinetuneGenerator(
|
|
101
94
|
cfg: ConfigRun,
|
|
102
|
-
x: np.ndarray,
|
|
103
|
-
y: np.ndarray,
|
|
95
|
+
x: np.ndarray,
|
|
96
|
+
y: np.ndarray,
|
|
104
97
|
task: Task,
|
|
105
98
|
max_samples_support: int,
|
|
106
99
|
max_samples_query: int,
|
|
@@ -112,9 +105,8 @@ def DatasetFinetuneGenerator(
|
|
|
112
105
|
Every single iteration, the generator yields a different support and query set split.
|
|
113
106
|
The dataset made always has exactly one batch.
|
|
114
107
|
"""
|
|
115
|
-
|
|
116
|
-
while True:
|
|
117
108
|
|
|
109
|
+
while True:
|
|
118
110
|
x_support, x_query, y_support, y_query = make_dataset_split(x=x, y=y, task=task, seed=rng)
|
|
119
111
|
n_samples_support = x_support.shape[0]
|
|
120
112
|
n_samples_query = x_query.shape[0]
|
|
@@ -133,4 +125,4 @@ def DatasetFinetuneGenerator(
|
|
|
133
125
|
rng=rng,
|
|
134
126
|
)
|
|
135
127
|
|
|
136
|
-
yield dataset_finetune
|
|
128
|
+
yield dataset_finetune
|
|
@@ -5,6 +5,7 @@ from sklearn.model_selection import StratifiedKFold, train_test_split
|
|
|
5
5
|
|
|
6
6
|
from ..._internal.config.enums import Task
|
|
7
7
|
|
|
8
|
+
|
|
8
9
|
def make_dataset_split(x: np.ndarray, y: np.ndarray, task: Task, seed: int) -> tuple[np.ndarray, ...]:
|
|
9
10
|
# Splits the dataset into train and validation sets with ratio 80/20
|
|
10
11
|
|
|
@@ -18,7 +19,7 @@ def make_dataset_split(x: np.ndarray, y: np.ndarray, task: Task, seed: int) -> t
|
|
|
18
19
|
return make_stratified_dataset_split(x, y, seed=seed)
|
|
19
20
|
else:
|
|
20
21
|
return make_standard_dataset_split(x, y, seed=seed)
|
|
21
|
-
|
|
22
|
+
|
|
22
23
|
|
|
23
24
|
def make_stratified_dataset_split(x, y, n_splits=5, seed=0):
|
|
24
25
|
if isinstance(seed, int):
|
|
@@ -29,12 +30,12 @@ def make_stratified_dataset_split(x, y, n_splits=5, seed=0):
|
|
|
29
30
|
x, y = x[permutation], y[permutation]
|
|
30
31
|
|
|
31
32
|
min_samples_per_class = np.min(np.bincount(y))
|
|
32
|
-
|
|
33
|
+
|
|
33
34
|
# Adjust n_splits based on both total samples and minimum samples per class
|
|
34
35
|
n_samples = len(y)
|
|
35
36
|
max_possible_splits = min(n_samples - 1, min_samples_per_class)
|
|
36
37
|
n_splits = min(n_splits, max_possible_splits)
|
|
37
|
-
|
|
38
|
+
|
|
38
39
|
# Ensure we have at least 2 splits if possible
|
|
39
40
|
if n_samples >= 2 and min_samples_per_class >= 2:
|
|
40
41
|
n_splits = max(2, n_splits)
|
|
@@ -44,14 +45,16 @@ def make_stratified_dataset_split(x, y, n_splits=5, seed=0):
|
|
|
44
45
|
|
|
45
46
|
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
|
|
46
47
|
indices = next(skf.split(x, y))
|
|
47
|
-
x_t_train, x_t_valid = x[indices[0]], x[indices[1]]
|
|
48
|
+
x_t_train, x_t_valid = x[indices[0]], x[indices[1]] # 80%, 20%
|
|
48
49
|
y_t_train, y_t_valid = y[indices[0]], y[indices[1]]
|
|
49
50
|
|
|
50
51
|
return x_t_train, x_t_valid, y_t_train, y_t_valid
|
|
51
52
|
|
|
52
53
|
|
|
53
54
|
def make_standard_dataset_split(x, y, seed):
|
|
54
|
-
|
|
55
55
|
return train_test_split(
|
|
56
|
-
x,
|
|
57
|
-
|
|
56
|
+
x,
|
|
57
|
+
y,
|
|
58
|
+
test_size=0.2,
|
|
59
|
+
random_state=seed,
|
|
60
|
+
)
|
|
@@ -8,8 +8,7 @@ from sklearn.compose import ColumnTransformer
|
|
|
8
8
|
from sklearn.decomposition import TruncatedSVD
|
|
9
9
|
from sklearn.feature_selection import SelectKBest
|
|
10
10
|
from sklearn.pipeline import FeatureUnion, Pipeline
|
|
11
|
-
from sklearn.preprocessing import
|
|
12
|
-
StandardScaler)
|
|
11
|
+
from sklearn.preprocessing import OrdinalEncoder, QuantileTransformer, StandardScaler
|
|
13
12
|
|
|
14
13
|
from ..._internal.config.enums import Task
|
|
15
14
|
|
|
@@ -17,33 +16,36 @@ from ..._internal.config.enums import Task
|
|
|
17
16
|
class NoneTransformer(BaseEstimator, TransformerMixin):
|
|
18
17
|
def fit(self, X, y=None):
|
|
19
18
|
return self
|
|
19
|
+
|
|
20
20
|
def transform(self, X):
|
|
21
21
|
return X
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
|
|
24
|
+
class Preprocessor:
|
|
24
25
|
"""
|
|
25
26
|
This class is used to preprocess the data before it is pushed through the model.
|
|
26
27
|
The preprocessor assures that the data has the right shape and is normalized,
|
|
27
|
-
This way the model always gets the same input distribution,
|
|
28
|
+
This way the model always gets the same input distribution,
|
|
28
29
|
no matter whether the input data is synthetic or real.
|
|
29
30
|
|
|
30
31
|
"""
|
|
31
32
|
|
|
32
33
|
def __init__(
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
34
|
+
self,
|
|
35
|
+
dim_embedding: Optional[
|
|
36
|
+
int
|
|
37
|
+
], # Size of the feature embedding. For some models this is None, which means the embedding does not depend on the number of features
|
|
38
|
+
n_classes: int, # Actual number of classes in the dataset, assumed to be numbered 0, ..., n_classes - 1
|
|
39
|
+
dim_output: int, # Maximum number of classes the model has been trained on -> size of the output
|
|
40
|
+
use_quantile_transformer: bool,
|
|
41
|
+
use_feature_count_scaling: bool,
|
|
42
|
+
use_random_transforms: bool,
|
|
43
|
+
shuffle_classes: bool,
|
|
44
|
+
shuffle_features: bool,
|
|
45
|
+
random_mirror_regression: bool,
|
|
46
|
+
random_mirror_x: bool,
|
|
47
|
+
task: Task,
|
|
48
|
+
):
|
|
47
49
|
self.dim_embedding = dim_embedding
|
|
48
50
|
self.n_classes = n_classes
|
|
49
51
|
self.dim_output = dim_output
|
|
@@ -107,9 +109,7 @@ class Preprocessor():
|
|
|
107
109
|
|
|
108
110
|
return self
|
|
109
111
|
|
|
110
|
-
|
|
111
112
|
def transform_X(self, X: np.ndarray):
|
|
112
|
-
|
|
113
113
|
X = self.impute_nan_features_with_mean(X)
|
|
114
114
|
X = self.cutoff_singular_features(X, self.singular_features)
|
|
115
115
|
X = self.select_features(X)
|
|
@@ -140,9 +140,7 @@ class Preprocessor():
|
|
|
140
140
|
|
|
141
141
|
return X
|
|
142
142
|
|
|
143
|
-
|
|
144
143
|
def transform_tabpfn(self, X: np.ndarray):
|
|
145
|
-
|
|
146
144
|
n_samples = X.shape[0]
|
|
147
145
|
n_features = X.shape[1]
|
|
148
146
|
|
|
@@ -150,37 +148,51 @@ class Preprocessor():
|
|
|
150
148
|
random_state = random.randint(0, 1000000)
|
|
151
149
|
|
|
152
150
|
if use_config1:
|
|
153
|
-
self.random_transforms = Pipeline(
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
151
|
+
self.random_transforms = Pipeline(
|
|
152
|
+
[
|
|
153
|
+
(
|
|
154
|
+
"quantile",
|
|
155
|
+
QuantileTransformer(
|
|
156
|
+
output_distribution="normal",
|
|
157
|
+
n_quantiles=max(n_samples // 10, 2),
|
|
158
|
+
random_state=random_state,
|
|
159
|
+
),
|
|
160
|
+
),
|
|
161
|
+
(
|
|
162
|
+
"svd",
|
|
163
|
+
FeatureUnion(
|
|
164
|
+
[
|
|
165
|
+
("passthrough", NoneTransformer()),
|
|
166
|
+
(
|
|
167
|
+
"svd",
|
|
168
|
+
Pipeline(
|
|
169
|
+
[
|
|
170
|
+
("standard", StandardScaler(with_mean=False)),
|
|
171
|
+
(
|
|
172
|
+
"svd",
|
|
173
|
+
TruncatedSVD(
|
|
174
|
+
algorithm="arpack",
|
|
175
|
+
n_components=max(1, min(n_samples // 10 + 1, n_features // 2)),
|
|
176
|
+
random_state=random_state,
|
|
177
|
+
),
|
|
178
|
+
),
|
|
179
|
+
]
|
|
180
|
+
),
|
|
181
|
+
),
|
|
182
|
+
]
|
|
183
|
+
),
|
|
184
|
+
),
|
|
185
|
+
]
|
|
186
|
+
)
|
|
171
187
|
else:
|
|
172
|
-
self.random_transforms = ColumnTransformer(
|
|
173
|
-
(
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
), [])
|
|
177
|
-
], remainder='passthrough')
|
|
188
|
+
self.random_transforms = ColumnTransformer(
|
|
189
|
+
[("ordinal", OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan), [])],
|
|
190
|
+
remainder="passthrough",
|
|
191
|
+
)
|
|
178
192
|
|
|
179
193
|
return self.random_transforms.fit_transform(X)
|
|
180
194
|
|
|
181
|
-
|
|
182
195
|
def transform_y(self, y: np.ndarray):
|
|
183
|
-
|
|
184
196
|
if self.task == Task.CLASSIFICATION:
|
|
185
197
|
# We assume that y properly presents classes [0, 1, 2, ...] before passing to the preprocessor
|
|
186
198
|
# If the test set has a class that is not in the training set, we will throw an error
|
|
@@ -202,7 +214,6 @@ class Preprocessor():
|
|
|
202
214
|
|
|
203
215
|
return y
|
|
204
216
|
|
|
205
|
-
|
|
206
217
|
def inverse_transform_y(self, y: np.ndarray):
|
|
207
218
|
# Function used during the prediction to transform the model output back to the original space
|
|
208
219
|
# For classification, y is assumed to be logits of shape [n_samples, n_classes]
|
|
@@ -214,62 +225,49 @@ class Preprocessor():
|
|
|
214
225
|
y = self.undo_randomize_class_order(y)
|
|
215
226
|
|
|
216
227
|
elif self.task == Task.REGRESSION:
|
|
217
|
-
|
|
218
|
-
if self.random_mirror_regression:
|
|
228
|
+
if self.random_mirror_regression:
|
|
219
229
|
y = self.apply_random_mirror_regression(y)
|
|
220
230
|
|
|
221
231
|
y = self.undo_normalize_y(y)
|
|
222
232
|
|
|
223
233
|
return y
|
|
224
234
|
|
|
225
|
-
|
|
226
|
-
|
|
227
235
|
def fit_transform_quantile_transformer(self, X: np.ndarray) -> np.ndarray:
|
|
228
|
-
|
|
229
236
|
n_obs, n_features = X.shape
|
|
230
237
|
n_quantiles = min(n_obs, 1000)
|
|
231
|
-
self.quantile_transformer = QuantileTransformer(n_quantiles=n_quantiles, output_distribution=
|
|
238
|
+
self.quantile_transformer = QuantileTransformer(n_quantiles=n_quantiles, output_distribution="normal")
|
|
232
239
|
X = self.quantile_transformer.fit_transform(X)
|
|
233
240
|
|
|
234
241
|
return X
|
|
235
242
|
|
|
236
|
-
|
|
237
|
-
|
|
238
243
|
def determine_which_features_are_singular(self, x: np.ndarray) -> None:
|
|
239
|
-
|
|
240
|
-
self.singular_features = np.array([ len(np.unique(x_col)) for x_col in x.T ]) == 1
|
|
241
|
-
|
|
242
|
-
|
|
244
|
+
self.singular_features = np.array([len(np.unique(x_col)) for x_col in x.T]) == 1
|
|
243
245
|
|
|
244
246
|
def determine_which_features_to_select(self, x: np.ndarray, y: np.ndarray) -> None:
|
|
245
|
-
|
|
246
247
|
if self.dim_embedding is None:
|
|
247
248
|
# All features are selected
|
|
248
249
|
return
|
|
249
250
|
|
|
250
251
|
if x.shape[1] > self.dim_embedding:
|
|
251
|
-
logger.info(
|
|
252
|
+
logger.info(
|
|
253
|
+
f"Number of features is capped at {self.dim_embedding}, but the dataset has {x.shape[1]} features. A subset of {self.dim_embedding} are selected using SelectKBest"
|
|
254
|
+
)
|
|
252
255
|
|
|
253
256
|
self.select_k_best = SelectKBest(k=self.dim_embedding)
|
|
254
257
|
self.select_k_best.fit(x, y)
|
|
255
258
|
|
|
256
|
-
|
|
257
259
|
def compute_pre_nan_mean(self, x: np.ndarray) -> None:
|
|
258
260
|
"""
|
|
259
261
|
Computes the mean of the data before the NaNs are imputed
|
|
260
262
|
"""
|
|
261
263
|
self.pre_nan_mean = np.nanmean(x, axis=0)
|
|
262
264
|
|
|
263
|
-
|
|
264
265
|
def impute_nan_features_with_mean(self, x: np.ndarray) -> np.ndarray:
|
|
265
|
-
|
|
266
266
|
inds = np.where(np.isnan(x))
|
|
267
267
|
x[inds] = np.take(self.pre_nan_mean, inds[1])
|
|
268
268
|
return x
|
|
269
269
|
|
|
270
|
-
|
|
271
270
|
def select_features(self, x: np.ndarray) -> np.ndarray:
|
|
272
|
-
|
|
273
271
|
if self.dim_embedding is None:
|
|
274
272
|
# All features are selected
|
|
275
273
|
return x
|
|
@@ -279,15 +277,12 @@ class Preprocessor():
|
|
|
279
277
|
|
|
280
278
|
return x
|
|
281
279
|
|
|
282
|
-
|
|
283
280
|
def cutoff_singular_features(self, x: np.ndarray, singular_features: np.ndarray) -> np.ndarray:
|
|
284
|
-
|
|
285
281
|
if singular_features.any():
|
|
286
282
|
x = x[:, ~singular_features]
|
|
287
283
|
|
|
288
284
|
return x
|
|
289
285
|
|
|
290
|
-
|
|
291
286
|
def calc_mean_std(self, x: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
|
292
287
|
"""
|
|
293
288
|
Calculates the mean and std of the training data
|
|
@@ -296,7 +291,6 @@ class Preprocessor():
|
|
|
296
291
|
std = x.std(axis=0) + 1e-6
|
|
297
292
|
return mean, std
|
|
298
293
|
|
|
299
|
-
|
|
300
294
|
def normalize_by_mean_std(self, x: np.ndarray, mean: np.ndarray, std: np.ndarray) -> np.ndarray:
|
|
301
295
|
"""
|
|
302
296
|
Normalizes the data by the mean and std
|
|
@@ -305,7 +299,6 @@ class Preprocessor():
|
|
|
305
299
|
x = (x - mean) / std
|
|
306
300
|
return x
|
|
307
301
|
|
|
308
|
-
|
|
309
302
|
def normalize_by_feature_count(self, x: np.ndarray) -> np.ndarray:
|
|
310
303
|
"""
|
|
311
304
|
An interesting way of normalization by the tabPFN paper
|
|
@@ -317,8 +310,6 @@ class Preprocessor():
|
|
|
317
310
|
|
|
318
311
|
return x
|
|
319
312
|
|
|
320
|
-
|
|
321
|
-
|
|
322
313
|
def extend_feature_dim_to_dim_embedding(self, x: np.ndarray, dim_embedding) -> np.ndarray:
|
|
323
314
|
"""
|
|
324
315
|
Increases the number of features to the number of features the model has been trained on
|
|
@@ -330,61 +321,47 @@ class Preprocessor():
|
|
|
330
321
|
x = np.concatenate([x, added_zeros], axis=1)
|
|
331
322
|
return x
|
|
332
323
|
|
|
333
|
-
|
|
334
324
|
def determine_mix_max_scale(self, y: np.ndarray) -> None:
|
|
335
325
|
self.y_min = y.min()
|
|
336
326
|
self.y_max = y.max()
|
|
337
327
|
assert self.y_min != self.y_max, "y_min and y_max are the same, cannot normalize, regression makes no sense"
|
|
338
328
|
|
|
339
|
-
|
|
340
329
|
def normalize_y(self, y: np.ndarray) -> np.ndarray:
|
|
341
330
|
y = (y - self.y_min) / (self.y_max - self.y_min)
|
|
342
331
|
return y
|
|
343
332
|
|
|
344
|
-
|
|
345
333
|
def undo_normalize_y(self, y: np.ndarray) -> np.ndarray:
|
|
346
334
|
y = y * (self.y_max - self.y_min) + self.y_min
|
|
347
335
|
return y
|
|
348
336
|
|
|
349
|
-
|
|
350
337
|
def determine_regression_mirror(self) -> None:
|
|
351
338
|
self.regression_mirror = np.random.choice([True, False], size=(1,)).item()
|
|
352
339
|
|
|
353
|
-
|
|
354
340
|
def apply_random_mirror_regression(self, y: np.ndarray) -> np.ndarray:
|
|
355
341
|
if self.regression_mirror:
|
|
356
342
|
y = 1 - y
|
|
357
343
|
return y
|
|
358
344
|
|
|
359
|
-
|
|
360
345
|
def determine_mirror(self, x: np.ndarray) -> None:
|
|
361
|
-
|
|
362
346
|
n_features = x.shape[1]
|
|
363
347
|
self.mirror = np.random.choice([1, -1], size=(1, n_features))
|
|
364
348
|
|
|
365
|
-
|
|
366
349
|
def apply_random_mirror_x(self, x: np.ndarray) -> np.ndarray:
|
|
367
|
-
|
|
368
350
|
x = x * self.mirror
|
|
369
351
|
return x
|
|
370
352
|
|
|
371
|
-
|
|
372
353
|
def determine_shuffle_class_order(self) -> None:
|
|
373
|
-
|
|
374
354
|
if self.shuffle_classes:
|
|
375
355
|
self.new_shuffle_classes = np.random.permutation(self.n_classes)
|
|
376
356
|
else:
|
|
377
357
|
self.new_shuffle_classes = np.arange(self.n_classes)
|
|
378
358
|
|
|
379
|
-
|
|
380
359
|
def randomize_class_order(self, y: np.ndarray) -> np.ndarray:
|
|
381
|
-
|
|
382
|
-
mapping = { i: self.new_shuffle_classes[i] for i in range(self.n_classes) }
|
|
360
|
+
mapping = {i: self.new_shuffle_classes[i] for i in range(self.n_classes)}
|
|
383
361
|
y = np.array([mapping[i.item()] for i in y], dtype=np.int64)
|
|
384
362
|
|
|
385
363
|
return y
|
|
386
364
|
|
|
387
|
-
|
|
388
365
|
def undo_randomize_class_order(self, y_logits: np.ndarray) -> np.ndarray:
|
|
389
366
|
"""
|
|
390
367
|
We assume y_logits has shape [n_samples, n_classes]
|
|
@@ -392,29 +369,22 @@ class Preprocessor():
|
|
|
392
369
|
|
|
393
370
|
# mapping = {self.new_shuffle_classes[i]: i for i in range(self.n_classes)}
|
|
394
371
|
mapping = {i: self.new_shuffle_classes[i] for i in range(self.n_classes)}
|
|
395
|
-
y = np.concatenate([y_logits[:, mapping[i]:mapping[i]+1] for i in range(self.n_classes)], axis=1)
|
|
372
|
+
y = np.concatenate([y_logits[:, mapping[i] : mapping[i] + 1] for i in range(self.n_classes)], axis=1)
|
|
396
373
|
|
|
397
374
|
return y
|
|
398
375
|
|
|
399
|
-
|
|
400
376
|
def extract_correct_classes(self, y_logits: np.ndarray) -> np.ndarray:
|
|
401
377
|
# Even though our network might be able to support 10 classes,
|
|
402
378
|
# If the problem only has three classes, we should give three classes as output.
|
|
403
379
|
# We assume y_logits has shape [n_samples, n_classes]
|
|
404
|
-
y_logits = y_logits[:, :self.n_classes]
|
|
380
|
+
y_logits = y_logits[:, : self.n_classes]
|
|
405
381
|
return y_logits
|
|
406
382
|
|
|
407
|
-
|
|
408
|
-
|
|
409
383
|
def determine_feature_order(self, x: np.ndarray) -> None:
|
|
410
|
-
|
|
411
384
|
n_features = x.shape[1]
|
|
412
385
|
self.new_feature_order = np.random.permutation(n_features)
|
|
413
386
|
|
|
414
|
-
|
|
415
|
-
|
|
416
387
|
def randomize_feature_order(self, x: np.ndarray) -> np.ndarray:
|
|
417
|
-
|
|
418
388
|
x = x[:, self.new_feature_order]
|
|
419
389
|
|
|
420
|
-
return x
|
|
390
|
+
return x
|
|
@@ -1 +1 @@
|
|
|
1
|
-
# Model architecture modules for MitraModel
|
|
1
|
+
# Model architecture modules for MitraModel
|
|
@@ -1,21 +1,18 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
1
3
|
import torch
|
|
2
4
|
import torch.nn as nn
|
|
3
|
-
|
|
5
|
+
|
|
4
6
|
|
|
5
7
|
class BaseModel(nn.Module, ABC):
|
|
6
|
-
|
|
7
8
|
def __init__(self):
|
|
8
9
|
super().__init__()
|
|
9
|
-
|
|
10
|
+
|
|
10
11
|
def init_weights(self):
|
|
11
12
|
"""Initialize model weights."""
|
|
12
13
|
pass
|
|
13
|
-
|
|
14
|
+
|
|
14
15
|
@abstractmethod
|
|
15
|
-
def forward(self,
|
|
16
|
-
x_support: torch.Tensor,
|
|
17
|
-
y_support: torch.Tensor,
|
|
18
|
-
x_query: torch.Tensor,
|
|
19
|
-
**kwargs):
|
|
16
|
+
def forward(self, x_support: torch.Tensor, y_support: torch.Tensor, x_query: torch.Tensor, **kwargs):
|
|
20
17
|
"""Forward pass for the model."""
|
|
21
|
-
pass
|
|
18
|
+
pass
|