autogluon.tabular 1.3.2b20250610__py3-none-any.whl → 1.4.1b20251214__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/tabular/configs/config_helper.py +1 -1
- autogluon/tabular/configs/hyperparameter_configs.py +2 -265
- autogluon/tabular/configs/pipeline_presets.py +130 -0
- autogluon/tabular/configs/presets_configs.py +51 -26
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py +0 -1
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +310 -0
- autogluon/tabular/models/__init__.py +6 -1
- autogluon/tabular/models/_utils/rapids_utils.py +1 -1
- autogluon/tabular/models/automm/automm_model.py +2 -0
- autogluon/tabular/models/automm/ft_transformer.py +4 -1
- autogluon/tabular/models/catboost/callbacks.py +3 -2
- autogluon/tabular/models/catboost/catboost_model.py +15 -9
- autogluon/tabular/models/catboost/catboost_utils.py +17 -3
- autogluon/tabular/models/ebm/__init__.py +0 -0
- autogluon/tabular/models/ebm/ebm_model.py +259 -0
- autogluon/tabular/models/ebm/hyperparameters/__init__.py +0 -0
- autogluon/tabular/models/ebm/hyperparameters/parameters.py +39 -0
- autogluon/tabular/models/ebm/hyperparameters/searchspaces.py +72 -0
- autogluon/tabular/models/fastainn/tabular_nn_fastai.py +7 -5
- autogluon/tabular/models/knn/knn_model.py +7 -3
- autogluon/tabular/models/lgb/lgb_model.py +60 -21
- autogluon/tabular/models/lr/lr_model.py +6 -1
- autogluon/tabular/models/lr/lr_preprocessing_utils.py +6 -7
- autogluon/tabular/models/lr/lr_rapids_model.py +45 -5
- autogluon/tabular/models/mitra/__init__.py +0 -0
- autogluon/tabular/models/mitra/_internal/__init__.py +1 -0
- autogluon/tabular/models/mitra/_internal/config/__init__.py +1 -0
- autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +190 -0
- autogluon/tabular/models/mitra/_internal/config/config_run.py +32 -0
- autogluon/tabular/models/mitra/_internal/config/enums.py +162 -0
- autogluon/tabular/models/mitra/_internal/core/__init__.py +1 -0
- autogluon/tabular/models/mitra/_internal/core/callbacks.py +94 -0
- autogluon/tabular/models/mitra/_internal/core/get_loss.py +54 -0
- autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +108 -0
- autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +67 -0
- autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +132 -0
- autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +373 -0
- autogluon/tabular/models/mitra/_internal/data/__init__.py +1 -0
- autogluon/tabular/models/mitra/_internal/data/collator.py +46 -0
- autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +136 -0
- autogluon/tabular/models/mitra/_internal/data/dataset_split.py +57 -0
- autogluon/tabular/models/mitra/_internal/data/preprocessor.py +420 -0
- autogluon/tabular/models/mitra/_internal/models/__init__.py +1 -0
- autogluon/tabular/models/mitra/_internal/models/base.py +21 -0
- autogluon/tabular/models/mitra/_internal/models/embedding.py +182 -0
- autogluon/tabular/models/mitra/_internal/models/tab2d.py +667 -0
- autogluon/tabular/models/mitra/_internal/utils/__init__.py +1 -0
- autogluon/tabular/models/mitra/_internal/utils/set_seed.py +15 -0
- autogluon/tabular/models/mitra/mitra_model.py +380 -0
- autogluon/tabular/models/mitra/sklearn_interface.py +494 -0
- autogluon/tabular/models/realmlp/__init__.py +0 -0
- autogluon/tabular/models/realmlp/realmlp_model.py +360 -0
- autogluon/tabular/models/rf/rf_model.py +11 -6
- autogluon/tabular/models/tabicl/__init__.py +0 -0
- autogluon/tabular/models/tabicl/tabicl_model.py +179 -0
- autogluon/tabular/models/tabm/__init__.py +0 -0
- autogluon/tabular/models/tabm/_tabm_internal.py +545 -0
- autogluon/tabular/models/tabm/rtdl_num_embeddings.py +810 -0
- autogluon/tabular/models/tabm/tabm_model.py +356 -0
- autogluon/tabular/models/tabm/tabm_reference.py +631 -0
- autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +13 -7
- autogluon/tabular/models/tabpfnv2/__init__.py +0 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/__init__.py +20 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/configs.py +40 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/scoring_utils.py +201 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_decision_tree_tabpfn.py +1464 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_random_forest_tabpfn.py +747 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_compat.py +863 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/utils.py +106 -0
- autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +388 -0
- autogluon/tabular/models/tabular_nn/hyperparameters/parameters.py +1 -3
- autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +5 -5
- autogluon/tabular/models/xgboost/xgboost_model.py +10 -3
- autogluon/tabular/predictor/predictor.py +147 -84
- autogluon/tabular/registry/_ag_model_registry.py +12 -2
- autogluon/tabular/testing/fit_helper.py +57 -27
- autogluon/tabular/testing/generate_datasets.py +7 -0
- autogluon/tabular/trainer/abstract_trainer.py +3 -1
- autogluon/tabular/trainer/model_presets/presets.py +10 -1
- autogluon/tabular/version.py +1 -1
- autogluon.tabular-1.4.1b20251214-py3.11-nspkg.pth +1 -0
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info}/METADATA +112 -57
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info}/RECORD +89 -40
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info}/WHEEL +1 -1
- autogluon/tabular/models/tabpfn/__init__.py +0 -1
- autogluon/tabular/models/tabpfn/tabpfn_model.py +0 -153
- autogluon.tabular-1.3.2b20250610-py3.9-nspkg.pth +0 -1
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info/licenses}/LICENSE +0 -0
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info/licenses}/NOTICE +0 -0
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info}/namespace_packages.txt +0 -0
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info}/top_level.txt +0 -0
- {autogluon.tabular-1.3.2b20250610.dist-info → autogluon_tabular-1.4.1b20251214.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Copyright 2023.
|
|
2
|
+
|
|
3
|
+
Author: Lukas Schweizer <schweizer.lukas@web.de>
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
# Copyright (c) Prior Labs GmbH 2025.
|
|
7
|
+
# Licensed under the Apache License, Version 2.0
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import torch
|
|
14
|
+
# Type checking imports
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from numpy.typing import NDArray
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def preprocess_data(
|
|
22
|
+
data,
|
|
23
|
+
nan_values=True,
|
|
24
|
+
one_hot_encoding=False,
|
|
25
|
+
normalization=True,
|
|
26
|
+
categorical_indices=None,
|
|
27
|
+
):
|
|
28
|
+
"""This method preprocesses data regarding missing values, categorical features
|
|
29
|
+
and data normalization (for the kNN Model)
|
|
30
|
+
:param data: Data to preprocess
|
|
31
|
+
:param nan_values: Preprocesses nan values if True
|
|
32
|
+
:param one_hot_encoding: Whether use OHE for categoricals
|
|
33
|
+
:param normalization: Normalizes data if True
|
|
34
|
+
:param categorical_indices: Categorical columns of data
|
|
35
|
+
:return: Preprocessed version of the data.
|
|
36
|
+
"""
|
|
37
|
+
data = data.numpy() if torch.is_tensor(data) else data
|
|
38
|
+
data = data.astype(np.float32)
|
|
39
|
+
data = pd.DataFrame(data).reset_index().drop("index", axis=1)
|
|
40
|
+
|
|
41
|
+
if categorical_indices is None:
|
|
42
|
+
categorical_indices = []
|
|
43
|
+
preprocessed_data = data
|
|
44
|
+
# NaN values (replace NaN with zeros)
|
|
45
|
+
if nan_values:
|
|
46
|
+
preprocessed_data = preprocessed_data.fillna(0)
|
|
47
|
+
# Categorical Features (One Hot Encoding)
|
|
48
|
+
if one_hot_encoding:
|
|
49
|
+
# Setting dtypes of categorical data to 'category'
|
|
50
|
+
for idx in categorical_indices:
|
|
51
|
+
preprocessed_data[preprocessed_data.columns[idx]] = preprocessed_data[
|
|
52
|
+
preprocessed_data.columns[idx]
|
|
53
|
+
].astype("category")
|
|
54
|
+
categorical_columns = list(
|
|
55
|
+
preprocessed_data.select_dtypes(include=["category"]).columns,
|
|
56
|
+
)
|
|
57
|
+
preprocessed_data = pd.get_dummies(
|
|
58
|
+
preprocessed_data,
|
|
59
|
+
columns=categorical_columns,
|
|
60
|
+
)
|
|
61
|
+
# Data normalization from R -> [0, 1]
|
|
62
|
+
if normalization:
|
|
63
|
+
if one_hot_encoding:
|
|
64
|
+
numerical_columns = list(
|
|
65
|
+
preprocessed_data.select_dtypes(exclude=["category"]).columns,
|
|
66
|
+
)
|
|
67
|
+
preprocessed_data[numerical_columns] = preprocessed_data[
|
|
68
|
+
numerical_columns
|
|
69
|
+
].apply(
|
|
70
|
+
lambda x: (x - x.min()) / (x.max() - x.min())
|
|
71
|
+
if x.max() != x.min()
|
|
72
|
+
else x,
|
|
73
|
+
)
|
|
74
|
+
else:
|
|
75
|
+
preprocessed_data = preprocessed_data.apply(
|
|
76
|
+
lambda x: (x - x.min()) / (x.max() - x.min())
|
|
77
|
+
if x.max() != x.min()
|
|
78
|
+
else x,
|
|
79
|
+
)
|
|
80
|
+
return preprocessed_data
|
|
81
|
+
|
|
82
|
+
def softmax(logits: NDArray) -> NDArray:
|
|
83
|
+
"""Apply softmax function to convert logits to probabilities.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
logits: Input logits array of shape (n_samples, n_classes) or (n_classes,)
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Probabilities where values sum to 1 across the last dimension
|
|
90
|
+
"""
|
|
91
|
+
# Handle both 2D and 1D inputs
|
|
92
|
+
if logits.ndim == 1:
|
|
93
|
+
logits = logits.reshape(1, -1)
|
|
94
|
+
|
|
95
|
+
# Apply exponential to each logit with numerical stability
|
|
96
|
+
logits_max = np.max(logits, axis=1, keepdims=True)
|
|
97
|
+
exp_logits = np.exp(logits - logits_max) # Subtract max for numerical stability
|
|
98
|
+
|
|
99
|
+
# Sum across classes and normalize
|
|
100
|
+
sum_exp_logits = np.sum(exp_logits, axis=1, keepdims=True)
|
|
101
|
+
probs = exp_logits / sum_exp_logits
|
|
102
|
+
|
|
103
|
+
# Return in the same shape as input
|
|
104
|
+
if logits.ndim == 1:
|
|
105
|
+
return probs.reshape(-1)
|
|
106
|
+
return probs
|
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Code Adapted from TabArena: https://github.com/autogluon/tabrepo/blob/main/tabrepo/benchmark/models/ag/tabpfnv2/tabpfnv2_model.py
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import warnings
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import scipy
|
|
13
|
+
from sklearn.preprocessing import PowerTransformer
|
|
14
|
+
|
|
15
|
+
from autogluon.common.utils.resource_utils import ResourceManager
|
|
16
|
+
from autogluon.core.models import AbstractModel
|
|
17
|
+
from autogluon.features.generators import LabelEncoderFeatureGenerator
|
|
18
|
+
from autogluon.tabular import __version__
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
import pandas as pd
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
_HAS_LOGGED_TABPFN_LICENSE: bool = False
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# TODO: merge into TabPFnv2 codebase
|
|
29
|
+
class FixedSafePowerTransformer(PowerTransformer):
|
|
30
|
+
"""Fixed version of safe power."""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
variance_threshold: float = 1e-3,
|
|
35
|
+
large_value_threshold: float = 100,
|
|
36
|
+
method="yeo-johnson",
|
|
37
|
+
standardize=True,
|
|
38
|
+
copy=True,
|
|
39
|
+
):
|
|
40
|
+
super().__init__(method=method, standardize=standardize, copy=copy)
|
|
41
|
+
self.variance_threshold = variance_threshold
|
|
42
|
+
self.large_value_threshold = large_value_threshold
|
|
43
|
+
|
|
44
|
+
self.revert_indices_ = None
|
|
45
|
+
|
|
46
|
+
def _find_features_to_revert_because_of_failure(
|
|
47
|
+
self,
|
|
48
|
+
transformed_X: np.ndarray,
|
|
49
|
+
) -> None:
|
|
50
|
+
# Calculate the variance for each feature in the transformed data
|
|
51
|
+
variances = np.nanvar(transformed_X, axis=0)
|
|
52
|
+
|
|
53
|
+
# Identify features where the variance is not close to 1
|
|
54
|
+
mask = np.abs(variances - 1) > self.variance_threshold
|
|
55
|
+
non_unit_variance_indices = np.where(mask)[0]
|
|
56
|
+
|
|
57
|
+
# Identify features with values greater than the large_value_threshold
|
|
58
|
+
large_value_indices = np.any(transformed_X > self.large_value_threshold, axis=0)
|
|
59
|
+
large_value_indices = np.nonzero(large_value_indices)[0]
|
|
60
|
+
|
|
61
|
+
# Identify features to revert based on either condition
|
|
62
|
+
self.revert_indices_ = np.unique(
|
|
63
|
+
np.concatenate([non_unit_variance_indices, large_value_indices]),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def _yeo_johnson_optimize(self, x: np.ndarray) -> float:
|
|
67
|
+
try:
|
|
68
|
+
with warnings.catch_warnings():
|
|
69
|
+
warnings.filterwarnings(
|
|
70
|
+
"ignore",
|
|
71
|
+
message=r"overflow encountered",
|
|
72
|
+
category=RuntimeWarning,
|
|
73
|
+
)
|
|
74
|
+
return super()._yeo_johnson_optimize(x) # type: ignore
|
|
75
|
+
except scipy.optimize._optimize.BracketError:
|
|
76
|
+
return np.nan
|
|
77
|
+
|
|
78
|
+
def _yeo_johnson_transform(self, x: np.ndarray, lmbda: float) -> np.ndarray:
|
|
79
|
+
if np.isnan(lmbda):
|
|
80
|
+
return x
|
|
81
|
+
|
|
82
|
+
return super()._yeo_johnson_transform(x, lmbda) # type: ignore
|
|
83
|
+
|
|
84
|
+
def _revert_failed_features(
|
|
85
|
+
self,
|
|
86
|
+
transformed_X: np.ndarray,
|
|
87
|
+
original_X: np.ndarray,
|
|
88
|
+
) -> np.ndarray:
|
|
89
|
+
# Replace these features with the original features
|
|
90
|
+
if self.revert_indices_ and (self.revert_indices_) > 0:
|
|
91
|
+
transformed_X[:, self.revert_indices_] = original_X[:, self.revert_indices_]
|
|
92
|
+
|
|
93
|
+
return transformed_X
|
|
94
|
+
|
|
95
|
+
def fit(self, X: np.ndarray, y: Any | None = None) -> FixedSafePowerTransformer:
|
|
96
|
+
super().fit(X, y)
|
|
97
|
+
|
|
98
|
+
# Check and revert features as necessary
|
|
99
|
+
self._find_features_to_revert_because_of_failure(super().transform(X)) # type: ignore
|
|
100
|
+
return self
|
|
101
|
+
|
|
102
|
+
def transform(self, X: np.ndarray) -> np.ndarray:
|
|
103
|
+
transformed_X = super().transform(X)
|
|
104
|
+
return self._revert_failed_features(transformed_X, X) # type: ignore
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class TabPFNV2Model(AbstractModel):
|
|
108
|
+
"""
|
|
109
|
+
TabPFNv2 is a tabular foundation model pre-trained purely on synthetic data that achieves
|
|
110
|
+
state-of-the-art results with in-context learning on small datasets with <=10000 samples and <=500 features.
|
|
111
|
+
TabPFNv2 is developed and maintained by PriorLabs: https://priorlabs.ai/
|
|
112
|
+
|
|
113
|
+
TabPFNv2 is the top performing method for small datasets on TabArena-v0.1: https://tabarena.ai
|
|
114
|
+
|
|
115
|
+
Paper: Accurate predictions on small data with a tabular foundation model
|
|
116
|
+
Authors: Noah Hollmann, Samuel Müller, Lennart Purucker, Arjun Krishnakumar, Max Körfer, Shi Bin Hoo, Robin Tibor Schirrmeister & Frank Hutter
|
|
117
|
+
Codebase: https://github.com/PriorLabs/TabPFN
|
|
118
|
+
License: https://github.com/PriorLabs/TabPFN/blob/main/LICENSE
|
|
119
|
+
|
|
120
|
+
.. versionadded:: 1.4.0
|
|
121
|
+
"""
|
|
122
|
+
ag_key = "TABPFNV2"
|
|
123
|
+
ag_name = "TabPFNv2"
|
|
124
|
+
ag_priority = 105
|
|
125
|
+
seed_name = "random_state"
|
|
126
|
+
|
|
127
|
+
def __init__(self, **kwargs):
|
|
128
|
+
super().__init__(**kwargs)
|
|
129
|
+
self._feature_generator = None
|
|
130
|
+
self._cat_features = None
|
|
131
|
+
self._cat_indices = None
|
|
132
|
+
|
|
133
|
+
def _preprocess(self, X: pd.DataFrame, is_train=False, **kwargs) -> pd.DataFrame:
|
|
134
|
+
X = super()._preprocess(X, **kwargs)
|
|
135
|
+
|
|
136
|
+
if is_train:
|
|
137
|
+
self._cat_indices = []
|
|
138
|
+
|
|
139
|
+
# X will be the training data.
|
|
140
|
+
self._feature_generator = LabelEncoderFeatureGenerator(verbosity=0)
|
|
141
|
+
self._feature_generator.fit(X=X)
|
|
142
|
+
|
|
143
|
+
# This converts categorical features to numeric via stateful label encoding.
|
|
144
|
+
if self._feature_generator.features_in:
|
|
145
|
+
X = X.copy()
|
|
146
|
+
X[self._feature_generator.features_in] = self._feature_generator.transform(
|
|
147
|
+
X=X
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
if is_train:
|
|
151
|
+
# Detect/set cat features and indices
|
|
152
|
+
if self._cat_features is None:
|
|
153
|
+
self._cat_features = self._feature_generator.features_in[:]
|
|
154
|
+
self._cat_indices = [X.columns.get_loc(col) for col in self._cat_features]
|
|
155
|
+
|
|
156
|
+
return X
|
|
157
|
+
|
|
158
|
+
# FIXME: Crashes during model download if bagging with parallel fit.
|
|
159
|
+
# Consider adopting same download logic as TabPFNMix which doesn't crash during model download.
|
|
160
|
+
# FIXME: Maybe support child_oof somehow with using only one model and being smart about inference time?
|
|
161
|
+
def _fit(
|
|
162
|
+
self,
|
|
163
|
+
X: pd.DataFrame,
|
|
164
|
+
y: pd.Series,
|
|
165
|
+
num_cpus: int = 1,
|
|
166
|
+
num_gpus: int = 0,
|
|
167
|
+
verbosity: int = 2,
|
|
168
|
+
**kwargs,
|
|
169
|
+
):
|
|
170
|
+
try:
|
|
171
|
+
from tabpfn.model import preprocessing
|
|
172
|
+
except ImportError as err:
|
|
173
|
+
logger.log(
|
|
174
|
+
40,
|
|
175
|
+
f"\tFailed to import tabpfn! To use the TabPFNv2 model, "
|
|
176
|
+
f"do: `pip install autogluon.tabular[tabpfn]=={__version__}`.",
|
|
177
|
+
)
|
|
178
|
+
raise err
|
|
179
|
+
|
|
180
|
+
preprocessing.SafePowerTransformer = FixedSafePowerTransformer
|
|
181
|
+
|
|
182
|
+
from tabpfn import TabPFNClassifier, TabPFNRegressor
|
|
183
|
+
from tabpfn.model.loading import resolve_model_path
|
|
184
|
+
from torch.cuda import is_available
|
|
185
|
+
|
|
186
|
+
is_classification = self.problem_type in ["binary", "multiclass"]
|
|
187
|
+
|
|
188
|
+
model_base = TabPFNClassifier if is_classification else TabPFNRegressor
|
|
189
|
+
|
|
190
|
+
device = "cuda" if num_gpus != 0 else "cpu"
|
|
191
|
+
if (device == "cuda") and (not is_available()):
|
|
192
|
+
# FIXME: warn instead and switch to CPU.
|
|
193
|
+
raise AssertionError(
|
|
194
|
+
"Fit specified to use GPU, but CUDA is not available on this machine. "
|
|
195
|
+
"Please switch to CPU usage instead.",
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
if verbosity >= 2:
|
|
199
|
+
# logs "Built with PriorLabs-TabPFN"
|
|
200
|
+
self._log_license(device=device)
|
|
201
|
+
|
|
202
|
+
X = self.preprocess(X, is_train=True)
|
|
203
|
+
|
|
204
|
+
hps = self._get_model_params()
|
|
205
|
+
hps["device"] = device
|
|
206
|
+
hps["n_jobs"] = num_cpus
|
|
207
|
+
hps["categorical_features_indices"] = self._cat_indices
|
|
208
|
+
|
|
209
|
+
_, model_dir, _, _ = resolve_model_path(
|
|
210
|
+
model_path=None,
|
|
211
|
+
which="classifier" if is_classification else "regressor",
|
|
212
|
+
)
|
|
213
|
+
if is_classification:
|
|
214
|
+
if "classification_model_path" in hps:
|
|
215
|
+
hps["model_path"] = model_dir / hps.pop("classification_model_path")
|
|
216
|
+
if "regression_model_path" in hps:
|
|
217
|
+
del hps["regression_model_path"]
|
|
218
|
+
else:
|
|
219
|
+
if "regression_model_path" in hps:
|
|
220
|
+
hps["model_path"] = model_dir / hps.pop("regression_model_path")
|
|
221
|
+
if "classification_model_path" in hps:
|
|
222
|
+
del hps["classification_model_path"]
|
|
223
|
+
|
|
224
|
+
# Resolve inference_config
|
|
225
|
+
inference_config = {
|
|
226
|
+
_k: v
|
|
227
|
+
for k, v in hps.items()
|
|
228
|
+
if k.startswith("inference_config/") and (_k := k.split("/")[-1])
|
|
229
|
+
}
|
|
230
|
+
if inference_config:
|
|
231
|
+
hps["inference_config"] = inference_config
|
|
232
|
+
for k in list(hps.keys()):
|
|
233
|
+
if k.startswith("inference_config/"):
|
|
234
|
+
del hps[k]
|
|
235
|
+
|
|
236
|
+
# TODO: remove power from search space and TabPFNv2 codebase
|
|
237
|
+
# Power transform can fail. To avoid this, make all power be safepower instead.
|
|
238
|
+
if "PREPROCESS_TRANSFORMS" in inference_config:
|
|
239
|
+
safe_config = []
|
|
240
|
+
for preprocessing_dict in inference_config["PREPROCESS_TRANSFORMS"]:
|
|
241
|
+
if preprocessing_dict["name"] == "power":
|
|
242
|
+
preprocessing_dict["name"] = "safepower"
|
|
243
|
+
safe_config.append(preprocessing_dict)
|
|
244
|
+
inference_config["PREPROCESS_TRANSFORMS"] = safe_config
|
|
245
|
+
if "REGRESSION_Y_PREPROCESS_TRANSFORMS" in inference_config:
|
|
246
|
+
safe_config = []
|
|
247
|
+
for preprocessing_name in inference_config[
|
|
248
|
+
"REGRESSION_Y_PREPROCESS_TRANSFORMS"
|
|
249
|
+
]:
|
|
250
|
+
if preprocessing_name == "power":
|
|
251
|
+
preprocessing_name = "safepower"
|
|
252
|
+
safe_config.append(preprocessing_name)
|
|
253
|
+
inference_config["REGRESSION_Y_PREPROCESS_TRANSFORMS"] = safe_config
|
|
254
|
+
|
|
255
|
+
# Resolve model_type
|
|
256
|
+
n_ensemble_repeats = hps.pop("n_ensemble_repeats", None)
|
|
257
|
+
model_is_rf_pfn = hps.pop("model_type", "no") == "dt_pfn"
|
|
258
|
+
if model_is_rf_pfn:
|
|
259
|
+
from .rfpfn import RandomForestTabPFNClassifier, RandomForestTabPFNRegressor
|
|
260
|
+
|
|
261
|
+
hps["n_estimators"] = 1
|
|
262
|
+
rf_model_base = (
|
|
263
|
+
RandomForestTabPFNClassifier
|
|
264
|
+
if is_classification
|
|
265
|
+
else RandomForestTabPFNRegressor
|
|
266
|
+
)
|
|
267
|
+
self.model = rf_model_base(
|
|
268
|
+
tabpfn=model_base(**hps),
|
|
269
|
+
categorical_features=self._cat_indices,
|
|
270
|
+
n_estimators=n_ensemble_repeats,
|
|
271
|
+
)
|
|
272
|
+
else:
|
|
273
|
+
if n_ensemble_repeats is not None:
|
|
274
|
+
hps["n_estimators"] = n_ensemble_repeats
|
|
275
|
+
self.model = model_base(**hps)
|
|
276
|
+
|
|
277
|
+
self.model = self.model.fit(
|
|
278
|
+
X=X,
|
|
279
|
+
y=y,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
def _log_license(self, device: str):
|
|
283
|
+
global _HAS_LOGGED_TABPFN_LICENSE
|
|
284
|
+
if not _HAS_LOGGED_TABPFN_LICENSE:
|
|
285
|
+
logger.log(20, "\tBuilt with PriorLabs-TabPFN") # Aligning with TabPFNv2 license requirements
|
|
286
|
+
if device == "cpu":
|
|
287
|
+
logger.log(
|
|
288
|
+
20,
|
|
289
|
+
"\tRunning TabPFNv2 on CPU. This can be very slow. "
|
|
290
|
+
"It is recommended to run TabPFNv2 on a GPU."
|
|
291
|
+
)
|
|
292
|
+
_HAS_LOGGED_TABPFN_LICENSE = True # Avoid repeated logging
|
|
293
|
+
|
|
294
|
+
def _get_default_resources(self) -> tuple[int, int]:
|
|
295
|
+
# Use only physical cores for better performance based on benchmarks
|
|
296
|
+
num_cpus = ResourceManager.get_cpu_count(only_physical_cores=True)
|
|
297
|
+
|
|
298
|
+
num_gpus = min(1, ResourceManager.get_gpu_count_torch(cuda_only=True))
|
|
299
|
+
|
|
300
|
+
return num_cpus, num_gpus
|
|
301
|
+
|
|
302
|
+
def _set_default_params(self):
|
|
303
|
+
default_params = {
|
|
304
|
+
"ignore_pretraining_limits": True, # to ignore warnings and size limits
|
|
305
|
+
}
|
|
306
|
+
for param, val in default_params.items():
|
|
307
|
+
self._set_default_param_value(param, val)
|
|
308
|
+
|
|
309
|
+
@classmethod
|
|
310
|
+
def supported_problem_types(cls) -> list[str] | None:
|
|
311
|
+
return ["binary", "multiclass", "regression"]
|
|
312
|
+
|
|
313
|
+
def _get_default_auxiliary_params(self) -> dict:
|
|
314
|
+
default_auxiliary_params = super()._get_default_auxiliary_params()
|
|
315
|
+
default_auxiliary_params.update(
|
|
316
|
+
{
|
|
317
|
+
"max_rows": 10000,
|
|
318
|
+
"max_features": 500,
|
|
319
|
+
"max_classes": 10,
|
|
320
|
+
}
|
|
321
|
+
)
|
|
322
|
+
return default_auxiliary_params
|
|
323
|
+
|
|
324
|
+
@classmethod
|
|
325
|
+
def _get_default_ag_args_ensemble(cls, **kwargs) -> dict:
|
|
326
|
+
"""Set fold_fitting_strategy to sequential_local,
|
|
327
|
+
as parallel folding crashes if model weights aren't pre-downloaded.
|
|
328
|
+
"""
|
|
329
|
+
default_ag_args_ensemble = super()._get_default_ag_args_ensemble(**kwargs)
|
|
330
|
+
extra_ag_args_ensemble = {
|
|
331
|
+
# FIXME: Find a work-around to avoid crash if parallel and weights are not downloaded
|
|
332
|
+
"fold_fitting_strategy": "sequential_local",
|
|
333
|
+
"refit_folds": True, # Better to refit the model for faster inference and similar quality as the bag.
|
|
334
|
+
}
|
|
335
|
+
default_ag_args_ensemble.update(extra_ag_args_ensemble)
|
|
336
|
+
return default_ag_args_ensemble
|
|
337
|
+
|
|
338
|
+
def _estimate_memory_usage(self, X: pd.DataFrame, **kwargs) -> int:
|
|
339
|
+
hyperparameters = self._get_model_params()
|
|
340
|
+
return self.estimate_memory_usage_static(
|
|
341
|
+
X=X,
|
|
342
|
+
problem_type=self.problem_type,
|
|
343
|
+
num_classes=self.num_classes,
|
|
344
|
+
hyperparameters=hyperparameters,
|
|
345
|
+
**kwargs,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
@classmethod
|
|
349
|
+
def _estimate_memory_usage_static(
|
|
350
|
+
cls,
|
|
351
|
+
*,
|
|
352
|
+
X: pd.DataFrame,
|
|
353
|
+
hyperparameters: dict | None = None,
|
|
354
|
+
**kwargs,
|
|
355
|
+
) -> int:
|
|
356
|
+
"""Heuristic memory estimate based on TabPFN's memory estimate logic in:
|
|
357
|
+
https://github.com/PriorLabs/TabPFN/blob/57a2efd3ebdb3886245e4d097cefa73a5261a969/src/tabpfn/model/memory.py#L147.
|
|
358
|
+
|
|
359
|
+
This is based on GPU memory usage, but hopefully with overheads it also approximates CPU memory usage.
|
|
360
|
+
"""
|
|
361
|
+
# features_per_group = 2 # Based on TabPFNv2 default (unused)
|
|
362
|
+
n_layers = 12 # Based on TabPFNv2 default
|
|
363
|
+
embedding_size = 192 # Based on TabPFNv2 default
|
|
364
|
+
dtype_byte_size = 2 # Based on TabPFNv2 default
|
|
365
|
+
|
|
366
|
+
model_mem = 14489108 # Based on TabPFNv2 default
|
|
367
|
+
|
|
368
|
+
n_samples, n_features = X.shape[0], X.shape[1]
|
|
369
|
+
n_feature_groups = n_features + 1 # TODO: Unsure how to calculate this
|
|
370
|
+
|
|
371
|
+
X_mem = n_samples * n_feature_groups * dtype_byte_size
|
|
372
|
+
activation_mem = (
|
|
373
|
+
n_samples * n_feature_groups * embedding_size * n_layers * dtype_byte_size
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
baseline_overhead_mem_est = 1e9 # 1 GB generic overhead
|
|
377
|
+
|
|
378
|
+
# Add some buffer to each term + 1 GB overhead to be safe
|
|
379
|
+
return int(
|
|
380
|
+
model_mem + 4 * X_mem + 2 * activation_mem + baseline_overhead_mem_est
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
@classmethod
|
|
384
|
+
def _class_tags(cls):
|
|
385
|
+
return {"can_estimate_memory_usage_static": True}
|
|
386
|
+
|
|
387
|
+
def _more_tags(self) -> dict:
|
|
388
|
+
return {"can_refit_full": True}
|
|
@@ -7,9 +7,7 @@ from autogluon.core.constants import BINARY, MULTICLASS, QUANTILE, REGRESSION
|
|
|
7
7
|
|
|
8
8
|
def get_fixed_params(framework):
|
|
9
9
|
"""Parameters that currently cannot be searched during HPO"""
|
|
10
|
-
fixed_params = {
|
|
11
|
-
# 'seed_value': 0, # random seed for reproducibility (set = None to ignore)
|
|
12
|
-
}
|
|
10
|
+
fixed_params = {}
|
|
13
11
|
# TODO: v1.2 Change default epochs_wo_improve to "auto", so that None can mean no early stopping.
|
|
14
12
|
pytorch_fixed_params = {
|
|
15
13
|
"num_epochs": 1000, # maximum number of epochs (passes over full dataset) for training NN
|
|
@@ -50,6 +50,7 @@ class TabularNeuralNetTorchModel(AbstractNeuralNetworkModel):
|
|
|
50
50
|
ag_key = "NN_TORCH"
|
|
51
51
|
ag_name = "NeuralNetTorch"
|
|
52
52
|
ag_priority = 25
|
|
53
|
+
seed_name = "seed_value"
|
|
53
54
|
|
|
54
55
|
# Constants used throughout this class:
|
|
55
56
|
unique_category_str = np.nan # string used to represent missing values and unknown categories for categorical features.
|
|
@@ -191,7 +192,7 @@ class TabularNeuralNetTorchModel(AbstractNeuralNetworkModel):
|
|
|
191
192
|
|
|
192
193
|
processor_kwargs, optimizer_kwargs, fit_kwargs, loss_kwargs, params = self._prepare_params(params=params)
|
|
193
194
|
|
|
194
|
-
seed_value = params.pop(
|
|
195
|
+
seed_value = params.pop(self.seed_name, self.default_random_seed)
|
|
195
196
|
|
|
196
197
|
self._num_cpus_infer = params.pop("_num_cpus_infer", 1)
|
|
197
198
|
if seed_value is not None: # Set seeds
|
|
@@ -370,7 +371,6 @@ class TabularNeuralNetTorchModel(AbstractNeuralNetworkModel):
|
|
|
370
371
|
best_epoch = 0
|
|
371
372
|
best_val_metric = -np.inf # higher = better
|
|
372
373
|
best_val_update = 0
|
|
373
|
-
val_improve_epoch = 0 # most recent epoch where validation-score strictly improved
|
|
374
374
|
start_fit_time = time.time()
|
|
375
375
|
if time_limit is not None:
|
|
376
376
|
time_limit = time_limit - (start_fit_time - start_time)
|
|
@@ -814,11 +814,11 @@ class TabularNeuralNetTorchModel(AbstractNeuralNetworkModel):
|
|
|
814
814
|
|
|
815
815
|
def _get_maximum_resources(self) -> Dict[str, Union[int, float]]:
|
|
816
816
|
# torch model trains slower when utilizing virtual cores and this issue scale up when the number of cpu cores increases
|
|
817
|
-
return {"num_cpus": ResourceManager.
|
|
817
|
+
return {"num_cpus": ResourceManager.get_cpu_count(only_physical_cores=True)}
|
|
818
818
|
|
|
819
819
|
def _get_default_resources(self):
|
|
820
|
-
#
|
|
821
|
-
num_cpus = ResourceManager.
|
|
820
|
+
# only_physical_cores=True is faster in training
|
|
821
|
+
num_cpus = ResourceManager.get_cpu_count(only_physical_cores=True)
|
|
822
822
|
num_gpus = 0
|
|
823
823
|
return num_cpus, num_gpus
|
|
824
824
|
|
|
@@ -32,6 +32,7 @@ class XGBoostModel(AbstractModel):
|
|
|
32
32
|
ag_key = "XGB"
|
|
33
33
|
ag_name = "XGBoost"
|
|
34
34
|
ag_priority = 40
|
|
35
|
+
seed_name = "seed"
|
|
35
36
|
|
|
36
37
|
def __init__(self, **kwargs):
|
|
37
38
|
super().__init__(**kwargs)
|
|
@@ -182,12 +183,18 @@ class XGBoostModel(AbstractModel):
|
|
|
182
183
|
from xgboost import XGBClassifier, XGBRegressor
|
|
183
184
|
|
|
184
185
|
model_type = XGBClassifier if self.problem_type in PROBLEM_TYPES_CLASSIFICATION else XGBRegressor
|
|
185
|
-
|
|
186
|
+
|
|
186
187
|
import warnings
|
|
187
188
|
|
|
188
189
|
with warnings.catch_warnings():
|
|
189
190
|
# FIXME: v1.1: Upgrade XGBoost to 2.0.1+ to avoid deprecation warnings from Pandas 2.1+ during XGBoost fit.
|
|
190
191
|
warnings.simplefilter(action="ignore", category=FutureWarning)
|
|
192
|
+
if params.get("device", "cpu") == "cuda:0":
|
|
193
|
+
# verbosity=0 to hide UserWarning: Falling back to prediction using DMatrix due to mismatched devices.
|
|
194
|
+
# TODO: Find a way to hide this warning without setting verbosity=0
|
|
195
|
+
# ref: https://github.com/dmlc/xgboost/issues/9791
|
|
196
|
+
params["verbosity"] = 0
|
|
197
|
+
self.model = model_type(**params)
|
|
191
198
|
self.model.fit(X=X, y=y, eval_set=eval_set, verbose=False, sample_weight=sample_weight)
|
|
192
199
|
|
|
193
200
|
if generate_curves:
|
|
@@ -310,8 +317,8 @@ class XGBoostModel(AbstractModel):
|
|
|
310
317
|
|
|
311
318
|
@disable_if_lite_mode(ret=(1, 0))
|
|
312
319
|
def _get_default_resources(self):
|
|
313
|
-
#
|
|
314
|
-
num_cpus = ResourceManager.
|
|
320
|
+
# only_physical_cores=True is faster in training
|
|
321
|
+
num_cpus = ResourceManager.get_cpu_count(only_physical_cores=True)
|
|
315
322
|
num_gpus = 0
|
|
316
323
|
return num_cpus, num_gpus
|
|
317
324
|
|