autogluon.tabular 1.3.2b20250710__py3-none-any.whl → 1.3.2b20250712__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/tabular/models/__init__.py +1 -1
- autogluon/tabular/models/tabpfnv2/__init__.py +0 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/__init__.py +20 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/configs.py +40 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/scoring_utils.py +201 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_decision_tree_tabpfn.py +1464 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_random_forest_tabpfn.py +747 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_compat.py +863 -0
- autogluon/tabular/models/tabpfnv2/rfpfn/utils.py +106 -0
- autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +376 -0
- autogluon/tabular/registry/_ag_model_registry.py +2 -2
- autogluon/tabular/version.py +1 -1
- {autogluon.tabular-1.3.2b20250710.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/METADATA +12 -14
- {autogluon.tabular-1.3.2b20250710.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/RECORD +21 -14
- autogluon/tabular/models/tabpfn/__init__.py +0 -1
- autogluon/tabular/models/tabpfn/tabpfn_model.py +0 -153
- /autogluon.tabular-1.3.2b20250710-py3.9-nspkg.pth → /autogluon.tabular-1.3.2b20250712-py3.9-nspkg.pth +0 -0
- {autogluon.tabular-1.3.2b20250710.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/LICENSE +0 -0
- {autogluon.tabular-1.3.2b20250710.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/NOTICE +0 -0
- {autogluon.tabular-1.3.2b20250710.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/WHEEL +0 -0
- {autogluon.tabular-1.3.2b20250710.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/namespace_packages.txt +0 -0
- {autogluon.tabular-1.3.2b20250710.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/top_level.txt +0 -0
- {autogluon.tabular-1.3.2b20250710.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/zip-safe +0 -0
@@ -21,7 +21,7 @@ from .realmlp.realmlp_model import RealMLPModel
|
|
21
21
|
from .rf.rf_model import RFModel
|
22
22
|
from .tabicl.tabicl_model import TabICLModel
|
23
23
|
from .tabm.tabm_model import TabMModel
|
24
|
-
from .
|
24
|
+
from .tabpfnv2.tabpfnv2_model import TabPFNV2Model
|
25
25
|
from .tabpfnmix.tabpfnmix_model import TabPFNMixModel
|
26
26
|
from .tabular_nn.torch.tabular_nn_torch import TabularNeuralNetTorchModel
|
27
27
|
from .text_prediction.text_prediction_v1_model import TextPredictorModel
|
File without changes
|
@@ -0,0 +1,20 @@
|
|
1
|
+
from .configs import TabPFNRFConfig
|
2
|
+
from .sklearn_based_decision_tree_tabpfn import (
|
3
|
+
DecisionTreeTabPFNClassifier,
|
4
|
+
DecisionTreeTabPFNRegressor,
|
5
|
+
)
|
6
|
+
from .sklearn_based_random_forest_tabpfn import (
|
7
|
+
RandomForestTabPFNClassifier,
|
8
|
+
RandomForestTabPFNRegressor,
|
9
|
+
)
|
10
|
+
|
11
|
+
# Backward compatibility for imports
|
12
|
+
# These classes were previously in CamelCase files but are now imported from snake_case files
|
13
|
+
|
14
|
+
__all__ = [
|
15
|
+
"DecisionTreeTabPFNClassifier",
|
16
|
+
"DecisionTreeTabPFNRegressor",
|
17
|
+
"RandomForestTabPFNClassifier",
|
18
|
+
"RandomForestTabPFNRegressor",
|
19
|
+
"TabPFNRFConfig",
|
20
|
+
]
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Copyright (c) Prior Labs GmbH 2025.
|
2
|
+
# Licensed under the Apache License, Version 2.0
|
3
|
+
|
4
|
+
from __future__ import annotations
|
5
|
+
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from typing import Literal
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class TabPFNRFConfig:
|
12
|
+
min_samples_split: int = 1000
|
13
|
+
min_samples_leaf: int = 5
|
14
|
+
max_depth: int = 5
|
15
|
+
splitter: Literal["best", "random"] = "best"
|
16
|
+
n_estimators: int = 16
|
17
|
+
max_features: Literal["sqrt", "auto"] = "sqrt"
|
18
|
+
criterion: Literal[
|
19
|
+
"gini",
|
20
|
+
"entropy",
|
21
|
+
"log_loss",
|
22
|
+
"squared_error",
|
23
|
+
"friedman_mse",
|
24
|
+
"poisson",
|
25
|
+
] = "gini"
|
26
|
+
preprocess_X: bool = False
|
27
|
+
preprocess_X_once: bool = False
|
28
|
+
adaptive_tree: bool = True
|
29
|
+
fit_nodes: bool = True
|
30
|
+
adaptive_tree_overwrite_metric: Literal["logloss", "roc"] = None
|
31
|
+
adaptive_tree_test_size: float = 0.2
|
32
|
+
adaptive_tree_min_train_samples: int = 100
|
33
|
+
adaptive_tree_min_valid_samples_fraction_of_train: int = 0.2
|
34
|
+
adaptive_tree_max_train_samples: int = 5000
|
35
|
+
adaptive_tree_skip_class_missing: bool = True
|
36
|
+
max_predict_time: float = -1
|
37
|
+
|
38
|
+
bootstrap: bool = True
|
39
|
+
rf_average_logits: bool = False
|
40
|
+
dt_average_logits: bool = True
|
@@ -0,0 +1,201 @@
|
|
1
|
+
# Copyright (c) Prior Labs GmbH 2025.
|
2
|
+
# Licensed under the Apache License, Version 2.0
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import warnings
|
6
|
+
from typing import Literal
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
from sklearn.metrics import (
|
10
|
+
accuracy_score,
|
11
|
+
f1_score,
|
12
|
+
log_loss,
|
13
|
+
mean_absolute_error,
|
14
|
+
mean_squared_error,
|
15
|
+
roc_auc_score,
|
16
|
+
)
|
17
|
+
|
18
|
+
CLF_LABEL_METRICS = ["accuracy", "f1"]
|
19
|
+
|
20
|
+
|
21
|
+
def safe_roc_auc_score(y_true, y_score, **kwargs):
|
22
|
+
"""Compute the Area Under the Receiver Operating Characteristic Curve (ROC AUC) score.
|
23
|
+
|
24
|
+
This function is a safe wrapper around `sklearn.metrics.roc_auc_score` that handles
|
25
|
+
cases where the input data may have missing classes or binary classification problems.
|
26
|
+
|
27
|
+
Parameters:
|
28
|
+
y_true : array-like of shape (n_samples,)
|
29
|
+
True binary labels or binary label indicators.
|
30
|
+
|
31
|
+
y_score : array-like of shape (n_samples,) or (n_samples, n_classes)
|
32
|
+
Target scores, can either be probability estimates of the positive class,
|
33
|
+
confidence values, or non-thresholded measure of decisions.
|
34
|
+
|
35
|
+
**kwargs : dict
|
36
|
+
Additional keyword arguments to pass to `sklearn.metrics.roc_auc_score`.
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
float: The ROC AUC score.
|
40
|
+
|
41
|
+
Raises:
|
42
|
+
ValueError: If there are missing classes in `y_true` that cannot be handled.
|
43
|
+
"""
|
44
|
+
# First check for single-class data - handle it gracefully with perfect score
|
45
|
+
unique_classes = np.unique(y_true)
|
46
|
+
if len(unique_classes) < 2:
|
47
|
+
# For single-class data, return perfect score (1.0) since all predictions
|
48
|
+
# will match the single class (perfect classifier)
|
49
|
+
warnings.warn(
|
50
|
+
"Only one class present in y_true. Returning perfect score (1.0).",
|
51
|
+
stacklevel=2,
|
52
|
+
)
|
53
|
+
return 1.0
|
54
|
+
|
55
|
+
try:
|
56
|
+
# would be much safer to check count of unique values in y_true... but inefficient.
|
57
|
+
if (len(y_score.shape) > 1) and (y_score.shape[1] == 2):
|
58
|
+
y_score = y_score[:, 1] # follow sklearn behavior selecting positive class
|
59
|
+
return roc_auc_score(y_true, y_score, **kwargs)
|
60
|
+
except ValueError:
|
61
|
+
try:
|
62
|
+
# Already checked for single class above, this handles other issues
|
63
|
+
missing_classes = [
|
64
|
+
i for i in range(y_score.shape[1]) if i not in unique_classes
|
65
|
+
]
|
66
|
+
|
67
|
+
# Modify y_score to exclude columns corresponding to missing classes
|
68
|
+
y_score_adjusted = np.delete(y_score, missing_classes, axis=1)
|
69
|
+
y_score_adjusted = y_score_adjusted / y_score_adjusted.sum(
|
70
|
+
axis=1,
|
71
|
+
keepdims=True,
|
72
|
+
)
|
73
|
+
return roc_auc_score(y_true, y_score_adjusted, **kwargs)
|
74
|
+
except ValueError as ve2:
|
75
|
+
warnings.warn(
|
76
|
+
f"Unable to compute ROC AUC score with adjusted classes: {ve2}",
|
77
|
+
stacklevel=2,
|
78
|
+
)
|
79
|
+
# Default to 1.0 for errors instead of raising exception
|
80
|
+
return 1.0
|
81
|
+
except IndexError as ie:
|
82
|
+
warnings.warn(
|
83
|
+
f"Index error when adjusting classes for ROC AUC: {ie}",
|
84
|
+
stacklevel=2,
|
85
|
+
)
|
86
|
+
# Return perfect score instead of raising exception
|
87
|
+
return 1.0
|
88
|
+
except TypeError as te:
|
89
|
+
warnings.warn(
|
90
|
+
f"Type error when computing ROC AUC: {te}",
|
91
|
+
stacklevel=2,
|
92
|
+
)
|
93
|
+
# Return perfect score instead of raising exception
|
94
|
+
return 1.0
|
95
|
+
|
96
|
+
|
97
|
+
def score_classification(
|
98
|
+
optimize_metric: Literal["roc", "auroc", "accuracy", "f1", "log_loss"],
|
99
|
+
y_true,
|
100
|
+
y_pred,
|
101
|
+
sample_weight=None,
|
102
|
+
*,
|
103
|
+
y_pred_is_labels: bool = False,
|
104
|
+
):
|
105
|
+
"""General function to score classification predictions.
|
106
|
+
|
107
|
+
Parameters:
|
108
|
+
optimize_metric : {"roc", "auroc", "accuracy", "f1", "log_loss"}
|
109
|
+
The metric to use for scoring the predictions.
|
110
|
+
|
111
|
+
y_true : array-like of shape (n_samples,)
|
112
|
+
True labels or binary label indicators.
|
113
|
+
|
114
|
+
y_pred : array-like of shape (n_samples,) or (n_samples, n_classes)
|
115
|
+
Predicted labels, probabilities, or confidence values.
|
116
|
+
|
117
|
+
sample_weight : array-like of shape (n_samples,), default=None
|
118
|
+
Sample weights.
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
float: The score for the specified metric.
|
122
|
+
|
123
|
+
Raises:
|
124
|
+
ValueError:If an unknown metric is specified.
|
125
|
+
"""
|
126
|
+
if optimize_metric is None:
|
127
|
+
optimize_metric = "roc"
|
128
|
+
|
129
|
+
if (optimize_metric == "roc") and len(np.unique(y_true)) == 2:
|
130
|
+
y_pred = y_pred[:, 1]
|
131
|
+
|
132
|
+
if (not y_pred_is_labels) and (optimize_metric not in ["roc", "log_loss"]):
|
133
|
+
y_pred = np.argmax(y_pred, axis=1)
|
134
|
+
|
135
|
+
if optimize_metric in ("roc", "auroc"):
|
136
|
+
return safe_roc_auc_score(
|
137
|
+
y_true,
|
138
|
+
y_pred,
|
139
|
+
sample_weight=sample_weight,
|
140
|
+
multi_class="ovr",
|
141
|
+
)
|
142
|
+
if optimize_metric == "accuracy":
|
143
|
+
return accuracy_score(y_true, y_pred, sample_weight=sample_weight)
|
144
|
+
if optimize_metric == "f1":
|
145
|
+
return f1_score(
|
146
|
+
y_true,
|
147
|
+
y_pred,
|
148
|
+
sample_weight=sample_weight,
|
149
|
+
average="macro",
|
150
|
+
)
|
151
|
+
if optimize_metric == "log_loss":
|
152
|
+
return -log_loss(y_true, y_pred, sample_weight=sample_weight)
|
153
|
+
raise ValueError(f"Unknown metric {optimize_metric}")
|
154
|
+
|
155
|
+
|
156
|
+
def score_regression(
|
157
|
+
optimize_metric: Literal["rmse", "mse", "mae"],
|
158
|
+
y_true,
|
159
|
+
y_pred,
|
160
|
+
sample_weight=None,
|
161
|
+
):
|
162
|
+
"""General function to score regression predictions.
|
163
|
+
|
164
|
+
Parameters:
|
165
|
+
optimize_metric : {"rmse", "mse", "mae"}
|
166
|
+
The metric to use for scoring the predictions.
|
167
|
+
|
168
|
+
y_true : array-like of shape (n_samples,)
|
169
|
+
True target values.
|
170
|
+
|
171
|
+
y_pred : array-like of shape (n_samples,)
|
172
|
+
Predicted target values.
|
173
|
+
|
174
|
+
sample_weight : array-like of shape (n_samples,), default=None
|
175
|
+
Sample weights.
|
176
|
+
|
177
|
+
Returns:
|
178
|
+
float: The score for the specified metric.
|
179
|
+
|
180
|
+
Raises:
|
181
|
+
ValueError: If an unknown metric is specified.
|
182
|
+
"""
|
183
|
+
if optimize_metric == "rmse":
|
184
|
+
try:
|
185
|
+
return -mean_squared_error(
|
186
|
+
y_true,
|
187
|
+
y_pred,
|
188
|
+
sample_weight=sample_weight,
|
189
|
+
squared=False,
|
190
|
+
)
|
191
|
+
except TypeError:
|
192
|
+
# Newer python version
|
193
|
+
from sklearn.metrics import root_mean_squared_error
|
194
|
+
|
195
|
+
return -root_mean_squared_error(y_true, y_pred, sample_weight=sample_weight)
|
196
|
+
elif optimize_metric == "mse":
|
197
|
+
return -mean_squared_error(y_true, y_pred, sample_weight=sample_weight)
|
198
|
+
elif optimize_metric == "mae":
|
199
|
+
return -mean_absolute_error(y_true, y_pred, sample_weight=sample_weight)
|
200
|
+
else:
|
201
|
+
raise ValueError(f"Unknown metric {optimize_metric}")
|