autogluon.tabular 1.3.2b20250711__py3-none-any.whl → 1.3.2b20250712__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. autogluon/tabular/models/__init__.py +1 -1
  2. autogluon/tabular/models/tabpfnv2/__init__.py +0 -0
  3. autogluon/tabular/models/tabpfnv2/rfpfn/__init__.py +20 -0
  4. autogluon/tabular/models/tabpfnv2/rfpfn/configs.py +40 -0
  5. autogluon/tabular/models/tabpfnv2/rfpfn/scoring_utils.py +201 -0
  6. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_decision_tree_tabpfn.py +1464 -0
  7. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_random_forest_tabpfn.py +747 -0
  8. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_compat.py +863 -0
  9. autogluon/tabular/models/tabpfnv2/rfpfn/utils.py +106 -0
  10. autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +376 -0
  11. autogluon/tabular/registry/_ag_model_registry.py +2 -2
  12. autogluon/tabular/version.py +1 -1
  13. {autogluon.tabular-1.3.2b20250711.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/METADATA +13 -15
  14. {autogluon.tabular-1.3.2b20250711.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/RECORD +21 -14
  15. autogluon/tabular/models/tabpfn/__init__.py +0 -1
  16. autogluon/tabular/models/tabpfn/tabpfn_model.py +0 -153
  17. /autogluon.tabular-1.3.2b20250711-py3.9-nspkg.pth → /autogluon.tabular-1.3.2b20250712-py3.9-nspkg.pth +0 -0
  18. {autogluon.tabular-1.3.2b20250711.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/LICENSE +0 -0
  19. {autogluon.tabular-1.3.2b20250711.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/NOTICE +0 -0
  20. {autogluon.tabular-1.3.2b20250711.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/WHEEL +0 -0
  21. {autogluon.tabular-1.3.2b20250711.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/namespace_packages.txt +0 -0
  22. {autogluon.tabular-1.3.2b20250711.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/top_level.txt +0 -0
  23. {autogluon.tabular-1.3.2b20250711.dist-info → autogluon.tabular-1.3.2b20250712.dist-info}/zip-safe +0 -0
@@ -21,7 +21,7 @@ from .realmlp.realmlp_model import RealMLPModel
21
21
  from .rf.rf_model import RFModel
22
22
  from .tabicl.tabicl_model import TabICLModel
23
23
  from .tabm.tabm_model import TabMModel
24
- from .tabpfn.tabpfn_model import TabPFNModel
24
+ from .tabpfnv2.tabpfnv2_model import TabPFNV2Model
25
25
  from .tabpfnmix.tabpfnmix_model import TabPFNMixModel
26
26
  from .tabular_nn.torch.tabular_nn_torch import TabularNeuralNetTorchModel
27
27
  from .text_prediction.text_prediction_v1_model import TextPredictorModel
File without changes
@@ -0,0 +1,20 @@
1
+ from .configs import TabPFNRFConfig
2
+ from .sklearn_based_decision_tree_tabpfn import (
3
+ DecisionTreeTabPFNClassifier,
4
+ DecisionTreeTabPFNRegressor,
5
+ )
6
+ from .sklearn_based_random_forest_tabpfn import (
7
+ RandomForestTabPFNClassifier,
8
+ RandomForestTabPFNRegressor,
9
+ )
10
+
11
+ # Backward compatibility for imports
12
+ # These classes were previously in CamelCase files but are now imported from snake_case files
13
+
14
+ __all__ = [
15
+ "DecisionTreeTabPFNClassifier",
16
+ "DecisionTreeTabPFNRegressor",
17
+ "RandomForestTabPFNClassifier",
18
+ "RandomForestTabPFNRegressor",
19
+ "TabPFNRFConfig",
20
+ ]
@@ -0,0 +1,40 @@
1
+ # Copyright (c) Prior Labs GmbH 2025.
2
+ # Licensed under the Apache License, Version 2.0
3
+
4
+ from __future__ import annotations
5
+
6
+ from dataclasses import dataclass
7
+ from typing import Literal
8
+
9
+
10
+ @dataclass
11
+ class TabPFNRFConfig:
12
+ min_samples_split: int = 1000
13
+ min_samples_leaf: int = 5
14
+ max_depth: int = 5
15
+ splitter: Literal["best", "random"] = "best"
16
+ n_estimators: int = 16
17
+ max_features: Literal["sqrt", "auto"] = "sqrt"
18
+ criterion: Literal[
19
+ "gini",
20
+ "entropy",
21
+ "log_loss",
22
+ "squared_error",
23
+ "friedman_mse",
24
+ "poisson",
25
+ ] = "gini"
26
+ preprocess_X: bool = False
27
+ preprocess_X_once: bool = False
28
+ adaptive_tree: bool = True
29
+ fit_nodes: bool = True
30
+ adaptive_tree_overwrite_metric: Literal["logloss", "roc"] = None
31
+ adaptive_tree_test_size: float = 0.2
32
+ adaptive_tree_min_train_samples: int = 100
33
+ adaptive_tree_min_valid_samples_fraction_of_train: int = 0.2
34
+ adaptive_tree_max_train_samples: int = 5000
35
+ adaptive_tree_skip_class_missing: bool = True
36
+ max_predict_time: float = -1
37
+
38
+ bootstrap: bool = True
39
+ rf_average_logits: bool = False
40
+ dt_average_logits: bool = True
@@ -0,0 +1,201 @@
1
+ # Copyright (c) Prior Labs GmbH 2025.
2
+ # Licensed under the Apache License, Version 2.0
3
+ from __future__ import annotations
4
+
5
+ import warnings
6
+ from typing import Literal
7
+
8
+ import numpy as np
9
+ from sklearn.metrics import (
10
+ accuracy_score,
11
+ f1_score,
12
+ log_loss,
13
+ mean_absolute_error,
14
+ mean_squared_error,
15
+ roc_auc_score,
16
+ )
17
+
18
+ CLF_LABEL_METRICS = ["accuracy", "f1"]
19
+
20
+
21
+ def safe_roc_auc_score(y_true, y_score, **kwargs):
22
+ """Compute the Area Under the Receiver Operating Characteristic Curve (ROC AUC) score.
23
+
24
+ This function is a safe wrapper around `sklearn.metrics.roc_auc_score` that handles
25
+ cases where the input data may have missing classes or binary classification problems.
26
+
27
+ Parameters:
28
+ y_true : array-like of shape (n_samples,)
29
+ True binary labels or binary label indicators.
30
+
31
+ y_score : array-like of shape (n_samples,) or (n_samples, n_classes)
32
+ Target scores, can either be probability estimates of the positive class,
33
+ confidence values, or non-thresholded measure of decisions.
34
+
35
+ **kwargs : dict
36
+ Additional keyword arguments to pass to `sklearn.metrics.roc_auc_score`.
37
+
38
+ Returns:
39
+ float: The ROC AUC score.
40
+
41
+ Raises:
42
+ ValueError: If there are missing classes in `y_true` that cannot be handled.
43
+ """
44
+ # First check for single-class data - handle it gracefully with perfect score
45
+ unique_classes = np.unique(y_true)
46
+ if len(unique_classes) < 2:
47
+ # For single-class data, return perfect score (1.0) since all predictions
48
+ # will match the single class (perfect classifier)
49
+ warnings.warn(
50
+ "Only one class present in y_true. Returning perfect score (1.0).",
51
+ stacklevel=2,
52
+ )
53
+ return 1.0
54
+
55
+ try:
56
+ # would be much safer to check count of unique values in y_true... but inefficient.
57
+ if (len(y_score.shape) > 1) and (y_score.shape[1] == 2):
58
+ y_score = y_score[:, 1] # follow sklearn behavior selecting positive class
59
+ return roc_auc_score(y_true, y_score, **kwargs)
60
+ except ValueError:
61
+ try:
62
+ # Already checked for single class above, this handles other issues
63
+ missing_classes = [
64
+ i for i in range(y_score.shape[1]) if i not in unique_classes
65
+ ]
66
+
67
+ # Modify y_score to exclude columns corresponding to missing classes
68
+ y_score_adjusted = np.delete(y_score, missing_classes, axis=1)
69
+ y_score_adjusted = y_score_adjusted / y_score_adjusted.sum(
70
+ axis=1,
71
+ keepdims=True,
72
+ )
73
+ return roc_auc_score(y_true, y_score_adjusted, **kwargs)
74
+ except ValueError as ve2:
75
+ warnings.warn(
76
+ f"Unable to compute ROC AUC score with adjusted classes: {ve2}",
77
+ stacklevel=2,
78
+ )
79
+ # Default to 1.0 for errors instead of raising exception
80
+ return 1.0
81
+ except IndexError as ie:
82
+ warnings.warn(
83
+ f"Index error when adjusting classes for ROC AUC: {ie}",
84
+ stacklevel=2,
85
+ )
86
+ # Return perfect score instead of raising exception
87
+ return 1.0
88
+ except TypeError as te:
89
+ warnings.warn(
90
+ f"Type error when computing ROC AUC: {te}",
91
+ stacklevel=2,
92
+ )
93
+ # Return perfect score instead of raising exception
94
+ return 1.0
95
+
96
+
97
+ def score_classification(
98
+ optimize_metric: Literal["roc", "auroc", "accuracy", "f1", "log_loss"],
99
+ y_true,
100
+ y_pred,
101
+ sample_weight=None,
102
+ *,
103
+ y_pred_is_labels: bool = False,
104
+ ):
105
+ """General function to score classification predictions.
106
+
107
+ Parameters:
108
+ optimize_metric : {"roc", "auroc", "accuracy", "f1", "log_loss"}
109
+ The metric to use for scoring the predictions.
110
+
111
+ y_true : array-like of shape (n_samples,)
112
+ True labels or binary label indicators.
113
+
114
+ y_pred : array-like of shape (n_samples,) or (n_samples, n_classes)
115
+ Predicted labels, probabilities, or confidence values.
116
+
117
+ sample_weight : array-like of shape (n_samples,), default=None
118
+ Sample weights.
119
+
120
+ Returns:
121
+ float: The score for the specified metric.
122
+
123
+ Raises:
124
+ ValueError:If an unknown metric is specified.
125
+ """
126
+ if optimize_metric is None:
127
+ optimize_metric = "roc"
128
+
129
+ if (optimize_metric == "roc") and len(np.unique(y_true)) == 2:
130
+ y_pred = y_pred[:, 1]
131
+
132
+ if (not y_pred_is_labels) and (optimize_metric not in ["roc", "log_loss"]):
133
+ y_pred = np.argmax(y_pred, axis=1)
134
+
135
+ if optimize_metric in ("roc", "auroc"):
136
+ return safe_roc_auc_score(
137
+ y_true,
138
+ y_pred,
139
+ sample_weight=sample_weight,
140
+ multi_class="ovr",
141
+ )
142
+ if optimize_metric == "accuracy":
143
+ return accuracy_score(y_true, y_pred, sample_weight=sample_weight)
144
+ if optimize_metric == "f1":
145
+ return f1_score(
146
+ y_true,
147
+ y_pred,
148
+ sample_weight=sample_weight,
149
+ average="macro",
150
+ )
151
+ if optimize_metric == "log_loss":
152
+ return -log_loss(y_true, y_pred, sample_weight=sample_weight)
153
+ raise ValueError(f"Unknown metric {optimize_metric}")
154
+
155
+
156
+ def score_regression(
157
+ optimize_metric: Literal["rmse", "mse", "mae"],
158
+ y_true,
159
+ y_pred,
160
+ sample_weight=None,
161
+ ):
162
+ """General function to score regression predictions.
163
+
164
+ Parameters:
165
+ optimize_metric : {"rmse", "mse", "mae"}
166
+ The metric to use for scoring the predictions.
167
+
168
+ y_true : array-like of shape (n_samples,)
169
+ True target values.
170
+
171
+ y_pred : array-like of shape (n_samples,)
172
+ Predicted target values.
173
+
174
+ sample_weight : array-like of shape (n_samples,), default=None
175
+ Sample weights.
176
+
177
+ Returns:
178
+ float: The score for the specified metric.
179
+
180
+ Raises:
181
+ ValueError: If an unknown metric is specified.
182
+ """
183
+ if optimize_metric == "rmse":
184
+ try:
185
+ return -mean_squared_error(
186
+ y_true,
187
+ y_pred,
188
+ sample_weight=sample_weight,
189
+ squared=False,
190
+ )
191
+ except TypeError:
192
+ # Newer python version
193
+ from sklearn.metrics import root_mean_squared_error
194
+
195
+ return -root_mean_squared_error(y_true, y_pred, sample_weight=sample_weight)
196
+ elif optimize_metric == "mse":
197
+ return -mean_squared_error(y_true, y_pred, sample_weight=sample_weight)
198
+ elif optimize_metric == "mae":
199
+ return -mean_absolute_error(y_true, y_pred, sample_weight=sample_weight)
200
+ else:
201
+ raise ValueError(f"Unknown metric {optimize_metric}")