autogluon.tabular 1.5.0b20251228__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.tabular might be problematic. Click here for more details.
- autogluon/tabular/__init__.py +1 -0
- autogluon/tabular/configs/config_helper.py +18 -6
- autogluon/tabular/configs/feature_generator_presets.py +3 -1
- autogluon/tabular/configs/hyperparameter_configs.py +42 -9
- autogluon/tabular/configs/presets_configs.py +38 -14
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py +84 -14
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +48 -48
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_cpu_2025_12_18.py +774 -1
- autogluon/tabular/configs/zeroshot/zeroshot_portfolio_gpu_2025_12_18.py +421 -1
- autogluon/tabular/experimental/_scikit_mixin.py +6 -2
- autogluon/tabular/experimental/_tabular_classifier.py +3 -1
- autogluon/tabular/experimental/_tabular_regressor.py +3 -1
- autogluon/tabular/experimental/plot_leaderboard.py +73 -19
- autogluon/tabular/learner/abstract_learner.py +160 -42
- autogluon/tabular/learner/default_learner.py +78 -22
- autogluon/tabular/models/__init__.py +2 -2
- autogluon/tabular/models/_utils/rapids_utils.py +3 -1
- autogluon/tabular/models/abstract/abstract_torch_model.py +2 -0
- autogluon/tabular/models/automm/automm_model.py +12 -3
- autogluon/tabular/models/automm/ft_transformer.py +5 -1
- autogluon/tabular/models/catboost/callbacks.py +2 -2
- autogluon/tabular/models/catboost/catboost_model.py +93 -29
- autogluon/tabular/models/catboost/catboost_softclass_utils.py +4 -1
- autogluon/tabular/models/catboost/catboost_utils.py +3 -1
- autogluon/tabular/models/ebm/ebm_model.py +8 -13
- autogluon/tabular/models/ebm/hyperparameters/parameters.py +1 -0
- autogluon/tabular/models/ebm/hyperparameters/searchspaces.py +1 -0
- autogluon/tabular/models/fastainn/callbacks.py +20 -3
- autogluon/tabular/models/fastainn/hyperparameters/searchspaces.py +11 -1
- autogluon/tabular/models/fastainn/quantile_helpers.py +10 -2
- autogluon/tabular/models/fastainn/tabular_nn_fastai.py +65 -18
- autogluon/tabular/models/fasttext/fasttext_model.py +3 -1
- autogluon/tabular/models/image_prediction/image_predictor.py +7 -2
- autogluon/tabular/models/knn/knn_model.py +41 -8
- autogluon/tabular/models/lgb/callbacks.py +32 -9
- autogluon/tabular/models/lgb/hyperparameters/searchspaces.py +3 -1
- autogluon/tabular/models/lgb/lgb_model.py +150 -34
- autogluon/tabular/models/lgb/lgb_utils.py +12 -4
- autogluon/tabular/models/lr/hyperparameters/searchspaces.py +5 -1
- autogluon/tabular/models/lr/lr_model.py +40 -10
- autogluon/tabular/models/lr/lr_rapids_model.py +22 -13
- autogluon/tabular/models/mitra/_internal/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/config/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +36 -40
- autogluon/tabular/models/mitra/_internal/config/config_run.py +2 -14
- autogluon/tabular/models/mitra/_internal/config/enums.py +27 -26
- autogluon/tabular/models/mitra/_internal/core/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/core/callbacks.py +14 -21
- autogluon/tabular/models/mitra/_internal/core/get_loss.py +10 -12
- autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +17 -32
- autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +12 -27
- autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +16 -21
- autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +130 -111
- autogluon/tabular/models/mitra/_internal/data/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/data/collator.py +30 -26
- autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +18 -26
- autogluon/tabular/models/mitra/_internal/data/dataset_split.py +10 -7
- autogluon/tabular/models/mitra/_internal/data/preprocessor.py +70 -100
- autogluon/tabular/models/mitra/_internal/models/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/models/base.py +7 -10
- autogluon/tabular/models/mitra/_internal/models/embedding.py +46 -56
- autogluon/tabular/models/mitra/_internal/models/tab2d.py +140 -120
- autogluon/tabular/models/mitra/_internal/utils/__init__.py +1 -1
- autogluon/tabular/models/mitra/_internal/utils/set_seed.py +3 -1
- autogluon/tabular/models/mitra/mitra_model.py +16 -11
- autogluon/tabular/models/mitra/sklearn_interface.py +178 -162
- autogluon/tabular/models/realmlp/realmlp_model.py +28 -15
- autogluon/tabular/models/rf/compilers/onnx.py +1 -1
- autogluon/tabular/models/rf/rf_model.py +45 -12
- autogluon/tabular/models/rf/rf_quantile.py +4 -2
- autogluon/tabular/models/tabdpt/tabdpt_model.py +8 -17
- autogluon/tabular/models/tabicl/tabicl_model.py +8 -1
- autogluon/tabular/models/tabm/_tabm_internal.py +6 -4
- autogluon/tabular/models/tabm/rtdl_num_embeddings.py +80 -127
- autogluon/tabular/models/tabm/tabm_model.py +8 -4
- autogluon/tabular/models/tabm/tabm_reference.py +53 -85
- autogluon/tabular/models/tabpfnmix/_internal/core/callbacks.py +7 -16
- autogluon/tabular/models/tabpfnmix/_internal/core/collator.py +16 -24
- autogluon/tabular/models/tabpfnmix/_internal/core/dataset_split.py +5 -7
- autogluon/tabular/models/tabpfnmix/_internal/core/enums.py +0 -2
- autogluon/tabular/models/tabpfnmix/_internal/core/get_loss.py +0 -1
- autogluon/tabular/models/tabpfnmix/_internal/core/get_optimizer.py +7 -18
- autogluon/tabular/models/tabpfnmix/_internal/core/get_scheduler.py +3 -14
- autogluon/tabular/models/tabpfnmix/_internal/core/trainer_finetune.py +79 -64
- autogluon/tabular/models/tabpfnmix/_internal/core/y_transformer.py +3 -5
- autogluon/tabular/models/tabpfnmix/_internal/data/dataset_finetune.py +17 -30
- autogluon/tabular/models/tabpfnmix/_internal/data/preprocessor.py +15 -35
- autogluon/tabular/models/tabpfnmix/_internal/models/foundation/embedding.py +21 -38
- autogluon/tabular/models/tabpfnmix/_internal/models/foundation/foundation_transformer.py +33 -51
- autogluon/tabular/models/tabpfnmix/_internal/results/prediction_metrics.py +4 -4
- autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_classifier.py +32 -12
- autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_regressor.py +32 -13
- autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +55 -19
- autogluon/tabular/models/tabpfnv2/tabpfnv2_5_model.py +21 -48
- autogluon/tabular/models/tabprep/prep_mixin.py +34 -26
- autogluon/tabular/models/tabular_nn/compilers/onnx.py +36 -8
- autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +130 -36
- autogluon/tabular/models/tabular_nn/torch/tabular_torch_dataset.py +8 -4
- autogluon/tabular/models/tabular_nn/torch/torch_network_modules.py +26 -5
- autogluon/tabular/models/tabular_nn/utils/categorical_encoders.py +41 -24
- autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py +33 -8
- autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py +21 -6
- autogluon/tabular/models/xgboost/callbacks.py +9 -3
- autogluon/tabular/models/xgboost/xgboost_model.py +59 -11
- autogluon/tabular/models/xt/xt_model.py +1 -0
- autogluon/tabular/predictor/interpretable_predictor.py +3 -1
- autogluon/tabular/predictor/predictor.py +409 -128
- autogluon/tabular/registry/__init__.py +1 -1
- autogluon/tabular/registry/_ag_model_registry.py +4 -5
- autogluon/tabular/registry/_model_registry.py +1 -0
- autogluon/tabular/testing/fit_helper.py +55 -15
- autogluon/tabular/testing/generate_datasets.py +1 -1
- autogluon/tabular/testing/model_fit_helper.py +10 -4
- autogluon/tabular/trainer/abstract_trainer.py +644 -230
- autogluon/tabular/trainer/auto_trainer.py +19 -8
- autogluon/tabular/trainer/model_presets/presets.py +33 -9
- autogluon/tabular/trainer/model_presets/presets_distill.py +16 -2
- autogluon/tabular/version.py +1 -1
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/METADATA +26 -26
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/RECORD +127 -135
- autogluon/tabular/models/tabpfnv2/rfpfn/__init__.py +0 -20
- autogluon/tabular/models/tabpfnv2/rfpfn/configs.py +0 -40
- autogluon/tabular/models/tabpfnv2/rfpfn/scoring_utils.py +0 -201
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_decision_tree_tabpfn.py +0 -1464
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_random_forest_tabpfn.py +0 -747
- autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_compat.py +0 -863
- autogluon/tabular/models/tabpfnv2/rfpfn/utils.py +0 -106
- autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +0 -466
- /autogluon.tabular-1.5.0b20251228-py3.11-nspkg.pth → /autogluon.tabular-1.5.1b20260116-py3.11-nspkg.pth +0 -0
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/WHEEL +0 -0
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/licenses/LICENSE +0 -0
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/licenses/NOTICE +0 -0
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/namespace_packages.txt +0 -0
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/top_level.txt +0 -0
- {autogluon_tabular-1.5.0b20251228.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/zip-safe +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Wrapper of the MultiModalPredictor."""
|
|
2
|
+
|
|
2
3
|
from __future__ import annotations
|
|
3
4
|
|
|
4
5
|
import logging
|
|
@@ -162,7 +163,9 @@ class MultiModalPredictorModel(AbstractModel):
|
|
|
162
163
|
|
|
163
164
|
X, y, X_val, y_val = self.preprocess_fit(X=X, y=y, X_val=X_val, y_val=y_val)
|
|
164
165
|
params = self._get_model_params()
|
|
165
|
-
max_features = params.pop(
|
|
166
|
+
max_features = params.pop(
|
|
167
|
+
"_max_features", None
|
|
168
|
+
) # FIXME: `_max_features` is a hack. Instead use ag_args_fit and make generic
|
|
166
169
|
num_features = len(X.columns)
|
|
167
170
|
if max_features is not None and num_features > max_features:
|
|
168
171
|
raise AssertionError(
|
|
@@ -180,7 +183,11 @@ class MultiModalPredictorModel(AbstractModel):
|
|
|
180
183
|
enable_progress_bar = True
|
|
181
184
|
num_gpus = kwargs.get("num_gpus", None)
|
|
182
185
|
if sample_weight is not None: # TODO: support
|
|
183
|
-
logger.log(
|
|
186
|
+
logger.log(
|
|
187
|
+
15,
|
|
188
|
+
"sample_weight not yet supported for MultiModalPredictorModel, "
|
|
189
|
+
"this model will ignore them in training.",
|
|
190
|
+
)
|
|
184
191
|
|
|
185
192
|
# Need to deep copy to avoid altering outer context
|
|
186
193
|
X = X.copy()
|
|
@@ -273,7 +280,9 @@ class MultiModalPredictorModel(AbstractModel):
|
|
|
273
280
|
|
|
274
281
|
def _get_default_resources(self):
|
|
275
282
|
num_cpus = ResourceManager.get_cpu_count()
|
|
276
|
-
num_gpus = min(
|
|
283
|
+
num_gpus = min(
|
|
284
|
+
ResourceManager.get_gpu_count_torch(), 1
|
|
285
|
+
) # Use single gpu training by default. Consider to revise it later.
|
|
277
286
|
return num_cpus, num_gpus
|
|
278
287
|
|
|
279
288
|
def get_minimum_resources(self, is_gpu_available=False) -> Dict[str, int]:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Wrapper of the MultiModalPredictor."""
|
|
2
|
+
|
|
2
3
|
from __future__ import annotations
|
|
3
4
|
|
|
4
5
|
import logging
|
|
@@ -57,7 +58,10 @@ class FTTransformerModel(MultiModalPredictorModel):
|
|
|
57
58
|
def _fit(self, X, num_gpus="auto", **kwargs):
|
|
58
59
|
if not isinstance(num_gpus, str):
|
|
59
60
|
if num_gpus == 0:
|
|
60
|
-
logger.log(
|
|
61
|
+
logger.log(
|
|
62
|
+
30,
|
|
63
|
+
f"WARNING: Training {self.name} on CPU (no GPU specified). This could take a long time. Use GPU to speed up training.",
|
|
64
|
+
)
|
|
61
65
|
super()._fit(X, num_gpus=num_gpus, **kwargs)
|
|
62
66
|
|
|
63
67
|
def _get_default_auxiliary_params(self) -> dict:
|
|
@@ -60,8 +60,8 @@ class MemoryCheckCallback:
|
|
|
60
60
|
self.init_mem_rss = cur_rss
|
|
61
61
|
|
|
62
62
|
# Convert memory values to MB
|
|
63
|
-
estimated_model_size_mb = (cur_rss - self.init_mem_rss) / (1024
|
|
64
|
-
available_mb = available_bytes / (1024
|
|
63
|
+
estimated_model_size_mb = (cur_rss - self.init_mem_rss) / (1024**2)
|
|
64
|
+
available_mb = available_bytes / (1024**2)
|
|
65
65
|
|
|
66
66
|
model_size_memory_ratio = estimated_model_size_mb / available_mb
|
|
67
67
|
early_stop = False
|
|
@@ -13,13 +13,13 @@ from autogluon.common.features.types import R_BOOL, R_CATEGORY, R_FLOAT, R_INT
|
|
|
13
13
|
from autogluon.common.utils.pandas_utils import get_approximate_df_mem_usage
|
|
14
14
|
from autogluon.common.utils.resource_utils import ResourceManager
|
|
15
15
|
from autogluon.common.utils.try_import import try_import_catboost
|
|
16
|
-
from autogluon.core.constants import MULTICLASS, PROBLEM_TYPES_CLASSIFICATION,
|
|
16
|
+
from autogluon.core.constants import MULTICLASS, PROBLEM_TYPES_CLASSIFICATION, QUANTILE, REGRESSION, SOFTCLASS
|
|
17
17
|
from autogluon.core.models import AbstractModel
|
|
18
18
|
from autogluon.core.models._utils import get_early_stopping_rounds
|
|
19
19
|
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
|
20
20
|
|
|
21
21
|
from .callbacks import EarlyStoppingCallback, MemoryCheckCallback, TimeCheckCallback
|
|
22
|
-
from .catboost_utils import
|
|
22
|
+
from .catboost_utils import CATBOOST_EVAL_METRIC_TO_LOSS_FUNCTION, get_catboost_metric_from_ag_metric
|
|
23
23
|
from .hyperparameters.parameters import get_param_baseline
|
|
24
24
|
from .hyperparameters.searchspaces import get_default_searchspace
|
|
25
25
|
|
|
@@ -33,12 +33,11 @@ class CatBoostModel(AbstractModel):
|
|
|
33
33
|
|
|
34
34
|
Hyperparameter options: https://catboost.ai/en/docs/references/training-parameters
|
|
35
35
|
"""
|
|
36
|
+
|
|
36
37
|
ag_key = "CAT"
|
|
37
38
|
ag_name = "CatBoost"
|
|
38
39
|
ag_priority = 70
|
|
39
|
-
ag_priority_by_problem_type = MappingProxyType({
|
|
40
|
-
SOFTCLASS: 60
|
|
41
|
-
})
|
|
40
|
+
ag_priority_by_problem_type = MappingProxyType({SOFTCLASS: 60})
|
|
42
41
|
seed_name = "random_seed"
|
|
43
42
|
|
|
44
43
|
def __init__(self, **kwargs):
|
|
@@ -50,16 +49,24 @@ class CatBoostModel(AbstractModel):
|
|
|
50
49
|
for param, val in default_params.items():
|
|
51
50
|
self._set_default_param_value(param, val)
|
|
52
51
|
# Set 'allow_writing_files' to True in order to keep log files created by catboost during training (these will be saved in the directory where AutoGluon stores this model)
|
|
53
|
-
self._set_default_param_value(
|
|
52
|
+
self._set_default_param_value(
|
|
53
|
+
"allow_writing_files", False
|
|
54
|
+
) # Disables creation of catboost logging files during training by default
|
|
54
55
|
if self.problem_type != SOFTCLASS: # TODO: remove this after catboost 0.24
|
|
55
|
-
default_eval_metric = get_catboost_metric_from_ag_metric(
|
|
56
|
+
default_eval_metric = get_catboost_metric_from_ag_metric(
|
|
57
|
+
self.stopping_metric, self.problem_type, self.quantile_levels
|
|
58
|
+
)
|
|
56
59
|
self._set_default_param_value("eval_metric", default_eval_metric)
|
|
57
60
|
|
|
58
61
|
def _get_default_searchspace(self):
|
|
59
62
|
return get_default_searchspace(self.problem_type, num_classes=self.num_classes)
|
|
60
63
|
|
|
61
|
-
def
|
|
62
|
-
|
|
64
|
+
def _preprocess(self, X, **kwargs):
|
|
65
|
+
# Note: while this is nonadaptive preprocessing, we made it stateful because it
|
|
66
|
+
# contains the logic for nan handling and nans can be created after
|
|
67
|
+
# nonadaptive preprocessing by model-specific preprocessing.
|
|
68
|
+
# Moreover, now CatBoost handles nan like most other models in `_preprocess`.
|
|
69
|
+
X = super()._preprocess(X, **kwargs)
|
|
63
70
|
if self._category_features is None:
|
|
64
71
|
self._category_features = list(X.select_dtypes(include="category").columns)
|
|
65
72
|
if self._category_features:
|
|
@@ -74,7 +81,13 @@ class CatBoostModel(AbstractModel):
|
|
|
74
81
|
|
|
75
82
|
def _estimate_memory_usage(self, X: pd.DataFrame, **kwargs) -> int:
|
|
76
83
|
hyperparameters = self._get_model_params()
|
|
77
|
-
return self.estimate_memory_usage_static(
|
|
84
|
+
return self.estimate_memory_usage_static(
|
|
85
|
+
X=X,
|
|
86
|
+
problem_type=self.problem_type,
|
|
87
|
+
num_classes=self.num_classes,
|
|
88
|
+
hyperparameters=hyperparameters,
|
|
89
|
+
**kwargs,
|
|
90
|
+
)
|
|
78
91
|
|
|
79
92
|
@classmethod
|
|
80
93
|
def _estimate_memory_usage_static(
|
|
@@ -97,16 +110,20 @@ class CatBoostModel(AbstractModel):
|
|
|
97
110
|
"""
|
|
98
111
|
if hyperparameters is None:
|
|
99
112
|
hyperparameters = {}
|
|
100
|
-
num_classes =
|
|
113
|
+
num_classes = (
|
|
114
|
+
num_classes if num_classes else 1
|
|
115
|
+
) # self.num_classes could be None after initialization if it's a regression problem
|
|
101
116
|
data_mem_usage = get_approximate_df_mem_usage(X).sum()
|
|
102
|
-
data_mem_usage_bytes =
|
|
117
|
+
data_mem_usage_bytes = (
|
|
118
|
+
data_mem_usage * 5 + data_mem_usage / 4 * num_classes
|
|
119
|
+
) # TODO: Extremely crude approximation, can be vastly improved
|
|
103
120
|
|
|
104
121
|
border_count = hyperparameters.get("border_count", 254)
|
|
105
122
|
depth = hyperparameters.get("depth", 6)
|
|
106
123
|
|
|
107
124
|
# if depth < 7, treat it as 1 step larger for histogram size estimate
|
|
108
125
|
# this fixes cases where otherwise histogram size appears to be off by around a factor of 2 for depth=6
|
|
109
|
-
histogram_effective_depth = max(min(depth+1, 7), depth)
|
|
126
|
+
histogram_effective_depth = max(min(depth + 1, 7), depth)
|
|
110
127
|
|
|
111
128
|
# Formula based on manual testing, aligns with LightGBM histogram sizes
|
|
112
129
|
histogram_mem_usage_bytes = 24 * math.pow(2, histogram_effective_depth) * len(X.columns) * border_count
|
|
@@ -119,7 +136,19 @@ class CatBoostModel(AbstractModel):
|
|
|
119
136
|
|
|
120
137
|
# TODO: Use Pool in preprocess, optimize bagging to do Pool.split() to avoid re-computing pool for each fold! Requires stateful + y
|
|
121
138
|
# Pool is much more memory efficient, avoids copying data twice in memory
|
|
122
|
-
def _fit(
|
|
139
|
+
def _fit(
|
|
140
|
+
self,
|
|
141
|
+
X,
|
|
142
|
+
y,
|
|
143
|
+
X_val=None,
|
|
144
|
+
y_val=None,
|
|
145
|
+
time_limit=None,
|
|
146
|
+
num_gpus=0,
|
|
147
|
+
num_cpus=-1,
|
|
148
|
+
sample_weight=None,
|
|
149
|
+
sample_weight_val=None,
|
|
150
|
+
**kwargs,
|
|
151
|
+
):
|
|
123
152
|
time_start = time.time()
|
|
124
153
|
try_import_catboost()
|
|
125
154
|
from catboost import CatBoostClassifier, CatBoostRegressor, Pool
|
|
@@ -132,19 +161,21 @@ class CatBoostModel(AbstractModel):
|
|
|
132
161
|
# FIXME: This is extremely slow due to unoptimized metric / objective sent to CatBoost
|
|
133
162
|
from .catboost_softclass_utils import SoftclassCustomMetric, SoftclassObjective
|
|
134
163
|
|
|
135
|
-
params.setdefault("loss_function",
|
|
164
|
+
params.setdefault("loss_function", SoftclassObjective.SoftLogLossObjective())
|
|
136
165
|
params["eval_metric"] = SoftclassCustomMetric.SoftLogLossMetric()
|
|
137
166
|
elif self.problem_type in [REGRESSION, QUANTILE]:
|
|
138
167
|
# Choose appropriate loss_function that is as close as possible to the eval_metric
|
|
139
168
|
params.setdefault(
|
|
140
169
|
"loss_function",
|
|
141
|
-
CATBOOST_EVAL_METRIC_TO_LOSS_FUNCTION.get(params["eval_metric"], params["eval_metric"])
|
|
170
|
+
CATBOOST_EVAL_METRIC_TO_LOSS_FUNCTION.get(params["eval_metric"], params["eval_metric"]),
|
|
142
171
|
)
|
|
143
172
|
|
|
144
173
|
model_type = CatBoostClassifier if self.problem_type in PROBLEM_TYPES_CLASSIFICATION else CatBoostRegressor
|
|
145
174
|
num_rows_train = len(X)
|
|
146
175
|
num_cols_train = len(X.columns)
|
|
147
|
-
num_classes =
|
|
176
|
+
num_classes = (
|
|
177
|
+
self.num_classes if self.num_classes else 1
|
|
178
|
+
) # self.num_classes could be None after initialization if it's a regression problem
|
|
148
179
|
|
|
149
180
|
X = self.preprocess(X, y=y, is_train=True)
|
|
150
181
|
cat_features = list(X.select_dtypes(include="category").columns)
|
|
@@ -159,7 +190,9 @@ class CatBoostModel(AbstractModel):
|
|
|
159
190
|
eval_set = X_val
|
|
160
191
|
early_stopping_rounds = ag_params.get("early_stop", "adaptive")
|
|
161
192
|
if isinstance(early_stopping_rounds, (str, tuple, list)):
|
|
162
|
-
early_stopping_rounds = self._get_early_stopping_rounds(
|
|
193
|
+
early_stopping_rounds = self._get_early_stopping_rounds(
|
|
194
|
+
num_rows_train=num_rows_train, strategy=early_stopping_rounds
|
|
195
|
+
)
|
|
163
196
|
|
|
164
197
|
if params.get("allow_writing_files", False):
|
|
165
198
|
if "train_dir" not in params:
|
|
@@ -187,7 +220,10 @@ class CatBoostModel(AbstractModel):
|
|
|
187
220
|
if num_gpus != 0:
|
|
188
221
|
if "task_type" not in params:
|
|
189
222
|
params["task_type"] = "GPU"
|
|
190
|
-
logger.log(
|
|
223
|
+
logger.log(
|
|
224
|
+
20,
|
|
225
|
+
f"\tTraining {self.name} with GPU, note that this may negatively impact model quality compared to CPU training.",
|
|
226
|
+
)
|
|
191
227
|
# TODO: Confirm if GPU is used in HPO (Probably not)
|
|
192
228
|
# TODO: Adjust max_bins to 254?
|
|
193
229
|
|
|
@@ -199,15 +235,23 @@ class CatBoostModel(AbstractModel):
|
|
|
199
235
|
params.pop("rsm")
|
|
200
236
|
logger.log(30, f"\t'rsm' is not supported on GPU, using default value (Default = 1).")
|
|
201
237
|
|
|
202
|
-
if
|
|
238
|
+
if (
|
|
239
|
+
self.problem_type == MULTICLASS
|
|
240
|
+
and "rsm" not in params
|
|
241
|
+
and "colsample_bylevel" not in params
|
|
242
|
+
and num_features > 1000
|
|
243
|
+
):
|
|
203
244
|
# Subsample columns to speed up training
|
|
204
245
|
if params.get("task_type", None) != "GPU": # RSM does not work on GPU
|
|
205
246
|
params["colsample_bylevel"] = max(min(1.0, 1000 / num_features), 0.05)
|
|
206
247
|
logger.log(
|
|
207
248
|
30,
|
|
208
|
-
f
|
|
249
|
+
f"\tMany features detected ({num_features}), dynamically setting 'colsample_bylevel' to {params['colsample_bylevel']} to speed up training (Default = 1).",
|
|
250
|
+
)
|
|
251
|
+
logger.log(
|
|
252
|
+
30,
|
|
253
|
+
f"\tTo disable this functionality, explicitly specify 'colsample_bylevel' in the model hyperparameters.",
|
|
209
254
|
)
|
|
210
|
-
logger.log(30, f"\tTo disable this functionality, explicitly specify 'colsample_bylevel' in the model hyperparameters.")
|
|
211
255
|
else:
|
|
212
256
|
params["colsample_bylevel"] = 1.0
|
|
213
257
|
logger.log(30, f"\t'colsample_bylevel' is not supported on GPU, using default value (Default = 1).")
|
|
@@ -218,7 +262,9 @@ class CatBoostModel(AbstractModel):
|
|
|
218
262
|
if params.get("task_type", None) != "GPU":
|
|
219
263
|
callbacks = []
|
|
220
264
|
if early_stopping_rounds is not None:
|
|
221
|
-
callbacks.append(
|
|
265
|
+
callbacks.append(
|
|
266
|
+
EarlyStoppingCallback(stopping_rounds=early_stopping_rounds, eval_metric=params["eval_metric"])
|
|
267
|
+
)
|
|
222
268
|
|
|
223
269
|
if num_rows_train * num_cols_train * num_classes > 5_000_000:
|
|
224
270
|
# The data is large enough to potentially cause memory issues during training, so monitor memory usage via callback.
|
|
@@ -226,12 +272,17 @@ class CatBoostModel(AbstractModel):
|
|
|
226
272
|
if time_limit is not None:
|
|
227
273
|
time_cur = time.time()
|
|
228
274
|
time_left = time_limit - (time_cur - time_start)
|
|
229
|
-
if
|
|
275
|
+
if (
|
|
276
|
+
time_left <= time_limit * 0.4
|
|
277
|
+
): # if 60% of time was spent preprocessing, likely not enough time to train model
|
|
230
278
|
raise TimeLimitExceeded
|
|
231
279
|
callbacks.append(TimeCheckCallback(time_start=time_cur, time_limit=time_left))
|
|
232
280
|
extra_fit_kwargs["callbacks"] = callbacks
|
|
233
281
|
else:
|
|
234
|
-
logger.log(
|
|
282
|
+
logger.log(
|
|
283
|
+
30,
|
|
284
|
+
f"\tWarning: CatBoost on GPU is experimental. If you encounter issues, use CPU for training CatBoost instead.",
|
|
285
|
+
)
|
|
235
286
|
if time_limit is not None:
|
|
236
287
|
params["iterations"] = self._estimate_iter_in_time_gpu(
|
|
237
288
|
X=X,
|
|
@@ -272,7 +323,9 @@ class CatBoostModel(AbstractModel):
|
|
|
272
323
|
# This method will train a model on a toy number of iterations to estimate memory and training time.
|
|
273
324
|
# It will return an updated iterations to train on that will avoid running OOM and running over time limit.
|
|
274
325
|
# Remove this logic once CatBoost fixes GPU support for callbacks and custom metrics.
|
|
275
|
-
def _estimate_iter_in_time_gpu(
|
|
326
|
+
def _estimate_iter_in_time_gpu(
|
|
327
|
+
self, *, X, eval_set, time_limit, verbose, params, num_rows_train, time_start, model_type
|
|
328
|
+
):
|
|
276
329
|
import math
|
|
277
330
|
import pickle
|
|
278
331
|
import sys
|
|
@@ -280,7 +333,9 @@ class CatBoostModel(AbstractModel):
|
|
|
280
333
|
modifier = min(1.0, 10000 / num_rows_train)
|
|
281
334
|
num_sample_iter_max = max(round(modifier * 50), 2)
|
|
282
335
|
time_left_start = time_limit - (time.time() - time_start)
|
|
283
|
-
if
|
|
336
|
+
if (
|
|
337
|
+
time_left_start <= time_limit * 0.4
|
|
338
|
+
): # if 60% of time was spent preprocessing, likely not enough time to train model
|
|
284
339
|
raise TimeLimitExceeded
|
|
285
340
|
default_iters = params["iterations"]
|
|
286
341
|
params_init = params.copy()
|
|
@@ -341,9 +396,18 @@ class CatBoostModel(AbstractModel):
|
|
|
341
396
|
def _ag_params(self) -> set:
|
|
342
397
|
return {"early_stop"}
|
|
343
398
|
|
|
344
|
-
def _validate_fit_memory_usage(
|
|
399
|
+
def _validate_fit_memory_usage(
|
|
400
|
+
self,
|
|
401
|
+
mem_error_threshold: float = 1,
|
|
402
|
+
mem_warning_threshold: float = 0.75,
|
|
403
|
+
mem_size_threshold: int = 1e9,
|
|
404
|
+
**kwargs,
|
|
405
|
+
):
|
|
345
406
|
return super()._validate_fit_memory_usage(
|
|
346
|
-
mem_error_threshold=mem_error_threshold,
|
|
407
|
+
mem_error_threshold=mem_error_threshold,
|
|
408
|
+
mem_warning_threshold=mem_warning_threshold,
|
|
409
|
+
mem_size_threshold=mem_size_threshold,
|
|
410
|
+
**kwargs,
|
|
347
411
|
)
|
|
348
412
|
|
|
349
413
|
def get_minimum_resources(self, is_gpu_available=False):
|
|
@@ -72,5 +72,8 @@ class SoftclassObjective(object):
|
|
|
72
72
|
exp_sum += x
|
|
73
73
|
exp_approx = [val / exp_sum for val in exp_approx]
|
|
74
74
|
grad = [(targets[j] - exp_approx[j]) * weight for j in range(len(targets))]
|
|
75
|
-
hess = [
|
|
75
|
+
hess = [
|
|
76
|
+
[(exp_approx[j] * exp_approx[j2] - (j == j2) * exp_approx[j]) * weight for j in range(len(targets))]
|
|
77
|
+
for j2 in range(len(targets))
|
|
78
|
+
]
|
|
76
79
|
return (grad, hess)
|
|
@@ -83,7 +83,9 @@ def get_catboost_metric_from_ag_metric(metric, problem_type, quantile_levels=Non
|
|
|
83
83
|
if quantile_levels is None:
|
|
84
84
|
raise AssertionError(f"quantile_levels must be provided for problem_type = {problem_type}")
|
|
85
85
|
if not all(0 < q < 1 for q in quantile_levels):
|
|
86
|
-
raise AssertionError(
|
|
86
|
+
raise AssertionError(
|
|
87
|
+
f"quantile_levels must fulfill 0 < q < 1, provided quantile_levels: {quantile_levels}"
|
|
88
|
+
)
|
|
87
89
|
# Loss function MultiQuantile: can only be used if len(quantile_levels) >= 2, otherwise we must use Quantile:
|
|
88
90
|
if len(quantile_levels) == 1:
|
|
89
91
|
metric_class = f"{CATBOOST_QUANTILE_PREFIX}alpha={quantile_levels[0]}"
|
|
@@ -6,6 +6,7 @@ from typing import TYPE_CHECKING
|
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pandas as pd
|
|
9
|
+
|
|
9
10
|
from autogluon.core.constants import BINARY, MULTICLASS, REGRESSION
|
|
10
11
|
from autogluon.core.models import AbstractModel
|
|
11
12
|
|
|
@@ -38,14 +39,14 @@ class EBMModel(AbstractModel):
|
|
|
38
39
|
black-box models on a wide range of tabular datasets.
|
|
39
40
|
|
|
40
41
|
Requires the 'interpret' or 'interpret-core' package. Install via:
|
|
41
|
-
|
|
42
|
+
|
|
42
43
|
pip install interpret
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
Paper: InterpretML: A Unified Framework for Machine Learning Interpretability
|
|
46
|
-
|
|
47
|
+
|
|
47
48
|
Authors: H. Nori, S. Jenkins, P. Koch, and R. Caruana 2019
|
|
48
|
-
|
|
49
|
+
|
|
49
50
|
Codebase: https://github.com/interpretml/interpret
|
|
50
51
|
|
|
51
52
|
License: MIT
|
|
@@ -57,7 +58,7 @@ class EBMModel(AbstractModel):
|
|
|
57
58
|
ag_name = "EBM"
|
|
58
59
|
ag_priority = 35
|
|
59
60
|
seed_name = "random_state"
|
|
60
|
-
|
|
61
|
+
|
|
61
62
|
def _fit(
|
|
62
63
|
self,
|
|
63
64
|
X: pd.DataFrame,
|
|
@@ -175,15 +176,11 @@ class EBMModel(AbstractModel):
|
|
|
175
176
|
baseline_memory_bytes = 400_000_000 # 400 MB baseline memory
|
|
176
177
|
|
|
177
178
|
# assuming we call pd.concat([X, X_val], ignore_index=True), then X size will be doubled
|
|
178
|
-
return baseline_memory_bytes + model_cls(**params).estimate_mem(
|
|
179
|
-
X, y, data_multiplier=2.0
|
|
180
|
-
)
|
|
179
|
+
return baseline_memory_bytes + model_cls(**params).estimate_mem(X, y, data_multiplier=2.0)
|
|
181
180
|
|
|
182
181
|
def _validate_fit_memory_usage(self, mem_error_threshold: float = 1, **kwargs):
|
|
183
182
|
# Given the good mem estimates with overhead, we set the threshold to 1.
|
|
184
|
-
return super()._validate_fit_memory_usage(
|
|
185
|
-
mem_error_threshold=mem_error_threshold, **kwargs
|
|
186
|
-
)
|
|
183
|
+
return super()._validate_fit_memory_usage(mem_error_threshold=mem_error_threshold, **kwargs)
|
|
187
184
|
|
|
188
185
|
|
|
189
186
|
def construct_ebm_params(
|
|
@@ -223,9 +220,7 @@ def construct_ebm_params(
|
|
|
223
220
|
"feature_types": feature_types,
|
|
224
221
|
}
|
|
225
222
|
if stopping_metric is not None:
|
|
226
|
-
params["objective"] = get_metric_from_ag_metric(
|
|
227
|
-
metric=stopping_metric, problem_type=problem_type
|
|
228
|
-
)
|
|
223
|
+
params["objective"] = get_metric_from_ag_metric(metric=stopping_metric, problem_type=problem_type)
|
|
229
224
|
if time_limit is not None:
|
|
230
225
|
params["callback"] = EbmCallback(time_limit)
|
|
231
226
|
|
|
@@ -34,7 +34,16 @@ class BatchTimeTracker(Callback):
|
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
class EarlyStoppingCallbackWithTimeLimit(TrackerCallback):
|
|
37
|
-
def __init__(
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
monitor="valid_loss",
|
|
40
|
+
comp=None,
|
|
41
|
+
min_delta=0.0,
|
|
42
|
+
patience=1,
|
|
43
|
+
reset_on_fit=True,
|
|
44
|
+
time_limit=None,
|
|
45
|
+
best_epoch_stop=None,
|
|
46
|
+
):
|
|
38
47
|
super().__init__(monitor=monitor, comp=comp, min_delta=min_delta, reset_on_fit=reset_on_fit)
|
|
39
48
|
self.patience = patience
|
|
40
49
|
self.time_limit = time_limit
|
|
@@ -84,7 +93,15 @@ class AgSaveModelCallback(TrackerCallback):
|
|
|
84
93
|
_only_train_loop = True
|
|
85
94
|
|
|
86
95
|
def __init__(
|
|
87
|
-
self,
|
|
96
|
+
self,
|
|
97
|
+
monitor="valid_loss",
|
|
98
|
+
comp=None,
|
|
99
|
+
min_delta=0.0,
|
|
100
|
+
fname="model",
|
|
101
|
+
every_epoch=False,
|
|
102
|
+
with_opt=False,
|
|
103
|
+
reset_on_fit=True,
|
|
104
|
+
best_epoch_stop=None,
|
|
88
105
|
):
|
|
89
106
|
super().__init__(monitor=monitor, comp=comp, min_delta=min_delta, reset_on_fit=reset_on_fit)
|
|
90
107
|
# keep track of file path for loggers
|
|
@@ -113,4 +130,4 @@ class AgSaveModelCallback(TrackerCallback):
|
|
|
113
130
|
|
|
114
131
|
def after_fit(self, **kwargs):
|
|
115
132
|
if not self.every_epoch:
|
|
116
|
-
self.learn.load(f"{self.fname}", with_opt=self.with_opt, weights_only=False)
|
|
133
|
+
self.learn.load(f"{self.fname}", with_opt=self.with_opt, weights_only=False) # nosec B614
|
|
@@ -19,7 +19,17 @@ def get_searchspace_binary():
|
|
|
19
19
|
spaces = {
|
|
20
20
|
# See docs: https://docs.fast.ai/tabular.learner.html
|
|
21
21
|
"layers": space.Categorical(
|
|
22
|
-
None,
|
|
22
|
+
None,
|
|
23
|
+
[200, 100],
|
|
24
|
+
[200],
|
|
25
|
+
[500],
|
|
26
|
+
[1000],
|
|
27
|
+
[500, 200],
|
|
28
|
+
[50, 25],
|
|
29
|
+
[1000, 500],
|
|
30
|
+
[200, 100, 50],
|
|
31
|
+
[500, 200, 100],
|
|
32
|
+
[1000, 500, 200],
|
|
23
33
|
),
|
|
24
34
|
"emb_drop": space.Real(0.0, 0.5, default=0.1),
|
|
25
35
|
"ps": space.Real(0.0, 0.5, default=0.1),
|
|
@@ -37,9 +37,17 @@ class HuberPinballLoss(nn.Module):
|
|
|
37
37
|
if self.alpha == 0.0:
|
|
38
38
|
loss_data = torch.max(self.quantile_levels * error_data, (self.quantile_levels - 1) * error_data)
|
|
39
39
|
else:
|
|
40
|
-
loss_data = torch.where(
|
|
40
|
+
loss_data = torch.where(
|
|
41
|
+
torch.abs(error_data) < self.alpha,
|
|
42
|
+
0.5 * error_data * error_data,
|
|
43
|
+
self.alpha * (torch.abs(error_data) - 0.5 * self.alpha),
|
|
44
|
+
)
|
|
41
45
|
loss_data = loss_data / self.alpha
|
|
42
46
|
|
|
43
|
-
scale = torch.where(
|
|
47
|
+
scale = torch.where(
|
|
48
|
+
error_data >= 0,
|
|
49
|
+
torch.ones_like(error_data) * self.quantile_levels,
|
|
50
|
+
torch.ones_like(error_data) * (1 - self.quantile_levels),
|
|
51
|
+
)
|
|
44
52
|
loss_data *= scale
|
|
45
53
|
return loss_data.mean()
|