autogluon.core 1.2.1b20250112__py3-none-any.whl → 1.2.1b20250114__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/core/models/ensemble/bagged_ensemble_model.py +39 -7
- autogluon/core/utils/utils.py +72 -10
- autogluon/core/version.py +1 -1
- {autogluon.core-1.2.1b20250112.dist-info → autogluon.core-1.2.1b20250114.dist-info}/METADATA +6 -6
- {autogluon.core-1.2.1b20250112.dist-info → autogluon.core-1.2.1b20250114.dist-info}/RECORD +12 -12
- /autogluon.core-1.2.1b20250112-py3.8-nspkg.pth → /autogluon.core-1.2.1b20250114-py3.8-nspkg.pth +0 -0
- {autogluon.core-1.2.1b20250112.dist-info → autogluon.core-1.2.1b20250114.dist-info}/LICENSE +0 -0
- {autogluon.core-1.2.1b20250112.dist-info → autogluon.core-1.2.1b20250114.dist-info}/NOTICE +0 -0
- {autogluon.core-1.2.1b20250112.dist-info → autogluon.core-1.2.1b20250114.dist-info}/WHEEL +0 -0
- {autogluon.core-1.2.1b20250112.dist-info → autogluon.core-1.2.1b20250114.dist-info}/namespace_packages.txt +0 -0
- {autogluon.core-1.2.1b20250112.dist-info → autogluon.core-1.2.1b20250114.dist-info}/top_level.txt +0 -0
- {autogluon.core-1.2.1b20250112.dist-info → autogluon.core-1.2.1b20250114.dist-info}/zip-safe +0 -0
@@ -18,7 +18,7 @@ from autogluon.common.utils.distribute_utils import DistributedContext
|
|
18
18
|
from autogluon.common.utils.log_utils import DuplicateFilter
|
19
19
|
from autogluon.common.utils.try_import import try_import_ray
|
20
20
|
|
21
|
-
from ...constants import MULTICLASS, QUANTILE, REFIT_FULL_SUFFIX, REGRESSION, SOFTCLASS
|
21
|
+
from ...constants import BINARY, MULTICLASS, QUANTILE, REFIT_FULL_SUFFIX, REGRESSION, SOFTCLASS
|
22
22
|
from ...hpo.exceptions import EmptySearchSpace
|
23
23
|
from ...pseudolabeling.pseudolabeling import assert_pseudo_column_match
|
24
24
|
from ...utils.exceptions import TimeLimitExceeded
|
@@ -106,6 +106,9 @@ class BaggedEnsembleModel(AbstractModel):
|
|
106
106
|
# 'refit_folds': False, # [Advanced, Experimental] Whether to refit bags immediately to a refit_full model in a single .fit call.
|
107
107
|
# 'num_folds' None, # Number of bagged folds per set. If specified, overrides .fit `k_fold` value.
|
108
108
|
# 'max_sets': None, # Maximum bagged repeats to allow, if specified, will set `self.can_fit()` to `self._n_repeats_finished < max_repeats`
|
109
|
+
"stratify": "auto",
|
110
|
+
"bin": "auto",
|
111
|
+
"n_bins": None,
|
109
112
|
}
|
110
113
|
for param, val in default_params.items():
|
111
114
|
self._set_default_param_value(param, val)
|
@@ -125,11 +128,32 @@ class BaggedEnsembleModel(AbstractModel):
|
|
125
128
|
def can_infer(self):
|
126
129
|
return self.is_fit() and self.params.get("save_bag_folds", True)
|
127
130
|
|
128
|
-
def is_stratified(self):
|
129
|
-
|
130
|
-
|
131
|
+
def is_stratified(self) -> bool:
|
132
|
+
"""
|
133
|
+
Returns whether to stratify on the label during KFold splits
|
134
|
+
"""
|
135
|
+
stratify = self.params.get("stratify", "auto")
|
136
|
+
if isinstance(stratify, str) and stratify == "auto":
|
137
|
+
return self.problem_type in [
|
138
|
+
BINARY,
|
139
|
+
MULTICLASS,
|
140
|
+
|
141
|
+
# Commented out due to inconclusive results on whether this is helpful when combined with binning
|
142
|
+
# REGRESSION,
|
143
|
+
# QUANTILE,
|
144
|
+
]
|
131
145
|
else:
|
132
|
-
return
|
146
|
+
return stratify
|
147
|
+
|
148
|
+
def is_binned(self) -> bool:
|
149
|
+
"""
|
150
|
+
Returns whether to bin the label during stratified KFold splits
|
151
|
+
"""
|
152
|
+
bin = self.params.get("bin", "auto")
|
153
|
+
if isinstance(bin, str) and bin == "auto":
|
154
|
+
return self.problem_type in [REGRESSION, QUANTILE]
|
155
|
+
else:
|
156
|
+
return bin
|
133
157
|
|
134
158
|
def is_fit(self) -> bool:
|
135
159
|
return self.n_children != 0
|
@@ -188,8 +212,16 @@ class BaggedEnsembleModel(AbstractModel):
|
|
188
212
|
else:
|
189
213
|
return X
|
190
214
|
|
191
|
-
def _get_cv_splitter(self, n_splits, n_repeats, groups=None):
|
192
|
-
return CVSplitter(
|
215
|
+
def _get_cv_splitter(self, n_splits: int, n_repeats: int, groups=None) -> CVSplitter:
|
216
|
+
return CVSplitter(
|
217
|
+
n_splits=n_splits,
|
218
|
+
n_repeats=n_repeats,
|
219
|
+
groups=groups,
|
220
|
+
stratify=self.is_stratified(),
|
221
|
+
bin=self.is_binned(),
|
222
|
+
n_bins=self.params.get("n_bins", None),
|
223
|
+
random_state=self._random_state,
|
224
|
+
)
|
193
225
|
|
194
226
|
def _fit(
|
195
227
|
self,
|
autogluon/core/utils/utils.py
CHANGED
@@ -13,7 +13,8 @@ import pandas as pd
|
|
13
13
|
import scipy.stats
|
14
14
|
from numpy.typing import ArrayLike
|
15
15
|
from pandas import DataFrame, Series
|
16
|
-
from sklearn.model_selection import LeaveOneGroupOut, RepeatedKFold, RepeatedStratifiedKFold, train_test_split
|
16
|
+
from sklearn.model_selection import BaseCrossValidator, LeaveOneGroupOut, RepeatedKFold, RepeatedStratifiedKFold, train_test_split
|
17
|
+
from sklearn.preprocessing import KBinsDiscretizer
|
17
18
|
|
18
19
|
from autogluon.common.utils.resource_utils import ResourceManager
|
19
20
|
|
@@ -34,12 +35,54 @@ from .miscs import warning_filter
|
|
34
35
|
logger = logging.getLogger(__name__)
|
35
36
|
|
36
37
|
|
38
|
+
# TODO: Add binned stratification support for regression in train/val split (non CV)
|
37
39
|
class CVSplitter:
|
38
|
-
def __init__(
|
40
|
+
def __init__(
|
41
|
+
self,
|
42
|
+
splitter_cls=None,
|
43
|
+
n_splits: int = 5,
|
44
|
+
n_repeats: int = 1,
|
45
|
+
random_state: int | None = 0,
|
46
|
+
stratify: bool = False,
|
47
|
+
bin: bool = False,
|
48
|
+
n_bins: int | None = None,
|
49
|
+
groups: pd.Series = None,
|
50
|
+
):
|
51
|
+
"""
|
52
|
+
Wrapper around splitter objects to perform KFold splits.
|
53
|
+
Supports regression stratification via the `bin` and `n_bins` argument.
|
54
|
+
|
55
|
+
Parameters
|
56
|
+
----------
|
57
|
+
splitter_cls, default None
|
58
|
+
The class to use for splitting.
|
59
|
+
If None, will automatically be determined based off of `stratify`, `groups`, and `n_repeats`.
|
60
|
+
n_splits : int, default 5
|
61
|
+
The number of splits to perform.
|
62
|
+
Ignored if `groups` is specified.
|
63
|
+
n_repeats: int, default 1
|
64
|
+
The number of repeated splits to perform.
|
65
|
+
Ignored if `groups` is specified.
|
66
|
+
random_state : int, default 0
|
67
|
+
The seed to use when splitting the data.
|
68
|
+
stratify : bool, default False
|
69
|
+
If True, will stratify the splits on `y`.
|
70
|
+
bin : bool, default False
|
71
|
+
If True and `stratify` is True, will bin `y` into `n_bins` bins for stratification.
|
72
|
+
Should only be used for regression and quantile tasks.
|
73
|
+
n_bins : int, default None
|
74
|
+
The number of bins to use when `bin` is True.
|
75
|
+
If None, defaults to `np.floor(n_samples / n_splits)`.
|
76
|
+
groups : pd.Series, default None
|
77
|
+
If specified, splitter_cls will default to LeaveOneGroupOut.
|
78
|
+
|
79
|
+
"""
|
39
80
|
self.n_splits = n_splits
|
40
81
|
self.n_repeats = n_repeats
|
41
82
|
self.random_state = random_state
|
42
|
-
self.
|
83
|
+
self.stratify = stratify
|
84
|
+
self.bin = bin
|
85
|
+
self.n_bins = n_bins
|
43
86
|
self.groups = groups
|
44
87
|
if splitter_cls is None:
|
45
88
|
splitter_cls = self._get_splitter_cls()
|
@@ -53,13 +96,13 @@ class CVSplitter:
|
|
53
96
|
self.n_splits = num_groups
|
54
97
|
splitter_cls = LeaveOneGroupOut
|
55
98
|
# pass
|
56
|
-
elif self.
|
99
|
+
elif self.stratify:
|
57
100
|
splitter_cls = RepeatedStratifiedKFold
|
58
101
|
else:
|
59
102
|
splitter_cls = RepeatedKFold
|
60
103
|
return splitter_cls
|
61
104
|
|
62
|
-
def _get_splitter(self, splitter_cls):
|
105
|
+
def _get_splitter(self, splitter_cls) -> BaseCrossValidator:
|
63
106
|
if splitter_cls == LeaveOneGroupOut:
|
64
107
|
return splitter_cls()
|
65
108
|
elif splitter_cls in [RepeatedKFold, RepeatedStratifiedKFold]:
|
@@ -67,19 +110,38 @@ class CVSplitter:
|
|
67
110
|
else:
|
68
111
|
raise AssertionError(f"{splitter_cls} is not supported as a valid `splitter_cls` input to CVSplitter.")
|
69
112
|
|
70
|
-
def split(self, X, y):
|
71
|
-
|
113
|
+
def split(self, X: pd.DataFrame, y: pd.Series) -> list[tuple[np.ndarray, np.ndarray]]:
|
114
|
+
splitter = self._splitter
|
115
|
+
if isinstance(splitter, RepeatedStratifiedKFold):
|
116
|
+
if self.bin:
|
117
|
+
if self.n_bins is None:
|
118
|
+
n_splits = splitter.get_n_splits()
|
119
|
+
n_samples = len(y)
|
120
|
+
|
121
|
+
# ensure at least n_splits samples per bin
|
122
|
+
n_bins = int(np.floor(n_samples / n_splits))
|
123
|
+
else:
|
124
|
+
n_bins = self.n_bins
|
125
|
+
|
126
|
+
if n_bins > 1:
|
127
|
+
k_bins_discretizer = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', random_state=self.random_state)
|
128
|
+
y_bin = k_bins_discretizer.fit_transform(y.to_frame())[:, 0]
|
129
|
+
y = pd.Series(data=y_bin, index=y.index, name=y.name)
|
130
|
+
else:
|
131
|
+
# Don't stratify, can't bin!
|
132
|
+
splitter = self._get_splitter(splitter_cls=RepeatedKFold)
|
133
|
+
|
72
134
|
# FIXME: There is a bug in sklearn that causes an incorrect ValueError if performing stratification and all classes have fewer than n_splits samples.
|
73
135
|
# This is hacked by adding a dummy class with n_splits samples, performing the kfold split, then removing the dummy samples from all resulting indices.
|
74
136
|
# This is very inefficient and complicated and ideally should be fixed in sklearn.
|
75
137
|
with warning_filter():
|
76
138
|
try:
|
77
|
-
out = [[train_index, test_index] for train_index, test_index in
|
139
|
+
out = [[train_index, test_index] for train_index, test_index in splitter.split(X, y)]
|
78
140
|
except:
|
79
141
|
y_dummy = pd.concat([y, pd.Series([-1] * self.n_splits)], ignore_index=True)
|
80
142
|
X_dummy = pd.concat([X, X.head(self.n_splits)], ignore_index=True)
|
81
143
|
invalid_index = set(list(y_dummy.tail(self.n_splits).index))
|
82
|
-
out = [[train_index, test_index] for train_index, test_index in
|
144
|
+
out = [[train_index, test_index] for train_index, test_index in splitter.split(X_dummy, y_dummy)]
|
83
145
|
len_out = len(out)
|
84
146
|
for i in range(len_out):
|
85
147
|
train_index, test_index = out[i]
|
@@ -87,7 +149,7 @@ class CVSplitter:
|
|
87
149
|
out[i][1] = [index for index in test_index if index not in invalid_index]
|
88
150
|
return out
|
89
151
|
else:
|
90
|
-
return [[train_index, test_index] for train_index, test_index in
|
152
|
+
return [[train_index, test_index] for train_index, test_index in splitter.split(X, y, groups=self.groups)]
|
91
153
|
|
92
154
|
|
93
155
|
def setup_compute(nthreads_per_trial, ngpus_per_trial):
|
autogluon/core/version.py
CHANGED
{autogluon.core-1.2.1b20250112.dist-info → autogluon.core-1.2.1b20250114.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: autogluon.core
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.1b20250114
|
4
4
|
Summary: Fast and Accurate ML in 3 Lines of Code
|
5
5
|
Home-page: https://github.com/autogluon/autogluon
|
6
6
|
Author: AutoGluon Community
|
@@ -43,12 +43,12 @@ Requires-Dist: tqdm<5,>=4.38
|
|
43
43
|
Requires-Dist: requests
|
44
44
|
Requires-Dist: matplotlib<3.11,>=3.7.0
|
45
45
|
Requires-Dist: boto3<2,>=1.10
|
46
|
-
Requires-Dist: autogluon.common==1.2.
|
46
|
+
Requires-Dist: autogluon.common==1.2.1b20250114
|
47
47
|
Provides-Extra: all
|
48
|
-
Requires-Dist: pyarrow>=15.0.0; extra == "all"
|
49
48
|
Requires-Dist: ray[default,tune]<2.41,>=2.10.0; extra == "all"
|
50
|
-
Requires-Dist: hyperopt<0.2.8,>=0.2.7; extra == "all"
|
51
49
|
Requires-Dist: ray[default]<2.41,>=2.10.0; extra == "all"
|
50
|
+
Requires-Dist: pyarrow>=15.0.0; extra == "all"
|
51
|
+
Requires-Dist: hyperopt<0.2.8,>=0.2.7; extra == "all"
|
52
52
|
Provides-Extra: ray
|
53
53
|
Requires-Dist: ray[default]<2.41,>=2.10.0; extra == "ray"
|
54
54
|
Provides-Extra: raytune
|
@@ -56,10 +56,10 @@ Requires-Dist: pyarrow>=15.0.0; extra == "raytune"
|
|
56
56
|
Requires-Dist: ray[default,tune]<2.41,>=2.10.0; extra == "raytune"
|
57
57
|
Requires-Dist: hyperopt<0.2.8,>=0.2.7; extra == "raytune"
|
58
58
|
Provides-Extra: tests
|
59
|
-
Requires-Dist:
|
59
|
+
Requires-Dist: flake8; extra == "tests"
|
60
60
|
Requires-Dist: pytest; extra == "tests"
|
61
|
+
Requires-Dist: pytest-mypy; extra == "tests"
|
61
62
|
Requires-Dist: types-requests; extra == "tests"
|
62
|
-
Requires-Dist: flake8; extra == "tests"
|
63
63
|
Requires-Dist: types-setuptools; extra == "tests"
|
64
64
|
|
65
65
|
|
@@ -1,9 +1,9 @@
|
|
1
|
-
autogluon.core-1.2.
|
1
|
+
autogluon.core-1.2.1b20250114-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
|
2
2
|
autogluon/core/__init__.py,sha256=8KfvvHzXX3a4q6z43Dw1yE7VtbAoiSMaglVpKDy6Xeg,245
|
3
3
|
autogluon/core/_setup_utils.py,sha256=NqlGK6So0KG5M0LbBJNT1TI3iAmG93kd_6Brih6y2gQ,6935
|
4
4
|
autogluon/core/constants.py,sha256=nEVLdSFJ-5O-tz3jUD3qPX65RMp7g8qOR38XlurbP4Y,3403
|
5
5
|
autogluon/core/problem_type.py,sha256=XJmMgeNBgS7u43pDK-spTivatPyh_INOXveEXwQt-Rw,2993
|
6
|
-
autogluon/core/version.py,sha256=
|
6
|
+
autogluon/core/version.py,sha256=Um-j8InMDDRIJ5WoL607BMYYvkzsVWU8cxXJ2aXfjbQ,90
|
7
7
|
autogluon/core/augmentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
autogluon/core/augmentation/distill_utils.py,sha256=JBlp2WOMNKoJv8aKVwJVRQSalSk8jx36HM7-k_VvkhY,9404
|
9
9
|
autogluon/core/calibrate/__init__.py,sha256=eU6qLj7DKUhaz2HHNHDrfroRaLM-mhuSncK_v1UP4F8,62
|
@@ -47,7 +47,7 @@ autogluon/core/models/dummy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
|
|
47
47
|
autogluon/core/models/dummy/_dummy_quantile_regressor.py,sha256=i-ZW2flJ60jsMfMK24IP39Xwc55-UlBDvHmqanIf29Q,664
|
48
48
|
autogluon/core/models/dummy/dummy_model.py,sha256=at2FZSM2_LuAQ78E2YrRCRt3UaKMyyOnc6p2rtZgA2w,1414
|
49
49
|
autogluon/core/models/ensemble/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
|
-
autogluon/core/models/ensemble/bagged_ensemble_model.py,sha256=
|
50
|
+
autogluon/core/models/ensemble/bagged_ensemble_model.py,sha256=p3lkm0Cweyu_maOeboirGmQIjIaaiDHXS-XHDc-wYYw,72610
|
51
51
|
autogluon/core/models/ensemble/fold_fitting_strategy.py,sha256=01vzNVvE4FIFgD6YqbhK63XoUlSztnVFsrDdsoqm75U,47021
|
52
52
|
autogluon/core/models/ensemble/ray_parallel_fold_fitting_strategy.py,sha256=8RASa-eV6n9kUgbqQHNt7k4IrvuB9NdrunIMLYOLwgA,2068
|
53
53
|
autogluon/core/models/ensemble/stacker_ensemble_model.py,sha256=DuDXgozvG9JYYkRvGACA7EXDAtj3Tz_uAjXTfxu5tFg,18041
|
@@ -85,15 +85,15 @@ autogluon/core/utils/infer_utils.py,sha256=In-u47ELeMLol-O02-bbo79Ak2TYo6iuQ7SJk
|
|
85
85
|
autogluon/core/utils/miscs.py,sha256=Kh5tXxyGHWfEEb36t3iBpdhJI7V4Pgih6xN_bpiQwFg,369
|
86
86
|
autogluon/core/utils/plots.py,sha256=ambgOSY7Hp--CDU6raghZx6O8m4a49spdHRG3zEMypk,12238
|
87
87
|
autogluon/core/utils/time.py,sha256=WaTonKWCSO5BoSIPo_hdgreGiQxoYE2dYWKO3aPFHj4,3841
|
88
|
-
autogluon/core/utils/utils.py,sha256=
|
88
|
+
autogluon/core/utils/utils.py,sha256=FMa9kIUAxA3IIBbATmBnNEVObSAivehZ2_zCy3PRR-c,57660
|
89
89
|
autogluon/core/utils/version_utils.py,sha256=5-r8hLRKTaZbj5qo2uzE_2E4casH49Ye3WyeHlgHuz4,3252
|
90
90
|
autogluon/core/utils/loaders/__init__.py,sha256=W5FAdQvpDcn_uisqJrlSAObWVta-YjJLKGN3NCbEgIo,109
|
91
91
|
autogluon/core/utils/savers/__init__.py,sha256=bGWciSxAkj6u06vOC4pTvr22f_1ey0glgvmjCMEOm78,89
|
92
|
-
autogluon.core-1.2.
|
93
|
-
autogluon.core-1.2.
|
94
|
-
autogluon.core-1.2.
|
95
|
-
autogluon.core-1.2.
|
96
|
-
autogluon.core-1.2.
|
97
|
-
autogluon.core-1.2.
|
98
|
-
autogluon.core-1.2.
|
99
|
-
autogluon.core-1.2.
|
92
|
+
autogluon.core-1.2.1b20250114.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
|
93
|
+
autogluon.core-1.2.1b20250114.dist-info/METADATA,sha256=qZ8UNbAq_mHz_lqcg-_xSY0z9dXa8fNDuHOncGl7ICg,12328
|
94
|
+
autogluon.core-1.2.1b20250114.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
|
95
|
+
autogluon.core-1.2.1b20250114.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
96
|
+
autogluon.core-1.2.1b20250114.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
|
97
|
+
autogluon.core-1.2.1b20250114.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
|
98
|
+
autogluon.core-1.2.1b20250114.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
99
|
+
autogluon.core-1.2.1b20250114.dist-info/RECORD,,
|
/autogluon.core-1.2.1b20250112-py3.8-nspkg.pth → /autogluon.core-1.2.1b20250114-py3.8-nspkg.pth
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{autogluon.core-1.2.1b20250112.dist-info → autogluon.core-1.2.1b20250114.dist-info}/top_level.txt
RENAMED
File without changes
|
{autogluon.core-1.2.1b20250112.dist-info → autogluon.core-1.2.1b20250114.dist-info}/zip-safe
RENAMED
File without changes
|