autogluon.core 1.2.1b20250110__py3-none-any.whl → 1.2.1b20250111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,12 @@
1
1
  _DEFAULT_TAGS = {
2
- # Whether the model can produce out-of-fold (or similar) predictions of the training data without being significantly overfit.
2
+ # [Advanced] Whether the model can support fitting on 100% of the data and then getting unbiased predictions on the same data.
3
+ # it fit on by exploiting special properties of the model architecture.
4
+ # For example, random forest uses only a portion of the training data randomly for each decision tree.
5
+ # We can therefore use the out-of-bag predictions to obtain unbiased predictions.
6
+ # Note that models that specify this as True must implement a `predict_proba_oof` method.
7
+ # Refer to RandomForestModel or KNeighborsModel for reference implementations.
3
8
  "valid_oof": False,
9
+
4
10
  # Whether the model can be refit using the combined train and val data as training and no validation data without issue.
5
11
  # TL;DR: Keep value as False unless you know what you are doing. This is advanced functionality.
6
12
  # If False, when calling predictor.refit_full(), this model will simply be duplicated (if non-bag) or will have the first fold model duplicated (if bag).
@@ -193,19 +193,19 @@ class BaggedEnsembleModel(AbstractModel):
193
193
 
194
194
  def _fit(
195
195
  self,
196
- X,
197
- y,
198
- X_val=None,
199
- y_val=None,
200
- X_pseudo=None,
201
- y_pseudo=None,
202
- k_fold=None,
203
- k_fold_start=0,
204
- k_fold_end=None,
205
- n_repeats=1,
206
- n_repeat_start=0,
207
- groups=None,
208
- _skip_oof=False,
196
+ X: pd.DataFrame,
197
+ y: pd.Series,
198
+ X_val: pd.DataFrame = None,
199
+ y_val: pd.Series = None,
200
+ X_pseudo: pd.DataFrame = None,
201
+ y_pseudo: pd.Series = None,
202
+ k_fold: int = None,
203
+ k_fold_start: int = 0,
204
+ k_fold_end: int = None,
205
+ n_repeats: int = 1,
206
+ n_repeat_start: int = 0,
207
+ groups: pd.Series = None,
208
+ _skip_oof: bool = False,
209
209
  **kwargs,
210
210
  ):
211
211
  use_child_oof = self.params.get("use_child_oof", False)
@@ -223,8 +223,6 @@ class BaggedEnsembleModel(AbstractModel):
223
223
  k_fold, k_fold_end = self._update_k_fold(k_fold=k_fold, k_fold_end=k_fold_end)
224
224
  if k_fold is None and groups is None:
225
225
  k_fold = 5
226
- if k_fold is not None and k_fold < 1:
227
- k_fold = 1
228
226
  if k_fold is None or k_fold > 1:
229
227
  k_fold = self._get_cv_splitter(n_splits=k_fold, n_repeats=n_repeats, groups=groups).n_splits
230
228
  max_sets = self._get_model_params().get("max_sets", None)
@@ -238,6 +236,7 @@ class BaggedEnsembleModel(AbstractModel):
238
236
  n_repeats=n_repeats,
239
237
  n_repeat_start=n_repeat_start,
240
238
  groups=groups,
239
+ use_child_oof=use_child_oof,
241
240
  )
242
241
  if k_fold_end is None:
243
242
  k_fold_end = k_fold
@@ -327,7 +326,7 @@ class BaggedEnsembleModel(AbstractModel):
327
326
  else:
328
327
  return self
329
328
 
330
- def _update_k_fold(self, k_fold, k_fold_end=None, verbose=True):
329
+ def _update_k_fold(self, k_fold: int, k_fold_end: int = None, verbose: bool = True) -> tuple[int, int]:
331
330
  """Update k_fold and k_fold_end in case num_folds was specified"""
332
331
  k_fold_override = self.params.get("num_folds", None)
333
332
  if k_fold_override is not None:
@@ -343,7 +342,17 @@ class BaggedEnsembleModel(AbstractModel):
343
342
  assert self.is_initialized(), "Model must be initialized before calling self._get_child_aux_val!"
344
343
  return self._params_aux_child.get(key, default)
345
344
 
346
- def _validate_bag_kwargs(self, *, k_fold, k_fold_start, k_fold_end, n_repeats, n_repeat_start, groups):
345
+ def _validate_bag_kwargs(
346
+ self,
347
+ *,
348
+ k_fold: int,
349
+ k_fold_start: int,
350
+ k_fold_end: int,
351
+ n_repeats: int,
352
+ n_repeat_start: int,
353
+ groups: pd.Series | None,
354
+ use_child_oof: bool,
355
+ ):
347
356
  if groups is not None:
348
357
  if self._n_repeats_finished != 0:
349
358
  raise AssertionError("Bagged models cannot call fit with `groups` specified when a full k-fold set has already been fit.")
@@ -356,7 +365,7 @@ class BaggedEnsembleModel(AbstractModel):
356
365
  if k_fold is None:
357
366
  raise ValueError("k_fold cannot be None.")
358
367
  if k_fold < 1:
359
- raise ValueError(f"k_fold must be equal or greater than 1, value: ({k_fold})")
368
+ raise ValueError(f"k_fold must be equal or greater than 1, value: {k_fold}")
360
369
  if n_repeat_start != self._n_repeats_finished:
361
370
  raise ValueError(f"n_repeat_start must equal self._n_repeats_finished, values: ({n_repeat_start}, {self._n_repeats_finished})")
362
371
  if n_repeats <= n_repeat_start:
@@ -370,7 +379,26 @@ class BaggedEnsembleModel(AbstractModel):
370
379
  # TODO: Remove this limitation
371
380
  raise ValueError(f"k_fold_end must equal k_fold when (n_repeats - n_repeat_start) > 1, values: ({k_fold_end}, {k_fold})")
372
381
  if self._k is not None and self._k != k_fold:
373
- raise ValueError(f"k_fold must equal previously fit k_fold value for the current n_repeat, values: (({k_fold}, {self._k})")
382
+ raise ValueError(f"k_fold must equal previously fit k_fold value for the current n_repeat, values: ({k_fold}, {self._k})")
383
+ if use_child_oof and not self._get_tags_child().get("valid_oof", False):
384
+ raise AssertionError(
385
+ f"`use_child_oof=True` was specified, "
386
+ f"but the model {self._child_type.__name__} does not support this option. (valid_oof=False)\n"
387
+ f"\tTo enable this logic, `{self._child_type.__name__}._predict_proba_oof` must be implemented "
388
+ f"and `tags['valid_oof'] = True` must be set in `{self._child_type.__name__}._more_tags`."
389
+ )
390
+ if k_fold == 1 and not use_child_oof and not self._get_tags().get("can_get_oof_from_train", False):
391
+ logger.log(
392
+ 30,
393
+ f"\tWARNING: Fitting bagged model with `k_fold=1`, "
394
+ f"but this model doesn't support getting out-of-fold predictions from training data!\n"
395
+ f"\t\tThe model will be fit on 100% of the training data without any validation split.\n"
396
+ f"\t\tIt will then predict on the same data used to train for generating out-of-fold predictions. "
397
+ f"This will likely be EXTREMELY overfit and produce terrible results.\n"
398
+ f"\t\tWe strongly recommend not forcing bagged models to use `k_fold=1`. "
399
+ f"Instead, specify `use_child_oof=True` if the model supports this option."
400
+ )
401
+
374
402
 
375
403
  def predict_proba_children(
376
404
  self,
@@ -557,8 +585,9 @@ class BaggedEnsembleModel(AbstractModel):
557
585
  logger.log(
558
586
  30,
559
587
  f"\tWARNING: Setting `self._oof_pred_proba` by predicting on train directly! "
560
- f"This is probably a bug and should be investigated...\n"
561
- f'\tIf this is intended, set the model tag "can_get_oof_from_train" to True '
588
+ f"This is probably a bug or the user specified `num_folds=1` "
589
+ f"as an `ag_args_ensemble` hyperparameter... Results may be very poor.\n"
590
+ f'\t\tIf this is intended, set the model tag "can_get_oof_from_train" to True '
562
591
  f"in `{self.__class__.__name__}._more_tags` to avoid this warning.",
563
592
  )
564
593
  self._oof_pred_proba = model_base.predict_proba(X=X) # TODO: Cheater value, will be overfit to valid set
autogluon/core/version.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """This is the autogluon version file."""
2
- __version__ = '1.2.1b20250110'
2
+ __version__ = '1.2.1b20250111'
3
3
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.core
3
- Version: 1.2.1b20250110
3
+ Version: 1.2.1b20250111
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -43,12 +43,12 @@ Requires-Dist: tqdm<5,>=4.38
43
43
  Requires-Dist: requests
44
44
  Requires-Dist: matplotlib<3.11,>=3.7.0
45
45
  Requires-Dist: boto3<2,>=1.10
46
- Requires-Dist: autogluon.common==1.2.1b20250110
46
+ Requires-Dist: autogluon.common==1.2.1b20250111
47
47
  Provides-Extra: all
48
+ Requires-Dist: hyperopt<0.2.8,>=0.2.7; extra == "all"
48
49
  Requires-Dist: ray[default,tune]<2.41,>=2.10.0; extra == "all"
49
50
  Requires-Dist: ray[default]<2.41,>=2.10.0; extra == "all"
50
51
  Requires-Dist: pyarrow>=15.0.0; extra == "all"
51
- Requires-Dist: hyperopt<0.2.8,>=0.2.7; extra == "all"
52
52
  Provides-Extra: ray
53
53
  Requires-Dist: ray[default]<2.41,>=2.10.0; extra == "ray"
54
54
  Provides-Extra: raytune
@@ -56,11 +56,11 @@ Requires-Dist: pyarrow>=15.0.0; extra == "raytune"
56
56
  Requires-Dist: ray[default,tune]<2.41,>=2.10.0; extra == "raytune"
57
57
  Requires-Dist: hyperopt<0.2.8,>=0.2.7; extra == "raytune"
58
58
  Provides-Extra: tests
59
- Requires-Dist: types-setuptools; extra == "tests"
60
- Requires-Dist: pytest-mypy; extra == "tests"
61
59
  Requires-Dist: flake8; extra == "tests"
62
- Requires-Dist: pytest; extra == "tests"
60
+ Requires-Dist: pytest-mypy; extra == "tests"
61
+ Requires-Dist: types-setuptools; extra == "tests"
63
62
  Requires-Dist: types-requests; extra == "tests"
63
+ Requires-Dist: pytest; extra == "tests"
64
64
 
65
65
 
66
66
 
@@ -1,9 +1,9 @@
1
- autogluon.core-1.2.1b20250110-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
1
+ autogluon.core-1.2.1b20250111-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
2
2
  autogluon/core/__init__.py,sha256=8KfvvHzXX3a4q6z43Dw1yE7VtbAoiSMaglVpKDy6Xeg,245
3
3
  autogluon/core/_setup_utils.py,sha256=NqlGK6So0KG5M0LbBJNT1TI3iAmG93kd_6Brih6y2gQ,6935
4
4
  autogluon/core/constants.py,sha256=nEVLdSFJ-5O-tz3jUD3qPX65RMp7g8qOR38XlurbP4Y,3403
5
5
  autogluon/core/problem_type.py,sha256=XJmMgeNBgS7u43pDK-spTivatPyh_INOXveEXwQt-Rw,2993
6
- autogluon/core/version.py,sha256=AGq81lVpmmLWoZzbwCzgbSN_eMpjRM7ZUk1rDC3lkhc,90
6
+ autogluon/core/version.py,sha256=TigPRfmNcZyeXSLH943LINQSmE-Pv8VStKn7i2xjUOM,90
7
7
  autogluon/core/augmentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  autogluon/core/augmentation/distill_utils.py,sha256=JBlp2WOMNKoJv8aKVwJVRQSalSk8jx36HM7-k_VvkhY,9404
9
9
  autogluon/core/calibrate/__init__.py,sha256=eU6qLj7DKUhaz2HHNHDrfroRaLM-mhuSncK_v1UP4F8,62
@@ -39,7 +39,7 @@ autogluon/core/metrics/softclass_metrics.py,sha256=inn35DfftLZey0mK3OuMJPzm58eZg
39
39
  autogluon/core/models/__init__.py,sha256=dg3onYq5wW3-sfdNurnSIGpX0rpEjG_abgzyfwDM77M,408
40
40
  autogluon/core/models/_utils.py,sha256=qswE9n1ge1AJSExgstEbrZiMFmMRa4Mf5Sz8D9-XU6c,2091
41
41
  autogluon/core/models/abstract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- autogluon/core/models/abstract/_tags.py,sha256=9GZMHdbek9D5-8X-vtCXNw-eICFwoPZ82zn3j0iYyo0,2962
42
+ autogluon/core/models/abstract/_tags.py,sha256=Qr_3an0ZMig24S3OwISa-nTFfWHQe3pwPTiXq4zlEec,3409
43
43
  autogluon/core/models/abstract/abstract_model.py,sha256=bi0WOb51WJG6tAWuo3GKKgAfSSrBStwKYHEkDhVQeD4,120425
44
44
  autogluon/core/models/abstract/abstract_nn_model.py,sha256=IId0ivO8uVvmpnK9OiM2CtPVrP1ewOaQQKtQUDtK7_k,4818
45
45
  autogluon/core/models/abstract/model_trial.py,sha256=PKEo1jfLSBCOLM42QE5VBD1u41MaVMRk31zhNhLiqTw,5035
@@ -47,7 +47,7 @@ autogluon/core/models/dummy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
47
47
  autogluon/core/models/dummy/_dummy_quantile_regressor.py,sha256=i-ZW2flJ60jsMfMK24IP39Xwc55-UlBDvHmqanIf29Q,664
48
48
  autogluon/core/models/dummy/dummy_model.py,sha256=at2FZSM2_LuAQ78E2YrRCRt3UaKMyyOnc6p2rtZgA2w,1414
49
49
  autogluon/core/models/ensemble/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- autogluon/core/models/ensemble/bagged_ensemble_model.py,sha256=OImzuazZL_5J4tXz4tt-vdUYDvAClA2uIRyY2OLdpjQ,69855
50
+ autogluon/core/models/ensemble/bagged_ensemble_model.py,sha256=HuyRqdtsdN2z_t9Fa9qWN3U5dz3O7MGYn2qe4BKU9Go,71600
51
51
  autogluon/core/models/ensemble/fold_fitting_strategy.py,sha256=01vzNVvE4FIFgD6YqbhK63XoUlSztnVFsrDdsoqm75U,47021
52
52
  autogluon/core/models/ensemble/ray_parallel_fold_fitting_strategy.py,sha256=8RASa-eV6n9kUgbqQHNt7k4IrvuB9NdrunIMLYOLwgA,2068
53
53
  autogluon/core/models/ensemble/stacker_ensemble_model.py,sha256=DuDXgozvG9JYYkRvGACA7EXDAtj3Tz_uAjXTfxu5tFg,18041
@@ -89,11 +89,11 @@ autogluon/core/utils/utils.py,sha256=K05ewQuGauLnVaYwccNDk1moUDg2EEzdSlq8gsw6JVM
89
89
  autogluon/core/utils/version_utils.py,sha256=5-r8hLRKTaZbj5qo2uzE_2E4casH49Ye3WyeHlgHuz4,3252
90
90
  autogluon/core/utils/loaders/__init__.py,sha256=W5FAdQvpDcn_uisqJrlSAObWVta-YjJLKGN3NCbEgIo,109
91
91
  autogluon/core/utils/savers/__init__.py,sha256=bGWciSxAkj6u06vOC4pTvr22f_1ey0glgvmjCMEOm78,89
92
- autogluon.core-1.2.1b20250110.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
93
- autogluon.core-1.2.1b20250110.dist-info/METADATA,sha256=umPV72cojnwuMAjsfFTdvlHikJWd3oKznl6b7O27TVY,12328
94
- autogluon.core-1.2.1b20250110.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
95
- autogluon.core-1.2.1b20250110.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
96
- autogluon.core-1.2.1b20250110.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
97
- autogluon.core-1.2.1b20250110.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
98
- autogluon.core-1.2.1b20250110.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
99
- autogluon.core-1.2.1b20250110.dist-info/RECORD,,
92
+ autogluon.core-1.2.1b20250111.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
93
+ autogluon.core-1.2.1b20250111.dist-info/METADATA,sha256=SjvIvISrTQooEHQlUsjRb26e1L0lt00JCd1_tZ9JLgE,12328
94
+ autogluon.core-1.2.1b20250111.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
95
+ autogluon.core-1.2.1b20250111.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
96
+ autogluon.core-1.2.1b20250111.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
97
+ autogluon.core-1.2.1b20250111.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
98
+ autogluon.core-1.2.1b20250111.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
99
+ autogluon.core-1.2.1b20250111.dist-info/RECORD,,