autogluon.tabular 1.2.1b20250425__py3-none-any.whl → 1.2.1b20250426__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.tabular might be problematic. Click here for more details.

@@ -95,17 +95,26 @@ class CatBoostModel(AbstractModel):
95
95
  Scales roughly by 5080*num_features*2^depth bytes
96
96
  For 10000 features and 6 depth, the histogram would be 3.2 GB.
97
97
  """
98
+ if hyperparameters is None:
99
+ hyperparameters = {}
98
100
  num_classes = num_classes if num_classes else 1 # self.num_classes could be None after initialization if it's a regression problem
99
101
  data_mem_usage = get_approximate_df_mem_usage(X).sum()
100
102
  data_mem_usage_bytes = data_mem_usage * 5 + data_mem_usage / 4 * num_classes # TODO: Extremely crude approximation, can be vastly improved
101
103
 
102
104
  border_count = hyperparameters.get("border_count", 254)
103
105
  depth = hyperparameters.get("depth", 6)
106
+
107
+ # if depth < 7, treat it as 1 step larger for histogram size estimate
108
+ # this fixes cases where otherwise histogram size appears to be off by around a factor of 2 for depth=6
109
+ histogram_effective_depth = max(min(depth+1, 7), depth)
110
+
104
111
  # Formula based on manual testing, aligns with LightGBM histogram sizes
105
- histogram_mem_usage_bytes = 20 * math.pow(2, depth) * len(X.columns) * border_count
112
+ histogram_mem_usage_bytes = 24 * math.pow(2, histogram_effective_depth) * len(X.columns) * border_count
106
113
  histogram_mem_usage_bytes *= 1.2 # Add a 20% buffer
107
114
 
108
- approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes
115
+ baseline_memory_bytes = 4e8 # 400 MB baseline memory
116
+
117
+ approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes + baseline_memory_bytes
109
118
  return approx_mem_size_req
110
119
 
111
120
  # TODO: Use Pool in preprocess, optimize bagging to do Pool.split() to avoid re-computing pool for each fold! Requires stateful + y
@@ -3,7 +3,6 @@ from __future__ import annotations
3
3
  import gc
4
4
  import logging
5
5
  import os
6
- import random
7
6
  import re
8
7
  import time
9
8
  import warnings
@@ -99,6 +98,8 @@ class LGBModel(AbstractModel):
99
98
  Scales roughly by 5100*num_features*num_leaves bytes
100
99
  For 10000 features and 128 num_leaves, the histogram would be 6.5 GB.
101
100
  """
101
+ if hyperparameters is None:
102
+ hyperparameters = {}
102
103
  num_classes = num_classes if num_classes else 1 # num_classes could be None after initialization if it's a regression problem
103
104
  data_mem_usage = get_approximate_df_mem_usage(X).sum()
104
105
  data_mem_usage_bytes = data_mem_usage * 5 + data_mem_usage / 4 * num_classes # TODO: Extremely crude approximation, can be vastly improved
@@ -275,8 +276,6 @@ class LGBModel(AbstractModel):
275
276
  train_params["params"]["quantile_levels"] = self.quantile_levels
276
277
  if seed_val is not None:
277
278
  train_params["params"]["seed"] = seed_val
278
- random.seed(seed_val)
279
- np.random.seed(seed_val)
280
279
 
281
280
  # Train LightGBM model:
282
281
  # Note that self.model contains a <class 'lightgbm.basic.Booster'> not a LightBGMClassifier or LightGBMRegressor object
@@ -147,6 +147,8 @@ class RFModel(AbstractModel):
147
147
  num_classes: int = 1,
148
148
  **kwargs,
149
149
  ) -> int:
150
+ if hyperparameters is None:
151
+ hyperparameters = {}
150
152
  n_estimators_final = hyperparameters.get("n_estimators", 300)
151
153
  if isinstance(n_estimators_final, int):
152
154
  n_estimators_minimum = min(40, n_estimators_final)
@@ -147,7 +147,7 @@ class EarlyStoppingCustom(EarlyStopping):
147
147
 
148
148
  model_size_memory_ratio = estimated_model_size_mb / available_mb
149
149
 
150
- if (model_size_memory_ratio > 1.0) or (available_mb < 512):
150
+ if (model_size_memory_ratio > 0.75) or (available_mb < 512):
151
151
  logger.warning("Warning: Large XGB model size may cause OOM error if training continues")
152
152
  logger.warning(f"Available Memory: {available_mb} MB")
153
153
  logger.warning(f"Estimated XGB model size: {estimated_model_size_mb} MB")
@@ -256,12 +256,25 @@ class XGBoostModel(AbstractModel):
256
256
  num_classes: int = 1,
257
257
  **kwargs,
258
258
  ) -> int:
259
+ if hyperparameters is None:
260
+ hyperparameters = {}
259
261
  num_classes = num_classes if num_classes else 1 # self.num_classes could be None after initialization if it's a regression problem
260
262
  data_mem_usage = get_approximate_df_mem_usage(X).sum()
261
263
  data_mem_usage_bytes = data_mem_usage * 7 + data_mem_usage / 4 * num_classes # TODO: Extremely crude approximation, can be vastly improved
262
264
 
263
265
  max_bin = hyperparameters.get("max_bin", 256)
264
266
  max_depth = hyperparameters.get("max_depth", 6)
267
+ max_leaves = hyperparameters.get("max_leaves", 0)
268
+ if max_leaves is None:
269
+ max_leaves = 0
270
+
271
+ if max_depth > 12 or max_depth == 0: # 0 = uncapped
272
+ max_depth = 12 # Try our best if the value is very large, only treat it as 12.
273
+
274
+ if max_leaves != 0: # if capped max_leaves
275
+ # make the effective max_depth for calculations be the lesser of the two constraints
276
+ max_depth = min(max_depth, math.ceil(math.log2(max_leaves)))
277
+
265
278
  # Formula based on manual testing, aligns with LightGBM histogram sizes
266
279
  # This approximation is less accurate than it is for LightGBM and CatBoost.
267
280
  # Note that max_depth didn't appear to reduce memory usage below 6, and it was unclear if it increased memory usage above 6.
@@ -274,7 +287,12 @@ class XGBoostModel(AbstractModel):
274
287
  histogram_mem_usage_bytes = 20 * depth_modifier * len(X.columns) * max_bin
275
288
  histogram_mem_usage_bytes *= 1.2 # Add a 20% buffer
276
289
 
277
- approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes
290
+ mem_size_per_estimator = num_classes * max_depth * 500 # very rough estimate
291
+ n_estimators = hyperparameters.get("n_estimators", 10000)
292
+ n_estimators_min = min(n_estimators, 1000)
293
+ mem_size_estimators = n_estimators_min * mem_size_per_estimator # memory estimate after fitting up to 1000 estimators
294
+
295
+ approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes + mem_size_estimators
278
296
  return approx_mem_size_req
279
297
 
280
298
  def _validate_fit_memory_usage(self, mem_error_threshold: float = 1.0, mem_warning_threshold: float = 0.75, mem_size_threshold: int = 1e9, **kwargs):
@@ -1,4 +1,4 @@
1
1
  """This is the autogluon version file."""
2
2
 
3
- __version__ = "1.2.1b20250425"
3
+ __version__ = "1.2.1b20250426"
4
4
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.tabular
3
- Version: 1.2.1b20250425
3
+ Version: 1.2.1b20250426
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -41,19 +41,19 @@ Requires-Dist: scipy<1.16,>=1.5.4
41
41
  Requires-Dist: pandas<2.3.0,>=2.0.0
42
42
  Requires-Dist: scikit-learn<1.7.0,>=1.4.0
43
43
  Requires-Dist: networkx<4,>=3.0
44
- Requires-Dist: autogluon.core==1.2.1b20250425
45
- Requires-Dist: autogluon.features==1.2.1b20250425
44
+ Requires-Dist: autogluon.core==1.2.1b20250426
45
+ Requires-Dist: autogluon.features==1.2.1b20250426
46
46
  Provides-Extra: all
47
- Requires-Dist: catboost<1.3,>=1.2; extra == "all"
48
- Requires-Dist: autogluon.core[all]==1.2.1b20250425; extra == "all"
49
- Requires-Dist: torch<2.7,>=2.2; extra == "all"
50
- Requires-Dist: xgboost<3.1,>=2.0; extra == "all"
51
47
  Requires-Dist: einops<0.9,>=0.7; extra == "all"
52
- Requires-Dist: huggingface-hub[torch]; extra == "all"
48
+ Requires-Dist: torch<2.7,>=2.2; extra == "all"
53
49
  Requires-Dist: lightgbm<4.7,>=4.0; extra == "all"
50
+ Requires-Dist: huggingface-hub[torch]; extra == "all"
54
51
  Requires-Dist: numpy<2.0.0,>=1.25; extra == "all"
55
52
  Requires-Dist: fastai<2.9,>=2.3.1; extra == "all"
56
53
  Requires-Dist: spacy<3.8; extra == "all"
54
+ Requires-Dist: xgboost<3.1,>=2.0; extra == "all"
55
+ Requires-Dist: autogluon.core[all]==1.2.1b20250426; extra == "all"
56
+ Requires-Dist: catboost<1.3,>=1.2; extra == "all"
57
57
  Provides-Extra: catboost
58
58
  Requires-Dist: numpy<2.0.0,>=1.25; extra == "catboost"
59
59
  Requires-Dist: catboost<1.3,>=1.2; extra == "catboost"
@@ -66,7 +66,7 @@ Requires-Dist: imodels<2.1.0,>=1.3.10; extra == "imodels"
66
66
  Provides-Extra: lightgbm
67
67
  Requires-Dist: lightgbm<4.7,>=4.0; extra == "lightgbm"
68
68
  Provides-Extra: ray
69
- Requires-Dist: autogluon.core[all]==1.2.1b20250425; extra == "ray"
69
+ Requires-Dist: autogluon.core[all]==1.2.1b20250426; extra == "ray"
70
70
  Provides-Extra: skex
71
71
  Requires-Dist: scikit-learn-intelex<2025.5,>=2024.0; extra == "skex"
72
72
  Provides-Extra: skl2onnx
@@ -1,6 +1,6 @@
1
- autogluon.tabular-1.2.1b20250425-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
1
+ autogluon.tabular-1.2.1b20250426-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
2
2
  autogluon/tabular/__init__.py,sha256=2OXpJCvENRHubBTYNIPpHX93WWuFZzsJBtTZbNVHVas,400
3
- autogluon/tabular/version.py,sha256=zTyiK3ClhRyp8BZjIMLB2n9YsA7GaR9qnPDLREdyRQs,91
3
+ autogluon/tabular/version.py,sha256=OsP4ql15uiax-GUPsVJ1sGZMaTOWMBXWWMnLOe5tRrY,91
4
4
  autogluon/tabular/configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  autogluon/tabular/configs/config_helper.py,sha256=wLgu94NjV-l2fwZacpKqjPfvk8E_RwAl_L1hfH5xO8E,21085
6
6
  autogluon/tabular/configs/feature_generator_presets.py,sha256=EV5Ym8VW15q92MwOUpTi7wZFS2QooM51fLg3RdUsn-M,1223
@@ -25,7 +25,7 @@ autogluon/tabular/models/automm/automm_model.py,sha256=GvrMBC8Z-zobalmSzX1iDHTYM
25
25
  autogluon/tabular/models/automm/ft_transformer.py,sha256=yZ9-TTA4GbtutHhz0Djkrl-rIFNxc7A2LBOFOXYOxVY,3886
26
26
  autogluon/tabular/models/catboost/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  autogluon/tabular/models/catboost/callbacks.py,sha256=l8x17n_w7oEFs-iDECSdBKZ89yW5g1z-zvj4XLgQPkw,7098
28
- autogluon/tabular/models/catboost/catboost_model.py,sha256=ApltUyLY0BUuS-Rb8z2_OgkAniXrm3Zlpd9FDZkRMjM,17416
28
+ autogluon/tabular/models/catboost/catboost_model.py,sha256=Dv62XDuJ_sFvU95xeY1gHuV_F2RoYDGaFa6-4XNwnnU,17849
29
29
  autogluon/tabular/models/catboost/catboost_softclass_utils.py,sha256=UiW0SUb3hFueW5qYtQn6Sbk7Wg7BWN4jqKWeFtbMvgU,3919
30
30
  autogluon/tabular/models/catboost/catboost_utils.py,sha256=YSc94V4DjrwbmkeUM8306zV7z21oq-K-qGCOj0UE_wg,3167
31
31
  autogluon/tabular/models/catboost/hyperparameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -55,7 +55,7 @@ autogluon/tabular/models/knn/knn_rapids_model.py,sha256=0FFApNZFH8nyrDqlBSUV7jO-
55
55
  autogluon/tabular/models/knn/knn_utils.py,sha256=XU1cxVXp1BAoQnja2_KmSIn9_q9gZkjAya7-9b0uStk,7455
56
56
  autogluon/tabular/models/lgb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
57
  autogluon/tabular/models/lgb/callbacks.py,sha256=0X42-nAbftKnu_zmFPDf8S3RrUJJjsJ1Qs_TPAJxzjU,11367
58
- autogluon/tabular/models/lgb/lgb_model.py,sha256=3hIk24x3HZErGKW9VfgN7-qu92-fovjATXqhxh81EPo,25053
58
+ autogluon/tabular/models/lgb/lgb_model.py,sha256=jBoku48tvxqWToCc0qUZWRhTWPUHxcDm_8ku435_eSg,25037
59
59
  autogluon/tabular/models/lgb/lgb_utils.py,sha256=jzTDTzP-z7gcBGZyy1_0YkyTOLbU5DLeRqtil4FCZPI,7382
60
60
  autogluon/tabular/models/lgb/hyperparameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  autogluon/tabular/models/lgb/hyperparameters/parameters.py,sha256=LLEQ-Ns3HElWBsFJx3ogRV7L6qw_nXlcl7EyO0C0fVQ,1336
@@ -68,7 +68,7 @@ autogluon/tabular/models/lr/hyperparameters/__init__.py,sha256=47DEQpj8HBSa-_TIm
68
68
  autogluon/tabular/models/lr/hyperparameters/parameters.py,sha256=Hr5YC13zjbt3CfCbzGj8iXUIuDn-Q7FvDT2uSuiSVlM,1414
69
69
  autogluon/tabular/models/lr/hyperparameters/searchspaces.py,sha256=Igywc-B6qJ9EBLdasrDhW-Ot5FGirIzbXLwv5HRe5Xo,276
70
70
  autogluon/tabular/models/rf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
- autogluon/tabular/models/rf/rf_model.py,sha256=MwhTUk9PknLgZWyT5bz_7DtjEW8ADU5wiBvlOjzVLkA,21529
71
+ autogluon/tabular/models/rf/rf_model.py,sha256=2iG45F0k17No7ycrfVMWBfU_WqJwC4MTehaFGsguLzE,21598
72
72
  autogluon/tabular/models/rf/rf_quantile.py,sha256=2S8FE8po9lMnZaeKuVkzOUFOcdil46ZbFqm49OuvNZY,36460
73
73
  autogluon/tabular/models/rf/rf_rapids_model.py,sha256=3s-8M11dzCl_2Lu5iB3H8YjHLgyP_SElrm_4w_HfmqY,2028
74
74
  autogluon/tabular/models/rf/compilers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -120,8 +120,8 @@ autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py,sha256=tttzR5
120
120
  autogluon/tabular/models/text_prediction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
121
121
  autogluon/tabular/models/text_prediction/text_prediction_v1_model.py,sha256=PBN7F98qgEAO6U76rV_hxZfAmKr_XpVKjElOdBvfX8c,1090
122
122
  autogluon/tabular/models/xgboost/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
123
- autogluon/tabular/models/xgboost/callbacks.py,sha256=uynimXya07XQMBkDvec-7mXK6OfMGP6M8MiVYu8OVRI,7008
124
- autogluon/tabular/models/xgboost/xgboost_model.py,sha256=tTSnTzEot2JB0qEvhki4h3RdaLjEpfMs-jWKsxlJWO4,14304
123
+ autogluon/tabular/models/xgboost/callbacks.py,sha256=PuRQUg3AEjgvFa-dpstRFoEVM9jHDe5W4XYSdDPRqoE,7009
124
+ autogluon/tabular/models/xgboost/xgboost_model.py,sha256=_HFwfEbAg0CllEUEk9HVLsXXVBcuC_qsGd7z7arJYPs,15220
125
125
  autogluon/tabular/models/xgboost/xgboost_utils.py,sha256=FVqZ8h4JAe_pifSvNx83cLZHwsuzTXylrrcan07AoNo,5757
126
126
  autogluon/tabular/models/xgboost/hyperparameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
127
127
  autogluon/tabular/models/xgboost/hyperparameters/parameters.py,sha256=ay6bVVpiPzftbtz6TTS76w7j4vjDjzHFpuf2Bjf6Zu4,1673
@@ -146,11 +146,11 @@ autogluon/tabular/trainer/model_presets/presets.py,sha256=IMBRSBc-djd35gkb2rtXrW
146
146
  autogluon/tabular/trainer/model_presets/presets_distill.py,sha256=MnFC2GJc6RmDBNAGbsO2XMfo3PjR8cUrZoilWW8gTYQ,3295
147
147
  autogluon/tabular/tuning/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
148
148
  autogluon/tabular/tuning/feature_pruner.py,sha256=9iNku8gVbYEkjuKlyITPJDicsNkoraaQOlINQq9iZlQ,6877
149
- autogluon.tabular-1.2.1b20250425.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
150
- autogluon.tabular-1.2.1b20250425.dist-info/METADATA,sha256=WXVB7wtSVG30KkMeusKLaBZaU7FWDJrrXbmC2rCH6Xs,14069
151
- autogluon.tabular-1.2.1b20250425.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
152
- autogluon.tabular-1.2.1b20250425.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
153
- autogluon.tabular-1.2.1b20250425.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
154
- autogluon.tabular-1.2.1b20250425.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
155
- autogluon.tabular-1.2.1b20250425.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
156
- autogluon.tabular-1.2.1b20250425.dist-info/RECORD,,
149
+ autogluon.tabular-1.2.1b20250426.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
150
+ autogluon.tabular-1.2.1b20250426.dist-info/METADATA,sha256=g4AxiS8dTRMHEO6FuxALlnv6-njEhYubb6rBmohshOo,14069
151
+ autogluon.tabular-1.2.1b20250426.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
152
+ autogluon.tabular-1.2.1b20250426.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
153
+ autogluon.tabular-1.2.1b20250426.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
154
+ autogluon.tabular-1.2.1b20250426.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
155
+ autogluon.tabular-1.2.1b20250426.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
156
+ autogluon.tabular-1.2.1b20250426.dist-info/RECORD,,