PyPI - autogluon.tabular - Versions diffs - 1.2.1b20250425__py3-none-any.whl → 1.2.1b20250426__py3-none-any.whl - Mend

autogluon.tabular 1.2.1b20250425py3-none-any.whl → 1.2.1b20250426py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.tabular might be problematic. Click here for more details.

Files changed (15) hide show

autogluon/tabular/models/catboost/catboost_model.py CHANGED Viewed

@@ -95,17 +95,26 @@ class CatBoostModel(AbstractModel):
             Scales roughly by 5080*num_features*2^depth bytes
             For 10000 features and 6 depth, the histogram would be 3.2 GB.
         """
+        if hyperparameters is None:
+            hyperparameters = {}
         num_classes = num_classes if num_classes else 1  # self.num_classes could be None after initialization if it's a regression problem
         data_mem_usage = get_approximate_df_mem_usage(X).sum()
         data_mem_usage_bytes = data_mem_usage * 5 + data_mem_usage / 4 * num_classes  # TODO: Extremely crude approximation, can be vastly improved
         border_count = hyperparameters.get("border_count", 254)
         depth = hyperparameters.get("depth", 6)
+        # if depth < 7, treat it as 1 step larger for histogram size estimate
+        #  this fixes cases where otherwise histogram size appears to be off by around a factor of 2 for depth=6
+        histogram_effective_depth = max(min(depth+1, 7), depth)
         # Formula based on manual testing, aligns with LightGBM histogram sizes
-        histogram_mem_usage_bytes = 20 * math.pow(2, depth) * len(X.columns) * border_count
+        histogram_mem_usage_bytes = 24 * math.pow(2, histogram_effective_depth) * len(X.columns) * border_count
         histogram_mem_usage_bytes *= 1.2  # Add a 20% buffer
-        approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes
+        baseline_memory_bytes = 4e8  # 400 MB baseline memory
+        approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes + baseline_memory_bytes
         return approx_mem_size_req
     # TODO: Use Pool in preprocess, optimize bagging to do Pool.split() to avoid re-computing pool for each fold! Requires stateful + y

autogluon/tabular/models/lgb/lgb_model.py CHANGED Viewed

@@ -3,7 +3,6 @@ from __future__ import annotations
 import gc
 import logging
 import os
-import random
 import re
 import time
 import warnings
@@ -99,6 +98,8 @@ class LGBModel(AbstractModel):
             Scales roughly by 5100*num_features*num_leaves bytes
             For 10000 features and 128 num_leaves, the histogram would be 6.5 GB.
         """
+        if hyperparameters is None:
+            hyperparameters = {}
         num_classes = num_classes if num_classes else 1  # num_classes could be None after initialization if it's a regression problem
         data_mem_usage = get_approximate_df_mem_usage(X).sum()
         data_mem_usage_bytes = data_mem_usage * 5 + data_mem_usage / 4 * num_classes  # TODO: Extremely crude approximation, can be vastly improved
@@ -275,8 +276,6 @@ class LGBModel(AbstractModel):
             train_params["params"]["quantile_levels"] = self.quantile_levels
         if seed_val is not None:
             train_params["params"]["seed"] = seed_val
-            random.seed(seed_val)
-            np.random.seed(seed_val)
         # Train LightGBM model:
         # Note that self.model contains a <class 'lightgbm.basic.Booster'> not a LightBGMClassifier or LightGBMRegressor object

autogluon/tabular/models/rf/rf_model.py CHANGED Viewed

@@ -147,6 +147,8 @@ class RFModel(AbstractModel):
         num_classes: int = 1,
         **kwargs,
     ) -> int:
+        if hyperparameters is None:
+            hyperparameters = {}
         n_estimators_final = hyperparameters.get("n_estimators", 300)
         if isinstance(n_estimators_final, int):
             n_estimators_minimum = min(40, n_estimators_final)

autogluon/tabular/models/xgboost/callbacks.py CHANGED Viewed

@@ -147,7 +147,7 @@ class EarlyStoppingCustom(EarlyStopping):
         model_size_memory_ratio = estimated_model_size_mb / available_mb
-        if (model_size_memory_ratio > 1.0) or (available_mb < 512):
+        if (model_size_memory_ratio > 0.75) or (available_mb < 512):
             logger.warning("Warning: Large XGB model size may cause OOM error if training continues")
             logger.warning(f"Available Memory: {available_mb} MB")
             logger.warning(f"Estimated XGB model size: {estimated_model_size_mb} MB")

autogluon/tabular/models/xgboost/xgboost_model.py CHANGED Viewed

@@ -256,12 +256,25 @@ class XGBoostModel(AbstractModel):
         num_classes: int = 1,
         **kwargs,
     ) -> int:
+        if hyperparameters is None:
+            hyperparameters = {}
         num_classes = num_classes if num_classes else 1  # self.num_classes could be None after initialization if it's a regression problem
         data_mem_usage = get_approximate_df_mem_usage(X).sum()
         data_mem_usage_bytes = data_mem_usage * 7 + data_mem_usage / 4 * num_classes  # TODO: Extremely crude approximation, can be vastly improved
         max_bin = hyperparameters.get("max_bin", 256)
         max_depth = hyperparameters.get("max_depth", 6)
+        max_leaves = hyperparameters.get("max_leaves", 0)
+        if max_leaves is None:
+            max_leaves = 0
+        if max_depth > 12 or max_depth == 0:  # 0 = uncapped
+            max_depth = 12  # Try our best if the value is very large, only treat it as 12.
+        if max_leaves != 0:  # if capped max_leaves
+            # make the effective max_depth for calculations be the lesser of the two constraints
+            max_depth = min(max_depth, math.ceil(math.log2(max_leaves)))
         # Formula based on manual testing, aligns with LightGBM histogram sizes
         #  This approximation is less accurate than it is for LightGBM and CatBoost.
         #  Note that max_depth didn't appear to reduce memory usage below 6, and it was unclear if it increased memory usage above 6.
@@ -274,7 +287,12 @@ class XGBoostModel(AbstractModel):
         histogram_mem_usage_bytes = 20 * depth_modifier * len(X.columns) * max_bin
         histogram_mem_usage_bytes *= 1.2  # Add a 20% buffer
-        approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes
+        mem_size_per_estimator = num_classes * max_depth * 500  # very rough estimate
+        n_estimators = hyperparameters.get("n_estimators", 10000)
+        n_estimators_min = min(n_estimators, 1000)
+        mem_size_estimators = n_estimators_min * mem_size_per_estimator  # memory estimate after fitting up to 1000 estimators
+        approx_mem_size_req = data_mem_usage_bytes + histogram_mem_usage_bytes + mem_size_estimators
         return approx_mem_size_req
     def _validate_fit_memory_usage(self, mem_error_threshold: float = 1.0, mem_warning_threshold: float = 0.75, mem_size_threshold: int = 1e9, **kwargs):

autogluon/tabular/version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 """This is the autogluon version file."""
-__version__ = "1.2.1b20250425"
+__version__ = "1.2.1b20250426"
 __lite__ = False

{autogluon.tabular-1.2.1b20250425.dist-info → autogluon.tabular-1.2.1b20250426.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: autogluon.tabular
-Version: 1.2.1b20250425
+Version: 1.2.1b20250426
 Summary: Fast and Accurate ML in 3 Lines of Code
 Home-page: https://github.com/autogluon/autogluon
 Author: AutoGluon Community
@@ -41,19 +41,19 @@ Requires-Dist: scipy<1.16,>=1.5.4
 Requires-Dist: pandas<2.3.0,>=2.0.0
 Requires-Dist: scikit-learn<1.7.0,>=1.4.0
 Requires-Dist: networkx<4,>=3.0
-Requires-Dist: autogluon.core==1.2.1b20250425
-Requires-Dist: autogluon.features==1.2.1b20250425
+Requires-Dist: autogluon.core==1.2.1b20250426
+Requires-Dist: autogluon.features==1.2.1b20250426
 Provides-Extra: all
-Requires-Dist: catboost<1.3,>=1.2; extra == "all"
-Requires-Dist: autogluon.core[all]==1.2.1b20250425; extra == "all"
-Requires-Dist: torch<2.7,>=2.2; extra == "all"
-Requires-Dist: xgboost<3.1,>=2.0; extra == "all"
 Requires-Dist: einops<0.9,>=0.7; extra == "all"
-Requires-Dist: huggingface-hub[torch]; extra == "all"
+Requires-Dist: torch<2.7,>=2.2; extra == "all"
 Requires-Dist: lightgbm<4.7,>=4.0; extra == "all"
+Requires-Dist: huggingface-hub[torch]; extra == "all"
 Requires-Dist: numpy<2.0.0,>=1.25; extra == "all"
 Requires-Dist: fastai<2.9,>=2.3.1; extra == "all"
 Requires-Dist: spacy<3.8; extra == "all"
+Requires-Dist: xgboost<3.1,>=2.0; extra == "all"
+Requires-Dist: autogluon.core[all]==1.2.1b20250426; extra == "all"
+Requires-Dist: catboost<1.3,>=1.2; extra == "all"
 Provides-Extra: catboost
 Requires-Dist: numpy<2.0.0,>=1.25; extra == "catboost"
 Requires-Dist: catboost<1.3,>=1.2; extra == "catboost"
@@ -66,7 +66,7 @@ Requires-Dist: imodels<2.1.0,>=1.3.10; extra == "imodels"
 Provides-Extra: lightgbm
 Requires-Dist: lightgbm<4.7,>=4.0; extra == "lightgbm"
 Provides-Extra: ray
-Requires-Dist: autogluon.core[all]==1.2.1b20250425; extra == "ray"
+Requires-Dist: autogluon.core[all]==1.2.1b20250426; extra == "ray"
 Provides-Extra: skex
 Requires-Dist: scikit-learn-intelex<2025.5,>=2024.0; extra == "skex"
 Provides-Extra: skl2onnx

{autogluon.tabular-1.2.1b20250425.dist-info → autogluon.tabular-1.2.1b20250426.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-autogluon.tabular-1.2.1b20250425-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
+autogluon.tabular-1.2.1b20250426-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
 autogluon/tabular/__init__.py,sha256=2OXpJCvENRHubBTYNIPpHX93WWuFZzsJBtTZbNVHVas,400
-autogluon/tabular/version.py,sha256=zTyiK3ClhRyp8BZjIMLB2n9YsA7GaR9qnPDLREdyRQs,91
+autogluon/tabular/version.py,sha256=OsP4ql15uiax-GUPsVJ1sGZMaTOWMBXWWMnLOe5tRrY,91
 autogluon/tabular/configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autogluon/tabular/configs/config_helper.py,sha256=wLgu94NjV-l2fwZacpKqjPfvk8E_RwAl_L1hfH5xO8E,21085
 autogluon/tabular/configs/feature_generator_presets.py,sha256=EV5Ym8VW15q92MwOUpTi7wZFS2QooM51fLg3RdUsn-M,1223
@@ -25,7 +25,7 @@ autogluon/tabular/models/automm/automm_model.py,sha256=GvrMBC8Z-zobalmSzX1iDHTYM
 autogluon/tabular/models/automm/ft_transformer.py,sha256=yZ9-TTA4GbtutHhz0Djkrl-rIFNxc7A2LBOFOXYOxVY,3886
 autogluon/tabular/models/catboost/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autogluon/tabular/models/catboost/callbacks.py,sha256=l8x17n_w7oEFs-iDECSdBKZ89yW5g1z-zvj4XLgQPkw,7098
-autogluon/tabular/models/catboost/catboost_model.py,sha256=ApltUyLY0BUuS-Rb8z2_OgkAniXrm3Zlpd9FDZkRMjM,17416
+autogluon/tabular/models/catboost/catboost_model.py,sha256=Dv62XDuJ_sFvU95xeY1gHuV_F2RoYDGaFa6-4XNwnnU,17849
 autogluon/tabular/models/catboost/catboost_softclass_utils.py,sha256=UiW0SUb3hFueW5qYtQn6Sbk7Wg7BWN4jqKWeFtbMvgU,3919
 autogluon/tabular/models/catboost/catboost_utils.py,sha256=YSc94V4DjrwbmkeUM8306zV7z21oq-K-qGCOj0UE_wg,3167
 autogluon/tabular/models/catboost/hyperparameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -55,7 +55,7 @@ autogluon/tabular/models/knn/knn_rapids_model.py,sha256=0FFApNZFH8nyrDqlBSUV7jO-
 autogluon/tabular/models/knn/knn_utils.py,sha256=XU1cxVXp1BAoQnja2_KmSIn9_q9gZkjAya7-9b0uStk,7455
 autogluon/tabular/models/lgb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autogluon/tabular/models/lgb/callbacks.py,sha256=0X42-nAbftKnu_zmFPDf8S3RrUJJjsJ1Qs_TPAJxzjU,11367
-autogluon/tabular/models/lgb/lgb_model.py,sha256=3hIk24x3HZErGKW9VfgN7-qu92-fovjATXqhxh81EPo,25053
+autogluon/tabular/models/lgb/lgb_model.py,sha256=jBoku48tvxqWToCc0qUZWRhTWPUHxcDm_8ku435_eSg,25037
 autogluon/tabular/models/lgb/lgb_utils.py,sha256=jzTDTzP-z7gcBGZyy1_0YkyTOLbU5DLeRqtil4FCZPI,7382
 autogluon/tabular/models/lgb/hyperparameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autogluon/tabular/models/lgb/hyperparameters/parameters.py,sha256=LLEQ-Ns3HElWBsFJx3ogRV7L6qw_nXlcl7EyO0C0fVQ,1336
@@ -68,7 +68,7 @@ autogluon/tabular/models/lr/hyperparameters/__init__.py,sha256=47DEQpj8HBSa-_TIm
 autogluon/tabular/models/lr/hyperparameters/parameters.py,sha256=Hr5YC13zjbt3CfCbzGj8iXUIuDn-Q7FvDT2uSuiSVlM,1414
 autogluon/tabular/models/lr/hyperparameters/searchspaces.py,sha256=Igywc-B6qJ9EBLdasrDhW-Ot5FGirIzbXLwv5HRe5Xo,276
 autogluon/tabular/models/rf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-autogluon/tabular/models/rf/rf_model.py,sha256=MwhTUk9PknLgZWyT5bz_7DtjEW8ADU5wiBvlOjzVLkA,21529
+autogluon/tabular/models/rf/rf_model.py,sha256=2iG45F0k17No7ycrfVMWBfU_WqJwC4MTehaFGsguLzE,21598
 autogluon/tabular/models/rf/rf_quantile.py,sha256=2S8FE8po9lMnZaeKuVkzOUFOcdil46ZbFqm49OuvNZY,36460
 autogluon/tabular/models/rf/rf_rapids_model.py,sha256=3s-8M11dzCl_2Lu5iB3H8YjHLgyP_SElrm_4w_HfmqY,2028
 autogluon/tabular/models/rf/compilers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -120,8 +120,8 @@ autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py,sha256=tttzR5
 autogluon/tabular/models/text_prediction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autogluon/tabular/models/text_prediction/text_prediction_v1_model.py,sha256=PBN7F98qgEAO6U76rV_hxZfAmKr_XpVKjElOdBvfX8c,1090
 autogluon/tabular/models/xgboost/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-autogluon/tabular/models/xgboost/callbacks.py,sha256=uynimXya07XQMBkDvec-7mXK6OfMGP6M8MiVYu8OVRI,7008
-autogluon/tabular/models/xgboost/xgboost_model.py,sha256=tTSnTzEot2JB0qEvhki4h3RdaLjEpfMs-jWKsxlJWO4,14304
+autogluon/tabular/models/xgboost/callbacks.py,sha256=PuRQUg3AEjgvFa-dpstRFoEVM9jHDe5W4XYSdDPRqoE,7009
+autogluon/tabular/models/xgboost/xgboost_model.py,sha256=_HFwfEbAg0CllEUEk9HVLsXXVBcuC_qsGd7z7arJYPs,15220
 autogluon/tabular/models/xgboost/xgboost_utils.py,sha256=FVqZ8h4JAe_pifSvNx83cLZHwsuzTXylrrcan07AoNo,5757
 autogluon/tabular/models/xgboost/hyperparameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autogluon/tabular/models/xgboost/hyperparameters/parameters.py,sha256=ay6bVVpiPzftbtz6TTS76w7j4vjDjzHFpuf2Bjf6Zu4,1673
@@ -146,11 +146,11 @@ autogluon/tabular/trainer/model_presets/presets.py,sha256=IMBRSBc-djd35gkb2rtXrW
 autogluon/tabular/trainer/model_presets/presets_distill.py,sha256=MnFC2GJc6RmDBNAGbsO2XMfo3PjR8cUrZoilWW8gTYQ,3295
 autogluon/tabular/tuning/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autogluon/tabular/tuning/feature_pruner.py,sha256=9iNku8gVbYEkjuKlyITPJDicsNkoraaQOlINQq9iZlQ,6877
-autogluon.tabular-1.2.1b20250425.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
-autogluon.tabular-1.2.1b20250425.dist-info/METADATA,sha256=WXVB7wtSVG30KkMeusKLaBZaU7FWDJrrXbmC2rCH6Xs,14069
-autogluon.tabular-1.2.1b20250425.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
-autogluon.tabular-1.2.1b20250425.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-autogluon.tabular-1.2.1b20250425.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
-autogluon.tabular-1.2.1b20250425.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
-autogluon.tabular-1.2.1b20250425.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-autogluon.tabular-1.2.1b20250425.dist-info/RECORD,,
+autogluon.tabular-1.2.1b20250426.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
+autogluon.tabular-1.2.1b20250426.dist-info/METADATA,sha256=g4AxiS8dTRMHEO6FuxALlnv6-njEhYubb6rBmohshOo,14069
+autogluon.tabular-1.2.1b20250426.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
+autogluon.tabular-1.2.1b20250426.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+autogluon.tabular-1.2.1b20250426.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
+autogluon.tabular-1.2.1b20250426.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
+autogluon.tabular-1.2.1b20250426.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+autogluon.tabular-1.2.1b20250426.dist-info/RECORD,,

/autogluon.tabular-1.2.1b20250425-py3.9-nspkg.pth → /autogluon.tabular-1.2.1b20250426-py3.9-nspkg.pth RENAMED Viewed

File without changes

{autogluon.tabular-1.2.1b20250425.dist-info → autogluon.tabular-1.2.1b20250426.dist-info}/LICENSE RENAMED Viewed

File without changes

{autogluon.tabular-1.2.1b20250425.dist-info → autogluon.tabular-1.2.1b20250426.dist-info}/NOTICE RENAMED Viewed

File without changes

{autogluon.tabular-1.2.1b20250425.dist-info → autogluon.tabular-1.2.1b20250426.dist-info}/WHEEL RENAMED Viewed

File without changes

{autogluon.tabular-1.2.1b20250425.dist-info → autogluon.tabular-1.2.1b20250426.dist-info}/namespace_packages.txt RENAMED Viewed

File without changes

{autogluon.tabular-1.2.1b20250425.dist-info → autogluon.tabular-1.2.1b20250426.dist-info}/top_level.txt RENAMED Viewed

File without changes

{autogluon.tabular-1.2.1b20250425.dist-info → autogluon.tabular-1.2.1b20250426.dist-info}/zip-safe RENAMED Viewed

File without changes

autogluon.tabular 1.2.1b20250425__py3-none-any.whl → 1.2.1b20250426__py3-none-any.whl

Potentially problematic release.

autogluon.tabular 1.2.1b20250425py3-none-any.whl → 1.2.1b20250426py3-none-any.whl