PyPI - autogluon.tabular - Versions diffs - 1.2.1b20250220__py3-none-any.whl → 1.2.1b20250222__py3-none-any.whl - Mend

autogluon.tabular 1.2.1b20250220py3-none-any.whl → 1.2.1b20250222py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

autogluon/tabular/experimental/plot_leaderboard.py ADDED Viewed

@@ -0,0 +1,234 @@
+from __future__ import annotations
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.figure import Figure
+from autogluon.tabular import TabularPredictor
+def _cumulative_min_idx(series: pd.Series) -> pd.Series:
+    """
+    Parameters
+    ----------
+    series: pd.Series
+    Returns
+    -------
+    pd.Series
+        The index of the cumulative min of the series values.
+    """
+    min_val = float('inf')
+    min_index = -1
+    result = []
+    for i, val in enumerate(series):
+        if pd.isna(val):
+            result.append(min_index)
+        elif val < min_val:
+            min_val = val
+            min_index = i
+            result.append(min_index)
+        else:
+            result.append(min_index)
+    return pd.Series(series.index[result], index=series.index)
+def compute_cumulative_leaderboard_stats(leaderboard: pd.DataFrame) -> pd.DataFrame:
+    """
+    Parameters
+    ----------
+    leaderboard: pd.DataFrame
+    Returns
+    -------
+    leaderboard_stats: pd.DataFrame
+    """
+    leaderboard = leaderboard.copy(deep=True)
+    leaderboard = leaderboard.sort_values(by=["fit_order"]).set_index("model")
+    leaderboard["best_model_so_far"] = _cumulative_min_idx(leaderboard["metric_error_val"])
+    leaderboard["best_idx_so_far"] = leaderboard["best_model_so_far"].map(leaderboard["fit_order"])
+    leaderboard["time_so_far"] = leaderboard["fit_time_marginal"].cumsum()
+    leaderboard["metric_error_val_so_far"] = leaderboard["best_model_so_far"].map(leaderboard["metric_error_val"])
+    if "metric_error_test" in leaderboard:
+        leaderboard["metric_error_test_so_far"] = leaderboard["best_model_so_far"].map(leaderboard["metric_error_test"])
+    leaderboard = leaderboard.reset_index(drop=False).set_index("fit_order")
+    return leaderboard
+# TODO: Include constraints as options:
+#  infer_limit
+#  disk_usage
+# TODO: Avoid calling leaderboard on the original models again
+# TODO: Calibration?
+def compute_cumulative_leaderboard_stats_ensemble(
+    leaderboard: pd.DataFrame,
+    predictor: TabularPredictor,
+    test_data: pd.DataFrame | None = None,
+    cleanup_ensembles: bool = True,
+) -> pd.DataFrame:
+    """
+    Parameters
+    ----------
+    leaderboard: pd.DataFrame
+    predictor: TabularPredictor
+    test_data: pd.DataFrame | None, default None
+    cleanup_ensembles: bool, default True
+    Returns
+    -------
+    leaderboard_stats: pd.DataFrame
+    """
+    leaderboard_stats = compute_cumulative_leaderboard_stats(leaderboard)
+    model_fit_order = list(leaderboard_stats["model"])
+    ens_names = []
+    for i in range(len(model_fit_order)):
+        models_to_ens = model_fit_order[:i + 1]
+        ens_name = predictor.fit_weighted_ensemble(base_models=models_to_ens, name_suffix=f"_fit_{i + 1}")[0]
+        ens_names.append(ens_name)
+    leaderboard_stats_ens = predictor.leaderboard(test_data, score_format="error", display=False)
+    leaderboard_stats_ens = leaderboard_stats_ens[leaderboard_stats_ens["model"].isin(ens_names)]
+    leaderboard_stats_ens = leaderboard_stats_ens.set_index("model").reindex(ens_names).reset_index()
+    leaderboard_stats_ens["fit_order"] = leaderboard_stats.index
+    leaderboard_stats_ens["model"] = leaderboard_stats["model"].values
+    leaderboard_stats_ens = compute_cumulative_leaderboard_stats(leaderboard_stats_ens)
+    leaderboard_stats["metric_error_val_so_far_ens"] = leaderboard_stats_ens["metric_error_val_so_far"]
+    if test_data is not None:
+        leaderboard_stats["metric_error_test_so_far_ens"] = leaderboard_stats_ens["metric_error_test_so_far"]
+    leaderboard_stats["best_idx_so_far_ens"] = leaderboard_stats_ens["best_idx_so_far"]
+    leaderboard_stats["best_model_so_far_ens"] = leaderboard_stats_ens["best_model_so_far"]
+    if cleanup_ensembles:
+        predictor.delete_models(models_to_delete=ens_names, dry_run=False)
+    return leaderboard_stats
+def plot_leaderboard_from_predictor(
+    predictor: TabularPredictor,
+    test_data: pd.DataFrame | None = None,
+    ensemble: bool = False,
+    include_val: bool = True,
+) -> tuple[Figure, pd.DataFrame]:
+    """
+    Parameters
+    ----------
+    predictor: TabularPredictor
+    test_data: pd.DataFrame | None, default None
+        If specified, plots the test error.
+    ensemble: bool, default False
+        If True, additionally plots the results of cumulatively ensembling models at each step.
+    include_val: bool, default True
+        If True, plots the validation error.
+    Returns
+    -------
+    fig: Figure
+    leaderboard_stats: pd.DataFrame
+    Examples
+    --------
+    >>> data_root = 'https://autogluon.s3.amazonaws.com/datasets/Inc/'
+    >>> predictor_example = TabularPredictor(label="class").fit(train_data=data_root + "train.csv", time_limit=60)
+    >>> figure, lb = plot_leaderboard_from_predictor(predictor=predictor_example, test_data=data_root + "test.csv", ensemble=True)
+    >>> with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.width", 1000):
+    >>>     print(lb)
+    >>> figure.savefig("example_leaderboard_plot.png")
+    """
+    leaderboard = predictor.leaderboard(test_data, score_format="error", display=False)
+    if ensemble:
+        leaderboard_order_sorted = compute_cumulative_leaderboard_stats_ensemble(leaderboard=leaderboard, test_data=test_data, predictor=predictor)
+    else:
+        leaderboard_order_sorted = compute_cumulative_leaderboard_stats(leaderboard=leaderboard)
+    return plot_leaderboard(leaderboard=leaderboard_order_sorted, preprocess=False, ensemble=ensemble, include_val=include_val)
+def plot_leaderboard(
+    leaderboard: pd.DataFrame,
+    preprocess: bool = True,
+    ensemble: bool = False,
+    include_val: bool = True,
+    include_test: bool | None = None,
+) -> tuple[Figure, pd.DataFrame]:
+    """
+    Parameters
+    ----------
+    leaderboard: pd.DataFrame
+        Either the raw leaderboard output of `predictor.leaderboard(..., score_format="error")` or the output of `compute_cumulative_leaderboard_stats`.
+    preprocess: bool, default True
+        Whether to preprocess the leaderboard to obtain leaderboard_stats.
+        Set to False if `leaderboard` has already been transformed
+        via `compute_cumulative_leaderboard_stats` or `compute_cumulative_leaderboard_stats_ensemble`.
+    ensemble: bool, default False
+        If True, additionally plots the results of cumulatively ensembling models at each step.
+        Can only be set to True if ensemble columns are present in the leaderboard,
+        which are generated by first calling `compute_cumulative_leaderboard_stats_ensemble`.
+    include_val: bool, default True
+        If True, plots the validation error.
+    include_test: bool | None, default None
+        If True, plots the test error.
+        If None, infers based on the existence of the test error column in `leaderboard`.
+    Returns
+    -------
+    fig: Figure
+    leaderboard_stats: pd.DataFrame
+    """
+    leaderboard_order_sorted = leaderboard
+    if preprocess:
+        if ensemble:
+            raise AssertionError(
+                f"Cannot have both `preprocess=True` and `ensemble=True`."
+                f"Instead call `plot_leaderboard_from_predictor(..., ensemble=True)`"
+            )
+        leaderboard_order_sorted = compute_cumulative_leaderboard_stats(leaderboard=leaderboard_order_sorted)
+    eval_metric = leaderboard_order_sorted["eval_metric"].iloc[0]
+    if include_test is None:
+        include_test = "metric_error_test_so_far" in leaderboard_order_sorted
+    # TODO: View on inference time, can take from ensemble model, 3rd dimension, color?
+    fig, axes = plt.subplots(1, 2, sharey=True)
+    fig.suptitle('AutoGluon Metric Error Over Time')
+    ax = axes[0]
+    if include_test:
+        ax.plot(leaderboard_order_sorted.index, leaderboard_order_sorted["metric_error_test_so_far"].values, '-', color="b", label="test")
+    if include_val:
+        ax.plot(leaderboard_order_sorted.index, leaderboard_order_sorted["metric_error_val_so_far"].values, '-', color="orange", label="val")
+    if ensemble:
+        if include_test:
+            ax.plot(leaderboard_order_sorted.index, leaderboard_order_sorted["metric_error_test_so_far_ens"].values, '--', color="b", label="test (ens)")
+        if include_val:
+            ax.plot(leaderboard_order_sorted.index, leaderboard_order_sorted["metric_error_val_so_far_ens"].values, '--', color="orange", label="val (ens)")
+    ax.set_xlim(left=1, right=leaderboard_order_sorted.index.max())
+    ax.set_xlabel('# Models Fit')
+    ax.set_ylabel(f'Metric Error ({eval_metric})')
+    ax.grid()
+    ax = axes[1]
+    if include_test:
+        ax.plot(leaderboard_order_sorted["time_so_far"].values, leaderboard_order_sorted["metric_error_test_so_far"].values, '-', color="b", label="test")
+    if include_val:
+        ax.plot(leaderboard_order_sorted["time_so_far"].values, leaderboard_order_sorted["metric_error_val_so_far"].values, '-', color="orange", label="val")
+    if ensemble:
+        if include_test:
+            ax.plot(leaderboard_order_sorted["time_so_far"].values, leaderboard_order_sorted["metric_error_test_so_far_ens"].values, '--', color="b", label="test (ens)")
+        if include_val:
+            ax.plot(leaderboard_order_sorted["time_so_far"].values, leaderboard_order_sorted["metric_error_val_so_far_ens"].values, '--', color="orange", label="val (ens)")
+    ax.set_xlabel('Time Elapsed (s)')
+    ax.grid()
+    ax.legend()
+    return fig, leaderboard_order_sorted

autogluon/tabular/version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 """This is the autogluon version file."""
-__version__ = "1.2.1b20250220"
+__version__ = "1.2.1b20250222"
 __lite__ = False

{autogluon.tabular-1.2.1b20250220.dist-info → autogluon.tabular-1.2.1b20250222.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: autogluon.tabular
-Version: 1.2.1b20250220
+Version: 1.2.1b20250222
 Summary: Fast and Accurate ML in 3 Lines of Code
 Home-page: https://github.com/autogluon/autogluon
 Author: AutoGluon Community
@@ -41,19 +41,19 @@ Requires-Dist: scipy<1.16,>=1.5.4
 Requires-Dist: pandas<2.3.0,>=2.0.0
 Requires-Dist: scikit-learn<1.5.3,>=1.4.0
 Requires-Dist: networkx<4,>=3.0
-Requires-Dist: autogluon.core==1.2.1b20250220
-Requires-Dist: autogluon.features==1.2.1b20250220
+Requires-Dist: autogluon.core==1.2.1b20250222
+Requires-Dist: autogluon.features==1.2.1b20250222
 Provides-Extra: all
-Requires-Dist: catboost<1.3,>=1.2; extra == "all"
-Requires-Dist: fastai<2.8,>=2.3.1; extra == "all"
-Requires-Dist: autogluon.core[all]==1.2.1b20250220; extra == "all"
-Requires-Dist: lightgbm<4.6,>=4.0; extra == "all"
-Requires-Dist: huggingface-hub[torch]; extra == "all"
-Requires-Dist: numpy<2.0.0,>=1.25; extra == "all"
 Requires-Dist: xgboost<2.2,>=1.6; extra == "all"
-Requires-Dist: torch<2.6,>=2.2; extra == "all"
+Requires-Dist: numpy<2.0.0,>=1.25; extra == "all"
+Requires-Dist: catboost<1.3,>=1.2; extra == "all"
 Requires-Dist: einops<0.9,>=0.7; extra == "all"
+Requires-Dist: torch<2.6,>=2.2; extra == "all"
+Requires-Dist: autogluon.core[all]==1.2.1b20250222; extra == "all"
+Requires-Dist: lightgbm<4.6,>=4.0; extra == "all"
+Requires-Dist: fastai<2.8,>=2.3.1; extra == "all"
 Requires-Dist: spacy<3.8; extra == "all"
+Requires-Dist: huggingface-hub[torch]; extra == "all"
 Provides-Extra: catboost
 Requires-Dist: numpy<2.0.0,>=1.25; extra == "catboost"
 Requires-Dist: catboost<1.3,>=1.2; extra == "catboost"
@@ -66,7 +66,7 @@ Requires-Dist: imodels<1.4.0,>=1.3.10; extra == "imodels"
 Provides-Extra: lightgbm
 Requires-Dist: lightgbm<4.6,>=4.0; extra == "lightgbm"
 Provides-Extra: ray
-Requires-Dist: autogluon.core[all]==1.2.1b20250220; extra == "ray"
+Requires-Dist: autogluon.core[all]==1.2.1b20250222; extra == "ray"
 Provides-Extra: skex
 Requires-Dist: scikit-learn-intelex<2025.1,>=2024.0; extra == "skex"
 Provides-Extra: skl2onnx

{autogluon.tabular-1.2.1b20250220.dist-info → autogluon.tabular-1.2.1b20250222.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-autogluon.tabular-1.2.1b20250220-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
+autogluon.tabular-1.2.1b20250222-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
 autogluon/tabular/__init__.py,sha256=2OXpJCvENRHubBTYNIPpHX93WWuFZzsJBtTZbNVHVas,400
-autogluon/tabular/version.py,sha256=5_5isKRJ9Kd7emAdEPGzl72mtp4GDkqLCtc4C6IuV6k,91
+autogluon/tabular/version.py,sha256=vKc61XBek9r0HPWh8L3iqbacLj4oUs5Ot7U5U8voG_0,91
 autogluon/tabular/configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autogluon/tabular/configs/config_helper.py,sha256=Pb2aW9Z9w77pYKPRVZ3nBzHY3KJaiEJSJ747zZcJIVk,21132
 autogluon/tabular/configs/feature_generator_presets.py,sha256=EV5Ym8VW15q92MwOUpTi7wZFS2QooM51fLg3RdUsn-M,1223
@@ -12,6 +12,7 @@ autogluon/tabular/experimental/__init__.py,sha256=PpkdMSv_pPZted1XRIuzcFWKjM-66V
 autogluon/tabular/experimental/_scikit_mixin.py,sha256=cKeCmtURAXZnhQGrkCBw5rmACCQF7biAWTT3qX8bM2Q,2281
 autogluon/tabular/experimental/_tabular_classifier.py,sha256=7lGoFdvkHiZS3VpcXo97q4ENV9qyIVDExlWkm0wzL3s,2527
 autogluon/tabular/experimental/_tabular_regressor.py,sha256=EzEDL-19T5QUVNmLkSHNzzGwYrUxyqlNpIDPMgtV6Gg,1932
+autogluon/tabular/experimental/plot_leaderboard.py,sha256=BN_kB-zmOZNUYWyI7z9pF67GCV20zo8yV51HKKj1SCY,9481
 autogluon/tabular/learner/__init__.py,sha256=Hhmk5WpKQHohVmI-veOaKMelKJpIdzeXrmw_DPn3DTU,63
 autogluon/tabular/learner/abstract_learner.py,sha256=3myDh867x-EWTPR-O-iw82WGgd5n1NKWf3kaTcYQeh0,54955
 autogluon/tabular/learner/default_learner.py,sha256=cg3K0oA-4ccXWtmGgy6qUJaLldieFwDvnMP_PyE9gdk,24579
@@ -152,11 +153,11 @@ autogluon/tabular/trainer/model_presets/presets.py,sha256=1E-Z1FxUpyydaoEdxcTCg7
 autogluon/tabular/trainer/model_presets/presets_distill.py,sha256=MnFC2GJc6RmDBNAGbsO2XMfo3PjR8cUrZoilWW8gTYQ,3295
 autogluon/tabular/tuning/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autogluon/tabular/tuning/feature_pruner.py,sha256=9iNku8gVbYEkjuKlyITPJDicsNkoraaQOlINQq9iZlQ,6877
-autogluon.tabular-1.2.1b20250220.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
-autogluon.tabular-1.2.1b20250220.dist-info/METADATA,sha256=GX_UkgUo4nTLg-VL4ztu90fgUD9_kCreN-GF6Fj0nVg,14386
-autogluon.tabular-1.2.1b20250220.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
-autogluon.tabular-1.2.1b20250220.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-autogluon.tabular-1.2.1b20250220.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
-autogluon.tabular-1.2.1b20250220.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
-autogluon.tabular-1.2.1b20250220.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-autogluon.tabular-1.2.1b20250220.dist-info/RECORD,,
+autogluon.tabular-1.2.1b20250222.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
+autogluon.tabular-1.2.1b20250222.dist-info/METADATA,sha256=SJ12AGAUOyGnNA2zJjT0PkZbGk4ebDeSOD_rpAkNaW4,14386
+autogluon.tabular-1.2.1b20250222.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
+autogluon.tabular-1.2.1b20250222.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+autogluon.tabular-1.2.1b20250222.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
+autogluon.tabular-1.2.1b20250222.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
+autogluon.tabular-1.2.1b20250222.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+autogluon.tabular-1.2.1b20250222.dist-info/RECORD,,

/autogluon.tabular-1.2.1b20250220-py3.9-nspkg.pth → /autogluon.tabular-1.2.1b20250222-py3.9-nspkg.pth RENAMED Viewed

File without changes

{autogluon.tabular-1.2.1b20250220.dist-info → autogluon.tabular-1.2.1b20250222.dist-info}/LICENSE RENAMED Viewed

File without changes

{autogluon.tabular-1.2.1b20250220.dist-info → autogluon.tabular-1.2.1b20250222.dist-info}/NOTICE RENAMED Viewed

File without changes

{autogluon.tabular-1.2.1b20250220.dist-info → autogluon.tabular-1.2.1b20250222.dist-info}/WHEEL RENAMED Viewed

File without changes

{autogluon.tabular-1.2.1b20250220.dist-info → autogluon.tabular-1.2.1b20250222.dist-info}/namespace_packages.txt RENAMED Viewed

File without changes

{autogluon.tabular-1.2.1b20250220.dist-info → autogluon.tabular-1.2.1b20250222.dist-info}/top_level.txt RENAMED Viewed

File without changes

{autogluon.tabular-1.2.1b20250220.dist-info → autogluon.tabular-1.2.1b20250222.dist-info}/zip-safe RENAMED Viewed

File without changes

autogluon.tabular 1.2.1b20250220__py3-none-any.whl → 1.2.1b20250222__py3-none-any.whl

autogluon.tabular 1.2.1b20250220py3-none-any.whl → 1.2.1b20250222py3-none-any.whl