autogluon.tabular 1.2.1b20250220__py3-none-any.whl → 1.2.1b20250222__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.tabular might be problematic. Click here for more details.

@@ -0,0 +1,234 @@
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ from matplotlib.figure import Figure
6
+
7
+ from autogluon.tabular import TabularPredictor
8
+
9
+
10
+ def _cumulative_min_idx(series: pd.Series) -> pd.Series:
11
+ """
12
+
13
+ Parameters
14
+ ----------
15
+ series: pd.Series
16
+
17
+ Returns
18
+ -------
19
+ pd.Series
20
+ The index of the cumulative min of the series values.
21
+
22
+ """
23
+ min_val = float('inf')
24
+ min_index = -1
25
+ result = []
26
+ for i, val in enumerate(series):
27
+ if pd.isna(val):
28
+ result.append(min_index)
29
+ elif val < min_val:
30
+ min_val = val
31
+ min_index = i
32
+ result.append(min_index)
33
+ else:
34
+ result.append(min_index)
35
+ return pd.Series(series.index[result], index=series.index)
36
+
37
+
38
+ def compute_cumulative_leaderboard_stats(leaderboard: pd.DataFrame) -> pd.DataFrame:
39
+ """
40
+
41
+ Parameters
42
+ ----------
43
+ leaderboard: pd.DataFrame
44
+
45
+ Returns
46
+ -------
47
+ leaderboard_stats: pd.DataFrame
48
+
49
+ """
50
+ leaderboard = leaderboard.copy(deep=True)
51
+ leaderboard = leaderboard.sort_values(by=["fit_order"]).set_index("model")
52
+ leaderboard["best_model_so_far"] = _cumulative_min_idx(leaderboard["metric_error_val"])
53
+ leaderboard["best_idx_so_far"] = leaderboard["best_model_so_far"].map(leaderboard["fit_order"])
54
+ leaderboard["time_so_far"] = leaderboard["fit_time_marginal"].cumsum()
55
+ leaderboard["metric_error_val_so_far"] = leaderboard["best_model_so_far"].map(leaderboard["metric_error_val"])
56
+ if "metric_error_test" in leaderboard:
57
+ leaderboard["metric_error_test_so_far"] = leaderboard["best_model_so_far"].map(leaderboard["metric_error_test"])
58
+ leaderboard = leaderboard.reset_index(drop=False).set_index("fit_order")
59
+ return leaderboard
60
+
61
+
62
+ # TODO: Include constraints as options:
63
+ # infer_limit
64
+ # disk_usage
65
+ # TODO: Avoid calling leaderboard on the original models again
66
+ # TODO: Calibration?
67
+ def compute_cumulative_leaderboard_stats_ensemble(
68
+ leaderboard: pd.DataFrame,
69
+ predictor: TabularPredictor,
70
+ test_data: pd.DataFrame | None = None,
71
+ cleanup_ensembles: bool = True,
72
+ ) -> pd.DataFrame:
73
+ """
74
+
75
+ Parameters
76
+ ----------
77
+ leaderboard: pd.DataFrame
78
+ predictor: TabularPredictor
79
+ test_data: pd.DataFrame | None, default None
80
+ cleanup_ensembles: bool, default True
81
+
82
+ Returns
83
+ -------
84
+ leaderboard_stats: pd.DataFrame
85
+
86
+ """
87
+ leaderboard_stats = compute_cumulative_leaderboard_stats(leaderboard)
88
+ model_fit_order = list(leaderboard_stats["model"])
89
+ ens_names = []
90
+ for i in range(len(model_fit_order)):
91
+ models_to_ens = model_fit_order[:i + 1]
92
+ ens_name = predictor.fit_weighted_ensemble(base_models=models_to_ens, name_suffix=f"_fit_{i + 1}")[0]
93
+ ens_names.append(ens_name)
94
+
95
+ leaderboard_stats_ens = predictor.leaderboard(test_data, score_format="error", display=False)
96
+ leaderboard_stats_ens = leaderboard_stats_ens[leaderboard_stats_ens["model"].isin(ens_names)]
97
+ leaderboard_stats_ens = leaderboard_stats_ens.set_index("model").reindex(ens_names).reset_index()
98
+ leaderboard_stats_ens["fit_order"] = leaderboard_stats.index
99
+ leaderboard_stats_ens["model"] = leaderboard_stats["model"].values
100
+ leaderboard_stats_ens = compute_cumulative_leaderboard_stats(leaderboard_stats_ens)
101
+
102
+ leaderboard_stats["metric_error_val_so_far_ens"] = leaderboard_stats_ens["metric_error_val_so_far"]
103
+ if test_data is not None:
104
+ leaderboard_stats["metric_error_test_so_far_ens"] = leaderboard_stats_ens["metric_error_test_so_far"]
105
+ leaderboard_stats["best_idx_so_far_ens"] = leaderboard_stats_ens["best_idx_so_far"]
106
+ leaderboard_stats["best_model_so_far_ens"] = leaderboard_stats_ens["best_model_so_far"]
107
+ if cleanup_ensembles:
108
+ predictor.delete_models(models_to_delete=ens_names, dry_run=False)
109
+
110
+ return leaderboard_stats
111
+
112
+
113
+ def plot_leaderboard_from_predictor(
114
+ predictor: TabularPredictor,
115
+ test_data: pd.DataFrame | None = None,
116
+ ensemble: bool = False,
117
+ include_val: bool = True,
118
+ ) -> tuple[Figure, pd.DataFrame]:
119
+ """
120
+
121
+ Parameters
122
+ ----------
123
+ predictor: TabularPredictor
124
+ test_data: pd.DataFrame | None, default None
125
+ If specified, plots the test error.
126
+ ensemble: bool, default False
127
+ If True, additionally plots the results of cumulatively ensembling models at each step.
128
+ include_val: bool, default True
129
+ If True, plots the validation error.
130
+
131
+ Returns
132
+ -------
133
+ fig: Figure
134
+ leaderboard_stats: pd.DataFrame
135
+
136
+ Examples
137
+ --------
138
+ >>> data_root = 'https://autogluon.s3.amazonaws.com/datasets/Inc/'
139
+ >>> predictor_example = TabularPredictor(label="class").fit(train_data=data_root + "train.csv", time_limit=60)
140
+ >>> figure, lb = plot_leaderboard_from_predictor(predictor=predictor_example, test_data=data_root + "test.csv", ensemble=True)
141
+ >>> with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.width", 1000):
142
+ >>> print(lb)
143
+ >>> figure.savefig("example_leaderboard_plot.png")
144
+ """
145
+ leaderboard = predictor.leaderboard(test_data, score_format="error", display=False)
146
+ if ensemble:
147
+ leaderboard_order_sorted = compute_cumulative_leaderboard_stats_ensemble(leaderboard=leaderboard, test_data=test_data, predictor=predictor)
148
+ else:
149
+ leaderboard_order_sorted = compute_cumulative_leaderboard_stats(leaderboard=leaderboard)
150
+ return plot_leaderboard(leaderboard=leaderboard_order_sorted, preprocess=False, ensemble=ensemble, include_val=include_val)
151
+
152
+
153
+ def plot_leaderboard(
154
+ leaderboard: pd.DataFrame,
155
+ preprocess: bool = True,
156
+ ensemble: bool = False,
157
+ include_val: bool = True,
158
+ include_test: bool | None = None,
159
+ ) -> tuple[Figure, pd.DataFrame]:
160
+ """
161
+
162
+ Parameters
163
+ ----------
164
+ leaderboard: pd.DataFrame
165
+ Either the raw leaderboard output of `predictor.leaderboard(..., score_format="error")` or the output of `compute_cumulative_leaderboard_stats`.
166
+ preprocess: bool, default True
167
+ Whether to preprocess the leaderboard to obtain leaderboard_stats.
168
+ Set to False if `leaderboard` has already been transformed
169
+ via `compute_cumulative_leaderboard_stats` or `compute_cumulative_leaderboard_stats_ensemble`.
170
+ ensemble: bool, default False
171
+ If True, additionally plots the results of cumulatively ensembling models at each step.
172
+ Can only be set to True if ensemble columns are present in the leaderboard,
173
+ which are generated by first calling `compute_cumulative_leaderboard_stats_ensemble`.
174
+ include_val: bool, default True
175
+ If True, plots the validation error.
176
+ include_test: bool | None, default None
177
+ If True, plots the test error.
178
+ If None, infers based on the existence of the test error column in `leaderboard`.
179
+
180
+ Returns
181
+ -------
182
+ fig: Figure
183
+ leaderboard_stats: pd.DataFrame
184
+
185
+ """
186
+ leaderboard_order_sorted = leaderboard
187
+ if preprocess:
188
+ if ensemble:
189
+ raise AssertionError(
190
+ f"Cannot have both `preprocess=True` and `ensemble=True`."
191
+ f"Instead call `plot_leaderboard_from_predictor(..., ensemble=True)`"
192
+ )
193
+ leaderboard_order_sorted = compute_cumulative_leaderboard_stats(leaderboard=leaderboard_order_sorted)
194
+
195
+ eval_metric = leaderboard_order_sorted["eval_metric"].iloc[0]
196
+ if include_test is None:
197
+ include_test = "metric_error_test_so_far" in leaderboard_order_sorted
198
+
199
+ # TODO: View on inference time, can take from ensemble model, 3rd dimension, color?
200
+ fig, axes = plt.subplots(1, 2, sharey=True)
201
+ fig.suptitle('AutoGluon Metric Error Over Time')
202
+
203
+ ax = axes[0]
204
+
205
+ if include_test:
206
+ ax.plot(leaderboard_order_sorted.index, leaderboard_order_sorted["metric_error_test_so_far"].values, '-', color="b", label="test")
207
+ if include_val:
208
+ ax.plot(leaderboard_order_sorted.index, leaderboard_order_sorted["metric_error_val_so_far"].values, '-', color="orange", label="val")
209
+ if ensemble:
210
+ if include_test:
211
+ ax.plot(leaderboard_order_sorted.index, leaderboard_order_sorted["metric_error_test_so_far_ens"].values, '--', color="b", label="test (ens)")
212
+ if include_val:
213
+ ax.plot(leaderboard_order_sorted.index, leaderboard_order_sorted["metric_error_val_so_far_ens"].values, '--', color="orange", label="val (ens)")
214
+ ax.set_xlim(left=1, right=leaderboard_order_sorted.index.max())
215
+ ax.set_xlabel('# Models Fit')
216
+ ax.set_ylabel(f'Metric Error ({eval_metric})')
217
+ ax.grid()
218
+
219
+ ax = axes[1]
220
+
221
+ if include_test:
222
+ ax.plot(leaderboard_order_sorted["time_so_far"].values, leaderboard_order_sorted["metric_error_test_so_far"].values, '-', color="b", label="test")
223
+ if include_val:
224
+ ax.plot(leaderboard_order_sorted["time_so_far"].values, leaderboard_order_sorted["metric_error_val_so_far"].values, '-', color="orange", label="val")
225
+ if ensemble:
226
+ if include_test:
227
+ ax.plot(leaderboard_order_sorted["time_so_far"].values, leaderboard_order_sorted["metric_error_test_so_far_ens"].values, '--', color="b", label="test (ens)")
228
+ if include_val:
229
+ ax.plot(leaderboard_order_sorted["time_so_far"].values, leaderboard_order_sorted["metric_error_val_so_far_ens"].values, '--', color="orange", label="val (ens)")
230
+ ax.set_xlabel('Time Elapsed (s)')
231
+ ax.grid()
232
+ ax.legend()
233
+
234
+ return fig, leaderboard_order_sorted
@@ -1,4 +1,4 @@
1
1
  """This is the autogluon version file."""
2
2
 
3
- __version__ = "1.2.1b20250220"
3
+ __version__ = "1.2.1b20250222"
4
4
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.tabular
3
- Version: 1.2.1b20250220
3
+ Version: 1.2.1b20250222
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -41,19 +41,19 @@ Requires-Dist: scipy<1.16,>=1.5.4
41
41
  Requires-Dist: pandas<2.3.0,>=2.0.0
42
42
  Requires-Dist: scikit-learn<1.5.3,>=1.4.0
43
43
  Requires-Dist: networkx<4,>=3.0
44
- Requires-Dist: autogluon.core==1.2.1b20250220
45
- Requires-Dist: autogluon.features==1.2.1b20250220
44
+ Requires-Dist: autogluon.core==1.2.1b20250222
45
+ Requires-Dist: autogluon.features==1.2.1b20250222
46
46
  Provides-Extra: all
47
- Requires-Dist: catboost<1.3,>=1.2; extra == "all"
48
- Requires-Dist: fastai<2.8,>=2.3.1; extra == "all"
49
- Requires-Dist: autogluon.core[all]==1.2.1b20250220; extra == "all"
50
- Requires-Dist: lightgbm<4.6,>=4.0; extra == "all"
51
- Requires-Dist: huggingface-hub[torch]; extra == "all"
52
- Requires-Dist: numpy<2.0.0,>=1.25; extra == "all"
53
47
  Requires-Dist: xgboost<2.2,>=1.6; extra == "all"
54
- Requires-Dist: torch<2.6,>=2.2; extra == "all"
48
+ Requires-Dist: numpy<2.0.0,>=1.25; extra == "all"
49
+ Requires-Dist: catboost<1.3,>=1.2; extra == "all"
55
50
  Requires-Dist: einops<0.9,>=0.7; extra == "all"
51
+ Requires-Dist: torch<2.6,>=2.2; extra == "all"
52
+ Requires-Dist: autogluon.core[all]==1.2.1b20250222; extra == "all"
53
+ Requires-Dist: lightgbm<4.6,>=4.0; extra == "all"
54
+ Requires-Dist: fastai<2.8,>=2.3.1; extra == "all"
56
55
  Requires-Dist: spacy<3.8; extra == "all"
56
+ Requires-Dist: huggingface-hub[torch]; extra == "all"
57
57
  Provides-Extra: catboost
58
58
  Requires-Dist: numpy<2.0.0,>=1.25; extra == "catboost"
59
59
  Requires-Dist: catboost<1.3,>=1.2; extra == "catboost"
@@ -66,7 +66,7 @@ Requires-Dist: imodels<1.4.0,>=1.3.10; extra == "imodels"
66
66
  Provides-Extra: lightgbm
67
67
  Requires-Dist: lightgbm<4.6,>=4.0; extra == "lightgbm"
68
68
  Provides-Extra: ray
69
- Requires-Dist: autogluon.core[all]==1.2.1b20250220; extra == "ray"
69
+ Requires-Dist: autogluon.core[all]==1.2.1b20250222; extra == "ray"
70
70
  Provides-Extra: skex
71
71
  Requires-Dist: scikit-learn-intelex<2025.1,>=2024.0; extra == "skex"
72
72
  Provides-Extra: skl2onnx
@@ -1,6 +1,6 @@
1
- autogluon.tabular-1.2.1b20250220-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
1
+ autogluon.tabular-1.2.1b20250222-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
2
2
  autogluon/tabular/__init__.py,sha256=2OXpJCvENRHubBTYNIPpHX93WWuFZzsJBtTZbNVHVas,400
3
- autogluon/tabular/version.py,sha256=5_5isKRJ9Kd7emAdEPGzl72mtp4GDkqLCtc4C6IuV6k,91
3
+ autogluon/tabular/version.py,sha256=vKc61XBek9r0HPWh8L3iqbacLj4oUs5Ot7U5U8voG_0,91
4
4
  autogluon/tabular/configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  autogluon/tabular/configs/config_helper.py,sha256=Pb2aW9Z9w77pYKPRVZ3nBzHY3KJaiEJSJ747zZcJIVk,21132
6
6
  autogluon/tabular/configs/feature_generator_presets.py,sha256=EV5Ym8VW15q92MwOUpTi7wZFS2QooM51fLg3RdUsn-M,1223
@@ -12,6 +12,7 @@ autogluon/tabular/experimental/__init__.py,sha256=PpkdMSv_pPZted1XRIuzcFWKjM-66V
12
12
  autogluon/tabular/experimental/_scikit_mixin.py,sha256=cKeCmtURAXZnhQGrkCBw5rmACCQF7biAWTT3qX8bM2Q,2281
13
13
  autogluon/tabular/experimental/_tabular_classifier.py,sha256=7lGoFdvkHiZS3VpcXo97q4ENV9qyIVDExlWkm0wzL3s,2527
14
14
  autogluon/tabular/experimental/_tabular_regressor.py,sha256=EzEDL-19T5QUVNmLkSHNzzGwYrUxyqlNpIDPMgtV6Gg,1932
15
+ autogluon/tabular/experimental/plot_leaderboard.py,sha256=BN_kB-zmOZNUYWyI7z9pF67GCV20zo8yV51HKKj1SCY,9481
15
16
  autogluon/tabular/learner/__init__.py,sha256=Hhmk5WpKQHohVmI-veOaKMelKJpIdzeXrmw_DPn3DTU,63
16
17
  autogluon/tabular/learner/abstract_learner.py,sha256=3myDh867x-EWTPR-O-iw82WGgd5n1NKWf3kaTcYQeh0,54955
17
18
  autogluon/tabular/learner/default_learner.py,sha256=cg3K0oA-4ccXWtmGgy6qUJaLldieFwDvnMP_PyE9gdk,24579
@@ -152,11 +153,11 @@ autogluon/tabular/trainer/model_presets/presets.py,sha256=1E-Z1FxUpyydaoEdxcTCg7
152
153
  autogluon/tabular/trainer/model_presets/presets_distill.py,sha256=MnFC2GJc6RmDBNAGbsO2XMfo3PjR8cUrZoilWW8gTYQ,3295
153
154
  autogluon/tabular/tuning/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
154
155
  autogluon/tabular/tuning/feature_pruner.py,sha256=9iNku8gVbYEkjuKlyITPJDicsNkoraaQOlINQq9iZlQ,6877
155
- autogluon.tabular-1.2.1b20250220.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
156
- autogluon.tabular-1.2.1b20250220.dist-info/METADATA,sha256=GX_UkgUo4nTLg-VL4ztu90fgUD9_kCreN-GF6Fj0nVg,14386
157
- autogluon.tabular-1.2.1b20250220.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
158
- autogluon.tabular-1.2.1b20250220.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
159
- autogluon.tabular-1.2.1b20250220.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
160
- autogluon.tabular-1.2.1b20250220.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
161
- autogluon.tabular-1.2.1b20250220.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
162
- autogluon.tabular-1.2.1b20250220.dist-info/RECORD,,
156
+ autogluon.tabular-1.2.1b20250222.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
157
+ autogluon.tabular-1.2.1b20250222.dist-info/METADATA,sha256=SJ12AGAUOyGnNA2zJjT0PkZbGk4ebDeSOD_rpAkNaW4,14386
158
+ autogluon.tabular-1.2.1b20250222.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
159
+ autogluon.tabular-1.2.1b20250222.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
160
+ autogluon.tabular-1.2.1b20250222.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
161
+ autogluon.tabular-1.2.1b20250222.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
162
+ autogluon.tabular-1.2.1b20250222.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
163
+ autogluon.tabular-1.2.1b20250222.dist-info/RECORD,,