autogluon.timeseries 1.3.2b20250712__py3-none-any.whl → 1.3.2b20250714__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -473,7 +473,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
473
473
 
474
474
 
475
475
  class DirectTabularModel(AbstractMLForecastModel):
476
- """Predict all future time series values simultaneously using tabular regression models.
476
+ """Predict all future time series values simultaneously using a regression model from AutoGluon-Tabular.
477
477
 
478
478
  A single tabular model is used to forecast all future time series values using the following features:
479
479
 
@@ -654,16 +654,16 @@ class DirectTabularModel(AbstractMLForecastModel):
654
654
 
655
655
 
656
656
  class RecursiveTabularModel(AbstractMLForecastModel):
657
- """Predict future time series values one by one using TabularPredictor from AutoGluon-Tabular.
657
+ """Predict future time series values one by one using a regression model from AutoGluon-Tabular.
658
658
 
659
- A single TabularPredictor is used to forecast the future time series values using the following features:
659
+ A single tabular regression model is used to forecast the future time series values using the following features:
660
660
 
661
661
  - lag features (observed time series values) based on ``freq`` of the data
662
662
  - time features (e.g., day of the week) based on the timestamp of the measurement
663
663
  - known covariates (if available)
664
664
  - static features of each item (if available)
665
665
 
666
- TabularPredictor will always be trained with ``"regression"`` problem type, and dummy quantiles will be
666
+ The tabular model will always be trained with ``"regression"`` problem type, and dummy quantiles will be
667
667
  obtained by assuming that the residuals follow zero-mean normal distribution.
668
668
 
669
669
  Based on the `mlforecast <https://github.com/Nixtla/mlforecast>`_ library.
@@ -2,15 +2,18 @@ import logging
2
2
  import math
3
3
  import os
4
4
  import time
5
- from typing import Any, Callable, Dict, List, Optional, Type
5
+ from typing import Any, Callable, Dict, List, Literal, Optional, Type
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
9
+ import scipy.stats
9
10
  from joblib import Parallel, cpu_count, delayed
10
11
 
12
+ from autogluon.common.loaders import load_pkl
13
+ from autogluon.common.savers import save_pkl
11
14
  from autogluon.common.utils.pandas_utils import get_approximate_df_mem_usage
12
15
  from autogluon.common.utils.resource_utils import ResourceManager
13
- from autogluon.core.constants import QUANTILE
16
+ from autogluon.core.constants import QUANTILE, REGRESSION
14
17
  from autogluon.tabular.models import AbstractModel as AbstractTabularModel
15
18
  from autogluon.tabular.registry import ag_model_registry
16
19
  from autogluon.timeseries import TimeSeriesDataFrame
@@ -23,8 +26,6 @@ from .utils import MLF_ITEMID, MLF_TARGET, MLF_TIMESTAMP
23
26
 
24
27
  logger = logging.getLogger(__name__)
25
28
 
26
- DUMMY_FREQ = "D"
27
-
28
29
 
29
30
  class PerStepTabularModel(AbstractTimeSeriesModel):
30
31
  """Fit a separate tabular regression model for each time step in the forecast horizon.
@@ -36,7 +37,11 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
36
37
  - known covariates (if available)
37
38
  - static features of each item (if available)
38
39
 
39
- This model is typically much slower to fit compared to other tabular forecasting models.
40
+ This model is typically slower to fit compared to other tabular forecasting models.
41
+
42
+ If ``eval_metric.needs_quantile``, the tabular regression models will be trained with ``"quantile"`` problem type.
43
+ Otherwise, the models will be trained with ``"regression"`` problem type, and dummy quantiles will be
44
+ obtained by assuming that the residuals follow zero-mean normal distribution.
40
45
 
41
46
  This model uses `mlforecast <https://github.com/Nixtla/mlforecast>`_ under the hood for efficient preprocessing,
42
47
  but the implementation of the per-step forecasting strategy is different from the `max_horizon` in `mlforecast`.
@@ -73,6 +78,8 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
73
78
  If None, automatically determined based on available memory to prevent OOM errors.
74
79
  """
75
80
 
81
+ _dummy_freq = "D"
82
+
76
83
  def __init__(self, *args, **kwargs):
77
84
  super().__init__(*args, **kwargs)
78
85
  # We save the relative paths to per-step models. Each worker process independently saves/loads the model.
@@ -116,13 +123,16 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
116
123
  "max_num_items": 20_000,
117
124
  }
118
125
 
119
- @staticmethod
126
+ @classmethod
120
127
  def _fit_single_model(
128
+ cls,
121
129
  train_df: pd.DataFrame,
122
130
  path_root: str,
123
131
  step: int,
124
132
  model_cls: Type[AbstractTabularModel],
125
133
  model_hyperparameters: dict,
134
+ problem_type: Literal["quantile", "regression"],
135
+ eval_metric: str,
126
136
  validation_fraction: Optional[float],
127
137
  quantile_levels: list[float],
128
138
  lags: list[int],
@@ -135,13 +145,14 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
135
145
 
136
146
  start_time = time.monotonic()
137
147
 
138
- mlf = MLForecast(models=[], freq=DUMMY_FREQ, lags=lags, date_features=date_features)
148
+ mlf = MLForecast(models=[], freq=cls._dummy_freq, lags=lags, date_features=date_features)
139
149
 
140
150
  features_df = mlf.preprocess(train_df, static_features=[], dropna=False)
141
151
  del train_df
142
152
  del mlf
143
153
  # Sort chronologically for efficient train/test split
144
154
  features_df = features_df.sort_values(by=MLF_TIMESTAMP)
155
+ item_ids = features_df[MLF_ITEMID]
145
156
  X = features_df.drop(columns=[MLF_ITEMID, MLF_TIMESTAMP, MLF_TARGET])
146
157
  y = features_df[MLF_TARGET]
147
158
  del features_df
@@ -162,14 +173,16 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
162
173
 
163
174
  elapsed = time.monotonic() - start_time
164
175
  time_left = time_limit - elapsed if time_limit is not None else None
176
+ if problem_type == QUANTILE:
177
+ model_hyperparameters = model_hyperparameters | {"ag.quantile_levels": quantile_levels}
165
178
  try:
166
179
  with set_loggers_level(regex=r"^autogluon.tabular.*", level=logging.ERROR):
167
180
  model = model_cls(
168
181
  path=os.path.join(path_root, f"step_{step}"),
169
182
  name=model_cls.__name__, # explicitly provide name to avoid warnings
170
- problem_type=QUANTILE,
171
- eval_metric="pinball_loss",
172
- hyperparameters={**model_hyperparameters, "ag.quantile_levels": quantile_levels},
183
+ problem_type=problem_type,
184
+ eval_metric=eval_metric,
185
+ hyperparameters=model_hyperparameters,
173
186
  )
174
187
  model.fit(
175
188
  X=X,
@@ -184,6 +197,9 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
184
197
  except Exception as e:
185
198
  raise RuntimeError(f"Failed when fitting model for {step=}") from e
186
199
  model.save()
200
+ if problem_type == REGRESSION:
201
+ residuals_std = pd.Series((model.predict(X) - y) ** 2).groupby(item_ids).mean() ** 0.5
202
+ save_pkl.save(cls._get_residuals_std_path(model.path), residuals_std)
187
203
  relative_path = os.path.relpath(path=model.path, start=path_root)
188
204
  return relative_path
189
205
 
@@ -313,13 +329,8 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
313
329
  date_features = get_time_features_for_frequency(self.freq)
314
330
  self._date_features = date_features
315
331
 
316
- self._model_cls = ag_model_registry.key_to_cls(model_params["model_name"])
317
- supported_problem_types = self._model_cls.supported_problem_types()
318
- if supported_problem_types is not None and QUANTILE not in supported_problem_types:
319
- raise ValueError(
320
- f"Chosen model_name='{model_params['model_name']}' cannot be used by {self.name} because it does not "
321
- f"support problem_type='quantile' ({supported_problem_types=})"
322
- )
332
+ model_name = model_params["model_name"]
333
+ self._model_cls = ag_model_registry.key_to_cls(model_name)
323
334
  model_hyperparameters = model_params["model_hyperparameters"]
324
335
  # User-provided n_jobs takes priority over the automatic estimate
325
336
  if model_params.get("n_jobs") is not None:
@@ -339,18 +350,35 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
339
350
  time_limit_per_model = time_limit / math.ceil(self.prediction_length / n_jobs)
340
351
  else:
341
352
  time_limit_per_model = None
353
+
354
+ if self.eval_metric.needs_quantile:
355
+ problem_type = QUANTILE
356
+ eval_metric = "pinball_loss"
357
+ else:
358
+ problem_type = REGRESSION
359
+ eval_metric = self.eval_metric.equivalent_tabular_regression_metric or "mean_absolute_error"
360
+
361
+ supported_problem_types = self._model_cls.supported_problem_types()
362
+ if supported_problem_types is not None and problem_type not in supported_problem_types:
363
+ raise ValueError(
364
+ f"Chosen model_name='{model_name}' cannot be used by {self.name} with eval_metric={self.eval_metric}"
365
+ f"because {model_name} does not support problem_type={problem_type} ({supported_problem_types=})"
366
+ )
342
367
  model_fit_kwargs = dict(
343
368
  train_df=train_df,
344
369
  path_root=self.path,
345
370
  model_cls=self._model_cls,
346
371
  quantile_levels=self.quantile_levels,
347
372
  validation_fraction=model_params["validation_fraction"],
373
+ problem_type=problem_type,
374
+ eval_metric=eval_metric,
348
375
  date_features=self._date_features,
349
376
  time_limit=time_limit_per_model,
350
377
  num_cpus=num_cpus_per_model,
351
378
  model_hyperparameters=model_hyperparameters.copy(),
352
379
  verbosity=verbosity - 1,
353
380
  )
381
+
354
382
  logger.debug(f"Fitting models in parallel with {n_jobs=}, {num_cpus_per_model=}, {time_limit_per_model=}")
355
383
  self._relative_paths_to_models = Parallel(n_jobs=n_jobs)( # type: ignore
356
384
  delayed(self._fit_single_model)(
@@ -363,12 +391,19 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
363
391
  for step in range(self.prediction_length)
364
392
  )
365
393
 
366
- @staticmethod
394
+ @classmethod
395
+ def _get_residuals_std_path(cls, model_path: str) -> str:
396
+ """Path to the pd.Series storing the standard deviation of residuals for each item_id."""
397
+ return os.path.join(model_path, "residuals_std.pkl")
398
+
399
+ @classmethod
367
400
  def _predict_with_single_model(
401
+ cls,
368
402
  full_df: pd.DataFrame,
369
403
  path_to_model: str,
370
404
  model_cls: Type[AbstractTabularModel],
371
405
  step: int,
406
+ quantile_levels: list[float],
372
407
  prediction_length: int,
373
408
  lags: list[int],
374
409
  date_features: list[Callable],
@@ -382,7 +417,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
382
417
  """
383
418
  from mlforecast import MLForecast
384
419
 
385
- mlf = MLForecast(models=[], freq=DUMMY_FREQ, lags=lags, date_features=date_features)
420
+ mlf = MLForecast(models=[], freq=cls._dummy_freq, lags=lags, date_features=date_features)
386
421
 
387
422
  features_df = mlf.preprocess(full_df, static_features=[], dropna=False)
388
423
  del mlf
@@ -395,6 +430,13 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
395
430
  logger.error(f"Could not load model for {step=} from {path_to_model}")
396
431
  raise
397
432
  predictions = model.predict(features_for_step)
433
+ if model.problem_type == REGRESSION:
434
+ predictions = np.tile(predictions[:, None], (1, len(quantile_levels)))
435
+ residuals_std: pd.Series = load_pkl.load(cls._get_residuals_std_path(model.path))
436
+ item_ids = features_for_step[MLF_ITEMID]
437
+ residuals_repeated = residuals_std.reindex(item_ids).fillna(residuals_std.mean()).to_numpy()
438
+ for i, q in enumerate(quantile_levels):
439
+ predictions[:, i] += scipy.stats.norm.ppf(q) * residuals_repeated
398
440
  return predictions
399
441
 
400
442
  def _predict(
@@ -425,6 +467,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
425
467
 
426
468
  model_predict_kwargs = dict(
427
469
  full_df=full_df,
470
+ quantile_levels=self.quantile_levels,
428
471
  prediction_length=self.prediction_length,
429
472
  model_cls=self._model_cls,
430
473
  date_features=self._date_features,
@@ -1,4 +1,4 @@
1
1
  """This is the autogluon version file."""
2
2
 
3
- __version__ = "1.3.2b20250712"
3
+ __version__ = "1.3.2b20250714"
4
4
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 1.3.2b20250712
3
+ Version: 1.3.2b20250714
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -36,7 +36,7 @@ Requires-Python: >=3.9, <3.13
36
36
  Description-Content-Type: text/markdown
37
37
  License-File: ../LICENSE
38
38
  License-File: ../NOTICE
39
- Requires-Dist: joblib<2,>=1.1
39
+ Requires-Dist: joblib<1.7,>=1.2
40
40
  Requires-Dist: numpy<2.4.0,>=1.25.0
41
41
  Requires-Dist: scipy<1.17,>=1.5.4
42
42
  Requires-Dist: pandas<2.4.0,>=2.0.0
@@ -55,10 +55,10 @@ Requires-Dist: fugue>=0.9.0
55
55
  Requires-Dist: tqdm<5,>=4.38
56
56
  Requires-Dist: orjson~=3.9
57
57
  Requires-Dist: tensorboard<3,>=2.9
58
- Requires-Dist: autogluon.core[raytune]==1.3.2b20250712
59
- Requires-Dist: autogluon.common==1.3.2b20250712
60
- Requires-Dist: autogluon.features==1.3.2b20250712
61
- Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.3.2b20250712
58
+ Requires-Dist: autogluon.core[raytune]==1.3.2b20250714
59
+ Requires-Dist: autogluon.common==1.3.2b20250714
60
+ Requires-Dist: autogluon.features==1.3.2b20250714
61
+ Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.3.2b20250714
62
62
  Provides-Extra: all
63
63
  Provides-Extra: tests
64
64
  Requires-Dist: pytest; extra == "tests"
@@ -1,4 +1,4 @@
1
- autogluon.timeseries-1.3.2b20250712-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
1
+ autogluon.timeseries-1.3.2b20250714-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
2
2
  autogluon/timeseries/__init__.py,sha256=_CrLLc1fkjen7UzWoO0Os8WZoHOgvZbHKy46I8v_4k4,304
3
3
  autogluon/timeseries/evaluator.py,sha256=l642tYfTHsl8WVIq_vV6qhgAFVFr9UuZD7gLra3A_Kc,250
4
4
  autogluon/timeseries/learner.py,sha256=pIn4YSOk0aqCWyBpIlwnAsFnG4h7PLXk8guFH3wFS-w,13923
@@ -6,7 +6,7 @@ autogluon/timeseries/predictor.py,sha256=u4d7-xMs669g5xxqIYuvEyGQ0P6Y8IoToiyg9zU
6
6
  autogluon/timeseries/regressor.py,sha256=G0zecniv85wr8EXlXsbiqpKYHE5KeNALHRzPp_hO5qs,12001
7
7
  autogluon/timeseries/splitter.py,sha256=yzPca9p2bWV-_VJAptUyyzQsxu-uixAdpMoGQtDzMD4,3205
8
8
  autogluon/timeseries/trainer.py,sha256=-xdGZ4v8OTA3AzMjBJ4CwGYhmKBRsY0Q-dm6YioFOmc,57977
9
- autogluon/timeseries/version.py,sha256=C6OW_vajErF7r9El7B0X_XkhCzzEn70hhuGbhroLKSU,91
9
+ autogluon/timeseries/version.py,sha256=QyavBAXV1tyROK-lM699M991S0GUCC93ub_uqpbzb74,91
10
10
  autogluon/timeseries/configs/__init__.py,sha256=BTtHIPCYeGjqgOcvqb8qPD4VNX-ICKOg6wnkew1cPOE,98
11
11
  autogluon/timeseries/configs/presets_configs.py,sha256=cLat8ecLlWrI-SC5KLBDCX2SbVXaucemy2pjxJAtSY0,2543
12
12
  autogluon/timeseries/dataset/__init__.py,sha256=UvnhAN5tjgxXTHoZMQDy64YMDj4Xxa68yY7NP4vAw0o,81
@@ -23,8 +23,8 @@ autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=cxAZoYe
23
23
  autogluon/timeseries/models/abstract/model_trial.py,sha256=ENPg_7nsdxIvaNM0o0UShZ3x8jFlRmwRc5m0fGPC0TM,3720
24
24
  autogluon/timeseries/models/abstract/tunable.py,sha256=SFl4vjkb6BfFFaRPVdftnnLYlIyCThutLHxiiAlV6tY,7168
25
25
  autogluon/timeseries/models/autogluon_tabular/__init__.py,sha256=E5fZsdFPgVdyCVyj5bGmn_lQFlCMn2NvuRLBMcCFvhM,205
26
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py,sha256=CBQh23Li__Gmpsv1e5ucMjeBtLFcm2CJbpgqXVNOTNY,37614
27
- autogluon/timeseries/models/autogluon_tabular/per_step.py,sha256=qCC8ed4pqm6yoW743WJ2z1Nh6WV8-Z8EVqRwX9Lz6eE,20580
26
+ autogluon/timeseries/models/autogluon_tabular/mlforecast.py,sha256=9J95mXR2V_DLXHz0p-VaHA2wi3mF33KggEKMI3YViHM,37641
27
+ autogluon/timeseries/models/autogluon_tabular/per_step.py,sha256=CVdtmVRGn4DYb8AVgqaiycnKrR_KrCkNtwZ8jW-UAGw,22761
28
28
  autogluon/timeseries/models/autogluon_tabular/transforms.py,sha256=aI1QJLJaOB5Xy2WA0jo6Jh25MRVyyZ8ONrqlV96kpw0,2735
29
29
  autogluon/timeseries/models/autogluon_tabular/utils.py,sha256=Fn3Vu_Q0PCtEUbtNgLp1xIblg7dOdpFlF3W5kLHgruI,63
30
30
  autogluon/timeseries/models/chronos/__init__.py,sha256=wT77HzTtmQxW3sw2k0mA5Ot6PSHivX-Uvn5fjM05EU4,60
@@ -61,11 +61,11 @@ autogluon/timeseries/utils/datetime/base.py,sha256=3NdsH3NDq4cVAOSoy3XpaNixyNlbj
61
61
  autogluon/timeseries/utils/datetime/lags.py,sha256=dpndFOV-d-AqCTwKeQ5Dz-AfCJTeI27bxDC13QzY4y8,6003
62
62
  autogluon/timeseries/utils/datetime/seasonality.py,sha256=YK_2k8hvYIMW-sJPnjGWRtCnvIOthwA2hATB3nwVoD4,834
63
63
  autogluon/timeseries/utils/datetime/time_features.py,sha256=MjLi3zQ00uWWJtXH9oGX2GJkTbvjdSiuabSa4kcVuxE,2672
64
- autogluon.timeseries-1.3.2b20250712.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
65
- autogluon.timeseries-1.3.2b20250712.dist-info/METADATA,sha256=IgR6RZQbUF8j9rMAow5LpeHS1R0EdXQm62mEf1SXkV8,12443
66
- autogluon.timeseries-1.3.2b20250712.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
67
- autogluon.timeseries-1.3.2b20250712.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
68
- autogluon.timeseries-1.3.2b20250712.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
69
- autogluon.timeseries-1.3.2b20250712.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
70
- autogluon.timeseries-1.3.2b20250712.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
71
- autogluon.timeseries-1.3.2b20250712.dist-info/RECORD,,
64
+ autogluon.timeseries-1.3.2b20250714.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
65
+ autogluon.timeseries-1.3.2b20250714.dist-info/METADATA,sha256=GTmk62hA3DVv50k3uxC7vswJlg36ooe0Ikt32dA9z1c,12445
66
+ autogluon.timeseries-1.3.2b20250714.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
67
+ autogluon.timeseries-1.3.2b20250714.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
68
+ autogluon.timeseries-1.3.2b20250714.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
69
+ autogluon.timeseries-1.3.2b20250714.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
70
+ autogluon.timeseries-1.3.2b20250714.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
71
+ autogluon.timeseries-1.3.2b20250714.dist-info/RECORD,,