autogluon.timeseries 1.4.1b20250906__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (93) hide show
  1. autogluon/timeseries/configs/hyperparameter_presets.py +2 -2
  2. autogluon/timeseries/dataset/ts_dataframe.py +97 -86
  3. autogluon/timeseries/learner.py +68 -35
  4. autogluon/timeseries/metrics/__init__.py +4 -4
  5. autogluon/timeseries/metrics/abstract.py +8 -8
  6. autogluon/timeseries/metrics/point.py +9 -9
  7. autogluon/timeseries/metrics/quantile.py +5 -5
  8. autogluon/timeseries/metrics/utils.py +4 -4
  9. autogluon/timeseries/models/__init__.py +4 -1
  10. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -39
  11. autogluon/timeseries/models/abstract/model_trial.py +2 -1
  12. autogluon/timeseries/models/abstract/tunable.py +8 -8
  13. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +58 -62
  14. autogluon/timeseries/models/autogluon_tabular/per_step.py +26 -15
  15. autogluon/timeseries/models/autogluon_tabular/transforms.py +11 -9
  16. autogluon/timeseries/models/chronos/__init__.py +2 -1
  17. autogluon/timeseries/models/chronos/chronos2.py +361 -0
  18. autogluon/timeseries/models/chronos/model.py +125 -87
  19. autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +68 -36
  20. autogluon/timeseries/models/ensemble/__init__.py +34 -2
  21. autogluon/timeseries/models/ensemble/abstract.py +5 -42
  22. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  23. autogluon/timeseries/models/ensemble/array_based/abstract.py +236 -0
  24. autogluon/timeseries/models/ensemble/array_based/models.py +73 -0
  25. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
  26. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
  27. autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +167 -0
  28. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
  29. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
  30. autogluon/timeseries/models/ensemble/{greedy.py → ensemble_selection.py} +41 -61
  31. autogluon/timeseries/models/ensemble/per_item_greedy.py +162 -0
  32. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  33. autogluon/timeseries/models/ensemble/weighted/abstract.py +40 -0
  34. autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +6 -16
  35. autogluon/timeseries/models/ensemble/weighted/greedy.py +57 -0
  36. autogluon/timeseries/models/gluonts/abstract.py +25 -25
  37. autogluon/timeseries/models/gluonts/dataset.py +11 -11
  38. autogluon/timeseries/models/local/__init__.py +0 -7
  39. autogluon/timeseries/models/local/abstract_local_model.py +15 -18
  40. autogluon/timeseries/models/local/naive.py +2 -2
  41. autogluon/timeseries/models/local/npts.py +1 -1
  42. autogluon/timeseries/models/local/statsforecast.py +12 -12
  43. autogluon/timeseries/models/multi_window/multi_window_model.py +39 -24
  44. autogluon/timeseries/models/registry.py +3 -4
  45. autogluon/timeseries/models/toto/__init__.py +3 -0
  46. autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
  47. autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
  48. autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
  49. autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
  50. autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
  51. autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
  52. autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
  53. autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
  54. autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
  55. autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
  56. autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
  57. autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
  58. autogluon/timeseries/models/toto/dataloader.py +108 -0
  59. autogluon/timeseries/models/toto/hf_pretrained_model.py +118 -0
  60. autogluon/timeseries/models/toto/model.py +236 -0
  61. autogluon/timeseries/predictor.py +301 -103
  62. autogluon/timeseries/regressor.py +27 -30
  63. autogluon/timeseries/splitter.py +3 -27
  64. autogluon/timeseries/trainer/ensemble_composer.py +439 -0
  65. autogluon/timeseries/trainer/model_set_builder.py +9 -9
  66. autogluon/timeseries/trainer/prediction_cache.py +16 -16
  67. autogluon/timeseries/trainer/trainer.py +300 -275
  68. autogluon/timeseries/trainer/utils.py +17 -0
  69. autogluon/timeseries/transforms/covariate_scaler.py +8 -8
  70. autogluon/timeseries/transforms/target_scaler.py +15 -15
  71. autogluon/timeseries/utils/constants.py +10 -0
  72. autogluon/timeseries/utils/datetime/lags.py +1 -3
  73. autogluon/timeseries/utils/datetime/seasonality.py +1 -3
  74. autogluon/timeseries/utils/features.py +18 -14
  75. autogluon/timeseries/utils/forecast.py +6 -7
  76. autogluon/timeseries/utils/timer.py +173 -0
  77. autogluon/timeseries/version.py +1 -1
  78. autogluon.timeseries-1.4.1b20251210-py3.11-nspkg.pth +1 -0
  79. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/METADATA +39 -22
  80. autogluon_timeseries-1.4.1b20251210.dist-info/RECORD +103 -0
  81. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/WHEEL +1 -1
  82. autogluon/timeseries/evaluator.py +0 -6
  83. autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
  84. autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
  85. autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
  86. autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
  87. autogluon.timeseries-1.4.1b20250906-py3.9-nspkg.pth +0 -1
  88. autogluon.timeseries-1.4.1b20250906.dist-info/RECORD +0 -75
  89. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/LICENSE +0 -0
  90. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/NOTICE +0 -0
  91. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/namespace_packages.txt +0 -0
  92. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/top_level.txt +0 -0
  93. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/zip-safe +0 -0
@@ -5,7 +5,7 @@ import os
5
5
  import pprint
6
6
  import time
7
7
  from pathlib import Path
8
- from typing import Any, Literal, Optional, Type, Union, cast
8
+ from typing import Any, Literal, Type, cast, overload
9
9
 
10
10
  import numpy as np
11
11
  import pandas as pd
@@ -22,10 +22,9 @@ from autogluon.core.utils.loaders import load_pkl, load_str
22
22
  from autogluon.core.utils.savers import save_pkl, save_str
23
23
  from autogluon.timeseries import __version__ as current_ag_version
24
24
  from autogluon.timeseries.configs import get_predictor_presets
25
- from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFrame
25
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
26
26
  from autogluon.timeseries.learner import TimeSeriesLearner
27
27
  from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
28
- from autogluon.timeseries.splitter import ExpandingWindowSplitter
29
28
  from autogluon.timeseries.trainer import TimeSeriesTrainer
30
29
  from autogluon.timeseries.utils.forecast import make_future_data_frame
31
30
 
@@ -67,7 +66,7 @@ class TimeSeriesPredictor:
67
66
 
68
67
  If ``freq`` is provided when creating the predictor, all data passed to the predictor will be automatically
69
68
  resampled at this frequency.
70
- eval_metric : Union[str, TimeSeriesScorer], default = "WQL"
69
+ eval_metric : str | TimeSeriesScorer, default = "WQL"
71
70
  Metric by which predictions will be ultimately evaluated on future test data. AutoGluon tunes hyperparameters
72
71
  in order to improve this metric on validation data, and ranks models (on validation data) according to this
73
72
  metric.
@@ -125,7 +124,7 @@ class TimeSeriesPredictor:
125
124
  debug messages from AutoGluon and all logging in dependencies (GluonTS, PyTorch Lightning, AutoGluon-Tabular, etc.)
126
125
  log_to_file: bool, default = True
127
126
  Whether to save the logs into a file for later reference
128
- log_file_path: Union[str, Path], default = "auto"
127
+ log_file_path: str | Path, default = "auto"
129
128
  File path to save the logs.
130
129
  If auto, logs will be saved under ``predictor_path/logs/predictor_log.txt``.
131
130
  Will be ignored if ``log_to_file`` is set to False
@@ -146,20 +145,20 @@ class TimeSeriesPredictor:
146
145
 
147
146
  def __init__(
148
147
  self,
149
- target: Optional[str] = None,
150
- known_covariates_names: Optional[list[str]] = None,
148
+ target: str | None = None,
149
+ known_covariates_names: list[str] | None = None,
151
150
  prediction_length: int = 1,
152
- freq: Optional[str] = None,
153
- eval_metric: Union[str, TimeSeriesScorer, None] = None,
154
- eval_metric_seasonal_period: Optional[int] = None,
155
- horizon_weight: Optional[list[float]] = None,
156
- path: Optional[Union[str, Path]] = None,
151
+ freq: str | None = None,
152
+ eval_metric: str | TimeSeriesScorer | None = None,
153
+ eval_metric_seasonal_period: int | None = None,
154
+ horizon_weight: list[float] | None = None,
155
+ path: str | Path | None = None,
157
156
  verbosity: int = 2,
158
157
  log_to_file: bool = True,
159
- log_file_path: Union[str, Path] = "auto",
160
- quantile_levels: Optional[list[float]] = None,
158
+ log_file_path: str | Path = "auto",
159
+ quantile_levels: list[float] | None = None,
161
160
  cache_predictions: bool = True,
162
- label: Optional[str] = None,
161
+ label: str | None = None,
163
162
  **kwargs,
164
163
  ):
165
164
  self.verbosity = verbosity
@@ -221,20 +220,6 @@ class TimeSeriesPredictor:
221
220
  ensemble_model_type=kwargs.pop("ensemble_model_type", None),
222
221
  )
223
222
 
224
- if "ignore_time_index" in kwargs:
225
- raise TypeError(
226
- "`ignore_time_index` argument to TimeSeriesPredictor.__init__() has been deprecated.\n"
227
- "If your data has irregular timestamps, please either 1) specify the desired regular frequency when "
228
- "creating the predictor as `TimeSeriesPredictor(freq=...)` or 2) manually convert timestamps to "
229
- "regular frequency with `data.convert_frequency(freq=...)`."
230
- )
231
- for k in ["learner_type", "learner_kwargs"]:
232
- if k in kwargs:
233
- val = kwargs.pop(k)
234
- logger.warning(
235
- f"Passing `{k}` to TimeSeriesPredictor has been deprecated and will be removed in v1.4. "
236
- f"The provided value {val} will be ignored."
237
- )
238
223
  if len(kwargs) > 0:
239
224
  for key in kwargs:
240
225
  raise TypeError(f"TimeSeriesPredictor.__init__() got an unexpected keyword argument '{key}'")
@@ -243,7 +228,16 @@ class TimeSeriesPredictor:
243
228
  def _trainer(self) -> TimeSeriesTrainer:
244
229
  return self._learner.load_trainer() # noqa
245
230
 
246
- def _setup_log_to_file(self, log_to_file: bool, log_file_path: Union[str, Path]) -> None:
231
+ @property
232
+ def is_fit(self) -> bool:
233
+ return self._learner.is_fit
234
+
235
+ def _assert_is_fit(self, method_name: str) -> None:
236
+ """Check if predictor is fit and raise AssertionError with informative message if not."""
237
+ if not self.is_fit:
238
+ raise AssertionError(f"Predictor is not fit. Call `.fit` before calling `.{method_name}`. ")
239
+
240
+ def _setup_log_to_file(self, log_to_file: bool, log_file_path: str | Path) -> None:
247
241
  if log_to_file:
248
242
  if log_file_path == "auto":
249
243
  log_file_path = os.path.join(self.path, "logs", self._predictor_log_file_name)
@@ -253,7 +247,7 @@ class TimeSeriesPredictor:
253
247
 
254
248
  def _to_data_frame(
255
249
  self,
256
- data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
250
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
257
251
  name: str = "data",
258
252
  ) -> TimeSeriesDataFrame:
259
253
  if isinstance(data, TimeSeriesDataFrame):
@@ -274,7 +268,7 @@ class TimeSeriesPredictor:
274
268
 
275
269
  def _check_and_prepare_data_frame(
276
270
  self,
277
- data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
271
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
278
272
  name: str = "data",
279
273
  ) -> TimeSeriesDataFrame:
280
274
  """Ensure that TimeSeriesDataFrame has a sorted index and a valid frequency.
@@ -283,7 +277,7 @@ class TimeSeriesPredictor:
283
277
 
284
278
  Parameters
285
279
  ----------
286
- data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
280
+ data : TimeSeriesDataFrame | pd.DataFrame | Path | str
287
281
  Data as a dataframe or path to file storing the data.
288
282
  name : str
289
283
  Name of the data that will be used in log messages (e.g., 'train_data', 'tuning_data', or 'data').
@@ -326,7 +320,7 @@ class TimeSeriesPredictor:
326
320
  return df
327
321
 
328
322
  def _check_and_prepare_data_frame_for_evaluation(
329
- self, data: TimeSeriesDataFrame, cutoff: Optional[int] = None, name: str = "data"
323
+ self, data: TimeSeriesDataFrame, cutoff: int | None = None, name: str = "data"
330
324
  ) -> TimeSeriesDataFrame:
331
325
  """
332
326
  Make sure that provided evaluation data includes both historical and future time series values.
@@ -417,7 +411,9 @@ class TimeSeriesPredictor:
417
411
  )
418
412
  train_data = train_data.query("item_id not in @too_short_items")
419
413
 
420
- all_nan_items = train_data.item_ids[train_data[self.target].isna().groupby(ITEMID, sort=False).all()]
414
+ all_nan_items = train_data.item_ids[
415
+ train_data[self.target].isna().groupby(TimeSeriesDataFrame.ITEMID, sort=False).all()
416
+ ]
421
417
  if len(all_nan_items) > 0:
422
418
  logger.info(f"\tRemoving {len(all_nan_items)} time series consisting of only NaN values from train_data.")
423
419
  train_data = train_data.query("item_id not in @all_nan_items")
@@ -435,27 +431,27 @@ class TimeSeriesPredictor:
435
431
  @apply_presets(get_predictor_presets())
436
432
  def fit(
437
433
  self,
438
- train_data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
439
- tuning_data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]] = None,
440
- time_limit: Optional[int] = None,
441
- presets: Optional[str] = None,
442
- hyperparameters: Optional[Union[str, dict[Union[str, Type], Any]]] = None,
443
- hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
444
- excluded_model_types: Optional[list[str]] = None,
434
+ train_data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
435
+ tuning_data: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
436
+ time_limit: int | None = None,
437
+ presets: str | None = None,
438
+ hyperparameters: str | dict[str | Type, Any] | None = None,
439
+ hyperparameter_tune_kwargs: str | dict | None = None,
440
+ excluded_model_types: list[str] | None = None,
445
441
  num_val_windows: int = 1,
446
- val_step_size: Optional[int] = None,
447
- refit_every_n_windows: Optional[int] = 1,
442
+ val_step_size: int | None = None,
443
+ refit_every_n_windows: int | None = 1,
448
444
  refit_full: bool = False,
449
445
  enable_ensemble: bool = True,
450
446
  skip_model_selection: bool = False,
451
- random_seed: Optional[int] = 123,
452
- verbosity: Optional[int] = None,
447
+ random_seed: int | None = 123,
448
+ verbosity: int | None = None,
453
449
  ) -> "TimeSeriesPredictor":
454
450
  """Fit probabilistic forecasting models to the given time series dataset.
455
451
 
456
452
  Parameters
457
453
  ----------
458
- train_data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
454
+ train_data : TimeSeriesDataFrame | pd.DataFrame | Path | str
459
455
  Training data in the :class:`~autogluon.timeseries.TimeSeriesDataFrame` format.
460
456
 
461
457
  Time series with length ``<= (num_val_windows + 1) * prediction_length`` will be ignored during training.
@@ -481,7 +477,7 @@ class TimeSeriesPredictor:
481
477
 
482
478
  If provided data is a ``pandas.DataFrame``, AutoGluon will attempt to convert it to a ``TimeSeriesDataFrame``.
483
479
  If a ``str`` or a ``Path`` is provided, AutoGluon will attempt to load this file.
484
- tuning_data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str], optional
480
+ tuning_data : TimeSeriesDataFrame | pd.DataFrame | Path | str, optional
485
481
  Data reserved for model selection and hyperparameter tuning, rather than training individual models. Also
486
482
  used to compute the validation scores. Note that only the last ``prediction_length`` time steps of each
487
483
  time series are used for computing the validation score.
@@ -673,8 +669,10 @@ class TimeSeriesPredictor:
673
669
 
674
670
  """
675
671
  time_start = time.time()
676
- if self._learner.is_fit:
677
- raise AssertionError("Predictor is already fit! To fit additional models create a new `Predictor`.")
672
+ if self.is_fit:
673
+ raise AssertionError(
674
+ "Predictor is already fit! To fit additional models create a new `TimeSeriesPredictor`."
675
+ )
678
676
 
679
677
  if verbosity is None:
680
678
  verbosity = self.verbosity
@@ -731,11 +729,11 @@ class TimeSeriesPredictor:
731
729
  tuning_data = self._check_and_prepare_data_frame_for_evaluation(tuning_data, name="tuning_data")
732
730
  logger.info(f"Provided tuning_data has {self._get_dataset_stats(tuning_data)}")
733
731
  # TODO: Use num_val_windows to perform multi-window backtests on tuning_data
734
- if num_val_windows > 0:
732
+ if num_val_windows > 1:
735
733
  logger.warning(
736
734
  "\tSetting num_val_windows = 0 (disabling backtesting on train_data) because tuning_data is provided."
737
735
  )
738
- num_val_windows = 0
736
+ num_val_windows = 1
739
737
 
740
738
  if num_val_windows == 0 and tuning_data is None:
741
739
  raise ValueError("Please set num_val_windows >= 1 or provide custom tuning_data")
@@ -748,13 +746,11 @@ class TimeSeriesPredictor:
748
746
 
749
747
  if not skip_model_selection:
750
748
  train_data = self._filter_useless_train_data(
751
- train_data, num_val_windows=num_val_windows, val_step_size=val_step_size
749
+ train_data,
750
+ num_val_windows=0 if tuning_data is not None else num_val_windows,
751
+ val_step_size=val_step_size,
752
752
  )
753
753
 
754
- val_splitter = ExpandingWindowSplitter(
755
- prediction_length=self.prediction_length, num_val_windows=num_val_windows, val_step_size=val_step_size
756
- )
757
-
758
754
  time_left = None if time_limit is None else time_limit - (time.time() - time_start)
759
755
  self._learner.fit(
760
756
  train_data=train_data,
@@ -764,7 +760,8 @@ class TimeSeriesPredictor:
764
760
  excluded_model_types=excluded_model_types,
765
761
  time_limit=time_left,
766
762
  verbosity=verbosity,
767
- val_splitter=val_splitter,
763
+ num_val_windows=(num_val_windows,) if isinstance(num_val_windows, int) else num_val_windows,
764
+ val_step_size=val_step_size,
768
765
  refit_every_n_windows=refit_every_n_windows,
769
766
  skip_model_selection=skip_model_selection,
770
767
  enable_ensemble=enable_ensemble,
@@ -781,21 +778,22 @@ class TimeSeriesPredictor:
781
778
 
782
779
  def model_names(self) -> list[str]:
783
780
  """Returns the list of model names trained by this predictor object."""
781
+ self._assert_is_fit("model_names")
784
782
  return self._trainer.get_model_names()
785
783
 
786
784
  def predict(
787
785
  self,
788
- data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
789
- known_covariates: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]] = None,
790
- model: Optional[str] = None,
786
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
787
+ known_covariates: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
788
+ model: str | None = None,
791
789
  use_cache: bool = True,
792
- random_seed: Optional[int] = 123,
790
+ random_seed: int | None = 123,
793
791
  ) -> TimeSeriesDataFrame:
794
792
  """Return quantile and mean forecasts for the given dataset, starting from the end of each time series.
795
793
 
796
794
  Parameters
797
795
  ----------
798
- data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
796
+ data : TimeSeriesDataFrame | pd.DataFrame | Path | str
799
797
  Historical time series data for which the forecast needs to be made.
800
798
 
801
799
  The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
@@ -803,7 +801,7 @@ class TimeSeriesPredictor:
803
801
 
804
802
  If provided data is a ``pandas.DataFrame``, AutoGluon will attempt to convert it to a ``TimeSeriesDataFrame``.
805
803
  If a ``str`` or a ``Path`` is provided, AutoGluon will attempt to load this file.
806
- known_covariates : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str], optional
804
+ known_covariates : TimeSeriesDataFrame | pd.DataFrame | Path | str, optional
807
805
  If ``known_covariates_names`` were specified when creating the predictor, it is necessary to provide the
808
806
  values of the known covariates for each time series during the forecast horizon. Specifically:
809
807
 
@@ -853,6 +851,7 @@ class TimeSeriesPredictor:
853
851
  B 2020-03-04 17.1
854
852
  2020-03-05 8.3
855
853
  """
854
+ self._assert_is_fit("predict")
856
855
  # Save original item_id order to return predictions in the same order as input data
857
856
  data = self._to_data_frame(data)
858
857
  original_item_id_order = data.item_ids
@@ -866,14 +865,209 @@ class TimeSeriesPredictor:
866
865
  use_cache=use_cache,
867
866
  random_seed=random_seed,
868
867
  )
869
- return cast(TimeSeriesDataFrame, predictions.reindex(original_item_id_order, level=ITEMID))
868
+ return cast(TimeSeriesDataFrame, predictions.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID))
869
+
870
+ @overload
871
+ def backtest_predictions(
872
+ self,
873
+ data: TimeSeriesDataFrame | None = None,
874
+ *,
875
+ model: str | None = None,
876
+ num_val_windows: int | None = None,
877
+ val_step_size: int | None = None,
878
+ use_cache: bool = True,
879
+ ) -> list[TimeSeriesDataFrame]: ...
880
+
881
+ @overload
882
+ def backtest_predictions(
883
+ self,
884
+ data: TimeSeriesDataFrame | None = None,
885
+ *,
886
+ model: list[str],
887
+ num_val_windows: int | None = None,
888
+ val_step_size: int | None = None,
889
+ use_cache: bool = True,
890
+ ) -> dict[str, list[TimeSeriesDataFrame]]: ...
891
+
892
+ def backtest_predictions(
893
+ self,
894
+ data: TimeSeriesDataFrame | None = None,
895
+ *,
896
+ model: str | list[str] | None = None,
897
+ num_val_windows: int | None = None,
898
+ val_step_size: int | None = None,
899
+ use_cache: bool = True,
900
+ ) -> list[TimeSeriesDataFrame] | dict[str, list[TimeSeriesDataFrame]]:
901
+ """Return predictions for multiple validation windows.
902
+
903
+ When ``data=None``, returns the predictions that were saved during training. Otherwise, generates new
904
+ predictions by splitting ``data`` into multiple windows using an expanding window strategy.
905
+
906
+ The corresponding target values for each window can be obtained using
907
+ :meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_targets`.
908
+
909
+ Parameters
910
+ ----------
911
+ data : TimeSeriesDataFrame, optional
912
+ Time series data to generate predictions for. If ``None``, returns the predictions that were saved
913
+ during training on ``train_data``.
914
+
915
+ If provided, all time series in ``data`` must have length at least
916
+ ``prediction_length + (num_val_windows - 1) * val_step_size + 1``.
917
+
918
+ The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
919
+ the predictor.
920
+ model : str, list[str], or None, default = None
921
+ Name of the model(s) to generate predictions with. By default, the best model during training
922
+ (with highest validation score) will be used.
923
+
924
+ - If ``str``: Returns predictions for a single model as a list.
925
+ - If ``list[str]``: Returns predictions for multiple models as a dict mapping model names to lists.
926
+ - If ``None``: Uses the best model.
927
+ num_val_windows : int, optional
928
+ Number of validation windows to generate. If ``None``, uses the ``num_val_windows`` value from training
929
+ configuration when ``data=None``, otherwise defaults to 1.
930
+
931
+ For example, with ``prediction_length=2``, ``num_val_windows=3``, and ``val_step_size=1``, the validation
932
+ windows are::
933
+
934
+ |-------------------|
935
+ | x x x x x y y - - |
936
+ | x x x x x x y y - |
937
+ | x x x x x x x y y |
938
+
939
+ where ``x`` denotes training time steps and ``y`` denotes validation time steps for each window.
940
+ val_step_size : int, optional
941
+ Number of time steps between the start of consecutive validation windows. If ``None``, defaults to
942
+ ``prediction_length``.
943
+ use_cache : bool, default = True
944
+ If True, will attempt to use cached predictions. If False, cached predictions will be ignored.
945
+ This argument is ignored if ``cache_predictions`` was set to False when creating the ``TimeSeriesPredictor``.
946
+
947
+ Returns
948
+ -------
949
+ list[TimeSeriesDataFrame] or dict[str, list[TimeSeriesDataFrame]]
950
+ Predictions for each validation window.
951
+
952
+ - If ``model`` is a ``str`` or ``None``: Returns a list of length ``num_val_windows``, where each element
953
+ contains the predictions for one validation window.
954
+ - If ``model`` is a ``list[str]``: Returns a dict mapping each model name to a list of predictions for
955
+ each validation window.
956
+
957
+ Examples
958
+ --------
959
+ Make predictions on new data with the best model
960
+
961
+ >>> predictor.backtest_predictions(test_data, num_val_windows=2)
962
+
963
+ Load validation predictions for all models that were saved during training
964
+
965
+ >>> predictor.backtest_predictions(model=predictor.model_names())
966
+
967
+ See Also
968
+ --------
969
+ backtest_targets
970
+ Return target values aligned with predictions.
971
+ evaluate
972
+ Evaluate forecast accuracy on a hold-out set.
973
+ predict
974
+ Generate forecasts for future time steps.
975
+ """
976
+ self._assert_is_fit("backtest_predictions")
977
+ if data is not None:
978
+ data = self._check_and_prepare_data_frame(data)
979
+
980
+ if model is None:
981
+ model_names = [self.model_best]
982
+ elif isinstance(model, str):
983
+ model_names = [model]
984
+ else:
985
+ model_names = model
986
+
987
+ result = self._learner.backtest_predictions(
988
+ data=data,
989
+ model_names=model_names,
990
+ num_val_windows=num_val_windows,
991
+ val_step_size=val_step_size,
992
+ use_cache=use_cache,
993
+ )
994
+
995
+ if isinstance(model, list):
996
+ return result
997
+ else:
998
+ return result[model_names[0]]
999
+
1000
+ def backtest_targets(
1001
+ self,
1002
+ data: TimeSeriesDataFrame | None = None,
1003
+ *,
1004
+ num_val_windows: int | None = None,
1005
+ val_step_size: int | None = None,
1006
+ ) -> list[TimeSeriesDataFrame]:
1007
+ """Return target values for each validation window.
1008
+
1009
+ Returns the actual target values corresponding to each validation window used in
1010
+ :meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`. The returned targets are aligned
1011
+ with the predictions, making it easy to compute custom evaluation metrics or analyze forecast errors.
1012
+
1013
+ Parameters
1014
+ ----------
1015
+ data : TimeSeriesDataFrame, optional
1016
+ Time series data to extract targets from. If ``None``, returns the targets from the validation windows
1017
+ used during training.
1018
+
1019
+ If provided, all time series in ``data`` must have length at least
1020
+ ``prediction_length + (num_val_windows - 1) * val_step_size + 1``.
1021
+
1022
+ The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
1023
+ the predictor.
1024
+ num_val_windows : int, optional
1025
+ Number of validation windows to extract targets for. If ``None``, uses the ``num_val_windows`` value from
1026
+ training configuration when ``data=None``, otherwise defaults to 1.
1027
+
1028
+ This should match the ``num_val_windows`` argument passed to
1029
+ :meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`.
1030
+ val_step_size : int, optional
1031
+ Number of time steps between the start of consecutive validation windows. If ``None``, defaults to
1032
+ ``prediction_length``.
1033
+
1034
+ This should match the ``val_step_size`` argument passed to
1035
+ :meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`.
1036
+
1037
+ Returns
1038
+ -------
1039
+ list[TimeSeriesDataFrame]
1040
+ Target values for each validation window. Returns a list of length ``num_val_windows``,
1041
+ where each element contains the full time series data for one validation window.
1042
+ Each dataframe includes both historical context and the last ``prediction_length`` time steps
1043
+ that represent the target values to compare against predictions.
1044
+
1045
+ The returned targets are aligned with the output of
1046
+ :meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`, so ``targets[i]`` corresponds
1047
+ to ``predictions[i]`` for the i-th validation window.
1048
+
1049
+ See Also
1050
+ --------
1051
+ backtest_predictions
1052
+ Return predictions for multiple validation windows.
1053
+ evaluate
1054
+ Evaluate forecast accuracy on a hold-out set.
1055
+ """
1056
+ self._assert_is_fit("backtest_targets")
1057
+ if data is not None:
1058
+ data = self._check_and_prepare_data_frame(data)
1059
+ return self._learner.backtest_targets(
1060
+ data=data,
1061
+ num_val_windows=num_val_windows,
1062
+ val_step_size=val_step_size,
1063
+ )
870
1064
 
871
1065
  def evaluate(
872
1066
  self,
873
- data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
874
- model: Optional[str] = None,
875
- metrics: Optional[Union[str, TimeSeriesScorer, list[Union[str, TimeSeriesScorer]]]] = None,
876
- cutoff: Optional[int] = None,
1067
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
1068
+ model: str | None = None,
1069
+ metrics: str | TimeSeriesScorer | list[str | TimeSeriesScorer] | None = None,
1070
+ cutoff: int | None = None,
877
1071
  display: bool = False,
878
1072
  use_cache: bool = True,
879
1073
  ) -> dict[str, float]:
@@ -890,7 +1084,7 @@ class TimeSeriesPredictor:
890
1084
 
891
1085
  Parameters
892
1086
  ----------
893
- data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
1087
+ data : TimeSeriesDataFrame | pd.DataFrame | Path | str
894
1088
  The data to evaluate the best model on. If a ``cutoff`` is not provided, the last ``prediction_length``
895
1089
  time steps of each time series in ``data`` will be held out for prediction and forecast accuracy will
896
1090
  be calculated on these time steps. When a ``cutoff`` is provided, the ``-cutoff``-th to the
@@ -907,7 +1101,7 @@ class TimeSeriesPredictor:
907
1101
  model : str, optional
908
1102
  Name of the model that you would like to evaluate. By default, the best model during training
909
1103
  (with highest validation score) will be used.
910
- metrics : str, TimeSeriesScorer or list[Union[str, TimeSeriesScorer]], optional
1104
+ metrics : str, TimeSeriesScorer or list[str | TimeSeriesScorer], optional
911
1105
  Metric or a list of metrics to compute scores with. Defaults to ``self.eval_metric``. Supports both
912
1106
  metric names as strings and custom metrics based on TimeSeriesScorer.
913
1107
  cutoff : int, optional
@@ -928,7 +1122,7 @@ class TimeSeriesPredictor:
928
1122
  will have their signs flipped to obey this convention. For example, negative MAPE values will be reported.
929
1123
  To get the ``eval_metric`` score, do ``output[predictor.eval_metric.name]``.
930
1124
  """
931
-
1125
+ self._assert_is_fit("evaluate")
932
1126
  data = self._check_and_prepare_data_frame(data)
933
1127
  data = self._check_and_prepare_data_frame_for_evaluation(data, cutoff=cutoff)
934
1128
 
@@ -940,15 +1134,15 @@ class TimeSeriesPredictor:
940
1134
 
941
1135
  def feature_importance(
942
1136
  self,
943
- data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]] = None,
944
- model: Optional[str] = None,
945
- metric: Optional[Union[str, TimeSeriesScorer]] = None,
946
- features: Optional[list[str]] = None,
947
- time_limit: Optional[float] = None,
1137
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
1138
+ model: str | None = None,
1139
+ metric: str | TimeSeriesScorer | None = None,
1140
+ features: list[str] | None = None,
1141
+ time_limit: float | None = None,
948
1142
  method: Literal["naive", "permutation"] = "permutation",
949
1143
  subsample_size: int = 50,
950
- num_iterations: Optional[int] = None,
951
- random_seed: Optional[int] = 123,
1144
+ num_iterations: int | None = None,
1145
+ random_seed: int | None = 123,
952
1146
  relative_scores: bool = False,
953
1147
  include_confidence_band: bool = True,
954
1148
  confidence_level: float = 0.99,
@@ -1045,6 +1239,7 @@ class TimeSeriesPredictor:
1045
1239
  'importance': The estimated feature importance score.
1046
1240
  'stddev': The standard deviation of the feature importance score. If NaN, then not enough ``num_iterations`` were used.
1047
1241
  """
1242
+ self._assert_is_fit("feature_importance")
1048
1243
  if data is not None:
1049
1244
  data = self._check_and_prepare_data_frame(data)
1050
1245
  data = self._check_and_prepare_data_frame_for_evaluation(data)
@@ -1091,7 +1286,7 @@ class TimeSeriesPredictor:
1091
1286
  return version
1092
1287
 
1093
1288
  @classmethod
1094
- def load(cls, path: Union[str, Path], require_version_match: bool = True) -> "TimeSeriesPredictor":
1289
+ def load(cls, path: str | Path, require_version_match: bool = True) -> "TimeSeriesPredictor":
1095
1290
  """Load an existing ``TimeSeriesPredictor`` from given ``path``.
1096
1291
 
1097
1292
  .. warning::
@@ -1175,15 +1370,14 @@ class TimeSeriesPredictor:
1175
1370
  @property
1176
1371
  def model_best(self) -> str:
1177
1372
  """Returns the name of the best model from trainer."""
1373
+ self._assert_is_fit("model_best")
1178
1374
  if self._trainer.model_best is not None:
1179
1375
  models = self._trainer.get_model_names()
1180
1376
  if self._trainer.model_best in models:
1181
1377
  return self._trainer.model_best
1182
1378
  return self._trainer.get_model_best()
1183
1379
 
1184
- def persist(
1185
- self, models: Union[Literal["all", "best"], list[str]] = "best", with_ancestors: bool = True
1186
- ) -> list[str]:
1380
+ def persist(self, models: Literal["all", "best"] | list[str] = "best", with_ancestors: bool = True) -> list[str]:
1187
1381
  """Persist models in memory for reduced inference latency. This is particularly important if the models are being used for online
1188
1382
  inference where low latency is critical. If models are not persisted in memory, they are loaded from disk every time they are
1189
1383
  asked to make predictions. This is especially cumbersome for large deep learning based models which have to be loaded into
@@ -1206,6 +1400,7 @@ class TimeSeriesPredictor:
1206
1400
  list_of_models : list[str]
1207
1401
  List of persisted model names.
1208
1402
  """
1403
+ self._assert_is_fit("persist")
1209
1404
  return self._learner.persist_trainer(models=models, with_ancestors=with_ancestors)
1210
1405
 
1211
1406
  def unpersist(self) -> list[str]:
@@ -1224,10 +1419,10 @@ class TimeSeriesPredictor:
1224
1419
 
1225
1420
  def leaderboard(
1226
1421
  self,
1227
- data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]] = None,
1228
- cutoff: Optional[int] = None,
1422
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
1423
+ cutoff: int | None = None,
1229
1424
  extra_info: bool = False,
1230
- extra_metrics: Optional[list[Union[str, TimeSeriesScorer]]] = None,
1425
+ extra_metrics: list[str | TimeSeriesScorer] | None = None,
1231
1426
  display: bool = False,
1232
1427
  use_cache: bool = True,
1233
1428
  **kwargs,
@@ -1252,7 +1447,7 @@ class TimeSeriesPredictor:
1252
1447
 
1253
1448
  Parameters
1254
1449
  ----------
1255
- data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str], optional
1450
+ data : TimeSeriesDataFrame | pd.DataFrame | Path | str, optional
1256
1451
  dataset used for additional evaluation. Must include both historical and future data (i.e., length of all
1257
1452
  time series in ``data`` must be at least ``prediction_length + 1``, if ``cutoff`` is not provided,
1258
1453
  ``-cutoff + 1`` otherwise).
@@ -1271,7 +1466,7 @@ class TimeSeriesPredictor:
1271
1466
  If True, the leaderboard will contain an additional column ``hyperparameters`` with the hyperparameters used
1272
1467
  by each model during training. An empty dictionary ``{}`` means that the model was trained with default
1273
1468
  hyperparameters.
1274
- extra_metrics : list[Union[str, TimeSeriesScorer]], optional
1469
+ extra_metrics : list[str | TimeSeriesScorer], optional
1275
1470
  A list of metrics to calculate scores for and include in the output DataFrame.
1276
1471
 
1277
1472
  Only valid when ``data`` is specified. The scores refer to the scores on ``data`` (same data as used to
@@ -1293,6 +1488,7 @@ class TimeSeriesPredictor:
1293
1488
  The leaderboard containing information on all models and in order of best model to worst in terms of
1294
1489
  test performance.
1295
1490
  """
1491
+ self._assert_is_fit("leaderboard")
1296
1492
  if "silent" in kwargs:
1297
1493
  # keep `silent` logic for backwards compatibility
1298
1494
  assert isinstance(kwargs["silent"], bool)
@@ -1317,12 +1513,12 @@ class TimeSeriesPredictor:
1317
1513
  print(leaderboard)
1318
1514
  return leaderboard
1319
1515
 
1320
- def make_future_data_frame(self, data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]) -> pd.DataFrame:
1516
+ def make_future_data_frame(self, data: TimeSeriesDataFrame | pd.DataFrame | Path | str) -> pd.DataFrame:
1321
1517
  """Generate a dataframe with the ``item_id`` and ``timestamp`` values corresponding to the forecast horizon.
1322
1518
 
1323
1519
  Parameters
1324
1520
  ----------
1325
- data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
1521
+ data : TimeSeriesDataFrame | pd.DataFrame | Path | str
1326
1522
  Historical time series data.
1327
1523
 
1328
1524
  Returns
@@ -1370,6 +1566,7 @@ class TimeSeriesPredictor:
1370
1566
  Dict containing various detailed information. We do not recommend directly printing this dict as it may
1371
1567
  be very large.
1372
1568
  """
1569
+ self._assert_is_fit("fit_summary")
1373
1570
  # TODO: HPO-specific information currently not reported in fit_summary
1374
1571
  # TODO: Revisit after ray tune integration
1375
1572
 
@@ -1430,6 +1627,7 @@ class TimeSeriesPredictor:
1430
1627
  ``predictor.predict(data)`` is called will be the refit_full version instead of the original version of the
1431
1628
  model. Has no effect if ``model`` is not the best model.
1432
1629
  """
1630
+ self._assert_is_fit("refit_full")
1433
1631
  logger.warning(
1434
1632
  "\tWARNING: refit_full functionality for TimeSeriesPredictor is experimental "
1435
1633
  "and is not yet supported by all models."
@@ -1482,7 +1680,7 @@ class TimeSeriesPredictor:
1482
1680
  trainer = self._trainer
1483
1681
  train_data = trainer.load_train_data()
1484
1682
  val_data = trainer.load_val_data()
1485
- base_model_names = trainer.get_model_names(level=0)
1683
+ base_model_names = trainer.get_model_names(layer=0)
1486
1684
  pred_proba_dict_val: dict[str, list[TimeSeriesDataFrame]] = {
1487
1685
  model_name: trainer._get_model_oof_predictions(model_name)
1488
1686
  for model_name in base_model_names
@@ -1498,7 +1696,7 @@ class TimeSeriesPredictor:
1498
1696
  )
1499
1697
 
1500
1698
  y_val: list[TimeSeriesDataFrame] = [
1501
- select_target(df) for df in trainer._get_ensemble_oof_data(train_data=train_data, val_data=val_data)
1699
+ select_target(df) for df in trainer._get_validation_windows(train_data=train_data, val_data=val_data)
1502
1700
  ]
1503
1701
  y_test: TimeSeriesDataFrame = select_target(test_data)
1504
1702
 
@@ -1518,27 +1716,27 @@ class TimeSeriesPredictor:
1518
1716
 
1519
1717
  def plot(
1520
1718
  self,
1521
- data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
1522
- predictions: Optional[TimeSeriesDataFrame] = None,
1523
- quantile_levels: Optional[list[float]] = None,
1524
- item_ids: Optional[list[Union[str, int]]] = None,
1719
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
1720
+ predictions: TimeSeriesDataFrame | None = None,
1721
+ quantile_levels: list[float] | None = None,
1722
+ item_ids: list[str | int] | None = None,
1525
1723
  max_num_item_ids: int = 8,
1526
- max_history_length: Optional[int] = None,
1527
- point_forecast_column: Optional[str] = None,
1528
- matplotlib_rc_params: Optional[dict] = None,
1724
+ max_history_length: int | None = None,
1725
+ point_forecast_column: str | None = None,
1726
+ matplotlib_rc_params: dict | None = None,
1529
1727
  ):
1530
1728
  """Plot historical time series values and the forecasts.
1531
1729
 
1532
1730
  Parameters
1533
1731
  ----------
1534
- data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
1732
+ data : TimeSeriesDataFrame | pd.DataFrame | Path | str
1535
1733
  Observed time series data.
1536
1734
  predictions : TimeSeriesDataFrame, optional
1537
1735
  Predictions generated by calling :meth:`~autogluon.timeseries.TimeSeriesPredictor.predict`.
1538
1736
  quantile_levels : list[float], optional
1539
1737
  Quantile levels for which to plot the prediction intervals. Defaults to lowest & highest quantile levels
1540
1738
  available in ``predictions``.
1541
- item_ids : list[Union[str, int]], optional
1739
+ item_ids : list[str | int], optional
1542
1740
  If provided, plots will only be generated for time series with these item IDs. By default (if set to
1543
1741
  ``None``), item IDs are selected randomly. In either case, plots are generated for at most
1544
1742
  ``max_num_item_ids`` time series.
@@ -1621,7 +1819,7 @@ class TimeSeriesPredictor:
1621
1819
  for q in quantile_levels:
1622
1820
  ax.fill_between(forecast.index, point_forecast, forecast[str(q)], color="C1", alpha=0.2)
1623
1821
  if len(axes) > len(item_ids):
1624
- axes[len(item_ids)].set_axis_off()
1625
- handles, labels = axes[0].get_legend_handles_labels()
1822
+ axes[len(item_ids)].set_axis_off() # type: ignore
1823
+ handles, labels = axes[0].get_legend_handles_labels() # type: ignore
1626
1824
  fig.legend(handles, labels, bbox_to_anchor=(0.5, 0.0), ncols=len(handles))
1627
1825
  return fig