autogluon.timeseries 1.4.1b20250907__py3-none-any.whl → 1.4.1b20251215__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (94) hide show
  1. autogluon/timeseries/configs/hyperparameter_presets.py +2 -2
  2. autogluon/timeseries/configs/predictor_presets.py +22 -0
  3. autogluon/timeseries/dataset/ts_dataframe.py +97 -86
  4. autogluon/timeseries/learner.py +70 -35
  5. autogluon/timeseries/metrics/__init__.py +4 -4
  6. autogluon/timeseries/metrics/abstract.py +8 -8
  7. autogluon/timeseries/metrics/point.py +9 -9
  8. autogluon/timeseries/metrics/quantile.py +5 -5
  9. autogluon/timeseries/metrics/utils.py +4 -4
  10. autogluon/timeseries/models/__init__.py +4 -1
  11. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -39
  12. autogluon/timeseries/models/abstract/model_trial.py +2 -1
  13. autogluon/timeseries/models/abstract/tunable.py +8 -8
  14. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +58 -62
  15. autogluon/timeseries/models/autogluon_tabular/per_step.py +26 -15
  16. autogluon/timeseries/models/autogluon_tabular/transforms.py +11 -9
  17. autogluon/timeseries/models/chronos/__init__.py +2 -1
  18. autogluon/timeseries/models/chronos/chronos2.py +361 -0
  19. autogluon/timeseries/models/chronos/model.py +125 -87
  20. autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +68 -36
  21. autogluon/timeseries/models/ensemble/__init__.py +36 -2
  22. autogluon/timeseries/models/ensemble/abstract.py +14 -46
  23. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  24. autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
  25. autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
  26. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
  27. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
  28. autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
  29. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
  30. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
  31. autogluon/timeseries/models/ensemble/{greedy.py → ensemble_selection.py} +41 -61
  32. autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
  33. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  34. autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
  35. autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +25 -22
  36. autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
  37. autogluon/timeseries/models/gluonts/abstract.py +32 -31
  38. autogluon/timeseries/models/gluonts/dataset.py +11 -11
  39. autogluon/timeseries/models/local/__init__.py +0 -7
  40. autogluon/timeseries/models/local/abstract_local_model.py +15 -18
  41. autogluon/timeseries/models/local/naive.py +2 -2
  42. autogluon/timeseries/models/local/npts.py +7 -1
  43. autogluon/timeseries/models/local/statsforecast.py +12 -12
  44. autogluon/timeseries/models/multi_window/multi_window_model.py +39 -24
  45. autogluon/timeseries/models/registry.py +3 -4
  46. autogluon/timeseries/models/toto/__init__.py +3 -0
  47. autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
  48. autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
  49. autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
  50. autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
  51. autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
  52. autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
  53. autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
  54. autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
  55. autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
  56. autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
  57. autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
  58. autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
  59. autogluon/timeseries/models/toto/dataloader.py +108 -0
  60. autogluon/timeseries/models/toto/hf_pretrained_model.py +200 -0
  61. autogluon/timeseries/models/toto/model.py +249 -0
  62. autogluon/timeseries/predictor.py +475 -156
  63. autogluon/timeseries/regressor.py +27 -30
  64. autogluon/timeseries/splitter.py +3 -27
  65. autogluon/timeseries/trainer/ensemble_composer.py +444 -0
  66. autogluon/timeseries/trainer/model_set_builder.py +9 -9
  67. autogluon/timeseries/trainer/prediction_cache.py +16 -16
  68. autogluon/timeseries/trainer/trainer.py +300 -275
  69. autogluon/timeseries/trainer/utils.py +17 -0
  70. autogluon/timeseries/transforms/covariate_scaler.py +8 -8
  71. autogluon/timeseries/transforms/target_scaler.py +15 -15
  72. autogluon/timeseries/utils/constants.py +10 -0
  73. autogluon/timeseries/utils/datetime/lags.py +1 -3
  74. autogluon/timeseries/utils/datetime/seasonality.py +1 -3
  75. autogluon/timeseries/utils/features.py +31 -14
  76. autogluon/timeseries/utils/forecast.py +6 -7
  77. autogluon/timeseries/utils/timer.py +173 -0
  78. autogluon/timeseries/version.py +1 -1
  79. autogluon.timeseries-1.4.1b20251215-py3.11-nspkg.pth +1 -0
  80. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/METADATA +39 -22
  81. autogluon_timeseries-1.4.1b20251215.dist-info/RECORD +103 -0
  82. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/WHEEL +1 -1
  83. autogluon/timeseries/evaluator.py +0 -6
  84. autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
  85. autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
  86. autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
  87. autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
  88. autogluon.timeseries-1.4.1b20250907-py3.9-nspkg.pth +0 -1
  89. autogluon.timeseries-1.4.1b20250907.dist-info/RECORD +0 -75
  90. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/LICENSE +0 -0
  91. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/NOTICE +0 -0
  92. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/namespace_packages.txt +0 -0
  93. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/top_level.txt +0 -0
  94. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/zip-safe +0 -0
@@ -5,7 +5,7 @@ import os
5
5
  import pprint
6
6
  import time
7
7
  from pathlib import Path
8
- from typing import Any, Literal, Optional, Type, Union, cast
8
+ from typing import Any, Literal, Type, cast, overload
9
9
 
10
10
  import numpy as np
11
11
  import pandas as pd
@@ -22,10 +22,9 @@ from autogluon.core.utils.loaders import load_pkl, load_str
22
22
  from autogluon.core.utils.savers import save_pkl, save_str
23
23
  from autogluon.timeseries import __version__ as current_ag_version
24
24
  from autogluon.timeseries.configs import get_predictor_presets
25
- from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFrame
25
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
26
26
  from autogluon.timeseries.learner import TimeSeriesLearner
27
27
  from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
28
- from autogluon.timeseries.splitter import ExpandingWindowSplitter
29
28
  from autogluon.timeseries.trainer import TimeSeriesTrainer
30
29
  from autogluon.timeseries.utils.forecast import make_future_data_frame
31
30
 
@@ -67,7 +66,7 @@ class TimeSeriesPredictor:
67
66
 
68
67
  If ``freq`` is provided when creating the predictor, all data passed to the predictor will be automatically
69
68
  resampled at this frequency.
70
- eval_metric : Union[str, TimeSeriesScorer], default = "WQL"
69
+ eval_metric : str | TimeSeriesScorer, default = "WQL"
71
70
  Metric by which predictions will be ultimately evaluated on future test data. AutoGluon tunes hyperparameters
72
71
  in order to improve this metric on validation data, and ranks models (on validation data) according to this
73
72
  metric.
@@ -125,7 +124,7 @@ class TimeSeriesPredictor:
125
124
  debug messages from AutoGluon and all logging in dependencies (GluonTS, PyTorch Lightning, AutoGluon-Tabular, etc.)
126
125
  log_to_file: bool, default = True
127
126
  Whether to save the logs into a file for later reference
128
- log_file_path: Union[str, Path], default = "auto"
127
+ log_file_path: str | Path, default = "auto"
129
128
  File path to save the logs.
130
129
  If auto, logs will be saved under ``predictor_path/logs/predictor_log.txt``.
131
130
  Will be ignored if ``log_to_file`` is set to False
@@ -146,20 +145,20 @@ class TimeSeriesPredictor:
146
145
 
147
146
  def __init__(
148
147
  self,
149
- target: Optional[str] = None,
150
- known_covariates_names: Optional[list[str]] = None,
148
+ target: str | None = None,
149
+ known_covariates_names: list[str] | None = None,
151
150
  prediction_length: int = 1,
152
- freq: Optional[str] = None,
153
- eval_metric: Union[str, TimeSeriesScorer, None] = None,
154
- eval_metric_seasonal_period: Optional[int] = None,
155
- horizon_weight: Optional[list[float]] = None,
156
- path: Optional[Union[str, Path]] = None,
151
+ freq: str | None = None,
152
+ eval_metric: str | TimeSeriesScorer | None = None,
153
+ eval_metric_seasonal_period: int | None = None,
154
+ horizon_weight: list[float] | None = None,
155
+ path: str | Path | None = None,
157
156
  verbosity: int = 2,
158
157
  log_to_file: bool = True,
159
- log_file_path: Union[str, Path] = "auto",
160
- quantile_levels: Optional[list[float]] = None,
158
+ log_file_path: str | Path = "auto",
159
+ quantile_levels: list[float] | None = None,
161
160
  cache_predictions: bool = True,
162
- label: Optional[str] = None,
161
+ label: str | None = None,
163
162
  **kwargs,
164
163
  ):
165
164
  self.verbosity = verbosity
@@ -221,20 +220,6 @@ class TimeSeriesPredictor:
221
220
  ensemble_model_type=kwargs.pop("ensemble_model_type", None),
222
221
  )
223
222
 
224
- if "ignore_time_index" in kwargs:
225
- raise TypeError(
226
- "`ignore_time_index` argument to TimeSeriesPredictor.__init__() has been deprecated.\n"
227
- "If your data has irregular timestamps, please either 1) specify the desired regular frequency when "
228
- "creating the predictor as `TimeSeriesPredictor(freq=...)` or 2) manually convert timestamps to "
229
- "regular frequency with `data.convert_frequency(freq=...)`."
230
- )
231
- for k in ["learner_type", "learner_kwargs"]:
232
- if k in kwargs:
233
- val = kwargs.pop(k)
234
- logger.warning(
235
- f"Passing `{k}` to TimeSeriesPredictor has been deprecated and will be removed in v1.4. "
236
- f"The provided value {val} will be ignored."
237
- )
238
223
  if len(kwargs) > 0:
239
224
  for key in kwargs:
240
225
  raise TypeError(f"TimeSeriesPredictor.__init__() got an unexpected keyword argument '{key}'")
@@ -243,7 +228,16 @@ class TimeSeriesPredictor:
243
228
  def _trainer(self) -> TimeSeriesTrainer:
244
229
  return self._learner.load_trainer() # noqa
245
230
 
246
- def _setup_log_to_file(self, log_to_file: bool, log_file_path: Union[str, Path]) -> None:
231
+ @property
232
+ def is_fit(self) -> bool:
233
+ return self._learner.is_fit
234
+
235
+ def _assert_is_fit(self, method_name: str) -> None:
236
+ """Check if predictor is fit and raise AssertionError with informative message if not."""
237
+ if not self.is_fit:
238
+ raise AssertionError(f"Predictor is not fit. Call `.fit` before calling `.{method_name}`. ")
239
+
240
+ def _setup_log_to_file(self, log_to_file: bool, log_file_path: str | Path) -> None:
247
241
  if log_to_file:
248
242
  if log_file_path == "auto":
249
243
  log_file_path = os.path.join(self.path, "logs", self._predictor_log_file_name)
@@ -253,7 +247,7 @@ class TimeSeriesPredictor:
253
247
 
254
248
  def _to_data_frame(
255
249
  self,
256
- data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
250
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
257
251
  name: str = "data",
258
252
  ) -> TimeSeriesDataFrame:
259
253
  if isinstance(data, TimeSeriesDataFrame):
@@ -274,7 +268,7 @@ class TimeSeriesPredictor:
274
268
 
275
269
  def _check_and_prepare_data_frame(
276
270
  self,
277
- data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
271
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
278
272
  name: str = "data",
279
273
  ) -> TimeSeriesDataFrame:
280
274
  """Ensure that TimeSeriesDataFrame has a sorted index and a valid frequency.
@@ -283,7 +277,7 @@ class TimeSeriesPredictor:
283
277
 
284
278
  Parameters
285
279
  ----------
286
- data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
280
+ data : TimeSeriesDataFrame | pd.DataFrame | Path | str
287
281
  Data as a dataframe or path to file storing the data.
288
282
  name : str
289
283
  Name of the data that will be used in log messages (e.g., 'train_data', 'tuning_data', or 'data').
@@ -326,7 +320,7 @@ class TimeSeriesPredictor:
326
320
  return df
327
321
 
328
322
  def _check_and_prepare_data_frame_for_evaluation(
329
- self, data: TimeSeriesDataFrame, cutoff: Optional[int] = None, name: str = "data"
323
+ self, data: TimeSeriesDataFrame, cutoff: int | None = None, name: str = "data"
330
324
  ) -> TimeSeriesDataFrame:
331
325
  """
332
326
  Make sure that provided evaluation data includes both historical and future time series values.
@@ -366,36 +360,10 @@ class TimeSeriesPredictor:
366
360
  f"Median time series length is {median_length:.0f} (min={min_length}, max={max_length}). "
367
361
  )
368
362
 
369
- def _reduce_num_val_windows_if_necessary(
370
- self,
371
- train_data: TimeSeriesDataFrame,
372
- original_num_val_windows: int,
373
- val_step_size: int,
374
- ) -> int:
375
- """Adjust num_val_windows based on the length of time series in train_data.
376
-
377
- Chooses num_val_windows such that TS with median length is long enough to perform num_val_windows validations
378
- (at least 1, at most `original_num_val_windows`).
379
-
380
- In other words, find largest `num_val_windows` that satisfies
381
- median_length >= min_train_length + prediction_length + (num_val_windows - 1) * val_step_size
382
- """
383
- median_length = train_data.num_timesteps_per_item().median()
384
- num_val_windows_for_median_ts = int(
385
- (median_length - self._min_train_length - self.prediction_length) // val_step_size + 1
386
- )
387
- new_num_val_windows = min(original_num_val_windows, max(1, num_val_windows_for_median_ts))
388
- if new_num_val_windows < original_num_val_windows:
389
- logger.warning(
390
- f"Time series in train_data are too short for chosen num_val_windows={original_num_val_windows}. "
391
- f"Reducing num_val_windows to {new_num_val_windows}."
392
- )
393
- return new_num_val_windows
394
-
395
363
  def _filter_useless_train_data(
396
364
  self,
397
365
  train_data: TimeSeriesDataFrame,
398
- num_val_windows: int,
366
+ num_val_windows: tuple[int, ...],
399
367
  val_step_size: int,
400
368
  ) -> TimeSeriesDataFrame:
401
369
  """Remove time series from train_data that either contain all NaNs or are too short for chosen settings.
@@ -406,7 +374,8 @@ class TimeSeriesPredictor:
406
374
  In other words, this method removes from train_data all time series with only NaN values or length less than
407
375
  min_train_length + prediction_length + (num_val_windows - 1) * val_step_size
408
376
  """
409
- min_length = self._min_train_length + self.prediction_length + (num_val_windows - 1) * val_step_size
377
+ total_num_val_windows = sum(num_val_windows)
378
+ min_length = self._min_train_length + self.prediction_length + (total_num_val_windows - 1) * val_step_size
410
379
  train_lengths = train_data.num_timesteps_per_item()
411
380
  too_short_items = train_lengths.index[train_lengths < min_length]
412
381
 
@@ -417,7 +386,9 @@ class TimeSeriesPredictor:
417
386
  )
418
387
  train_data = train_data.query("item_id not in @too_short_items")
419
388
 
420
- all_nan_items = train_data.item_ids[train_data[self.target].isna().groupby(ITEMID, sort=False).all()]
389
+ all_nan_items = train_data.item_ids[
390
+ train_data[self.target].isna().groupby(TimeSeriesDataFrame.ITEMID, sort=False).all()
391
+ ]
421
392
  if len(all_nan_items) > 0:
422
393
  logger.info(f"\tRemoving {len(all_nan_items)} time series consisting of only NaN values from train_data.")
423
394
  train_data = train_data.query("item_id not in @all_nan_items")
@@ -435,27 +406,28 @@ class TimeSeriesPredictor:
435
406
  @apply_presets(get_predictor_presets())
436
407
  def fit(
437
408
  self,
438
- train_data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
439
- tuning_data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]] = None,
440
- time_limit: Optional[int] = None,
441
- presets: Optional[str] = None,
442
- hyperparameters: Optional[Union[str, dict[Union[str, Type], Any]]] = None,
443
- hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
444
- excluded_model_types: Optional[list[str]] = None,
445
- num_val_windows: int = 1,
446
- val_step_size: Optional[int] = None,
447
- refit_every_n_windows: Optional[int] = 1,
409
+ train_data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
410
+ tuning_data: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
411
+ time_limit: int | None = None,
412
+ presets: str | None = None,
413
+ hyperparameters: str | dict[str | Type, Any] | None = None,
414
+ hyperparameter_tune_kwargs: str | dict | None = None,
415
+ excluded_model_types: list[str] | None = None,
416
+ ensemble_hyperparameters: dict[str, Any] | list[dict[str, Any]] | None = None,
417
+ num_val_windows: int | tuple[int, ...] = 1,
418
+ val_step_size: int | None = None,
419
+ refit_every_n_windows: int | None = 1,
448
420
  refit_full: bool = False,
449
421
  enable_ensemble: bool = True,
450
422
  skip_model_selection: bool = False,
451
- random_seed: Optional[int] = 123,
452
- verbosity: Optional[int] = None,
423
+ random_seed: int | None = 123,
424
+ verbosity: int | None = None,
453
425
  ) -> "TimeSeriesPredictor":
454
426
  """Fit probabilistic forecasting models to the given time series dataset.
455
427
 
456
428
  Parameters
457
429
  ----------
458
- train_data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
430
+ train_data : TimeSeriesDataFrame | pd.DataFrame | Path | str
459
431
  Training data in the :class:`~autogluon.timeseries.TimeSeriesDataFrame` format.
460
432
 
461
433
  Time series with length ``<= (num_val_windows + 1) * prediction_length`` will be ignored during training.
@@ -481,7 +453,7 @@ class TimeSeriesPredictor:
481
453
 
482
454
  If provided data is a ``pandas.DataFrame``, AutoGluon will attempt to convert it to a ``TimeSeriesDataFrame``.
483
455
  If a ``str`` or a ``Path`` is provided, AutoGluon will attempt to load this file.
484
- tuning_data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str], optional
456
+ tuning_data : TimeSeriesDataFrame | pd.DataFrame | Path | str, optional
485
457
  Data reserved for model selection and hyperparameter tuning, rather than training individual models. Also
486
458
  used to compute the validation scores. Note that only the last ``prediction_length`` time steps of each
487
459
  time series are used for computing the validation score.
@@ -623,13 +595,36 @@ class TimeSeriesPredictor:
623
595
  presets="high_quality",
624
596
  excluded_model_types=["DeepAR"],
625
597
  )
626
- num_val_windows : int, default = 1
598
+ ensemble_hyperparameters : dict or list of dict, optional
599
+ Hyperparameters for ensemble models. Can be a single dict for one ensemble layer, or a list of dicts
600
+ for multiple ensemble layers (multi-layer stacking).
601
+
602
+ For single-layer ensembling (default)::
603
+
604
+ predictor.fit(
605
+ ...,
606
+ ensemble_hyperparameters={"WeightedEnsemble": {"ensemble_size": 10}},
607
+ )
608
+
609
+ For multi-layer ensembling, provide a list where each element configures one ensemble layer::
610
+
611
+ predictor.fit(
612
+ ...,
613
+ num_val_windows=(2, 3),
614
+ ensemble_hyperparameters=[
615
+ {"WeightedEnsemble": {"ensemble_size": 5}, "SimpleAverageEnsemble": {}}, # Layer 1
616
+ {"PerformanceWeightedEnsemble": {}}, # Layer 2
617
+ ],
618
+ )
619
+
620
+ When using multi-layer ensembling, ``num_val_windows`` must be a tuple of integers, and ``len(ensemble_hyperparameters)`` must match ``len(num_val_windows)``.
621
+ num_val_windows : int | tuple[int, ...], default = 1
627
622
  Number of backtests done on ``train_data`` for each trained model to estimate the validation performance.
628
- If ``num_val_windows > 1`` is provided, this value may be automatically reduced to ensure that the majority
629
- of time series in ``train_data`` are long enough for the chosen number of backtests.
623
+ This parameter is also used to control multi-layer ensembling.
630
624
 
631
- Increasing this parameter increases the training time roughly by a factor of ``num_val_windows // refit_every_n_windows``.
632
- See ``refit_every_n_windows`` and ``val_step_size`` for details.
625
+ Increasing this parameter increases the training time roughly by a factor of
626
+ ``num_val_windows // refit_every_n_windows``. See ``refit_every_n_windows`` and ``val_step_size`` for
627
+ details.
633
628
 
634
629
  For example, for ``prediction_length=2``, ``num_val_windows=3`` and ``val_step_size=1`` the folds are::
635
630
 
@@ -640,12 +635,34 @@ class TimeSeriesPredictor:
640
635
 
641
636
  where ``x`` are the train time steps and ``y`` are the validation time steps.
642
637
 
643
- This argument has no effect if ``tuning_data`` is provided.
638
+ This parameter can also be used to control how many of the backtesting windows are reserved for training
639
+ multiple layers of ensemble models. By default, AutoGluon-TimeSeries uses only a single layer of ensembles
640
+ trained on the backtest windows specified by the ``num_val_windows`` parameter. However, the
641
+ ``ensemble_hyperparameters`` argument can be used to specify multiple layers of ensembles. In this case,
642
+ a tuple of integers can be provided in ``num_val_windows`` to control how many of the backtesting windows
643
+ will be used to train which ensemble layers.
644
+
645
+ For example, if ``len(ensemble_hyperparameters) == 2``, a 2-tuple ``num_val_windows=(2, 3)`` is analogous
646
+ to ``num_val_windows=5``, except the first layer of ensemble models will be trained on the first two
647
+ backtest windows, and the second layer will be trained on the latter three. Validation scores of all models
648
+ will be computed on the last three windows.
649
+
650
+ If ``len(ensemble_hyperparameters) == 1``, then ``num_val_windows=(5,)`` has the same effect as
651
+ ``num_val_windows=5``.
652
+
653
+ If ``tuning_data`` is provided and ``len(ensemble_hyperparameters) == 1``, then this parameter is ignored.
654
+ Validation and ensemble training will be performed on ``tuning_data``.
655
+
656
+ If ``tuning_data`` is provided and ``len(ensemble_hyperparameters) > 1``, then this method expects that
657
+ ``len(num_val_windows) > 1``. In this case, the last element of ``num_val_windows`` will be ignored. The
658
+ last layer of ensemble training will be performed on ``tuning_data``. Validation scores will likewise be
659
+ computed on ``tuning_data``.
660
+
644
661
  val_step_size : int or None, default = None
645
662
  Step size between consecutive validation windows. If set to ``None``, defaults to ``prediction_length``
646
663
  provided when creating the predictor.
647
664
 
648
- This argument has no effect if ``tuning_data`` is provided.
665
+ If ``tuning_data`` is provided and ``len(ensemble_hyperparameters) == 1``, then this parameter is ignored.
649
666
  refit_every_n_windows: int or None, default = 1
650
667
  When performing cross validation, each model will be retrained every ``refit_every_n_windows`` validation
651
668
  windows, where the number of validation windows is specified by ``num_val_windows``. Note that in the
@@ -673,8 +690,10 @@ class TimeSeriesPredictor:
673
690
 
674
691
  """
675
692
  time_start = time.time()
676
- if self._learner.is_fit:
677
- raise AssertionError("Predictor is already fit! To fit additional models create a new `Predictor`.")
693
+ if self.is_fit:
694
+ raise AssertionError(
695
+ "Predictor is already fit! To fit additional models create a new `TimeSeriesPredictor`."
696
+ )
678
697
 
679
698
  if verbosity is None:
680
699
  verbosity = self.verbosity
@@ -721,39 +740,29 @@ class TimeSeriesPredictor:
721
740
  if val_step_size is None:
722
741
  val_step_size = self.prediction_length
723
742
 
724
- if num_val_windows > 0:
725
- num_val_windows = self._reduce_num_val_windows_if_necessary(
726
- train_data, original_num_val_windows=num_val_windows, val_step_size=val_step_size
727
- )
743
+ num_val_windows, ensemble_hyperparameters = self._validate_and_normalize_validation_and_ensemble_inputs(
744
+ num_val_windows=num_val_windows,
745
+ ensemble_hyperparameters=ensemble_hyperparameters,
746
+ val_step_size=val_step_size,
747
+ median_timeseries_length=train_data.num_timesteps_per_item().median(),
748
+ tuning_data_provided=tuning_data is not None,
749
+ )
728
750
 
729
751
  if tuning_data is not None:
730
752
  tuning_data = self._check_and_prepare_data_frame(tuning_data, name="tuning_data")
731
753
  tuning_data = self._check_and_prepare_data_frame_for_evaluation(tuning_data, name="tuning_data")
732
754
  logger.info(f"Provided tuning_data has {self._get_dataset_stats(tuning_data)}")
733
- # TODO: Use num_val_windows to perform multi-window backtests on tuning_data
734
- if num_val_windows > 0:
735
- logger.warning(
736
- "\tSetting num_val_windows = 0 (disabling backtesting on train_data) because tuning_data is provided."
737
- )
738
- num_val_windows = 0
739
-
740
- if num_val_windows == 0 and tuning_data is None:
741
- raise ValueError("Please set num_val_windows >= 1 or provide custom tuning_data")
742
755
 
743
- if num_val_windows <= 1 and refit_every_n_windows is not None and refit_every_n_windows > 1:
756
+ if sum(num_val_windows) <= 1 and refit_every_n_windows is not None and refit_every_n_windows > 1:
744
757
  logger.warning(
745
- f"\trefit_every_n_windows provided as {refit_every_n_windows} but num_val_windows is set to {num_val_windows}."
746
- " Refit_every_n_windows will have no effect."
758
+ f"\trefit_every_n_windows provided as {refit_every_n_windows} but num_val_windows is set to "
759
+ f"{num_val_windows}. refit_every_n_windows will have no effect."
747
760
  )
748
761
 
749
762
  if not skip_model_selection:
750
- train_data = self._filter_useless_train_data(
751
- train_data, num_val_windows=num_val_windows, val_step_size=val_step_size
752
- )
753
-
754
- val_splitter = ExpandingWindowSplitter(
755
- prediction_length=self.prediction_length, num_val_windows=num_val_windows, val_step_size=val_step_size
756
- )
763
+ # When tuning_data is provided, ignore the last element of num_val_windows for filtering purposes
764
+ filter_num_val_windows = num_val_windows[:-1] if tuning_data is not None else num_val_windows
765
+ train_data = self._filter_useless_train_data(train_data, filter_num_val_windows, val_step_size)
757
766
 
758
767
  time_left = None if time_limit is None else time_limit - (time.time() - time_start)
759
768
  self._learner.fit(
@@ -762,9 +771,11 @@ class TimeSeriesPredictor:
762
771
  val_data=tuning_data,
763
772
  hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
764
773
  excluded_model_types=excluded_model_types,
774
+ ensemble_hyperparameters=ensemble_hyperparameters,
765
775
  time_limit=time_left,
766
776
  verbosity=verbosity,
767
- val_splitter=val_splitter,
777
+ num_val_windows=num_val_windows,
778
+ val_step_size=val_step_size,
768
779
  refit_every_n_windows=refit_every_n_windows,
769
780
  skip_model_selection=skip_model_selection,
770
781
  enable_ensemble=enable_ensemble,
@@ -779,23 +790,131 @@ class TimeSeriesPredictor:
779
790
  self.save()
780
791
  return self
781
792
 
793
+ def _validate_and_normalize_validation_and_ensemble_inputs(
794
+ self,
795
+ num_val_windows: int | tuple[int, ...],
796
+ ensemble_hyperparameters: dict[str, Any] | list[dict[str, Any]] | None,
797
+ val_step_size: int,
798
+ median_timeseries_length: float,
799
+ tuning_data_provided: bool,
800
+ ) -> tuple[tuple[int, ...], list[dict[str, Any]] | None]:
801
+ """Validate and normalize num_val_windows and ensemble_hyperparameters for multi-layer ensembling."""
802
+ original_num_val_windows = num_val_windows if isinstance(num_val_windows, tuple) else (num_val_windows,)
803
+
804
+ if ensemble_hyperparameters is not None:
805
+ if isinstance(ensemble_hyperparameters, dict):
806
+ ensemble_hyperparameters = [ensemble_hyperparameters]
807
+
808
+ if len(ensemble_hyperparameters) != len(original_num_val_windows):
809
+ raise ValueError(
810
+ f"Length mismatch: num_val_windows has {len(original_num_val_windows)} layers but "
811
+ f"ensemble_hyperparameters has {len(ensemble_hyperparameters)} layers. "
812
+ f"These must match for multi-layer ensembling."
813
+ )
814
+
815
+ num_val_windows = self._normalize_num_val_windows_input(num_val_windows, tuning_data_provided)
816
+ num_val_windows = self._reduce_num_val_windows_if_necessary(
817
+ num_val_windows, val_step_size, median_timeseries_length, tuning_data_provided
818
+ )
819
+
820
+ if ensemble_hyperparameters is not None and len(num_val_windows) < len(ensemble_hyperparameters):
821
+ logger.warning(
822
+ f"Time series too short: reducing ensemble layers from {len(ensemble_hyperparameters)} to "
823
+ f"{len(num_val_windows)}. Only the first {len(num_val_windows)} ensemble layer(s) will be trained."
824
+ )
825
+ ensemble_hyperparameters = ensemble_hyperparameters[: len(num_val_windows)]
826
+
827
+ return num_val_windows, ensemble_hyperparameters
828
+
829
+ def _normalize_num_val_windows_input(
830
+ self,
831
+ num_val_windows: int | tuple[int, ...],
832
+ tuning_data_provided: bool,
833
+ ) -> tuple[int, ...]:
834
+ if isinstance(num_val_windows, int):
835
+ num_val_windows = (num_val_windows,)
836
+ if not isinstance(num_val_windows, tuple):
837
+ raise TypeError(f"num_val_windows must be int or tuple[int, ...], got {type(num_val_windows)}")
838
+ if len(num_val_windows) == 0:
839
+ raise ValueError("num_val_windows tuple cannot be empty")
840
+ if tuning_data_provided:
841
+ num_val_windows = num_val_windows[:-1] + (1,)
842
+ logger.warning(
843
+ f"\tTuning data is provided. Setting num_val_windows = {num_val_windows}. Validation scores will"
844
+ " be computed on a single window of tuning_data."
845
+ )
846
+ if not all(isinstance(n, int) and n > 0 for n in num_val_windows):
847
+ raise ValueError("All elements of num_val_windows must be positive integers.")
848
+ return num_val_windows
849
+
850
+ def _reduce_num_val_windows_if_necessary(
851
+ self,
852
+ num_val_windows: tuple[int, ...],
853
+ val_step_size: int,
854
+ median_time_series_length: float,
855
+ tuning_data_provided: bool,
856
+ ) -> tuple[int, ...]:
857
+ """Adjust num_val_windows based on the length of time series in train_data.
858
+
859
+ Chooses num_val_windows such that TS with median length is long enough to perform num_val_windows validations
860
+ (at least 1, at most `original_num_val_windows`).
861
+
862
+ In other words, find largest `num_val_windows` that satisfies
863
+ median_length >= min_train_length + prediction_length + (num_val_windows - 1) * val_step_size
864
+
865
+ If tuning_data is provided, the last element of `num_val_windows` is ignored when computing the number of
866
+ requested validation windows.
867
+ """
868
+ num_val_windows_for_median_ts = int(
869
+ (median_time_series_length - self._min_train_length - self.prediction_length) // val_step_size + 1
870
+ )
871
+ max_allowed = max(1, num_val_windows_for_median_ts)
872
+ total_requested = sum(num_val_windows) if not tuning_data_provided else sum(num_val_windows[:-1])
873
+
874
+ if max_allowed >= total_requested:
875
+ return num_val_windows
876
+
877
+ logger.warning(
878
+ f"Time series in train_data are too short for chosen num_val_windows={num_val_windows}. "
879
+ f"Reducing num_val_windows to {max_allowed} total windows."
880
+ )
881
+
882
+ result = list(num_val_windows)
883
+
884
+ # Starting from the last group of windows, reduce number of windows in each group by 1,
885
+ # until sum(num_val_windows) <= max_allowed is satisfied.
886
+ for i in range(len(result) - 1, -1, -1):
887
+ while result[i] > 1 and sum(result) > max_allowed:
888
+ result[i] -= 1
889
+ if sum(result) <= max_allowed:
890
+ break
891
+
892
+ # It is possible that the above for loop reduced the number of windows in each group to 1
893
+ # (i.e. result = [1] * len(num_val_windows)), but still sum(result) > max_allowed. In this
894
+ # case we set result = [1] * max_allowed
895
+ if sum(result) > max_allowed:
896
+ result = [1] * max_allowed
897
+
898
+ return tuple(result)
899
+
782
900
  def model_names(self) -> list[str]:
783
901
  """Returns the list of model names trained by this predictor object."""
902
+ self._assert_is_fit("model_names")
784
903
  return self._trainer.get_model_names()
785
904
 
786
905
  def predict(
787
906
  self,
788
- data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
789
- known_covariates: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]] = None,
790
- model: Optional[str] = None,
907
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
908
+ known_covariates: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
909
+ model: str | None = None,
791
910
  use_cache: bool = True,
792
- random_seed: Optional[int] = 123,
911
+ random_seed: int | None = 123,
793
912
  ) -> TimeSeriesDataFrame:
794
913
  """Return quantile and mean forecasts for the given dataset, starting from the end of each time series.
795
914
 
796
915
  Parameters
797
916
  ----------
798
- data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
917
+ data : TimeSeriesDataFrame | pd.DataFrame | Path | str
799
918
  Historical time series data for which the forecast needs to be made.
800
919
 
801
920
  The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
@@ -803,7 +922,7 @@ class TimeSeriesPredictor:
803
922
 
804
923
  If provided data is a ``pandas.DataFrame``, AutoGluon will attempt to convert it to a ``TimeSeriesDataFrame``.
805
924
  If a ``str`` or a ``Path`` is provided, AutoGluon will attempt to load this file.
806
- known_covariates : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str], optional
925
+ known_covariates : TimeSeriesDataFrame | pd.DataFrame | Path | str, optional
807
926
  If ``known_covariates_names`` were specified when creating the predictor, it is necessary to provide the
808
927
  values of the known covariates for each time series during the forecast horizon. Specifically:
809
928
 
@@ -853,6 +972,7 @@ class TimeSeriesPredictor:
853
972
  B 2020-03-04 17.1
854
973
  2020-03-05 8.3
855
974
  """
975
+ self._assert_is_fit("predict")
856
976
  # Save original item_id order to return predictions in the same order as input data
857
977
  data = self._to_data_frame(data)
858
978
  original_item_id_order = data.item_ids
@@ -866,14 +986,209 @@ class TimeSeriesPredictor:
866
986
  use_cache=use_cache,
867
987
  random_seed=random_seed,
868
988
  )
869
- return cast(TimeSeriesDataFrame, predictions.reindex(original_item_id_order, level=ITEMID))
989
+ return cast(TimeSeriesDataFrame, predictions.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID))
990
+
991
+ @overload
992
+ def backtest_predictions(
993
+ self,
994
+ data: TimeSeriesDataFrame | None = None,
995
+ *,
996
+ model: str | None = None,
997
+ num_val_windows: int | None = None,
998
+ val_step_size: int | None = None,
999
+ use_cache: bool = True,
1000
+ ) -> list[TimeSeriesDataFrame]: ...
1001
+
1002
+ @overload
1003
+ def backtest_predictions(
1004
+ self,
1005
+ data: TimeSeriesDataFrame | None = None,
1006
+ *,
1007
+ model: list[str],
1008
+ num_val_windows: int | None = None,
1009
+ val_step_size: int | None = None,
1010
+ use_cache: bool = True,
1011
+ ) -> dict[str, list[TimeSeriesDataFrame]]: ...
1012
+
1013
+ def backtest_predictions(
1014
+ self,
1015
+ data: TimeSeriesDataFrame | None = None,
1016
+ *,
1017
+ model: str | list[str] | None = None,
1018
+ num_val_windows: int | None = None,
1019
+ val_step_size: int | None = None,
1020
+ use_cache: bool = True,
1021
+ ) -> list[TimeSeriesDataFrame] | dict[str, list[TimeSeriesDataFrame]]:
1022
+ """Return predictions for multiple validation windows.
1023
+
1024
+ When ``data=None``, returns the predictions that were saved during training. Otherwise, generates new
1025
+ predictions by splitting ``data`` into multiple windows using an expanding window strategy.
1026
+
1027
+ The corresponding target values for each window can be obtained using
1028
+ :meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_targets`.
1029
+
1030
+ Parameters
1031
+ ----------
1032
+ data : TimeSeriesDataFrame, optional
1033
+ Time series data to generate predictions for. If ``None``, returns the predictions that were saved
1034
+ during training on ``train_data``.
1035
+
1036
+ If provided, all time series in ``data`` must have length at least
1037
+ ``prediction_length + (num_val_windows - 1) * val_step_size + 1``.
1038
+
1039
+ The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
1040
+ the predictor.
1041
+ model : str, list[str], or None, default = None
1042
+ Name of the model(s) to generate predictions with. By default, the best model during training
1043
+ (with highest validation score) will be used.
1044
+
1045
+ - If ``str``: Returns predictions for a single model as a list.
1046
+ - If ``list[str]``: Returns predictions for multiple models as a dict mapping model names to lists.
1047
+ - If ``None``: Uses the best model.
1048
+ num_val_windows : int, optional
1049
+ Number of validation windows to generate. If ``None``, uses the ``num_val_windows`` value from training
1050
+ configuration when ``data=None``, otherwise defaults to 1.
1051
+
1052
+ For example, with ``prediction_length=2``, ``num_val_windows=3``, and ``val_step_size=1``, the validation
1053
+ windows are::
1054
+
1055
+ |-------------------|
1056
+ | x x x x x y y - - |
1057
+ | x x x x x x y y - |
1058
+ | x x x x x x x y y |
1059
+
1060
+ where ``x`` denotes training time steps and ``y`` denotes validation time steps for each window.
1061
+ val_step_size : int, optional
1062
+ Number of time steps between the start of consecutive validation windows. If ``None``, defaults to
1063
+ ``prediction_length``.
1064
+ use_cache : bool, default = True
1065
+ If True, will attempt to use cached predictions. If False, cached predictions will be ignored.
1066
+ This argument is ignored if ``cache_predictions`` was set to False when creating the ``TimeSeriesPredictor``.
1067
+
1068
+ Returns
1069
+ -------
1070
+ list[TimeSeriesDataFrame] or dict[str, list[TimeSeriesDataFrame]]
1071
+ Predictions for each validation window.
1072
+
1073
+ - If ``model`` is a ``str`` or ``None``: Returns a list of length ``num_val_windows``, where each element
1074
+ contains the predictions for one validation window.
1075
+ - If ``model`` is a ``list[str]``: Returns a dict mapping each model name to a list of predictions for
1076
+ each validation window.
1077
+
1078
+ Examples
1079
+ --------
1080
+ Make predictions on new data with the best model
1081
+
1082
+ >>> predictor.backtest_predictions(test_data, num_val_windows=2)
1083
+
1084
+ Load validation predictions for all models that were saved during training
1085
+
1086
+ >>> predictor.backtest_predictions(model=predictor.model_names())
1087
+
1088
+ See Also
1089
+ --------
1090
+ backtest_targets
1091
+ Return target values aligned with predictions.
1092
+ evaluate
1093
+ Evaluate forecast accuracy on a hold-out set.
1094
+ predict
1095
+ Generate forecasts for future time steps.
1096
+ """
1097
+ self._assert_is_fit("backtest_predictions")
1098
+ if data is not None:
1099
+ data = self._check_and_prepare_data_frame(data)
1100
+
1101
+ if model is None:
1102
+ model_names = [self.model_best]
1103
+ elif isinstance(model, str):
1104
+ model_names = [model]
1105
+ else:
1106
+ model_names = model
1107
+
1108
+ result = self._learner.backtest_predictions(
1109
+ data=data,
1110
+ model_names=model_names,
1111
+ num_val_windows=num_val_windows,
1112
+ val_step_size=val_step_size,
1113
+ use_cache=use_cache,
1114
+ )
1115
+
1116
+ if isinstance(model, list):
1117
+ return result
1118
+ else:
1119
+ return result[model_names[0]]
1120
+
1121
+ def backtest_targets(
1122
+ self,
1123
+ data: TimeSeriesDataFrame | None = None,
1124
+ *,
1125
+ num_val_windows: int | None = None,
1126
+ val_step_size: int | None = None,
1127
+ ) -> list[TimeSeriesDataFrame]:
1128
+ """Return target values for each validation window.
1129
+
1130
+ Returns the actual target values corresponding to each validation window used in
1131
+ :meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`. The returned targets are aligned
1132
+ with the predictions, making it easy to compute custom evaluation metrics or analyze forecast errors.
1133
+
1134
+ Parameters
1135
+ ----------
1136
+ data : TimeSeriesDataFrame, optional
1137
+ Time series data to extract targets from. If ``None``, returns the targets from the validation windows
1138
+ used during training.
1139
+
1140
+ If provided, all time series in ``data`` must have length at least
1141
+ ``prediction_length + (num_val_windows - 1) * val_step_size + 1``.
1142
+
1143
+ The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
1144
+ the predictor.
1145
+ num_val_windows : int, optional
1146
+ Number of validation windows to extract targets for. If ``None``, uses the ``num_val_windows`` value from
1147
+ training configuration when ``data=None``, otherwise defaults to 1.
1148
+
1149
+ This should match the ``num_val_windows`` argument passed to
1150
+ :meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`.
1151
+ val_step_size : int, optional
1152
+ Number of time steps between the start of consecutive validation windows. If ``None``, defaults to
1153
+ ``prediction_length``.
1154
+
1155
+ This should match the ``val_step_size`` argument passed to
1156
+ :meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`.
1157
+
1158
+ Returns
1159
+ -------
1160
+ list[TimeSeriesDataFrame]
1161
+ Target values for each validation window. Returns a list of length ``num_val_windows``,
1162
+ where each element contains the full time series data for one validation window.
1163
+ Each dataframe includes both historical context and the last ``prediction_length`` time steps
1164
+ that represent the target values to compare against predictions.
1165
+
1166
+ The returned targets are aligned with the output of
1167
+ :meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`, so ``targets[i]`` corresponds
1168
+ to ``predictions[i]`` for the i-th validation window.
1169
+
1170
+ See Also
1171
+ --------
1172
+ backtest_predictions
1173
+ Return predictions for multiple validation windows.
1174
+ evaluate
1175
+ Evaluate forecast accuracy on a hold-out set.
1176
+ """
1177
+ self._assert_is_fit("backtest_targets")
1178
+ if data is not None:
1179
+ data = self._check_and_prepare_data_frame(data)
1180
+ return self._learner.backtest_targets(
1181
+ data=data,
1182
+ num_val_windows=num_val_windows,
1183
+ val_step_size=val_step_size,
1184
+ )
870
1185
 
871
1186
  def evaluate(
872
1187
  self,
873
- data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
874
- model: Optional[str] = None,
875
- metrics: Optional[Union[str, TimeSeriesScorer, list[Union[str, TimeSeriesScorer]]]] = None,
876
- cutoff: Optional[int] = None,
1188
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
1189
+ model: str | None = None,
1190
+ metrics: str | TimeSeriesScorer | list[str | TimeSeriesScorer] | None = None,
1191
+ cutoff: int | None = None,
877
1192
  display: bool = False,
878
1193
  use_cache: bool = True,
879
1194
  ) -> dict[str, float]:
@@ -890,7 +1205,7 @@ class TimeSeriesPredictor:
890
1205
 
891
1206
  Parameters
892
1207
  ----------
893
- data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
1208
+ data : TimeSeriesDataFrame | pd.DataFrame | Path | str
894
1209
  The data to evaluate the best model on. If a ``cutoff`` is not provided, the last ``prediction_length``
895
1210
  time steps of each time series in ``data`` will be held out for prediction and forecast accuracy will
896
1211
  be calculated on these time steps. When a ``cutoff`` is provided, the ``-cutoff``-th to the
@@ -907,7 +1222,7 @@ class TimeSeriesPredictor:
907
1222
  model : str, optional
908
1223
  Name of the model that you would like to evaluate. By default, the best model during training
909
1224
  (with highest validation score) will be used.
910
- metrics : str, TimeSeriesScorer or list[Union[str, TimeSeriesScorer]], optional
1225
+ metrics : str, TimeSeriesScorer or list[str | TimeSeriesScorer], optional
911
1226
  Metric or a list of metrics to compute scores with. Defaults to ``self.eval_metric``. Supports both
912
1227
  metric names as strings and custom metrics based on TimeSeriesScorer.
913
1228
  cutoff : int, optional
@@ -928,7 +1243,7 @@ class TimeSeriesPredictor:
928
1243
  will have their signs flipped to obey this convention. For example, negative MAPE values will be reported.
929
1244
  To get the ``eval_metric`` score, do ``output[predictor.eval_metric.name]``.
930
1245
  """
931
-
1246
+ self._assert_is_fit("evaluate")
932
1247
  data = self._check_and_prepare_data_frame(data)
933
1248
  data = self._check_and_prepare_data_frame_for_evaluation(data, cutoff=cutoff)
934
1249
 
@@ -940,15 +1255,15 @@ class TimeSeriesPredictor:
940
1255
 
941
1256
  def feature_importance(
942
1257
  self,
943
- data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]] = None,
944
- model: Optional[str] = None,
945
- metric: Optional[Union[str, TimeSeriesScorer]] = None,
946
- features: Optional[list[str]] = None,
947
- time_limit: Optional[float] = None,
1258
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
1259
+ model: str | None = None,
1260
+ metric: str | TimeSeriesScorer | None = None,
1261
+ features: list[str] | None = None,
1262
+ time_limit: float | None = None,
948
1263
  method: Literal["naive", "permutation"] = "permutation",
949
1264
  subsample_size: int = 50,
950
- num_iterations: Optional[int] = None,
951
- random_seed: Optional[int] = 123,
1265
+ num_iterations: int | None = None,
1266
+ random_seed: int | None = 123,
952
1267
  relative_scores: bool = False,
953
1268
  include_confidence_band: bool = True,
954
1269
  confidence_level: float = 0.99,
@@ -1045,6 +1360,7 @@ class TimeSeriesPredictor:
1045
1360
  'importance': The estimated feature importance score.
1046
1361
  'stddev': The standard deviation of the feature importance score. If NaN, then not enough ``num_iterations`` were used.
1047
1362
  """
1363
+ self._assert_is_fit("feature_importance")
1048
1364
  if data is not None:
1049
1365
  data = self._check_and_prepare_data_frame(data)
1050
1366
  data = self._check_and_prepare_data_frame_for_evaluation(data)
@@ -1063,7 +1379,7 @@ class TimeSeriesPredictor:
1063
1379
  include_confidence_band=include_confidence_band,
1064
1380
  confidence_level=confidence_level,
1065
1381
  )
1066
- return fi_df
1382
+ return fi_df.sort_values("importance", ascending=False)
1067
1383
 
1068
1384
  @classmethod
1069
1385
  def _load_version_file(cls, path: str) -> str:
@@ -1091,7 +1407,7 @@ class TimeSeriesPredictor:
1091
1407
  return version
1092
1408
 
1093
1409
  @classmethod
1094
- def load(cls, path: Union[str, Path], require_version_match: bool = True) -> "TimeSeriesPredictor":
1410
+ def load(cls, path: str | Path, require_version_match: bool = True) -> "TimeSeriesPredictor":
1095
1411
  """Load an existing ``TimeSeriesPredictor`` from given ``path``.
1096
1412
 
1097
1413
  .. warning::
@@ -1175,15 +1491,14 @@ class TimeSeriesPredictor:
1175
1491
  @property
1176
1492
  def model_best(self) -> str:
1177
1493
  """Returns the name of the best model from trainer."""
1494
+ self._assert_is_fit("model_best")
1178
1495
  if self._trainer.model_best is not None:
1179
1496
  models = self._trainer.get_model_names()
1180
1497
  if self._trainer.model_best in models:
1181
1498
  return self._trainer.model_best
1182
1499
  return self._trainer.get_model_best()
1183
1500
 
1184
- def persist(
1185
- self, models: Union[Literal["all", "best"], list[str]] = "best", with_ancestors: bool = True
1186
- ) -> list[str]:
1501
+ def persist(self, models: Literal["all", "best"] | list[str] = "best", with_ancestors: bool = True) -> list[str]:
1187
1502
  """Persist models in memory for reduced inference latency. This is particularly important if the models are being used for online
1188
1503
  inference where low latency is critical. If models are not persisted in memory, they are loaded from disk every time they are
1189
1504
  asked to make predictions. This is especially cumbersome for large deep learning based models which have to be loaded into
@@ -1206,6 +1521,7 @@ class TimeSeriesPredictor:
1206
1521
  list_of_models : list[str]
1207
1522
  List of persisted model names.
1208
1523
  """
1524
+ self._assert_is_fit("persist")
1209
1525
  return self._learner.persist_trainer(models=models, with_ancestors=with_ancestors)
1210
1526
 
1211
1527
  def unpersist(self) -> list[str]:
@@ -1224,10 +1540,10 @@ class TimeSeriesPredictor:
1224
1540
 
1225
1541
  def leaderboard(
1226
1542
  self,
1227
- data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]] = None,
1228
- cutoff: Optional[int] = None,
1543
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
1544
+ cutoff: int | None = None,
1229
1545
  extra_info: bool = False,
1230
- extra_metrics: Optional[list[Union[str, TimeSeriesScorer]]] = None,
1546
+ extra_metrics: list[str | TimeSeriesScorer] | None = None,
1231
1547
  display: bool = False,
1232
1548
  use_cache: bool = True,
1233
1549
  **kwargs,
@@ -1252,7 +1568,7 @@ class TimeSeriesPredictor:
1252
1568
 
1253
1569
  Parameters
1254
1570
  ----------
1255
- data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str], optional
1571
+ data : TimeSeriesDataFrame | pd.DataFrame | Path | str, optional
1256
1572
  dataset used for additional evaluation. Must include both historical and future data (i.e., length of all
1257
1573
  time series in ``data`` must be at least ``prediction_length + 1``, if ``cutoff`` is not provided,
1258
1574
  ``-cutoff + 1`` otherwise).
@@ -1271,7 +1587,7 @@ class TimeSeriesPredictor:
1271
1587
  If True, the leaderboard will contain an additional column ``hyperparameters`` with the hyperparameters used
1272
1588
  by each model during training. An empty dictionary ``{}`` means that the model was trained with default
1273
1589
  hyperparameters.
1274
- extra_metrics : list[Union[str, TimeSeriesScorer]], optional
1590
+ extra_metrics : list[str | TimeSeriesScorer], optional
1275
1591
  A list of metrics to calculate scores for and include in the output DataFrame.
1276
1592
 
1277
1593
  Only valid when ``data`` is specified. The scores refer to the scores on ``data`` (same data as used to
@@ -1293,6 +1609,7 @@ class TimeSeriesPredictor:
1293
1609
  The leaderboard containing information on all models and in order of best model to worst in terms of
1294
1610
  test performance.
1295
1611
  """
1612
+ self._assert_is_fit("leaderboard")
1296
1613
  if "silent" in kwargs:
1297
1614
  # keep `silent` logic for backwards compatibility
1298
1615
  assert isinstance(kwargs["silent"], bool)
@@ -1317,12 +1634,12 @@ class TimeSeriesPredictor:
1317
1634
  print(leaderboard)
1318
1635
  return leaderboard
1319
1636
 
1320
- def make_future_data_frame(self, data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]) -> pd.DataFrame:
1637
+ def make_future_data_frame(self, data: TimeSeriesDataFrame | pd.DataFrame | Path | str) -> pd.DataFrame:
1321
1638
  """Generate a dataframe with the ``item_id`` and ``timestamp`` values corresponding to the forecast horizon.
1322
1639
 
1323
1640
  Parameters
1324
1641
  ----------
1325
- data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
1642
+ data : TimeSeriesDataFrame | pd.DataFrame | Path | str
1326
1643
  Historical time series data.
1327
1644
 
1328
1645
  Returns
@@ -1370,6 +1687,7 @@ class TimeSeriesPredictor:
1370
1687
  Dict containing various detailed information. We do not recommend directly printing this dict as it may
1371
1688
  be very large.
1372
1689
  """
1690
+ self._assert_is_fit("fit_summary")
1373
1691
  # TODO: HPO-specific information currently not reported in fit_summary
1374
1692
  # TODO: Revisit after ray tune integration
1375
1693
 
@@ -1430,6 +1748,7 @@ class TimeSeriesPredictor:
1430
1748
  ``predictor.predict(data)`` is called will be the refit_full version instead of the original version of the
1431
1749
  model. Has no effect if ``model`` is not the best model.
1432
1750
  """
1751
+ self._assert_is_fit("refit_full")
1433
1752
  logger.warning(
1434
1753
  "\tWARNING: refit_full functionality for TimeSeriesPredictor is experimental "
1435
1754
  "and is not yet supported by all models."
@@ -1482,7 +1801,7 @@ class TimeSeriesPredictor:
1482
1801
  trainer = self._trainer
1483
1802
  train_data = trainer.load_train_data()
1484
1803
  val_data = trainer.load_val_data()
1485
- base_model_names = trainer.get_model_names(level=0)
1804
+ base_model_names = trainer.get_model_names(layer=0)
1486
1805
  pred_proba_dict_val: dict[str, list[TimeSeriesDataFrame]] = {
1487
1806
  model_name: trainer._get_model_oof_predictions(model_name)
1488
1807
  for model_name in base_model_names
@@ -1498,7 +1817,7 @@ class TimeSeriesPredictor:
1498
1817
  )
1499
1818
 
1500
1819
  y_val: list[TimeSeriesDataFrame] = [
1501
- select_target(df) for df in trainer._get_ensemble_oof_data(train_data=train_data, val_data=val_data)
1820
+ select_target(df) for df in trainer._get_validation_windows(train_data=train_data, val_data=val_data)
1502
1821
  ]
1503
1822
  y_test: TimeSeriesDataFrame = select_target(test_data)
1504
1823
 
@@ -1518,27 +1837,27 @@ class TimeSeriesPredictor:
1518
1837
 
1519
1838
  def plot(
1520
1839
  self,
1521
- data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
1522
- predictions: Optional[TimeSeriesDataFrame] = None,
1523
- quantile_levels: Optional[list[float]] = None,
1524
- item_ids: Optional[list[Union[str, int]]] = None,
1840
+ data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
1841
+ predictions: TimeSeriesDataFrame | None = None,
1842
+ quantile_levels: list[float] | None = None,
1843
+ item_ids: list[str | int] | None = None,
1525
1844
  max_num_item_ids: int = 8,
1526
- max_history_length: Optional[int] = None,
1527
- point_forecast_column: Optional[str] = None,
1528
- matplotlib_rc_params: Optional[dict] = None,
1845
+ max_history_length: int | None = None,
1846
+ point_forecast_column: str | None = None,
1847
+ matplotlib_rc_params: dict | None = None,
1529
1848
  ):
1530
1849
  """Plot historical time series values and the forecasts.
1531
1850
 
1532
1851
  Parameters
1533
1852
  ----------
1534
- data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
1853
+ data : TimeSeriesDataFrame | pd.DataFrame | Path | str
1535
1854
  Observed time series data.
1536
1855
  predictions : TimeSeriesDataFrame, optional
1537
1856
  Predictions generated by calling :meth:`~autogluon.timeseries.TimeSeriesPredictor.predict`.
1538
1857
  quantile_levels : list[float], optional
1539
1858
  Quantile levels for which to plot the prediction intervals. Defaults to lowest & highest quantile levels
1540
1859
  available in ``predictions``.
1541
- item_ids : list[Union[str, int]], optional
1860
+ item_ids : list[str | int], optional
1542
1861
  If provided, plots will only be generated for time series with these item IDs. By default (if set to
1543
1862
  ``None``), item IDs are selected randomly. In either case, plots are generated for at most
1544
1863
  ``max_num_item_ids`` time series.
@@ -1621,7 +1940,7 @@ class TimeSeriesPredictor:
1621
1940
  for q in quantile_levels:
1622
1941
  ax.fill_between(forecast.index, point_forecast, forecast[str(q)], color="C1", alpha=0.2)
1623
1942
  if len(axes) > len(item_ids):
1624
- axes[len(item_ids)].set_axis_off()
1625
- handles, labels = axes[0].get_legend_handles_labels()
1943
+ axes[len(item_ids)].set_axis_off() # type: ignore
1944
+ handles, labels = axes[0].get_legend_handles_labels() # type: ignore
1626
1945
  fig.legend(handles, labels, bbox_to_anchor=(0.5, 0.0), ncols=len(handles))
1627
1946
  return fig