autogluon.timeseries 1.4.1b20250907__py3-none-any.whl → 1.4.1b20251215__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (94) hide show
  1. autogluon/timeseries/configs/hyperparameter_presets.py +2 -2
  2. autogluon/timeseries/configs/predictor_presets.py +22 -0
  3. autogluon/timeseries/dataset/ts_dataframe.py +97 -86
  4. autogluon/timeseries/learner.py +70 -35
  5. autogluon/timeseries/metrics/__init__.py +4 -4
  6. autogluon/timeseries/metrics/abstract.py +8 -8
  7. autogluon/timeseries/metrics/point.py +9 -9
  8. autogluon/timeseries/metrics/quantile.py +5 -5
  9. autogluon/timeseries/metrics/utils.py +4 -4
  10. autogluon/timeseries/models/__init__.py +4 -1
  11. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -39
  12. autogluon/timeseries/models/abstract/model_trial.py +2 -1
  13. autogluon/timeseries/models/abstract/tunable.py +8 -8
  14. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +58 -62
  15. autogluon/timeseries/models/autogluon_tabular/per_step.py +26 -15
  16. autogluon/timeseries/models/autogluon_tabular/transforms.py +11 -9
  17. autogluon/timeseries/models/chronos/__init__.py +2 -1
  18. autogluon/timeseries/models/chronos/chronos2.py +361 -0
  19. autogluon/timeseries/models/chronos/model.py +125 -87
  20. autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +68 -36
  21. autogluon/timeseries/models/ensemble/__init__.py +36 -2
  22. autogluon/timeseries/models/ensemble/abstract.py +14 -46
  23. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  24. autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
  25. autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
  26. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
  27. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
  28. autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
  29. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
  30. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
  31. autogluon/timeseries/models/ensemble/{greedy.py → ensemble_selection.py} +41 -61
  32. autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
  33. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  34. autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
  35. autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +25 -22
  36. autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
  37. autogluon/timeseries/models/gluonts/abstract.py +32 -31
  38. autogluon/timeseries/models/gluonts/dataset.py +11 -11
  39. autogluon/timeseries/models/local/__init__.py +0 -7
  40. autogluon/timeseries/models/local/abstract_local_model.py +15 -18
  41. autogluon/timeseries/models/local/naive.py +2 -2
  42. autogluon/timeseries/models/local/npts.py +7 -1
  43. autogluon/timeseries/models/local/statsforecast.py +12 -12
  44. autogluon/timeseries/models/multi_window/multi_window_model.py +39 -24
  45. autogluon/timeseries/models/registry.py +3 -4
  46. autogluon/timeseries/models/toto/__init__.py +3 -0
  47. autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
  48. autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
  49. autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
  50. autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
  51. autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
  52. autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
  53. autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
  54. autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
  55. autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
  56. autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
  57. autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
  58. autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
  59. autogluon/timeseries/models/toto/dataloader.py +108 -0
  60. autogluon/timeseries/models/toto/hf_pretrained_model.py +200 -0
  61. autogluon/timeseries/models/toto/model.py +249 -0
  62. autogluon/timeseries/predictor.py +475 -156
  63. autogluon/timeseries/regressor.py +27 -30
  64. autogluon/timeseries/splitter.py +3 -27
  65. autogluon/timeseries/trainer/ensemble_composer.py +444 -0
  66. autogluon/timeseries/trainer/model_set_builder.py +9 -9
  67. autogluon/timeseries/trainer/prediction_cache.py +16 -16
  68. autogluon/timeseries/trainer/trainer.py +300 -275
  69. autogluon/timeseries/trainer/utils.py +17 -0
  70. autogluon/timeseries/transforms/covariate_scaler.py +8 -8
  71. autogluon/timeseries/transforms/target_scaler.py +15 -15
  72. autogluon/timeseries/utils/constants.py +10 -0
  73. autogluon/timeseries/utils/datetime/lags.py +1 -3
  74. autogluon/timeseries/utils/datetime/seasonality.py +1 -3
  75. autogluon/timeseries/utils/features.py +31 -14
  76. autogluon/timeseries/utils/forecast.py +6 -7
  77. autogluon/timeseries/utils/timer.py +173 -0
  78. autogluon/timeseries/version.py +1 -1
  79. autogluon.timeseries-1.4.1b20251215-py3.11-nspkg.pth +1 -0
  80. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/METADATA +39 -22
  81. autogluon_timeseries-1.4.1b20251215.dist-info/RECORD +103 -0
  82. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/WHEEL +1 -1
  83. autogluon/timeseries/evaluator.py +0 -6
  84. autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
  85. autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
  86. autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
  87. autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
  88. autogluon.timeseries-1.4.1b20250907-py3.9-nspkg.pth +0 -1
  89. autogluon.timeseries-1.4.1b20250907.dist-info/RECORD +0 -75
  90. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/LICENSE +0 -0
  91. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/NOTICE +0 -0
  92. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/namespace_packages.txt +0 -0
  93. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/top_level.txt +0 -0
  94. {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/zip-safe +0 -0
@@ -1,7 +1,7 @@
1
- from typing import Any, Union
1
+ from typing import Any
2
2
 
3
3
 
4
- def get_hyperparameter_presets() -> dict[str, dict[str, Union[dict[str, Any], list[dict[str, Any]]]]]:
4
+ def get_hyperparameter_presets() -> dict[str, dict[str, dict[str, Any] | list[dict[str, Any]]]]:
5
5
  return {
6
6
  "very_light": {
7
7
  "Naive": {},
@@ -23,6 +23,28 @@ def get_predictor_presets() -> dict[str, Any]:
23
23
  high_quality={"hyperparameters": "default"},
24
24
  medium_quality={"hyperparameters": "light"},
25
25
  fast_training={"hyperparameters": "very_light"},
26
+ # Chronos-2 models
27
+ chronos2={
28
+ "hyperparameters": {"Chronos2": {"model_path": "autogluon/chronos-2"}},
29
+ "skip_model_selection": True,
30
+ },
31
+ chronos2_small={
32
+ "hyperparameters": {"Chronos2": {"model_path": "autogluon/chronos-2-small"}},
33
+ "skip_model_selection": True,
34
+ },
35
+ chronos2_ensemble={
36
+ "hyperparameters": {
37
+ "Chronos2": [
38
+ {"model_path": "autogluon/chronos-2", "ag_args": {"name_suffix": "ZeroShot"}},
39
+ {
40
+ "model_path": "autogluon/chronos-2-small",
41
+ "fine_tune": True,
42
+ "eval_during_fine_tune": True,
43
+ "ag_args": {"name_suffix": "SmallFineTuned"},
44
+ },
45
+ ]
46
+ },
47
+ },
26
48
  # Chronos-Bolt models
27
49
  bolt_tiny={
28
50
  "hyperparameters": {"Chronos": {"model_path": "bolt_tiny"}},
@@ -7,7 +7,7 @@ import reprlib
7
7
  from collections.abc import Iterable
8
8
  from itertools import islice
9
9
  from pathlib import Path
10
- from typing import TYPE_CHECKING, Any, Optional, Type, Union, overload
10
+ from typing import TYPE_CHECKING, Any, Final, Type, overload
11
11
 
12
12
  import numpy as np
13
13
  import pandas as pd
@@ -19,11 +19,6 @@ from autogluon.common.loaders import load_pd
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
- ITEMID = "item_id"
23
- TIMESTAMP = "timestamp"
24
-
25
- IRREGULAR_TIME_INDEX_FREQSTR = "IRREG"
26
-
27
22
 
28
23
  class TimeSeriesDataFrame(pd.DataFrame):
29
24
  """A collection of univariate time series, where each row is identified by an (``item_id``, ``timestamp``) pair.
@@ -121,12 +116,16 @@ class TimeSeriesDataFrame(pd.DataFrame):
121
116
  index: pd.MultiIndex # type: ignore
122
117
  _metadata = ["_static_features"]
123
118
 
119
+ IRREGULAR_TIME_INDEX_FREQSTR: Final[str] = "IRREG"
120
+ ITEMID: Final[str] = "item_id"
121
+ TIMESTAMP: Final[str] = "timestamp"
122
+
124
123
  def __init__(
125
124
  self,
126
- data: Union[pd.DataFrame, str, Path, Iterable],
127
- static_features: Optional[Union[pd.DataFrame, str, Path]] = None,
128
- id_column: Optional[str] = None,
129
- timestamp_column: Optional[str] = None,
125
+ data: pd.DataFrame | str | Path | Iterable,
126
+ static_features: pd.DataFrame | str | Path | None = None,
127
+ id_column: str | None = None,
128
+ timestamp_column: str | None = None,
130
129
  num_cpus: int = -1,
131
130
  *args,
132
131
  **kwargs,
@@ -150,7 +149,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
150
149
  else:
151
150
  raise ValueError(f"data must be a pd.DataFrame, Iterable, string or Path (received {type(data)}).")
152
151
  super().__init__(data=data, *args, **kwargs) # type: ignore
153
- self._static_features: Optional[pd.DataFrame] = None
152
+ self._static_features: pd.DataFrame | None = None
154
153
  if static_features is not None:
155
154
  self.static_features = self._construct_static_features(static_features, id_column=id_column)
156
155
 
@@ -169,29 +168,33 @@ class TimeSeriesDataFrame(pd.DataFrame):
169
168
  def _construct_tsdf_from_data_frame(
170
169
  cls,
171
170
  df: pd.DataFrame,
172
- id_column: Optional[str] = None,
173
- timestamp_column: Optional[str] = None,
171
+ id_column: str | None = None,
172
+ timestamp_column: str | None = None,
174
173
  ) -> pd.DataFrame:
175
174
  df = df.copy()
176
175
  if id_column is not None:
177
176
  assert id_column in df.columns, f"Column '{id_column}' not found!"
178
- if id_column != ITEMID and ITEMID in df.columns:
179
- logger.warning(f"Renaming existing column '{ITEMID}' -> '__{ITEMID}' to avoid name collisions.")
180
- df.rename(columns={ITEMID: "__" + ITEMID}, inplace=True)
181
- df.rename(columns={id_column: ITEMID}, inplace=True)
177
+ if id_column != cls.ITEMID and cls.ITEMID in df.columns:
178
+ logger.warning(
179
+ f"Renaming existing column '{cls.ITEMID}' -> '__{cls.ITEMID}' to avoid name collisions."
180
+ )
181
+ df.rename(columns={cls.ITEMID: "__" + cls.ITEMID}, inplace=True)
182
+ df.rename(columns={id_column: cls.ITEMID}, inplace=True)
182
183
 
183
184
  if timestamp_column is not None:
184
185
  assert timestamp_column in df.columns, f"Column '{timestamp_column}' not found!"
185
- if timestamp_column != TIMESTAMP and TIMESTAMP in df.columns:
186
- logger.warning(f"Renaming existing column '{TIMESTAMP}' -> '__{TIMESTAMP}' to avoid name collisions.")
187
- df.rename(columns={TIMESTAMP: "__" + TIMESTAMP}, inplace=True)
188
- df.rename(columns={timestamp_column: TIMESTAMP}, inplace=True)
186
+ if timestamp_column != cls.TIMESTAMP and cls.TIMESTAMP in df.columns:
187
+ logger.warning(
188
+ f"Renaming existing column '{cls.TIMESTAMP}' -> '__{cls.TIMESTAMP}' to avoid name collisions."
189
+ )
190
+ df.rename(columns={cls.TIMESTAMP: "__" + cls.TIMESTAMP}, inplace=True)
191
+ df.rename(columns={timestamp_column: cls.TIMESTAMP}, inplace=True)
189
192
 
190
- if TIMESTAMP in df.columns:
191
- df[TIMESTAMP] = pd.to_datetime(df[TIMESTAMP])
193
+ if cls.TIMESTAMP in df.columns:
194
+ df[cls.TIMESTAMP] = pd.to_datetime(df[cls.TIMESTAMP])
192
195
 
193
196
  cls._validate_data_frame(df)
194
- return df.set_index([ITEMID, TIMESTAMP])
197
+ return df.set_index([cls.ITEMID, cls.TIMESTAMP])
195
198
 
196
199
  @classmethod
197
200
  def _construct_tsdf_from_iterable_dataset(cls, iterable_dataset: Iterable, num_cpus: int = -1) -> pd.DataFrame:
@@ -202,7 +205,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
202
205
  start_timestamp = start_timestamp.to_timestamp(how="S")
203
206
  target = ts["target"]
204
207
  datetime_index = tuple(pd.date_range(start_timestamp, periods=len(target), freq=freq))
205
- idx = pd.MultiIndex.from_product([(item_id,), datetime_index], names=[ITEMID, TIMESTAMP])
208
+ idx = pd.MultiIndex.from_product([(item_id,), datetime_index], names=[cls.ITEMID, cls.TIMESTAMP])
206
209
  return pd.Series(target, name="target", index=idx).to_frame()
207
210
 
208
211
  cls._validate_iterable(iterable_dataset)
@@ -219,32 +222,34 @@ class TimeSeriesDataFrame(pd.DataFrame):
219
222
  raise ValueError(f"data must be a pd.DataFrame, got {type(data)}")
220
223
  if not isinstance(data.index, pd.MultiIndex):
221
224
  raise ValueError(f"data must have pd.MultiIndex, got {type(data.index)}")
222
- if not pd.api.types.is_datetime64_dtype(data.index.dtypes[TIMESTAMP]):
223
- raise ValueError(f"for {TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
224
- if not data.index.names == (f"{ITEMID}", f"{TIMESTAMP}"):
225
- raise ValueError(f"data must have index names as ('{ITEMID}', '{TIMESTAMP}'), got {data.index.names}")
225
+ if not pd.api.types.is_datetime64_dtype(data.index.dtypes[cls.TIMESTAMP]):
226
+ raise ValueError(f"for {cls.TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
227
+ if not data.index.names == (f"{cls.ITEMID}", f"{cls.TIMESTAMP}"):
228
+ raise ValueError(
229
+ f"data must have index names as ('{cls.ITEMID}', '{cls.TIMESTAMP}'), got {data.index.names}"
230
+ )
226
231
  item_id_index = data.index.levels[0]
227
232
  if not (pd.api.types.is_integer_dtype(item_id_index) or pd.api.types.is_string_dtype(item_id_index)):
228
- raise ValueError(f"all entries in index `{ITEMID}` must be of integer or string dtype")
233
+ raise ValueError(f"all entries in index `{cls.ITEMID}` must be of integer or string dtype")
229
234
 
230
235
  @classmethod
231
236
  def _validate_data_frame(cls, df: pd.DataFrame):
232
237
  """Validate that a pd.DataFrame with ITEMID and TIMESTAMP columns can be converted to TimeSeriesDataFrame"""
233
238
  if not isinstance(df, pd.DataFrame):
234
239
  raise ValueError(f"data must be a pd.DataFrame, got {type(df)}")
235
- if ITEMID not in df.columns:
236
- raise ValueError(f"data must have a `{ITEMID}` column")
237
- if TIMESTAMP not in df.columns:
238
- raise ValueError(f"data must have a `{TIMESTAMP}` column")
239
- if df[ITEMID].isnull().any():
240
- raise ValueError(f"`{ITEMID}` column can not have nan")
241
- if df[TIMESTAMP].isnull().any():
242
- raise ValueError(f"`{TIMESTAMP}` column can not have nan")
243
- if not pd.api.types.is_datetime64_dtype(df[TIMESTAMP]):
244
- raise ValueError(f"for {TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
245
- item_id_column = df[ITEMID]
240
+ if cls.ITEMID not in df.columns:
241
+ raise ValueError(f"data must have a `{cls.ITEMID}` column")
242
+ if cls.TIMESTAMP not in df.columns:
243
+ raise ValueError(f"data must have a `{cls.TIMESTAMP}` column")
244
+ if df[cls.ITEMID].isnull().any():
245
+ raise ValueError(f"`{cls.ITEMID}` column can not have nan")
246
+ if df[cls.TIMESTAMP].isnull().any():
247
+ raise ValueError(f"`{cls.TIMESTAMP}` column can not have nan")
248
+ if not pd.api.types.is_datetime64_dtype(df[cls.TIMESTAMP]):
249
+ raise ValueError(f"for {cls.TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
250
+ item_id_column = df[cls.ITEMID]
246
251
  if not (pd.api.types.is_integer_dtype(item_id_column) or pd.api.types.is_string_dtype(item_id_column)):
247
- raise ValueError(f"all entries in column `{ITEMID}` must be of integer or string dtype")
252
+ raise ValueError(f"all entries in column `{cls.ITEMID}` must be of integer or string dtype")
248
253
 
249
254
  @classmethod
250
255
  def _validate_iterable(cls, data: Iterable):
@@ -267,9 +272,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
267
272
  def from_data_frame(
268
273
  cls,
269
274
  df: pd.DataFrame,
270
- id_column: Optional[str] = None,
271
- timestamp_column: Optional[str] = None,
272
- static_features_df: Optional[pd.DataFrame] = None,
275
+ id_column: str | None = None,
276
+ timestamp_column: str | None = None,
277
+ static_features_df: pd.DataFrame | None = None,
273
278
  ) -> TimeSeriesDataFrame:
274
279
  """Construct a ``TimeSeriesDataFrame`` from a pandas DataFrame.
275
280
 
@@ -310,10 +315,10 @@ class TimeSeriesDataFrame(pd.DataFrame):
310
315
  @classmethod
311
316
  def from_path(
312
317
  cls,
313
- path: Union[str, Path],
314
- id_column: Optional[str] = None,
315
- timestamp_column: Optional[str] = None,
316
- static_features_path: Optional[Union[str, Path]] = None,
318
+ path: str | Path,
319
+ id_column: str | None = None,
320
+ timestamp_column: str | None = None,
321
+ static_features_path: str | Path | None = None,
317
322
  ) -> TimeSeriesDataFrame:
318
323
  """Construct a ``TimeSeriesDataFrame`` from a CSV or Parquet file.
319
324
 
@@ -386,13 +391,13 @@ class TimeSeriesDataFrame(pd.DataFrame):
386
391
  @property
387
392
  def item_ids(self) -> pd.Index:
388
393
  """List of unique time series IDs contained in the data set."""
389
- return self.index.unique(level=ITEMID)
394
+ return self.index.unique(level=self.ITEMID)
390
395
 
391
396
  @classmethod
392
397
  def _construct_static_features(
393
398
  cls,
394
- static_features: Union[pd.DataFrame, str, Path],
395
- id_column: Optional[str] = None,
399
+ static_features: pd.DataFrame | str | Path,
400
+ id_column: str | None = None,
396
401
  ) -> pd.DataFrame:
397
402
  if isinstance(static_features, (str, Path)):
398
403
  static_features = load_pd.load(str(static_features))
@@ -403,10 +408,12 @@ class TimeSeriesDataFrame(pd.DataFrame):
403
408
 
404
409
  if id_column is not None:
405
410
  assert id_column in static_features.columns, f"Column '{id_column}' not found in static_features!"
406
- if id_column != ITEMID and ITEMID in static_features.columns:
407
- logger.warning(f"Renaming existing column '{ITEMID}' -> '__{ITEMID}' to avoid name collisions.")
408
- static_features.rename(columns={ITEMID: "__" + ITEMID}, inplace=True)
409
- static_features.rename(columns={id_column: ITEMID}, inplace=True)
411
+ if id_column != cls.ITEMID and cls.ITEMID in static_features.columns:
412
+ logger.warning(
413
+ f"Renaming existing column '{cls.ITEMID}' -> '__{cls.ITEMID}' to avoid name collisions."
414
+ )
415
+ static_features.rename(columns={cls.ITEMID: "__" + cls.ITEMID}, inplace=True)
416
+ static_features.rename(columns={id_column: cls.ITEMID}, inplace=True)
410
417
  return static_features
411
418
 
412
419
  @property
@@ -414,7 +421,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
414
421
  return self._static_features
415
422
 
416
423
  @static_features.setter
417
- def static_features(self, value: Optional[pd.DataFrame]):
424
+ def static_features(self, value: pd.DataFrame | None):
418
425
  # if the current item index is not a multiindex, then we are dealing with a single
419
426
  # item slice. this should only happen when the user explicitly requests only a
420
427
  # single item or during `slice_by_timestep`. In this case we do not set static features
@@ -431,10 +438,10 @@ class TimeSeriesDataFrame(pd.DataFrame):
431
438
 
432
439
  # Avoid modifying static features inplace
433
440
  value = value.copy()
434
- if ITEMID in value.columns and value.index.name != ITEMID:
435
- value = value.set_index(ITEMID)
436
- if value.index.name != ITEMID:
437
- value.index.rename(ITEMID, inplace=True)
441
+ if self.ITEMID in value.columns and value.index.name != self.ITEMID:
442
+ value = value.set_index(self.ITEMID)
443
+ if value.index.name != self.ITEMID:
444
+ value.index.rename(self.ITEMID, inplace=True)
438
445
  missing_item_ids = self.item_ids.difference(value.index)
439
446
  if len(missing_item_ids) > 0:
440
447
  raise ValueError(
@@ -447,7 +454,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
447
454
 
448
455
  self._static_features = value
449
456
 
450
- def infer_frequency(self, num_items: Optional[int] = None, raise_if_irregular: bool = False) -> str:
457
+ def infer_frequency(self, num_items: int | None = None, raise_if_irregular: bool = False) -> str:
451
458
  """Infer the time series frequency based on the timestamps of the observations.
452
459
 
453
460
  Parameters
@@ -514,7 +521,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
514
521
  else:
515
522
  raise ValueError(f"Cannot infer frequency. Multiple frequencies detected: {unique_freqs}")
516
523
  else:
517
- return IRREGULAR_TIME_INDEX_FREQSTR
524
+ return self.IRREGULAR_TIME_INDEX_FREQSTR
518
525
  else:
519
526
  return pd.tseries.frequencies.to_offset(unique_freqs[0]).freqstr
520
527
 
@@ -526,7 +533,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
526
533
  values. For reliable results, use :meth:`~autogluon.timeseries.TimeSeriesDataFrame.infer_frequency`.
527
534
  """
528
535
  inferred_freq = self.infer_frequency(num_items=50)
529
- return None if inferred_freq == IRREGULAR_TIME_INDEX_FREQSTR else inferred_freq
536
+ return None if inferred_freq == self.IRREGULAR_TIME_INDEX_FREQSTR else inferred_freq
530
537
 
531
538
  @property
532
539
  def num_items(self):
@@ -563,7 +570,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
563
570
  return obj
564
571
 
565
572
  def __finalize__( # noqa
566
- self: TimeSeriesDataFrame, other, method: Optional[str] = None, **kwargs
573
+ self: TimeSeriesDataFrame, other, method: str | None = None, **kwargs
567
574
  ) -> TimeSeriesDataFrame:
568
575
  super().__finalize__(other=other, method=method, **kwargs)
569
576
  # when finalizing the copy/slice operation, we use the property setter to stay consistent
@@ -595,9 +602,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
595
602
  after = TimeSeriesDataFrame(data_after, static_features=self.static_features)
596
603
  return before, after
597
604
 
598
- def slice_by_timestep(
599
- self, start_index: Optional[int] = None, end_index: Optional[int] = None
600
- ) -> TimeSeriesDataFrame:
605
+ def slice_by_timestep(self, start_index: int | None = None, end_index: int | None = None) -> TimeSeriesDataFrame:
601
606
  """Select a subsequence from each time series between start (inclusive) and end (exclusive) indices.
602
607
 
603
608
  This operation is equivalent to selecting a slice ``[start_index : end_index]`` from each time series, and then
@@ -735,7 +740,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
735
740
  return self.loc[mask]
736
741
  else:
737
742
  # Fall back to a slow groupby operation
738
- result = self.groupby(level=ITEMID, sort=False, as_index=False).nth(slice(start_index, end_index))
743
+ result = self.groupby(level=self.ITEMID, sort=False, as_index=False).nth(slice(start_index, end_index))
739
744
  result.static_features = self.static_features
740
745
  return result
741
746
 
@@ -852,12 +857,12 @@ class TimeSeriesDataFrame(pd.DataFrame):
852
857
  "It is highly recommended to call `ts_df.sort_index()` before calling `ts_df.fill_missing_values()`"
853
858
  )
854
859
 
855
- grouped_df = df.groupby(level=ITEMID, sort=False, group_keys=False)
860
+ grouped_df = df.groupby(level=self.ITEMID, sort=False, group_keys=False)
856
861
  if method == "auto":
857
862
  filled_df = grouped_df.ffill()
858
863
  # If necessary, fill missing values at the start of each time series with bfill
859
864
  if filled_df.isna().any(axis=None):
860
- filled_df = filled_df.groupby(level=ITEMID, sort=False, group_keys=False).bfill()
865
+ filled_df = filled_df.groupby(level=self.ITEMID, sort=False, group_keys=False).bfill()
861
866
  elif method in ["ffill", "pad"]:
862
867
  filled_df = grouped_df.ffill()
863
868
  elif method in ["bfill", "backfill"]:
@@ -900,8 +905,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
900
905
  return super().sort_index(*args, **kwargs) # type: ignore
901
906
 
902
907
  def get_model_inputs_for_scoring(
903
- self, prediction_length: int, known_covariates_names: Optional[list[str]] = None
904
- ) -> tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
908
+ self, prediction_length: int, known_covariates_names: list[str] | None = None
909
+ ) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
905
910
  """Prepare model inputs necessary to predict the last ``prediction_length`` time steps of each time series in the dataset.
906
911
 
907
912
  Parameters
@@ -931,8 +936,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
931
936
  def train_test_split(
932
937
  self,
933
938
  prediction_length: int,
934
- end_index: Optional[int] = None,
935
- suffix: Optional[str] = None,
939
+ end_index: int | None = None,
940
+ suffix: str | None = None,
936
941
  ) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
937
942
  """Generate a train/test split from the given dataset.
938
943
 
@@ -977,7 +982,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
977
982
 
978
983
  def convert_frequency(
979
984
  self,
980
- freq: Union[str, pd.DateOffset],
985
+ freq: str | pd.DateOffset,
981
986
  agg_numeric: str = "mean",
982
987
  agg_categorical: str = "first",
983
988
  num_cpus: int = -1,
@@ -996,7 +1001,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
996
1001
 
997
1002
  Parameters
998
1003
  ----------
999
- freq : Union[str, pd.DateOffset]
1004
+ freq : str | pd.DateOffset
1000
1005
  Frequency to which the data should be converted. See `pandas frequency aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
1001
1006
  for supported values.
1002
1007
  agg_numeric : {"max", "min", "sum", "mean", "median", "first", "last"}, default = "mean"
@@ -1086,8 +1091,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
1086
1091
  def resample_chunk(chunk: Iterable[tuple[str, pd.DataFrame]]) -> pd.DataFrame:
1087
1092
  resampled_dfs = []
1088
1093
  for item_id, df in chunk:
1089
- resampled_df = df.resample(offset, level=TIMESTAMP, **kwargs).agg(aggregation)
1090
- resampled_dfs.append(pd.concat({item_id: resampled_df}, names=[ITEMID]))
1094
+ resampled_df = df.resample(offset, level=self.TIMESTAMP, **kwargs).agg(aggregation)
1095
+ resampled_dfs.append(pd.concat({item_id: resampled_df}, names=[self.ITEMID]))
1091
1096
  return pd.concat(resampled_dfs)
1092
1097
 
1093
1098
  # Resampling time for 1 item < overhead time for a single parallel job. Therefore, we group items into chunks
@@ -1095,8 +1100,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
1095
1100
  df = pd.DataFrame(self)
1096
1101
  # Make sure that timestamp index has dtype 'datetime64[ns]', otherwise index may contain NaT values.
1097
1102
  # See https://github.com/autogluon/autogluon/issues/4917
1098
- df.index = df.index.set_levels(df.index.levels[1].astype("datetime64[ns]"), level=TIMESTAMP)
1099
- chunks = split_into_chunks(df.groupby(level=ITEMID, sort=False), chunk_size)
1103
+ df.index = df.index.set_levels(df.index.levels[1].astype("datetime64[ns]"), level=self.TIMESTAMP)
1104
+ chunks = split_into_chunks(df.groupby(level=self.ITEMID, sort=False), chunk_size)
1100
1105
  resampled_chunks = Parallel(n_jobs=num_cpus)(delayed(resample_chunk)(chunk) for chunk in chunks)
1101
1106
  resampled_df = TimeSeriesDataFrame(pd.concat(resampled_chunks))
1102
1107
  resampled_df.static_features = self.static_features
@@ -1123,14 +1128,14 @@ class TimeSeriesDataFrame(pd.DataFrame):
1123
1128
  def reindex(*args, **kwargs) -> Self: ... # type: ignore
1124
1129
 
1125
1130
  @overload
1126
- def __new__(cls, data: pd.DataFrame, static_features: Optional[pd.DataFrame] = None) -> Self: ... # type: ignore
1131
+ def __new__(cls, data: pd.DataFrame, static_features: pd.DataFrame | None = None) -> Self: ... # type: ignore
1127
1132
  @overload
1128
1133
  def __new__(
1129
1134
  cls,
1130
- data: Union[pd.DataFrame, str, Path, Iterable],
1131
- static_features: Optional[Union[pd.DataFrame, str, Path]] = None,
1132
- id_column: Optional[str] = None,
1133
- timestamp_column: Optional[str] = None,
1135
+ data: pd.DataFrame | str | Path | Iterable,
1136
+ static_features: pd.DataFrame | str | Path | None = None,
1137
+ id_column: str | None = None,
1138
+ timestamp_column: str | None = None,
1134
1139
  num_cpus: int = -1,
1135
1140
  *args,
1136
1141
  **kwargs,
@@ -1142,3 +1147,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
1142
1147
  def __getitem__(self, items: list[str]) -> Self: ... # type: ignore
1143
1148
  @overload
1144
1149
  def __getitem__(self, item: str) -> pd.Series: ... # type: ignore
1150
+
1151
+
1152
+ # TODO: remove with v2.0
1153
+ # module-level constants kept for backward compatibility.
1154
+ ITEMID = TimeSeriesDataFrame.ITEMID
1155
+ TIMESTAMP = TimeSeriesDataFrame.TIMESTAMP
@@ -1,15 +1,14 @@
1
1
  import logging
2
2
  import reprlib
3
3
  import time
4
- from typing import Any, Literal, Optional, Type, Union
4
+ from typing import Any, Literal, Type
5
5
 
6
6
  import pandas as pd
7
7
 
8
8
  from autogluon.core.learner import AbstractLearner
9
- from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
9
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
10
10
  from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
11
11
  from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
12
- from autogluon.timeseries.splitter import AbstractWindowSplitter
13
12
  from autogluon.timeseries.trainer import TimeSeriesTrainer
14
13
  from autogluon.timeseries.utils.features import TimeSeriesFeatureGenerator
15
14
  from autogluon.timeseries.utils.forecast import make_future_data_frame
@@ -26,12 +25,12 @@ class TimeSeriesLearner(AbstractLearner):
26
25
  self,
27
26
  path_context: str,
28
27
  target: str = "target",
29
- known_covariates_names: Optional[list[str]] = None,
28
+ known_covariates_names: list[str] | None = None,
30
29
  trainer_type: Type[TimeSeriesTrainer] = TimeSeriesTrainer,
31
- eval_metric: Union[str, TimeSeriesScorer, None] = None,
30
+ eval_metric: str | TimeSeriesScorer | None = None,
32
31
  prediction_length: int = 1,
33
32
  cache_predictions: bool = True,
34
- ensemble_model_type: Optional[Type] = None,
33
+ ensemble_model_type: Type | None = None,
35
34
  **kwargs,
36
35
  ):
37
36
  super().__init__(path_context=path_context)
@@ -42,7 +41,7 @@ class TimeSeriesLearner(AbstractLearner):
42
41
  self.prediction_length = prediction_length
43
42
  self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
44
43
  self.cache_predictions = cache_predictions
45
- self.freq: Optional[str] = None
44
+ self.freq: str | None = None
46
45
  self.ensemble_model_type = ensemble_model_type
47
46
 
48
47
  self.feature_generator = TimeSeriesFeatureGenerator(
@@ -56,13 +55,15 @@ class TimeSeriesLearner(AbstractLearner):
56
55
  def fit(
57
56
  self,
58
57
  train_data: TimeSeriesDataFrame,
59
- hyperparameters: Union[str, dict],
60
- val_data: Optional[TimeSeriesDataFrame] = None,
61
- hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
62
- time_limit: Optional[float] = None,
63
- val_splitter: Optional[AbstractWindowSplitter] = None,
64
- refit_every_n_windows: Optional[int] = 1,
65
- random_seed: Optional[int] = None,
58
+ hyperparameters: str | dict,
59
+ val_data: TimeSeriesDataFrame | None = None,
60
+ hyperparameter_tune_kwargs: str | dict | None = None,
61
+ ensemble_hyperparameters: dict[str, Any] | list[dict[str, Any]] | None = None,
62
+ time_limit: float | None = None,
63
+ num_val_windows: tuple[int, ...] = (1,),
64
+ val_step_size: int | None = None,
65
+ refit_every_n_windows: int | None = 1,
66
+ random_seed: int | None = None,
66
67
  **kwargs,
67
68
  ) -> None:
68
69
  self._time_limit = time_limit
@@ -86,7 +87,8 @@ class TimeSeriesLearner(AbstractLearner):
86
87
  skip_model_selection=kwargs.get("skip_model_selection", False),
87
88
  enable_ensemble=kwargs.get("enable_ensemble", True),
88
89
  covariate_metadata=self.feature_generator.covariate_metadata,
89
- val_splitter=val_splitter,
90
+ num_val_windows=num_val_windows,
91
+ val_step_size=val_step_size,
90
92
  refit_every_n_windows=refit_every_n_windows,
91
93
  cache_predictions=self.cache_predictions,
92
94
  ensemble_model_type=self.ensemble_model_type,
@@ -94,7 +96,7 @@ class TimeSeriesLearner(AbstractLearner):
94
96
  )
95
97
 
96
98
  assert issubclass(self.trainer_type, TimeSeriesTrainer)
97
- self.trainer: Optional[TimeSeriesTrainer] = self.trainer_type(**trainer_init_kwargs)
99
+ self.trainer: TimeSeriesTrainer | None = self.trainer_type(**trainer_init_kwargs)
98
100
  self.trainer_path = self.trainer.path
99
101
  self.save()
100
102
 
@@ -111,6 +113,7 @@ class TimeSeriesLearner(AbstractLearner):
111
113
  val_data=val_data,
112
114
  hyperparameters=hyperparameters,
113
115
  hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
116
+ ensemble_hyperparameters=ensemble_hyperparameters,
114
117
  excluded_model_types=kwargs.get("excluded_model_types"),
115
118
  time_limit=time_limit,
116
119
  random_seed=random_seed,
@@ -121,9 +124,9 @@ class TimeSeriesLearner(AbstractLearner):
121
124
 
122
125
  def _align_covariates_with_forecast_index(
123
126
  self,
124
- known_covariates: Optional[TimeSeriesDataFrame],
127
+ known_covariates: TimeSeriesDataFrame | None,
125
128
  data: TimeSeriesDataFrame,
126
- ) -> Optional[TimeSeriesDataFrame]:
129
+ ) -> TimeSeriesDataFrame | None:
127
130
  """Select the relevant item_ids and timestamps from the known_covariates dataframe.
128
131
 
129
132
  If some of the item_ids or timestamps are missing, an exception is raised.
@@ -162,10 +165,10 @@ class TimeSeriesLearner(AbstractLearner):
162
165
  def predict(
163
166
  self,
164
167
  data: TimeSeriesDataFrame,
165
- known_covariates: Optional[TimeSeriesDataFrame] = None,
166
- model: Optional[Union[str, AbstractTimeSeriesModel]] = None,
168
+ known_covariates: TimeSeriesDataFrame | None = None,
169
+ model: str | AbstractTimeSeriesModel | None = None,
167
170
  use_cache: bool = True,
168
- random_seed: Optional[int] = None,
171
+ random_seed: int | None = None,
169
172
  **kwargs,
170
173
  ) -> TimeSeriesDataFrame:
171
174
  data = self.feature_generator.transform(data)
@@ -183,8 +186,8 @@ class TimeSeriesLearner(AbstractLearner):
183
186
  def score(
184
187
  self,
185
188
  data: TimeSeriesDataFrame,
186
- model: Optional[Union[str, AbstractTimeSeriesModel]] = None,
187
- metric: Union[str, TimeSeriesScorer, None] = None,
189
+ model: str | AbstractTimeSeriesModel | None = None,
190
+ metric: str | TimeSeriesScorer | None = None,
188
191
  use_cache: bool = True,
189
192
  ) -> float:
190
193
  data = self.feature_generator.transform(data)
@@ -193,8 +196,8 @@ class TimeSeriesLearner(AbstractLearner):
193
196
  def evaluate(
194
197
  self,
195
198
  data: TimeSeriesDataFrame,
196
- model: Optional[str] = None,
197
- metrics: Optional[Union[str, TimeSeriesScorer, list[Union[str, TimeSeriesScorer]]]] = None,
199
+ model: str | None = None,
200
+ metrics: str | TimeSeriesScorer | list[str | TimeSeriesScorer] | None = None,
198
201
  use_cache: bool = True,
199
202
  ) -> dict[str, float]:
200
203
  data = self.feature_generator.transform(data)
@@ -202,15 +205,15 @@ class TimeSeriesLearner(AbstractLearner):
202
205
 
203
206
  def get_feature_importance(
204
207
  self,
205
- data: Optional[TimeSeriesDataFrame] = None,
206
- model: Optional[str] = None,
207
- metric: Optional[Union[str, TimeSeriesScorer]] = None,
208
- features: Optional[list[str]] = None,
209
- time_limit: Optional[float] = None,
208
+ data: TimeSeriesDataFrame | None = None,
209
+ model: str | None = None,
210
+ metric: str | TimeSeriesScorer | None = None,
211
+ features: list[str] | None = None,
212
+ time_limit: float | None = None,
210
213
  method: Literal["naive", "permutation"] = "permutation",
211
214
  subsample_size: int = 50,
212
- num_iterations: Optional[int] = None,
213
- random_seed: Optional[int] = None,
215
+ num_iterations: int | None = None,
216
+ random_seed: int | None = None,
214
217
  relative_scores: bool = False,
215
218
  include_confidence_band: bool = True,
216
219
  confidence_level: float = 0.99,
@@ -271,9 +274,9 @@ class TimeSeriesLearner(AbstractLearner):
271
274
 
272
275
  def leaderboard(
273
276
  self,
274
- data: Optional[TimeSeriesDataFrame] = None,
277
+ data: TimeSeriesDataFrame | None = None,
275
278
  extra_info: bool = False,
276
- extra_metrics: Optional[list[Union[str, TimeSeriesScorer]]] = None,
279
+ extra_metrics: list[str | TimeSeriesScorer] | None = None,
277
280
  use_cache: bool = True,
278
281
  ) -> pd.DataFrame:
279
282
  if data is not None:
@@ -300,7 +303,7 @@ class TimeSeriesLearner(AbstractLearner):
300
303
  return learner_info
301
304
 
302
305
  def persist_trainer(
303
- self, models: Union[Literal["all", "best"], list[str]] = "all", with_ancestors: bool = False
306
+ self, models: Literal["all", "best"] | list[str] = "all", with_ancestors: bool = False
304
307
  ) -> list[str]:
305
308
  """Loads models and trainer in memory so that they don't have to be
306
309
  loaded during predictions
@@ -328,3 +331,35 @@ class TimeSeriesLearner(AbstractLearner):
328
331
 
329
332
  def refit_full(self, model: str = "all") -> dict[str, str]:
330
333
  return self.load_trainer().refit_full(model=model)
334
+
335
+ def backtest_predictions(
336
+ self,
337
+ data: TimeSeriesDataFrame | None,
338
+ model_names: list[str],
339
+ num_val_windows: int | None = None,
340
+ val_step_size: int | None = None,
341
+ use_cache: bool = True,
342
+ ) -> dict[str, list[TimeSeriesDataFrame]]:
343
+ if data is not None:
344
+ data = self.feature_generator.transform(data)
345
+ return self.load_trainer().backtest_predictions(
346
+ model_names=model_names,
347
+ data=data,
348
+ num_val_windows=num_val_windows,
349
+ val_step_size=val_step_size,
350
+ use_cache=use_cache,
351
+ )
352
+
353
+ def backtest_targets(
354
+ self,
355
+ data: TimeSeriesDataFrame | None,
356
+ num_val_windows: int | None = None,
357
+ val_step_size: int | None = None,
358
+ ) -> list[TimeSeriesDataFrame]:
359
+ if data is not None:
360
+ data = self.feature_generator.transform(data)
361
+ return self.load_trainer().backtest_targets(
362
+ data=data,
363
+ num_val_windows=num_val_windows,
364
+ val_step_size=val_step_size,
365
+ )