autogluon.timeseries 1.4.1b20251010__py3-none-any.whl → 1.4.1b20251115__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (53) hide show
  1. autogluon/timeseries/dataset/ts_dataframe.py +66 -53
  2. autogluon/timeseries/learner.py +5 -4
  3. autogluon/timeseries/metrics/quantile.py +1 -1
  4. autogluon/timeseries/metrics/utils.py +4 -4
  5. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +28 -36
  6. autogluon/timeseries/models/autogluon_tabular/per_step.py +14 -5
  7. autogluon/timeseries/models/autogluon_tabular/transforms.py +9 -7
  8. autogluon/timeseries/models/chronos/model.py +101 -68
  9. autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +64 -32
  10. autogluon/timeseries/models/ensemble/__init__.py +29 -2
  11. autogluon/timeseries/models/ensemble/abstract.py +1 -37
  12. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  13. autogluon/timeseries/models/ensemble/array_based/abstract.py +247 -0
  14. autogluon/timeseries/models/ensemble/array_based/models.py +50 -0
  15. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +10 -0
  16. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +87 -0
  17. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +133 -0
  18. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +141 -0
  19. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  20. autogluon/timeseries/models/ensemble/weighted/abstract.py +41 -0
  21. autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +0 -10
  22. autogluon/timeseries/models/gluonts/abstract.py +2 -2
  23. autogluon/timeseries/models/gluonts/dataset.py +2 -2
  24. autogluon/timeseries/models/local/abstract_local_model.py +2 -2
  25. autogluon/timeseries/models/multi_window/multi_window_model.py +1 -1
  26. autogluon/timeseries/models/toto/model.py +5 -3
  27. autogluon/timeseries/predictor.py +10 -26
  28. autogluon/timeseries/regressor.py +9 -7
  29. autogluon/timeseries/splitter.py +1 -25
  30. autogluon/timeseries/trainer/ensemble_composer.py +250 -0
  31. autogluon/timeseries/trainer/trainer.py +124 -193
  32. autogluon/timeseries/trainer/utils.py +18 -0
  33. autogluon/timeseries/transforms/covariate_scaler.py +1 -1
  34. autogluon/timeseries/transforms/target_scaler.py +7 -7
  35. autogluon/timeseries/utils/features.py +9 -5
  36. autogluon/timeseries/utils/forecast.py +5 -5
  37. autogluon/timeseries/version.py +1 -1
  38. autogluon.timeseries-1.4.1b20251115-py3.9-nspkg.pth +1 -0
  39. {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/METADATA +25 -15
  40. {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/RECORD +47 -41
  41. {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/WHEEL +1 -1
  42. autogluon/timeseries/evaluator.py +0 -6
  43. autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
  44. autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
  45. autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
  46. autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
  47. autogluon.timeseries-1.4.1b20251010-py3.9-nspkg.pth +0 -1
  48. /autogluon/timeseries/models/ensemble/{greedy.py → weighted/greedy.py} +0 -0
  49. {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info/licenses}/LICENSE +0 -0
  50. {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info/licenses}/NOTICE +0 -0
  51. {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/namespace_packages.txt +0 -0
  52. {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/top_level.txt +0 -0
  53. {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/zip-safe +0 -0
@@ -7,7 +7,7 @@ import reprlib
7
7
  from collections.abc import Iterable
8
8
  from itertools import islice
9
9
  from pathlib import Path
10
- from typing import TYPE_CHECKING, Any, Optional, Type, Union, overload
10
+ from typing import TYPE_CHECKING, Any, Final, Optional, Type, Union, overload
11
11
 
12
12
  import numpy as np
13
13
  import pandas as pd
@@ -19,11 +19,6 @@ from autogluon.common.loaders import load_pd
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
- ITEMID = "item_id"
23
- TIMESTAMP = "timestamp"
24
-
25
- IRREGULAR_TIME_INDEX_FREQSTR = "IRREG"
26
-
27
22
 
28
23
  class TimeSeriesDataFrame(pd.DataFrame):
29
24
  """A collection of univariate time series, where each row is identified by an (``item_id``, ``timestamp``) pair.
@@ -121,6 +116,10 @@ class TimeSeriesDataFrame(pd.DataFrame):
121
116
  index: pd.MultiIndex # type: ignore
122
117
  _metadata = ["_static_features"]
123
118
 
119
+ IRREGULAR_TIME_INDEX_FREQSTR: Final[str] = "IRREG"
120
+ ITEMID: Final[str] = "item_id"
121
+ TIMESTAMP: Final[str] = "timestamp"
122
+
124
123
  def __init__(
125
124
  self,
126
125
  data: Union[pd.DataFrame, str, Path, Iterable],
@@ -175,23 +174,27 @@ class TimeSeriesDataFrame(pd.DataFrame):
175
174
  df = df.copy()
176
175
  if id_column is not None:
177
176
  assert id_column in df.columns, f"Column '{id_column}' not found!"
178
- if id_column != ITEMID and ITEMID in df.columns:
179
- logger.warning(f"Renaming existing column '{ITEMID}' -> '__{ITEMID}' to avoid name collisions.")
180
- df.rename(columns={ITEMID: "__" + ITEMID}, inplace=True)
181
- df.rename(columns={id_column: ITEMID}, inplace=True)
177
+ if id_column != cls.ITEMID and cls.ITEMID in df.columns:
178
+ logger.warning(
179
+ f"Renaming existing column '{cls.ITEMID}' -> '__{cls.ITEMID}' to avoid name collisions."
180
+ )
181
+ df.rename(columns={cls.ITEMID: "__" + cls.ITEMID}, inplace=True)
182
+ df.rename(columns={id_column: cls.ITEMID}, inplace=True)
182
183
 
183
184
  if timestamp_column is not None:
184
185
  assert timestamp_column in df.columns, f"Column '{timestamp_column}' not found!"
185
- if timestamp_column != TIMESTAMP and TIMESTAMP in df.columns:
186
- logger.warning(f"Renaming existing column '{TIMESTAMP}' -> '__{TIMESTAMP}' to avoid name collisions.")
187
- df.rename(columns={TIMESTAMP: "__" + TIMESTAMP}, inplace=True)
188
- df.rename(columns={timestamp_column: TIMESTAMP}, inplace=True)
186
+ if timestamp_column != cls.TIMESTAMP and cls.TIMESTAMP in df.columns:
187
+ logger.warning(
188
+ f"Renaming existing column '{cls.TIMESTAMP}' -> '__{cls.TIMESTAMP}' to avoid name collisions."
189
+ )
190
+ df.rename(columns={cls.TIMESTAMP: "__" + cls.TIMESTAMP}, inplace=True)
191
+ df.rename(columns={timestamp_column: cls.TIMESTAMP}, inplace=True)
189
192
 
190
- if TIMESTAMP in df.columns:
191
- df[TIMESTAMP] = pd.to_datetime(df[TIMESTAMP])
193
+ if cls.TIMESTAMP in df.columns:
194
+ df[cls.TIMESTAMP] = pd.to_datetime(df[cls.TIMESTAMP])
192
195
 
193
196
  cls._validate_data_frame(df)
194
- return df.set_index([ITEMID, TIMESTAMP])
197
+ return df.set_index([cls.ITEMID, cls.TIMESTAMP])
195
198
 
196
199
  @classmethod
197
200
  def _construct_tsdf_from_iterable_dataset(cls, iterable_dataset: Iterable, num_cpus: int = -1) -> pd.DataFrame:
@@ -202,7 +205,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
202
205
  start_timestamp = start_timestamp.to_timestamp(how="S")
203
206
  target = ts["target"]
204
207
  datetime_index = tuple(pd.date_range(start_timestamp, periods=len(target), freq=freq))
205
- idx = pd.MultiIndex.from_product([(item_id,), datetime_index], names=[ITEMID, TIMESTAMP])
208
+ idx = pd.MultiIndex.from_product([(item_id,), datetime_index], names=[cls.ITEMID, cls.TIMESTAMP])
206
209
  return pd.Series(target, name="target", index=idx).to_frame()
207
210
 
208
211
  cls._validate_iterable(iterable_dataset)
@@ -219,32 +222,34 @@ class TimeSeriesDataFrame(pd.DataFrame):
219
222
  raise ValueError(f"data must be a pd.DataFrame, got {type(data)}")
220
223
  if not isinstance(data.index, pd.MultiIndex):
221
224
  raise ValueError(f"data must have pd.MultiIndex, got {type(data.index)}")
222
- if not pd.api.types.is_datetime64_dtype(data.index.dtypes[TIMESTAMP]):
223
- raise ValueError(f"for {TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
224
- if not data.index.names == (f"{ITEMID}", f"{TIMESTAMP}"):
225
- raise ValueError(f"data must have index names as ('{ITEMID}', '{TIMESTAMP}'), got {data.index.names}")
225
+ if not pd.api.types.is_datetime64_dtype(data.index.dtypes[cls.TIMESTAMP]):
226
+ raise ValueError(f"for {cls.TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
227
+ if not data.index.names == (f"{cls.ITEMID}", f"{cls.TIMESTAMP}"):
228
+ raise ValueError(
229
+ f"data must have index names as ('{cls.ITEMID}', '{cls.TIMESTAMP}'), got {data.index.names}"
230
+ )
226
231
  item_id_index = data.index.levels[0]
227
232
  if not (pd.api.types.is_integer_dtype(item_id_index) or pd.api.types.is_string_dtype(item_id_index)):
228
- raise ValueError(f"all entries in index `{ITEMID}` must be of integer or string dtype")
233
+ raise ValueError(f"all entries in index `{cls.ITEMID}` must be of integer or string dtype")
229
234
 
230
235
  @classmethod
231
236
  def _validate_data_frame(cls, df: pd.DataFrame):
232
237
  """Validate that a pd.DataFrame with ITEMID and TIMESTAMP columns can be converted to TimeSeriesDataFrame"""
233
238
  if not isinstance(df, pd.DataFrame):
234
239
  raise ValueError(f"data must be a pd.DataFrame, got {type(df)}")
235
- if ITEMID not in df.columns:
236
- raise ValueError(f"data must have a `{ITEMID}` column")
237
- if TIMESTAMP not in df.columns:
238
- raise ValueError(f"data must have a `{TIMESTAMP}` column")
239
- if df[ITEMID].isnull().any():
240
- raise ValueError(f"`{ITEMID}` column can not have nan")
241
- if df[TIMESTAMP].isnull().any():
242
- raise ValueError(f"`{TIMESTAMP}` column can not have nan")
243
- if not pd.api.types.is_datetime64_dtype(df[TIMESTAMP]):
244
- raise ValueError(f"for {TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
245
- item_id_column = df[ITEMID]
240
+ if cls.ITEMID not in df.columns:
241
+ raise ValueError(f"data must have a `{cls.ITEMID}` column")
242
+ if cls.TIMESTAMP not in df.columns:
243
+ raise ValueError(f"data must have a `{cls.TIMESTAMP}` column")
244
+ if df[cls.ITEMID].isnull().any():
245
+ raise ValueError(f"`{cls.ITEMID}` column can not have nan")
246
+ if df[cls.TIMESTAMP].isnull().any():
247
+ raise ValueError(f"`{cls.TIMESTAMP}` column can not have nan")
248
+ if not pd.api.types.is_datetime64_dtype(df[cls.TIMESTAMP]):
249
+ raise ValueError(f"for {cls.TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
250
+ item_id_column = df[cls.ITEMID]
246
251
  if not (pd.api.types.is_integer_dtype(item_id_column) or pd.api.types.is_string_dtype(item_id_column)):
247
- raise ValueError(f"all entries in column `{ITEMID}` must be of integer or string dtype")
252
+ raise ValueError(f"all entries in column `{cls.ITEMID}` must be of integer or string dtype")
248
253
 
249
254
  @classmethod
250
255
  def _validate_iterable(cls, data: Iterable):
@@ -386,7 +391,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
386
391
  @property
387
392
  def item_ids(self) -> pd.Index:
388
393
  """List of unique time series IDs contained in the data set."""
389
- return self.index.unique(level=ITEMID)
394
+ return self.index.unique(level=self.ITEMID)
390
395
 
391
396
  @classmethod
392
397
  def _construct_static_features(
@@ -403,10 +408,12 @@ class TimeSeriesDataFrame(pd.DataFrame):
403
408
 
404
409
  if id_column is not None:
405
410
  assert id_column in static_features.columns, f"Column '{id_column}' not found in static_features!"
406
- if id_column != ITEMID and ITEMID in static_features.columns:
407
- logger.warning(f"Renaming existing column '{ITEMID}' -> '__{ITEMID}' to avoid name collisions.")
408
- static_features.rename(columns={ITEMID: "__" + ITEMID}, inplace=True)
409
- static_features.rename(columns={id_column: ITEMID}, inplace=True)
411
+ if id_column != cls.ITEMID and cls.ITEMID in static_features.columns:
412
+ logger.warning(
413
+ f"Renaming existing column '{cls.ITEMID}' -> '__{cls.ITEMID}' to avoid name collisions."
414
+ )
415
+ static_features.rename(columns={cls.ITEMID: "__" + cls.ITEMID}, inplace=True)
416
+ static_features.rename(columns={id_column: cls.ITEMID}, inplace=True)
410
417
  return static_features
411
418
 
412
419
  @property
@@ -431,10 +438,10 @@ class TimeSeriesDataFrame(pd.DataFrame):
431
438
 
432
439
  # Avoid modifying static features inplace
433
440
  value = value.copy()
434
- if ITEMID in value.columns and value.index.name != ITEMID:
435
- value = value.set_index(ITEMID)
436
- if value.index.name != ITEMID:
437
- value.index.rename(ITEMID, inplace=True)
441
+ if self.ITEMID in value.columns and value.index.name != self.ITEMID:
442
+ value = value.set_index(self.ITEMID)
443
+ if value.index.name != self.ITEMID:
444
+ value.index.rename(self.ITEMID, inplace=True)
438
445
  missing_item_ids = self.item_ids.difference(value.index)
439
446
  if len(missing_item_ids) > 0:
440
447
  raise ValueError(
@@ -514,7 +521,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
514
521
  else:
515
522
  raise ValueError(f"Cannot infer frequency. Multiple frequencies detected: {unique_freqs}")
516
523
  else:
517
- return IRREGULAR_TIME_INDEX_FREQSTR
524
+ return self.IRREGULAR_TIME_INDEX_FREQSTR
518
525
  else:
519
526
  return pd.tseries.frequencies.to_offset(unique_freqs[0]).freqstr
520
527
 
@@ -526,7 +533,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
526
533
  values. For reliable results, use :meth:`~autogluon.timeseries.TimeSeriesDataFrame.infer_frequency`.
527
534
  """
528
535
  inferred_freq = self.infer_frequency(num_items=50)
529
- return None if inferred_freq == IRREGULAR_TIME_INDEX_FREQSTR else inferred_freq
536
+ return None if inferred_freq == self.IRREGULAR_TIME_INDEX_FREQSTR else inferred_freq
530
537
 
531
538
  @property
532
539
  def num_items(self):
@@ -735,7 +742,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
735
742
  return self.loc[mask]
736
743
  else:
737
744
  # Fall back to a slow groupby operation
738
- result = self.groupby(level=ITEMID, sort=False, as_index=False).nth(slice(start_index, end_index))
745
+ result = self.groupby(level=self.ITEMID, sort=False, as_index=False).nth(slice(start_index, end_index))
739
746
  result.static_features = self.static_features
740
747
  return result
741
748
 
@@ -852,12 +859,12 @@ class TimeSeriesDataFrame(pd.DataFrame):
852
859
  "It is highly recommended to call `ts_df.sort_index()` before calling `ts_df.fill_missing_values()`"
853
860
  )
854
861
 
855
- grouped_df = df.groupby(level=ITEMID, sort=False, group_keys=False)
862
+ grouped_df = df.groupby(level=self.ITEMID, sort=False, group_keys=False)
856
863
  if method == "auto":
857
864
  filled_df = grouped_df.ffill()
858
865
  # If necessary, fill missing values at the start of each time series with bfill
859
866
  if filled_df.isna().any(axis=None):
860
- filled_df = filled_df.groupby(level=ITEMID, sort=False, group_keys=False).bfill()
867
+ filled_df = filled_df.groupby(level=self.ITEMID, sort=False, group_keys=False).bfill()
861
868
  elif method in ["ffill", "pad"]:
862
869
  filled_df = grouped_df.ffill()
863
870
  elif method in ["bfill", "backfill"]:
@@ -1086,8 +1093,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
1086
1093
  def resample_chunk(chunk: Iterable[tuple[str, pd.DataFrame]]) -> pd.DataFrame:
1087
1094
  resampled_dfs = []
1088
1095
  for item_id, df in chunk:
1089
- resampled_df = df.resample(offset, level=TIMESTAMP, **kwargs).agg(aggregation)
1090
- resampled_dfs.append(pd.concat({item_id: resampled_df}, names=[ITEMID]))
1096
+ resampled_df = df.resample(offset, level=self.TIMESTAMP, **kwargs).agg(aggregation)
1097
+ resampled_dfs.append(pd.concat({item_id: resampled_df}, names=[self.ITEMID]))
1091
1098
  return pd.concat(resampled_dfs)
1092
1099
 
1093
1100
  # Resampling time for 1 item < overhead time for a single parallel job. Therefore, we group items into chunks
@@ -1095,8 +1102,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
1095
1102
  df = pd.DataFrame(self)
1096
1103
  # Make sure that timestamp index has dtype 'datetime64[ns]', otherwise index may contain NaT values.
1097
1104
  # See https://github.com/autogluon/autogluon/issues/4917
1098
- df.index = df.index.set_levels(df.index.levels[1].astype("datetime64[ns]"), level=TIMESTAMP)
1099
- chunks = split_into_chunks(df.groupby(level=ITEMID, sort=False), chunk_size)
1105
+ df.index = df.index.set_levels(df.index.levels[1].astype("datetime64[ns]"), level=self.TIMESTAMP)
1106
+ chunks = split_into_chunks(df.groupby(level=self.ITEMID, sort=False), chunk_size)
1100
1107
  resampled_chunks = Parallel(n_jobs=num_cpus)(delayed(resample_chunk)(chunk) for chunk in chunks)
1101
1108
  resampled_df = TimeSeriesDataFrame(pd.concat(resampled_chunks))
1102
1109
  resampled_df.static_features = self.static_features
@@ -1142,3 +1149,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
1142
1149
  def __getitem__(self, items: list[str]) -> Self: ... # type: ignore
1143
1150
  @overload
1144
1151
  def __getitem__(self, item: str) -> pd.Series: ... # type: ignore
1152
+
1153
+
1154
+ # TODO: remove with v2.0
1155
+ # module-level constants kept for backward compatibility.
1156
+ ITEMID = TimeSeriesDataFrame.ITEMID
1157
+ TIMESTAMP = TimeSeriesDataFrame.TIMESTAMP
@@ -6,10 +6,9 @@ from typing import Any, Literal, Optional, Type, Union
6
6
  import pandas as pd
7
7
 
8
8
  from autogluon.core.learner import AbstractLearner
9
- from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
9
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
10
10
  from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
11
11
  from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
12
- from autogluon.timeseries.splitter import AbstractWindowSplitter
13
12
  from autogluon.timeseries.trainer import TimeSeriesTrainer
14
13
  from autogluon.timeseries.utils.features import TimeSeriesFeatureGenerator
15
14
  from autogluon.timeseries.utils.forecast import make_future_data_frame
@@ -60,7 +59,8 @@ class TimeSeriesLearner(AbstractLearner):
60
59
  val_data: Optional[TimeSeriesDataFrame] = None,
61
60
  hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
62
61
  time_limit: Optional[float] = None,
63
- val_splitter: Optional[AbstractWindowSplitter] = None,
62
+ num_val_windows: Optional[int] = None,
63
+ val_step_size: Optional[int] = None,
64
64
  refit_every_n_windows: Optional[int] = 1,
65
65
  random_seed: Optional[int] = None,
66
66
  **kwargs,
@@ -86,7 +86,8 @@ class TimeSeriesLearner(AbstractLearner):
86
86
  skip_model_selection=kwargs.get("skip_model_selection", False),
87
87
  enable_ensemble=kwargs.get("enable_ensemble", True),
88
88
  covariate_metadata=self.feature_generator.covariate_metadata,
89
- val_splitter=val_splitter,
89
+ num_val_windows=num_val_windows,
90
+ val_step_size=val_step_size,
90
91
  refit_every_n_windows=refit_every_n_windows,
91
92
  cache_predictions=self.cache_predictions,
92
93
  ensemble_model_type=self.ensemble_model_type,
@@ -3,7 +3,7 @@ from typing import Optional, Sequence
3
3
  import numpy as np
4
4
  import pandas as pd
5
5
 
6
- from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
6
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
7
7
 
8
8
  from .abstract import TimeSeriesScorer
9
9
  from .utils import in_sample_abs_seasonal_error
@@ -1,18 +1,18 @@
1
1
  import pandas as pd
2
2
 
3
- from autogluon.timeseries.dataset.ts_dataframe import ITEMID
3
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
4
4
 
5
5
 
6
6
  def _get_seasonal_diffs(*, y_past: pd.Series, seasonal_period: int = 1) -> pd.Series:
7
- return y_past.groupby(level=ITEMID, sort=False).diff(seasonal_period).abs()
7
+ return y_past.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).diff(seasonal_period).abs()
8
8
 
9
9
 
10
10
  def in_sample_abs_seasonal_error(*, y_past: pd.Series, seasonal_period: int = 1) -> pd.Series:
11
11
  """Compute seasonal naive forecast error (predict value from seasonal_period steps ago) for each time series."""
12
12
  seasonal_diffs = _get_seasonal_diffs(y_past=y_past, seasonal_period=seasonal_period)
13
- return seasonal_diffs.groupby(level=ITEMID, sort=False).mean().fillna(1.0)
13
+ return seasonal_diffs.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).mean().fillna(1.0)
14
14
 
15
15
 
16
16
  def in_sample_squared_seasonal_error(*, y_past: pd.Series, seasonal_period: int = 1) -> pd.Series:
17
17
  seasonal_diffs = _get_seasonal_diffs(y_past=y_past, seasonal_period=seasonal_period)
18
- return seasonal_diffs.pow(2.0).groupby(level=ITEMID, sort=False).mean().fillna(1.0)
18
+ return seasonal_diffs.pow(2.0).groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).mean().fillna(1.0)
@@ -13,7 +13,7 @@ import autogluon.core as ag
13
13
  from autogluon.core.models import AbstractModel as AbstractTabularModel
14
14
  from autogluon.features import AutoMLPipelineFeatureGenerator
15
15
  from autogluon.tabular.registry import ag_model_registry
16
- from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TIMESTAMP, TimeSeriesDataFrame
16
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
17
17
  from autogluon.timeseries.metrics.abstract import TimeSeriesScorer
18
18
  from autogluon.timeseries.metrics.utils import in_sample_squared_seasonal_error
19
19
  from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
@@ -120,7 +120,9 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
120
120
  ) -> tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
121
121
  if is_train:
122
122
  # All-NaN series are removed; partially-NaN series in train_data are handled inside _generate_train_val_dfs
123
- all_nan_items = data.item_ids[data[self.target].isna().groupby(ITEMID, sort=False).all()]
123
+ all_nan_items = data.item_ids[
124
+ data[self.target].isna().groupby(TimeSeriesDataFrame.ITEMID, sort=False).all()
125
+ ]
124
126
  if len(all_nan_items):
125
127
  data = data.query("item_id not in @all_nan_items")
126
128
  else:
@@ -130,31 +132,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
130
132
  data[self.target] = data[self.target].fillna(value=self._train_target_median)
131
133
  return data, known_covariates
132
134
 
133
- def _process_deprecated_hyperparameters(self, model_params: dict[str, Any]) -> dict[str, Any]:
134
- if "tabular_hyperparameters" in model_params:
135
- logger.warning(
136
- f"Hyperparameter 'tabular_hyperparameters' for {self.name} is deprecated and will be removed in v1.5. "
137
- "Please use 'model_name' to specify the tabular model alias and 'model_hyperparameters' "
138
- "to provide the tabular model hyperparameters."
139
- )
140
- tabular_hyperparameters = model_params.pop("tabular_hyperparameters")
141
- if len(tabular_hyperparameters) == 1:
142
- # We can automatically convert the hyperparameters if only one model is used
143
- model_params["model_name"] = list(tabular_hyperparameters.keys())[0]
144
- model_params["model_hyperparameters"] = tabular_hyperparameters[model_params["model_name"]]
145
- else:
146
- raise ValueError(
147
- f"Provided 'tabular_hyperparameters' {tabular_hyperparameters} cannot be automatically converted "
148
- f"to the new 'model_name' and 'model_hyperparameters' API for {self.name}."
149
- )
150
- if "tabular_fit_kwargs" in model_params:
151
- logger.warning(
152
- f"Hyperparameters 'tabular_fit_kwargs' for {self.name} is deprecated and is ignored by the model. "
153
- "Please use 'model_name' to specify the tabular model alias and 'model_hyperparameters' "
154
- "to provide the tabular model hyperparameters."
155
- )
156
- return model_params
157
-
158
135
  def _get_default_hyperparameters(self) -> dict[str, Any]:
159
136
  return {
160
137
  "max_num_items": 20_000,
@@ -298,18 +275,28 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
298
275
  """
299
276
  # TODO: Add support for past_covariates
300
277
  selected_columns = self.covariate_metadata.known_covariates.copy()
301
- column_name_mapping = {ITEMID: MLF_ITEMID, TIMESTAMP: MLF_TIMESTAMP}
278
+ column_name_mapping = {TimeSeriesDataFrame.ITEMID: MLF_ITEMID, TimeSeriesDataFrame.TIMESTAMP: MLF_TIMESTAMP}
302
279
  if include_target:
303
280
  selected_columns += [self.target]
304
281
  column_name_mapping[self.target] = MLF_TARGET
305
282
 
306
283
  df = pd.DataFrame(data)[selected_columns].reset_index()
307
284
  if static_features is not None:
308
- df = pd.merge(df, static_features, how="left", on=ITEMID, suffixes=(None, "_static_feat"))
285
+ df = pd.merge(
286
+ df, static_features, how="left", on=TimeSeriesDataFrame.ITEMID, suffixes=(None, "_static_feat")
287
+ )
309
288
 
310
289
  for col in self._non_boolean_real_covariates:
311
290
  # Normalize non-boolean features using mean_abs scaling
312
- df[f"__scaled_{col}"] = df[col] / df[col].abs().groupby(df[ITEMID]).mean().reindex(df[ITEMID]).values
291
+ df[f"__scaled_{col}"] = (
292
+ df[col]
293
+ / df[col]
294
+ .abs()
295
+ .groupby(df[TimeSeriesDataFrame.ITEMID])
296
+ .mean()
297
+ .reindex(df[TimeSeriesDataFrame.ITEMID])
298
+ .values
299
+ )
313
300
 
314
301
  # Convert float64 to float32 to reduce memory usage
315
302
  float64_cols = list(df.select_dtypes(include="float64"))
@@ -338,7 +325,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
338
325
  if not set(train_data[col].unique()) == set([0, 1]):
339
326
  self._non_boolean_real_covariates.append(col)
340
327
  model_params = self.get_hyperparameters()
341
- model_params = self._process_deprecated_hyperparameters(model_params)
342
328
 
343
329
  mlforecast_init_args = self._get_mlforecast_init_args(train_data, model_params)
344
330
  assert self.freq is not None
@@ -612,12 +598,14 @@ class DirectTabularModel(AbstractMLForecastModel):
612
598
  predictions, repeated_item_ids=predictions[MLF_ITEMID], past_target=data[self.target]
613
599
  )
614
600
  predictions_tsdf: TimeSeriesDataFrame = TimeSeriesDataFrame(
615
- predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP})
601
+ predictions.rename(
602
+ columns={MLF_ITEMID: TimeSeriesDataFrame.ITEMID, MLF_TIMESTAMP: TimeSeriesDataFrame.TIMESTAMP}
603
+ )
616
604
  )
617
605
 
618
606
  if forecast_for_short_series is not None:
619
607
  predictions_tsdf = pd.concat([predictions_tsdf, forecast_for_short_series]) # type: ignore
620
- predictions_tsdf = predictions_tsdf.reindex(original_item_id_order, level=ITEMID)
608
+ predictions_tsdf = predictions_tsdf.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID)
621
609
 
622
610
  return predictions_tsdf
623
611
 
@@ -745,16 +733,20 @@ class RecursiveTabularModel(AbstractMLForecastModel):
745
733
  X_df=X_df,
746
734
  )
747
735
  assert isinstance(raw_predictions, pd.DataFrame)
748
- raw_predictions = raw_predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP})
736
+ raw_predictions = raw_predictions.rename(
737
+ columns={MLF_ITEMID: TimeSeriesDataFrame.ITEMID, MLF_TIMESTAMP: TimeSeriesDataFrame.TIMESTAMP}
738
+ )
749
739
 
750
740
  predictions: TimeSeriesDataFrame = TimeSeriesDataFrame(
751
741
  self._add_gaussian_quantiles(
752
- raw_predictions, repeated_item_ids=raw_predictions[ITEMID], past_target=data[self.target]
742
+ raw_predictions,
743
+ repeated_item_ids=raw_predictions[TimeSeriesDataFrame.ITEMID],
744
+ past_target=data[self.target],
753
745
  )
754
746
  )
755
747
  if forecast_for_short_series is not None:
756
748
  predictions = pd.concat([predictions, forecast_for_short_series]) # type: ignore
757
- return predictions.reindex(original_item_id_order, level=ITEMID)
749
+ return predictions.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID)
758
750
 
759
751
  def _create_tabular_model(self, model_name: str, model_hyperparameters: dict[str, Any]) -> TabularModel:
760
752
  model_class = ag_model_registry.key_to_cls(model_name)
@@ -17,7 +17,6 @@ from autogluon.core.constants import QUANTILE, REGRESSION
17
17
  from autogluon.tabular.models import AbstractModel as AbstractTabularModel
18
18
  from autogluon.tabular.registry import ag_model_registry
19
19
  from autogluon.timeseries import TimeSeriesDataFrame
20
- from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TIMESTAMP
21
20
  from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
22
21
  from autogluon.timeseries.utils.datetime import get_lags_for_frequency, get_time_features_for_frequency
23
22
  from autogluon.timeseries.utils.warning_filters import set_loggers_level, warning_filter
@@ -115,7 +114,11 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
115
114
 
116
115
  @property
117
116
  def _ag_to_nixtla(self) -> dict:
118
- return {self.target: MLF_TARGET, ITEMID: MLF_ITEMID, TIMESTAMP: MLF_TIMESTAMP}
117
+ return {
118
+ self.target: MLF_TARGET,
119
+ TimeSeriesDataFrame.ITEMID: MLF_ITEMID,
120
+ TimeSeriesDataFrame.TIMESTAMP: MLF_TIMESTAMP,
121
+ }
119
122
 
120
123
  def _get_default_hyperparameters(self):
121
124
  return {
@@ -246,7 +249,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
246
249
  self._non_boolean_real_covariates.append(col)
247
250
 
248
251
  if len(self._non_boolean_real_covariates) > 0:
249
- item_ids = data.index.get_level_values(level=ITEMID)
252
+ item_ids = data.index.get_level_values(level=TimeSeriesDataFrame.ITEMID)
250
253
  scale_per_column: dict[str, pd.Series] = {}
251
254
  columns_grouped = data[self._non_boolean_real_covariates].abs().groupby(item_ids)
252
255
  for col in self._non_boolean_real_covariates:
@@ -277,7 +280,11 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
277
280
  train_df = train_data.to_data_frame().reset_index()
278
281
  if train_data.static_features is not None:
279
282
  train_df = pd.merge(
280
- left=train_df, right=train_data.static_features, left_on=ITEMID, right_index=True, how="left"
283
+ left=train_df,
284
+ right=train_data.static_features,
285
+ left_on=TimeSeriesDataFrame.ITEMID,
286
+ right_index=True,
287
+ how="left",
281
288
  )
282
289
  train_df = train_df.rename(columns=self._ag_to_nixtla)
283
290
  train_df = train_df.assign(**{MLF_TARGET: train_df[MLF_TARGET].fillna(float("inf"))})
@@ -462,7 +469,9 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
462
469
  full_df = full_df.slice_by_timestep(-(self._max_ts_length + self.prediction_length), None)
463
470
  full_df = full_df.to_data_frame().reset_index()
464
471
  if data.static_features is not None:
465
- full_df = pd.merge(full_df, data.static_features, left_on=ITEMID, right_index=True, how="left")
472
+ full_df = pd.merge(
473
+ full_df, data.static_features, left_on=TimeSeriesDataFrame.ITEMID, right_index=True, how="left"
474
+ )
466
475
 
467
476
  full_df = (
468
477
  full_df.rename(columns=self._ag_to_nixtla)
@@ -8,11 +8,7 @@ from mlforecast.target_transforms import (
8
8
  _BaseGroupedArrayTargetTransform,
9
9
  )
10
10
 
11
- from autogluon.timeseries.dataset.ts_dataframe import (
12
- ITEMID,
13
- TIMESTAMP,
14
- TimeSeriesDataFrame,
15
- )
11
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
16
12
  from autogluon.timeseries.transforms.target_scaler import TargetScaler, get_target_scaler
17
13
 
18
14
  from .utils import MLF_ITEMID, MLF_TIMESTAMP
@@ -26,11 +22,17 @@ class MLForecastScaler(BaseTargetTransform):
26
22
 
27
23
  def _df_to_tsdf(self, df: pd.DataFrame) -> TimeSeriesDataFrame:
28
24
  return TimeSeriesDataFrame(
29
- df.rename(columns={self.id_col: ITEMID, self.time_col: TIMESTAMP}).set_index([ITEMID, TIMESTAMP])
25
+ df.rename(
26
+ columns={self.id_col: TimeSeriesDataFrame.ITEMID, self.time_col: TimeSeriesDataFrame.TIMESTAMP}
27
+ ).set_index([TimeSeriesDataFrame.ITEMID, TimeSeriesDataFrame.TIMESTAMP])
30
28
  )
31
29
 
32
30
  def _tsdf_to_df(self, ts_df: TimeSeriesDataFrame) -> pd.DataFrame:
33
- return pd.DataFrame(ts_df).reset_index().rename(columns={ITEMID: self.id_col, TIMESTAMP: self.time_col})
31
+ return (
32
+ pd.DataFrame(ts_df)
33
+ .reset_index()
34
+ .rename(columns={TimeSeriesDataFrame.ITEMID: self.id_col, TimeSeriesDataFrame.TIMESTAMP: self.time_col})
35
+ )
34
36
 
35
37
  def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame: # type: ignore
36
38
  self.ag_scaler = get_target_scaler(name=self.scaler_type, target=self.target_col)