autogluon.timeseries 1.2.1b20250224__py3-none-any.whl → 1.4.1b20251215__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (108) hide show
  1. autogluon/timeseries/configs/__init__.py +3 -2
  2. autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
  3. autogluon/timeseries/configs/predictor_presets.py +106 -0
  4. autogluon/timeseries/dataset/ts_dataframe.py +256 -141
  5. autogluon/timeseries/learner.py +86 -52
  6. autogluon/timeseries/metrics/__init__.py +42 -8
  7. autogluon/timeseries/metrics/abstract.py +89 -19
  8. autogluon/timeseries/metrics/point.py +142 -53
  9. autogluon/timeseries/metrics/quantile.py +46 -21
  10. autogluon/timeseries/metrics/utils.py +4 -4
  11. autogluon/timeseries/models/__init__.py +8 -2
  12. autogluon/timeseries/models/abstract/__init__.py +2 -2
  13. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +361 -592
  14. autogluon/timeseries/models/abstract/model_trial.py +2 -1
  15. autogluon/timeseries/models/abstract/tunable.py +189 -0
  16. autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
  17. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +282 -194
  18. autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
  19. autogluon/timeseries/models/autogluon_tabular/transforms.py +25 -18
  20. autogluon/timeseries/models/chronos/__init__.py +2 -1
  21. autogluon/timeseries/models/chronos/chronos2.py +361 -0
  22. autogluon/timeseries/models/chronos/model.py +219 -138
  23. autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +81 -50
  24. autogluon/timeseries/models/ensemble/__init__.py +37 -2
  25. autogluon/timeseries/models/ensemble/abstract.py +107 -0
  26. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  27. autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
  28. autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
  29. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
  30. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
  31. autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
  32. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
  33. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
  34. autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
  35. autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
  36. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  37. autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
  38. autogluon/timeseries/models/ensemble/weighted/basic.py +91 -0
  39. autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
  40. autogluon/timeseries/models/gluonts/__init__.py +1 -1
  41. autogluon/timeseries/models/gluonts/{abstract_gluonts.py → abstract.py} +148 -208
  42. autogluon/timeseries/models/gluonts/dataset.py +109 -0
  43. autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +38 -22
  44. autogluon/timeseries/models/local/__init__.py +0 -7
  45. autogluon/timeseries/models/local/abstract_local_model.py +71 -74
  46. autogluon/timeseries/models/local/naive.py +13 -9
  47. autogluon/timeseries/models/local/npts.py +9 -2
  48. autogluon/timeseries/models/local/statsforecast.py +52 -36
  49. autogluon/timeseries/models/multi_window/multi_window_model.py +65 -45
  50. autogluon/timeseries/models/registry.py +64 -0
  51. autogluon/timeseries/models/toto/__init__.py +3 -0
  52. autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
  53. autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
  54. autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
  55. autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
  56. autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
  57. autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
  58. autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
  59. autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
  60. autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
  61. autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
  62. autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
  63. autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
  64. autogluon/timeseries/models/toto/dataloader.py +108 -0
  65. autogluon/timeseries/models/toto/hf_pretrained_model.py +200 -0
  66. autogluon/timeseries/models/toto/model.py +249 -0
  67. autogluon/timeseries/predictor.py +685 -297
  68. autogluon/timeseries/regressor.py +94 -44
  69. autogluon/timeseries/splitter.py +8 -32
  70. autogluon/timeseries/trainer/__init__.py +3 -0
  71. autogluon/timeseries/trainer/ensemble_composer.py +444 -0
  72. autogluon/timeseries/trainer/model_set_builder.py +256 -0
  73. autogluon/timeseries/trainer/prediction_cache.py +149 -0
  74. autogluon/timeseries/{trainer.py → trainer/trainer.py} +387 -390
  75. autogluon/timeseries/trainer/utils.py +17 -0
  76. autogluon/timeseries/transforms/__init__.py +2 -13
  77. autogluon/timeseries/transforms/covariate_scaler.py +34 -40
  78. autogluon/timeseries/transforms/target_scaler.py +37 -20
  79. autogluon/timeseries/utils/constants.py +10 -0
  80. autogluon/timeseries/utils/datetime/lags.py +3 -5
  81. autogluon/timeseries/utils/datetime/seasonality.py +1 -3
  82. autogluon/timeseries/utils/datetime/time_features.py +2 -2
  83. autogluon/timeseries/utils/features.py +70 -47
  84. autogluon/timeseries/utils/forecast.py +19 -14
  85. autogluon/timeseries/utils/timer.py +173 -0
  86. autogluon/timeseries/utils/warning_filters.py +4 -2
  87. autogluon/timeseries/version.py +1 -1
  88. autogluon.timeseries-1.4.1b20251215-py3.11-nspkg.pth +1 -0
  89. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/METADATA +49 -36
  90. autogluon_timeseries-1.4.1b20251215.dist-info/RECORD +103 -0
  91. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/WHEEL +1 -1
  92. autogluon/timeseries/configs/presets_configs.py +0 -79
  93. autogluon/timeseries/evaluator.py +0 -6
  94. autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -11
  95. autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
  96. autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -585
  97. autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -518
  98. autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -78
  99. autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
  100. autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
  101. autogluon/timeseries/models/presets.py +0 -360
  102. autogluon.timeseries-1.2.1b20250224-py3.9-nspkg.pth +0 -1
  103. autogluon.timeseries-1.2.1b20250224.dist-info/RECORD +0 -68
  104. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/LICENSE +0 -0
  105. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/NOTICE +0 -0
  106. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/namespace_packages.txt +0 -0
  107. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/top_level.txt +0 -0
  108. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/zip-safe +0 -0
@@ -7,27 +7,23 @@ import reprlib
7
7
  from collections.abc import Iterable
8
8
  from itertools import islice
9
9
  from pathlib import Path
10
- from pprint import pformat
11
- from typing import Any, List, Optional, Tuple, Type, Union
10
+ from typing import TYPE_CHECKING, Any, Final, Type, overload
12
11
 
12
+ import numpy as np
13
13
  import pandas as pd
14
14
  from joblib.parallel import Parallel, delayed
15
15
  from pandas.core.internals import ArrayManager, BlockManager # type: ignore
16
+ from typing_extensions import Self
16
17
 
17
18
  from autogluon.common.loaders import load_pd
18
19
 
19
20
  logger = logging.getLogger(__name__)
20
21
 
21
- ITEMID = "item_id"
22
- TIMESTAMP = "timestamp"
23
-
24
- IRREGULAR_TIME_INDEX_FREQSTR = "IRREG"
25
-
26
22
 
27
23
  class TimeSeriesDataFrame(pd.DataFrame):
28
24
  """A collection of univariate time series, where each row is identified by an (``item_id``, ``timestamp``) pair.
29
25
 
30
- For example, a time series data frame could represent the daily sales of a collection of products, where each
26
+ For example, a time series dataframe could represent the daily sales of a collection of products, where each
31
27
  ``item_id`` corresponds to a product and ``timestamp`` corresponds to the day of the record.
32
28
 
33
29
  Parameters
@@ -77,7 +73,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
77
73
  You can also use :meth:`~autogluon.timeseries.TimeSeriesDataFrame.from_iterable_dataset` for loading data in such format.
78
74
 
79
75
  static_features : pd.DataFrame, str or pathlib.Path, optional
80
- An optional data frame describing the metadata of each individual time series that does not change with time.
76
+ An optional dataframe describing the metadata of each individual time series that does not change with time.
81
77
  Can take real-valued or categorical values. For example, if ``TimeSeriesDataFrame`` contains sales of various
82
78
  products, static features may refer to time-independent features like color or brand.
83
79
 
@@ -117,15 +113,19 @@ class TimeSeriesDataFrame(pd.DataFrame):
117
113
 
118
114
  """
119
115
 
120
- index: pd.MultiIndex
116
+ index: pd.MultiIndex # type: ignore
121
117
  _metadata = ["_static_features"]
122
118
 
119
+ IRREGULAR_TIME_INDEX_FREQSTR: Final[str] = "IRREG"
120
+ ITEMID: Final[str] = "item_id"
121
+ TIMESTAMP: Final[str] = "timestamp"
122
+
123
123
  def __init__(
124
124
  self,
125
- data: Union[pd.DataFrame, str, Path, Iterable],
126
- static_features: Optional[Union[pd.DataFrame, str, Path]] = None,
127
- id_column: Optional[str] = None,
128
- timestamp_column: Optional[str] = None,
125
+ data: pd.DataFrame | str | Path | Iterable,
126
+ static_features: pd.DataFrame | str | Path | None = None,
127
+ id_column: str | None = None,
128
+ timestamp_column: str | None = None,
129
129
  num_cpus: int = -1,
130
130
  *args,
131
131
  **kwargs,
@@ -149,7 +149,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
149
149
  else:
150
150
  raise ValueError(f"data must be a pd.DataFrame, Iterable, string or Path (received {type(data)}).")
151
151
  super().__init__(data=data, *args, **kwargs) # type: ignore
152
- self._static_features: Optional[pd.DataFrame] = None
152
+ self._static_features: pd.DataFrame | None = None
153
153
  if static_features is not None:
154
154
  self.static_features = self._construct_static_features(static_features, id_column=id_column)
155
155
 
@@ -168,29 +168,33 @@ class TimeSeriesDataFrame(pd.DataFrame):
168
168
  def _construct_tsdf_from_data_frame(
169
169
  cls,
170
170
  df: pd.DataFrame,
171
- id_column: Optional[str] = None,
172
- timestamp_column: Optional[str] = None,
171
+ id_column: str | None = None,
172
+ timestamp_column: str | None = None,
173
173
  ) -> pd.DataFrame:
174
174
  df = df.copy()
175
175
  if id_column is not None:
176
176
  assert id_column in df.columns, f"Column '{id_column}' not found!"
177
- if id_column != ITEMID and ITEMID in df.columns:
178
- logger.warning(f"Renaming existing column '{ITEMID}' -> '__{ITEMID}' to avoid name collisions.")
179
- df.rename(columns={ITEMID: "__" + ITEMID}, inplace=True)
180
- df.rename(columns={id_column: ITEMID}, inplace=True)
177
+ if id_column != cls.ITEMID and cls.ITEMID in df.columns:
178
+ logger.warning(
179
+ f"Renaming existing column '{cls.ITEMID}' -> '__{cls.ITEMID}' to avoid name collisions."
180
+ )
181
+ df.rename(columns={cls.ITEMID: "__" + cls.ITEMID}, inplace=True)
182
+ df.rename(columns={id_column: cls.ITEMID}, inplace=True)
181
183
 
182
184
  if timestamp_column is not None:
183
185
  assert timestamp_column in df.columns, f"Column '{timestamp_column}' not found!"
184
- if timestamp_column != TIMESTAMP and TIMESTAMP in df.columns:
185
- logger.warning(f"Renaming existing column '{TIMESTAMP}' -> '__{TIMESTAMP}' to avoid name collisions.")
186
- df.rename(columns={TIMESTAMP: "__" + TIMESTAMP}, inplace=True)
187
- df.rename(columns={timestamp_column: TIMESTAMP}, inplace=True)
186
+ if timestamp_column != cls.TIMESTAMP and cls.TIMESTAMP in df.columns:
187
+ logger.warning(
188
+ f"Renaming existing column '{cls.TIMESTAMP}' -> '__{cls.TIMESTAMP}' to avoid name collisions."
189
+ )
190
+ df.rename(columns={cls.TIMESTAMP: "__" + cls.TIMESTAMP}, inplace=True)
191
+ df.rename(columns={timestamp_column: cls.TIMESTAMP}, inplace=True)
188
192
 
189
- if TIMESTAMP in df.columns:
190
- df[TIMESTAMP] = pd.to_datetime(df[TIMESTAMP])
193
+ if cls.TIMESTAMP in df.columns:
194
+ df[cls.TIMESTAMP] = pd.to_datetime(df[cls.TIMESTAMP])
191
195
 
192
196
  cls._validate_data_frame(df)
193
- return df.set_index([ITEMID, TIMESTAMP])
197
+ return df.set_index([cls.ITEMID, cls.TIMESTAMP])
194
198
 
195
199
  @classmethod
196
200
  def _construct_tsdf_from_iterable_dataset(cls, iterable_dataset: Iterable, num_cpus: int = -1) -> pd.DataFrame:
@@ -201,7 +205,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
201
205
  start_timestamp = start_timestamp.to_timestamp(how="S")
202
206
  target = ts["target"]
203
207
  datetime_index = tuple(pd.date_range(start_timestamp, periods=len(target), freq=freq))
204
- idx = pd.MultiIndex.from_product([(item_id,), datetime_index], names=[ITEMID, TIMESTAMP])
208
+ idx = pd.MultiIndex.from_product([(item_id,), datetime_index], names=[cls.ITEMID, cls.TIMESTAMP])
205
209
  return pd.Series(target, name="target", index=idx).to_frame()
206
210
 
207
211
  cls._validate_iterable(iterable_dataset)
@@ -218,32 +222,34 @@ class TimeSeriesDataFrame(pd.DataFrame):
218
222
  raise ValueError(f"data must be a pd.DataFrame, got {type(data)}")
219
223
  if not isinstance(data.index, pd.MultiIndex):
220
224
  raise ValueError(f"data must have pd.MultiIndex, got {type(data.index)}")
221
- if not pd.api.types.is_datetime64_dtype(data.index.dtypes[TIMESTAMP]):
222
- raise ValueError(f"for {TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
223
- if not data.index.names == (f"{ITEMID}", f"{TIMESTAMP}"):
224
- raise ValueError(f"data must have index names as ('{ITEMID}', '{TIMESTAMP}'), got {data.index.names}")
225
- item_id_index = data.index.get_level_values(level=ITEMID)
225
+ if not pd.api.types.is_datetime64_dtype(data.index.dtypes[cls.TIMESTAMP]):
226
+ raise ValueError(f"for {cls.TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
227
+ if not data.index.names == (f"{cls.ITEMID}", f"{cls.TIMESTAMP}"):
228
+ raise ValueError(
229
+ f"data must have index names as ('{cls.ITEMID}', '{cls.TIMESTAMP}'), got {data.index.names}"
230
+ )
231
+ item_id_index = data.index.levels[0]
226
232
  if not (pd.api.types.is_integer_dtype(item_id_index) or pd.api.types.is_string_dtype(item_id_index)):
227
- raise ValueError(f"all entries in index `{ITEMID}` must be of integer or string dtype")
233
+ raise ValueError(f"all entries in index `{cls.ITEMID}` must be of integer or string dtype")
228
234
 
229
235
  @classmethod
230
236
  def _validate_data_frame(cls, df: pd.DataFrame):
231
237
  """Validate that a pd.DataFrame with ITEMID and TIMESTAMP columns can be converted to TimeSeriesDataFrame"""
232
238
  if not isinstance(df, pd.DataFrame):
233
239
  raise ValueError(f"data must be a pd.DataFrame, got {type(df)}")
234
- if ITEMID not in df.columns:
235
- raise ValueError(f"data must have a `{ITEMID}` column")
236
- if TIMESTAMP not in df.columns:
237
- raise ValueError(f"data must have a `{TIMESTAMP}` column")
238
- if df[ITEMID].isnull().any():
239
- raise ValueError(f"`{ITEMID}` column can not have nan")
240
- if df[TIMESTAMP].isnull().any():
241
- raise ValueError(f"`{TIMESTAMP}` column can not have nan")
242
- if not pd.api.types.is_datetime64_dtype(df[TIMESTAMP]):
243
- raise ValueError(f"for {TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
244
- item_id_column = df[ITEMID]
240
+ if cls.ITEMID not in df.columns:
241
+ raise ValueError(f"data must have a `{cls.ITEMID}` column")
242
+ if cls.TIMESTAMP not in df.columns:
243
+ raise ValueError(f"data must have a `{cls.TIMESTAMP}` column")
244
+ if df[cls.ITEMID].isnull().any():
245
+ raise ValueError(f"`{cls.ITEMID}` column can not have nan")
246
+ if df[cls.TIMESTAMP].isnull().any():
247
+ raise ValueError(f"`{cls.TIMESTAMP}` column can not have nan")
248
+ if not pd.api.types.is_datetime64_dtype(df[cls.TIMESTAMP]):
249
+ raise ValueError(f"for {cls.TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
250
+ item_id_column = df[cls.ITEMID]
245
251
  if not (pd.api.types.is_integer_dtype(item_id_column) or pd.api.types.is_string_dtype(item_id_column)):
246
- raise ValueError(f"all entries in column `{ITEMID}` must be of integer or string dtype")
252
+ raise ValueError(f"all entries in column `{cls.ITEMID}` must be of integer or string dtype")
247
253
 
248
254
  @classmethod
249
255
  def _validate_iterable(cls, data: Iterable):
@@ -266,9 +272,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
266
272
  def from_data_frame(
267
273
  cls,
268
274
  df: pd.DataFrame,
269
- id_column: Optional[str] = None,
270
- timestamp_column: Optional[str] = None,
271
- static_features_df: Optional[pd.DataFrame] = None,
275
+ id_column: str | None = None,
276
+ timestamp_column: str | None = None,
277
+ static_features_df: pd.DataFrame | None = None,
272
278
  ) -> TimeSeriesDataFrame:
273
279
  """Construct a ``TimeSeriesDataFrame`` from a pandas DataFrame.
274
280
 
@@ -302,17 +308,17 @@ class TimeSeriesDataFrame(pd.DataFrame):
302
308
  Returns
303
309
  -------
304
310
  ts_df: TimeSeriesDataFrame
305
- A data frame in TimeSeriesDataFrame format.
311
+ A dataframe in TimeSeriesDataFrame format.
306
312
  """
307
313
  return cls(df, static_features=static_features_df, id_column=id_column, timestamp_column=timestamp_column)
308
314
 
309
315
  @classmethod
310
316
  def from_path(
311
317
  cls,
312
- path: Union[str, Path],
313
- id_column: Optional[str] = None,
314
- timestamp_column: Optional[str] = None,
315
- static_features_path: Optional[Union[str, Path]] = None,
318
+ path: str | Path,
319
+ id_column: str | None = None,
320
+ timestamp_column: str | None = None,
321
+ static_features_path: str | Path | None = None,
316
322
  ) -> TimeSeriesDataFrame:
317
323
  """Construct a ``TimeSeriesDataFrame`` from a CSV or Parquet file.
318
324
 
@@ -349,7 +355,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
349
355
  Returns
350
356
  -------
351
357
  ts_df: TimeSeriesDataFrame
352
- A data frame in TimeSeriesDataFrame format.
358
+ A dataframe in TimeSeriesDataFrame format.
353
359
  """
354
360
  return cls(path, static_features=static_features_path, id_column=id_column, timestamp_column=timestamp_column)
355
361
 
@@ -378,20 +384,20 @@ class TimeSeriesDataFrame(pd.DataFrame):
378
384
  Returns
379
385
  -------
380
386
  ts_df: TimeSeriesDataFrame
381
- A data frame in TimeSeriesDataFrame format.
387
+ A dataframe in TimeSeriesDataFrame format.
382
388
  """
383
389
  return cls(iterable_dataset, num_cpus=num_cpus)
384
390
 
385
391
  @property
386
392
  def item_ids(self) -> pd.Index:
387
393
  """List of unique time series IDs contained in the data set."""
388
- return self.index.unique(level=ITEMID)
394
+ return self.index.unique(level=self.ITEMID)
389
395
 
390
396
  @classmethod
391
397
  def _construct_static_features(
392
398
  cls,
393
- static_features: Union[pd.DataFrame, str, Path],
394
- id_column: Optional[str] = None,
399
+ static_features: pd.DataFrame | str | Path,
400
+ id_column: str | None = None,
395
401
  ) -> pd.DataFrame:
396
402
  if isinstance(static_features, (str, Path)):
397
403
  static_features = load_pd.load(str(static_features))
@@ -402,10 +408,12 @@ class TimeSeriesDataFrame(pd.DataFrame):
402
408
 
403
409
  if id_column is not None:
404
410
  assert id_column in static_features.columns, f"Column '{id_column}' not found in static_features!"
405
- if id_column != ITEMID and ITEMID in static_features.columns:
406
- logger.warning(f"Renaming existing column '{ITEMID}' -> '__{ITEMID}' to avoid name collisions.")
407
- static_features.rename(columns={ITEMID: "__" + ITEMID}, inplace=True)
408
- static_features.rename(columns={id_column: ITEMID}, inplace=True)
411
+ if id_column != cls.ITEMID and cls.ITEMID in static_features.columns:
412
+ logger.warning(
413
+ f"Renaming existing column '{cls.ITEMID}' -> '__{cls.ITEMID}' to avoid name collisions."
414
+ )
415
+ static_features.rename(columns={cls.ITEMID: "__" + cls.ITEMID}, inplace=True)
416
+ static_features.rename(columns={id_column: cls.ITEMID}, inplace=True)
409
417
  return static_features
410
418
 
411
419
  @property
@@ -413,7 +421,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
413
421
  return self._static_features
414
422
 
415
423
  @static_features.setter
416
- def static_features(self, value: Optional[pd.DataFrame]):
424
+ def static_features(self, value: pd.DataFrame | None):
417
425
  # if the current item index is not a multiindex, then we are dealing with a single
418
426
  # item slice. this should only happen when the user explicitly requests only a
419
427
  # single item or during `slice_by_timestep`. In this case we do not set static features
@@ -430,10 +438,10 @@ class TimeSeriesDataFrame(pd.DataFrame):
430
438
 
431
439
  # Avoid modifying static features inplace
432
440
  value = value.copy()
433
- if ITEMID in value.columns and value.index.name != ITEMID:
434
- value = value.set_index(ITEMID)
435
- if value.index.name != ITEMID:
436
- value.index.rename(ITEMID, inplace=True)
441
+ if self.ITEMID in value.columns and value.index.name != self.ITEMID:
442
+ value = value.set_index(self.ITEMID)
443
+ if value.index.name != self.ITEMID:
444
+ value.index.rename(self.ITEMID, inplace=True)
437
445
  missing_item_ids = self.item_ids.difference(value.index)
438
446
  if len(missing_item_ids) > 0:
439
447
  raise ValueError(
@@ -446,7 +454,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
446
454
 
447
455
  self._static_features = value
448
456
 
449
- def infer_frequency(self, num_items: Optional[int] = None, raise_if_irregular: bool = False) -> str:
457
+ def infer_frequency(self, num_items: int | None = None, raise_if_irregular: bool = False) -> str:
450
458
  """Infer the time series frequency based on the timestamps of the observations.
451
459
 
452
460
  Parameters
@@ -455,7 +463,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
455
463
  Number of items (individual time series) randomly selected to infer the frequency. Lower values speed up
456
464
  the method, but increase the chance that some items with invalid frequency are missed by subsampling.
457
465
 
458
- If set to `None`, all items will be used for inferring the frequency.
466
+ If set to ``None``, all items will be used for inferring the frequency.
459
467
  raise_if_irregular : bool, default = False
460
468
  If True, an exception will be raised if some items have an irregular frequency, or if different items have
461
469
  different frequencies.
@@ -466,61 +474,66 @@ class TimeSeriesDataFrame(pd.DataFrame):
466
474
  If all time series have a regular frequency, returns a pandas-compatible `frequency alias <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.
467
475
 
468
476
  If some items have an irregular frequency or if different items have different frequencies, returns string
469
- `IRREG`.
477
+ ``IRREG``.
470
478
  """
479
+ ts_df = self
480
+ if num_items is not None and ts_df.num_items > num_items:
481
+ items_subset = ts_df.item_ids.to_series().sample(n=num_items, random_state=123)
482
+ ts_df = ts_df.loc[items_subset]
483
+
484
+ if not ts_df.index.is_monotonic_increasing:
485
+ ts_df = ts_df.sort_index()
486
+
487
+ indptr = ts_df.get_indptr()
488
+ item_ids = ts_df.item_ids
489
+ timestamps = ts_df.index.get_level_values(level=1)
490
+ candidate_freq = ts_df.index.levels[1].freq
491
+
492
+ frequencies = []
493
+ irregular_items = []
494
+ for i in range(len(indptr) - 1):
495
+ start, end = indptr[i], indptr[i + 1]
496
+ item_timestamps = timestamps[start:end]
497
+ inferred_freq = item_timestamps.inferred_freq
471
498
 
472
- df = pd.DataFrame(self)
473
- if num_items is not None:
474
- all_item_ids = self.item_ids
475
- if len(all_item_ids) > num_items:
476
- items_subset = all_item_ids.to_series().sample(n=num_items, random_state=123)
477
- df = df.loc[items_subset]
478
-
479
- candidate_freq = df.index.levels[1].freq
480
- index_df = df.index.to_frame(index=False)
481
-
482
- def get_freq(series: pd.Series) -> Optional[str]:
483
- dt_index = pd.DatetimeIndex(series)
484
- inferred_freq = dt_index.inferred_freq
485
499
  # Fallback option: maybe original index has a `freq` attribute that pandas fails to infer (e.g., 'SME')
486
500
  if inferred_freq is None and candidate_freq is not None:
487
501
  try:
488
502
  # If this line does not raise an exception, then candidate_freq is a compatible frequency
489
- dt_index.freq = candidate_freq
503
+ item_timestamps.freq = candidate_freq
490
504
  except ValueError:
491
505
  inferred_freq = None
492
506
  else:
493
- inferred_freq = candidate_freq
494
- return inferred_freq
507
+ inferred_freq = candidate_freq.freqstr
508
+
509
+ if inferred_freq is None:
510
+ irregular_items.append(item_ids[i])
511
+ else:
512
+ frequencies.append(inferred_freq)
495
513
 
496
- freq_for_each_item = index_df.groupby(ITEMID, sort=False).agg(get_freq)[TIMESTAMP]
497
- freq = freq_for_each_item.iloc[0]
498
- if len(set(freq_for_each_item)) > 1 or freq is None:
514
+ unique_freqs = list(set(frequencies))
515
+ if len(unique_freqs) != 1 or len(irregular_items) > 0:
499
516
  if raise_if_irregular:
500
- items_with_irregular_freq = freq_for_each_item[pd.isnull(freq_for_each_item)]
501
- if len(items_with_irregular_freq) > 0:
517
+ if irregular_items:
502
518
  raise ValueError(
503
- "Cannot infer frequency. Items with irregular frequency: "
504
- f"{pformat(items_with_irregular_freq.index.tolist())}"
519
+ f"Cannot infer frequency. Items with irregular frequency: {reprlib.repr(irregular_items)}"
505
520
  )
506
521
  else:
507
- raise ValueError(
508
- "Cannot infer frequency. Multiple frequencies detected in the dataset: "
509
- f"{freq_for_each_item.unique().tolist()}"
510
- )
511
- return IRREGULAR_TIME_INDEX_FREQSTR
522
+ raise ValueError(f"Cannot infer frequency. Multiple frequencies detected: {unique_freqs}")
523
+ else:
524
+ return self.IRREGULAR_TIME_INDEX_FREQSTR
512
525
  else:
513
- return pd.tseries.frequencies.to_offset(freq).freqstr
526
+ return pd.tseries.frequencies.to_offset(unique_freqs[0]).freqstr
514
527
 
515
528
  @property
516
529
  def freq(self):
517
- """Inferred pandas-compatible frequency of the timestamps in the data frame.
530
+ """Inferred pandas-compatible frequency of the timestamps in the dataframe.
518
531
 
519
532
  Computed using a random subset of the time series for speed. This may sometimes result in incorrectly inferred
520
533
  values. For reliable results, use :meth:`~autogluon.timeseries.TimeSeriesDataFrame.infer_frequency`.
521
534
  """
522
535
  inferred_freq = self.infer_frequency(num_items=50)
523
- return None if inferred_freq == IRREGULAR_TIME_INDEX_FREQSTR else inferred_freq
536
+ return None if inferred_freq == self.IRREGULAR_TIME_INDEX_FREQSTR else inferred_freq
524
537
 
525
538
  @property
526
539
  def num_items(self):
@@ -528,8 +541,13 @@ class TimeSeriesDataFrame(pd.DataFrame):
528
541
  return len(self.item_ids)
529
542
 
530
543
  def num_timesteps_per_item(self) -> pd.Series:
531
- """Length of each time series in the dataframe."""
532
- return self.groupby(level=ITEMID, sort=False).size()
544
+ """Number of observations in each time series in the dataframe.
545
+
546
+ Returns a ``pandas.Series`` with ``item_id`` as index and number of observations per item as values.
547
+ """
548
+ counts = pd.Series(self.index.codes[0]).value_counts(sort=False)
549
+ counts.index = self.index.levels[0][counts.index]
550
+ return counts
533
551
 
534
552
  def copy(self: TimeSeriesDataFrame, deep: bool = True) -> TimeSeriesDataFrame:
535
553
  """Make a copy of the TimeSeriesDataFrame.
@@ -552,7 +570,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
552
570
  return obj
553
571
 
554
572
  def __finalize__( # noqa
555
- self: TimeSeriesDataFrame, other, method: Optional[str] = None, **kwargs
573
+ self: TimeSeriesDataFrame, other, method: str | None = None, **kwargs
556
574
  ) -> TimeSeriesDataFrame:
557
575
  super().__finalize__(other=other, method=method, **kwargs)
558
576
  # when finalizing the copy/slice operation, we use the property setter to stay consistent
@@ -561,13 +579,13 @@ class TimeSeriesDataFrame(pd.DataFrame):
561
579
  self.static_features = other._static_features
562
580
  return self
563
581
 
564
- def split_by_time(self, cutoff_time: pd.Timestamp) -> Tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
582
+ def split_by_time(self, cutoff_time: pd.Timestamp) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
565
583
  """Split dataframe to two different ``TimeSeriesDataFrame`` s before and after a certain ``cutoff_time``.
566
584
 
567
585
  Parameters
568
586
  ----------
569
587
  cutoff_time: pd.Timestamp
570
- The time to split the current data frame into two data frames.
588
+ The time to split the current dataframe into two dataframes.
571
589
 
572
590
  Returns
573
591
  -------
@@ -584,15 +602,14 @@ class TimeSeriesDataFrame(pd.DataFrame):
584
602
  after = TimeSeriesDataFrame(data_after, static_features=self.static_features)
585
603
  return before, after
586
604
 
587
- def slice_by_timestep(
588
- self, start_index: Optional[int] = None, end_index: Optional[int] = None
589
- ) -> TimeSeriesDataFrame:
605
+ def slice_by_timestep(self, start_index: int | None = None, end_index: int | None = None) -> TimeSeriesDataFrame:
590
606
  """Select a subsequence from each time series between start (inclusive) and end (exclusive) indices.
591
607
 
592
608
  This operation is equivalent to selecting a slice ``[start_index : end_index]`` from each time series, and then
593
609
  combining these slices into a new ``TimeSeriesDataFrame``. See examples below.
594
610
 
595
- Returns a copy of the original data. This is useful for constructing holdout sets for validation.
611
+ It is recommended to sort the index with ``ts_df.sort_index()`` before calling this method to take advantage of
612
+ a fast optimized algorithm.
596
613
 
597
614
  Parameters
598
615
  ----------
@@ -679,10 +696,53 @@ class TimeSeriesDataFrame(pd.DataFrame):
679
696
  if end_index is not None and not isinstance(end_index, int):
680
697
  raise ValueError(f"end_index must be of type int or None (got {type(end_index)})")
681
698
 
682
- time_step_slice = slice(start_index, end_index)
683
- result = self.groupby(level=ITEMID, sort=False, as_index=False).nth(time_step_slice)
684
- result.static_features = self.static_features
685
- return result
699
+ if start_index is None and end_index is None:
700
+ # Return a copy to avoid in-place modification.
701
+ # self.copy() is much faster than self.loc[ones(len(self), dtype=bool)]
702
+ return self.copy()
703
+
704
+ if self.index.is_monotonic_increasing:
705
+ # Use a fast optimized algorithm if the index is sorted
706
+ indptr = self.get_indptr()
707
+ lengths = np.diff(indptr)
708
+ starts = indptr[:-1]
709
+
710
+ slice_start = (
711
+ np.zeros_like(lengths)
712
+ if start_index is None
713
+ else np.clip(np.where(start_index >= 0, start_index, lengths + start_index), 0, lengths)
714
+ )
715
+ slice_end = (
716
+ lengths.copy()
717
+ if end_index is None
718
+ else np.clip(np.where(end_index >= 0, end_index, lengths + end_index), 0, lengths)
719
+ )
720
+
721
+ # Filter out invalid slices where start >= end
722
+ valid_slices = slice_start < slice_end
723
+ if not np.any(valid_slices):
724
+ # Return empty dataframe with same structure
725
+ return self.loc[np.zeros(len(self), dtype=bool)]
726
+
727
+ starts = starts[valid_slices]
728
+ slice_start = slice_start[valid_slices]
729
+ slice_end = slice_end[valid_slices]
730
+
731
+ # We put 1 at the slice_start index for each item and -1 at the slice_end index for each item.
732
+ # After we apply cumsum we get the indicator mask selecting values between slice_start and slice_end
733
+ # cumsum([0, 0, 1, 0, 0, -1, 0]) -> [0, 0, 1, 1, 1, 0, 0]
734
+ # We need array of size len(self) + 1 in case events[starts + slice_end] tries to access position len(self)
735
+ events = np.zeros(len(self) + 1, dtype=np.int8)
736
+ events[starts + slice_start] += 1
737
+ events[starts + slice_end] -= 1
738
+ mask = np.cumsum(events)[:-1].astype(bool)
739
+ # loc[mask] returns a view of the original data - modifying it will produce a SettingWithCopyWarning
740
+ return self.loc[mask]
741
+ else:
742
+ # Fall back to a slow groupby operation
743
+ result = self.groupby(level=self.ITEMID, sort=False, as_index=False).nth(slice(start_index, end_index))
744
+ result.static_features = self.static_features
745
+ return result
686
746
 
687
747
  def slice_by_time(self, start_time: pd.Timestamp, end_time: pd.Timestamp) -> TimeSeriesDataFrame:
688
748
  """Select a subsequence from each time series between start (inclusive) and end (exclusive) timestamps.
@@ -711,7 +771,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
711
771
 
712
772
  @classmethod
713
773
  def from_pickle(cls, filepath_or_buffer: Any) -> TimeSeriesDataFrame:
714
- """Convenience method to read pickled time series data frames. If the read pickle
774
+ """Convenience method to read pickled time series dataframes. If the read pickle
715
775
  file refers to a plain pandas DataFrame, it will be cast to a TimeSeriesDataFrame.
716
776
 
717
777
  Parameters
@@ -722,7 +782,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
722
782
  Returns
723
783
  -------
724
784
  ts_df : TimeSeriesDataFrame
725
- The pickled time series data frame.
785
+ The pickled time series dataframe.
726
786
  """
727
787
  try:
728
788
  data = pd.read_pickle(filepath_or_buffer)
@@ -733,16 +793,21 @@ class TimeSeriesDataFrame(pd.DataFrame):
733
793
  def fill_missing_values(self, method: str = "auto", value: float = 0.0) -> TimeSeriesDataFrame:
734
794
  """Fill missing values represented by NaN.
735
795
 
796
+ .. note::
797
+ This method assumes that the index of the TimeSeriesDataFrame is sorted by [item_id, timestamp].
798
+
799
+ If the index is not sorted, this method will log a warning and may produce an incorrect result.
800
+
736
801
  Parameters
737
802
  ----------
738
803
  method : str, default = "auto"
739
804
  Method used to impute missing values.
740
805
 
741
- - "auto" - first forward fill (to fill the in-between and trailing NaNs), then backward fill (to fill the leading NaNs)
742
- - "ffill" or "pad" - propagate last valid observation forward. Note: missing values at the start of the time series are not filled.
743
- - "bfill" or "backfill" - use next valid observation to fill gap. Note: this may result in information leakage; missing values at the end of the time series are not filled.
744
- - "constant" - replace NaNs with the given constant ``value``.
745
- - "interpolate" - fill NaN values using linear interpolation. Note: this may result in information leakage.
806
+ - ``"auto"`` - first forward fill (to fill the in-between and trailing NaNs), then backward fill (to fill the leading NaNs)
807
+ - ``"ffill"`` or ``"pad"`` - propagate last valid observation forward. Note: missing values at the start of the time series are not filled.
808
+ - ``"bfill"`` or ``"backfill"`` - use next valid observation to fill gap. Note: this may result in information leakage; missing values at the end of the time series are not filled.
809
+ - ``"constant"`` - replace NaNs with the given constant ``value``.
810
+ - ``"interpolate"`` - fill NaN values using linear interpolation. Note: this may result in information leakage.
746
811
  value : float, default = 0.0
747
812
  Value used by the "constant" imputation method.
748
813
 
@@ -792,12 +857,12 @@ class TimeSeriesDataFrame(pd.DataFrame):
792
857
  "It is highly recommended to call `ts_df.sort_index()` before calling `ts_df.fill_missing_values()`"
793
858
  )
794
859
 
795
- grouped_df = df.groupby(level=ITEMID, sort=False, group_keys=False)
860
+ grouped_df = df.groupby(level=self.ITEMID, sort=False, group_keys=False)
796
861
  if method == "auto":
797
862
  filled_df = grouped_df.ffill()
798
863
  # If necessary, fill missing values at the start of each time series with bfill
799
864
  if filled_df.isna().any(axis=None):
800
- filled_df = filled_df.groupby(level=ITEMID, sort=False, group_keys=False).bfill()
865
+ filled_df = filled_df.groupby(level=self.ITEMID, sort=False, group_keys=False).bfill()
801
866
  elif method in ["ffill", "pad"]:
802
867
  filled_df = grouped_df.ffill()
803
868
  elif method in ["bfill", "backfill"]:
@@ -840,17 +905,17 @@ class TimeSeriesDataFrame(pd.DataFrame):
840
905
  return super().sort_index(*args, **kwargs) # type: ignore
841
906
 
842
907
  def get_model_inputs_for_scoring(
843
- self, prediction_length: int, known_covariates_names: Optional[List[str]] = None
844
- ) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
908
+ self, prediction_length: int, known_covariates_names: list[str] | None = None
909
+ ) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
845
910
  """Prepare model inputs necessary to predict the last ``prediction_length`` time steps of each time series in the dataset.
846
911
 
847
912
  Parameters
848
913
  ----------
849
914
  prediction_length : int
850
915
  The forecast horizon, i.e., How many time steps into the future must be predicted.
851
- known_covariates_names : List[str], optional
916
+ known_covariates_names : list[str], optional
852
917
  Names of the dataframe columns that contain covariates known in the future.
853
- See :attr:`known_covariates_names` of :class:`~autogluon.timeseries.TimeSeriesPredictor` for more details.
918
+ See ``known_covariates_names`` of :class:`~autogluon.timeseries.TimeSeriesPredictor` for more details.
854
919
 
855
920
  Returns
856
921
  -------
@@ -871,12 +936,16 @@ class TimeSeriesDataFrame(pd.DataFrame):
871
936
  def train_test_split(
872
937
  self,
873
938
  prediction_length: int,
874
- end_index: Optional[int] = None,
875
- suffix: Optional[str] = None,
876
- ) -> Tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
939
+ end_index: int | None = None,
940
+ suffix: str | None = None,
941
+ ) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
877
942
  """Generate a train/test split from the given dataset.
943
+
878
944
  This method can be used to generate splits for multi-window backtesting.
879
945
 
946
+ .. note::
947
+ This method automatically sorts the TimeSeriesDataFrame by [item_id, timestamp].
948
+
880
949
  Parameters
881
950
  ----------
882
951
  prediction_length : int
@@ -913,14 +982,14 @@ class TimeSeriesDataFrame(pd.DataFrame):
913
982
 
914
983
  def convert_frequency(
915
984
  self,
916
- freq: Union[str, pd.DateOffset],
985
+ freq: str | pd.DateOffset,
917
986
  agg_numeric: str = "mean",
918
987
  agg_categorical: str = "first",
919
988
  num_cpus: int = -1,
920
989
  chunk_size: int = 100,
921
990
  **kwargs,
922
991
  ) -> TimeSeriesDataFrame:
923
- """Convert each time series in the data frame to the given frequency.
992
+ """Convert each time series in the dataframe to the given frequency.
924
993
 
925
994
  This method is useful for two purposes:
926
995
 
@@ -930,10 +999,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
930
999
  Standard ``df.groupby(...).resample(...)`` can be extremely slow for large datasets, so we parallelize this
931
1000
  operation across multiple CPU cores.
932
1001
 
933
-
934
1002
  Parameters
935
1003
  ----------
936
- freq : Union[str, pd.DateOffset]
1004
+ freq : str | pd.DateOffset
937
1005
  Frequency to which the data should be converted. See `pandas frequency aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
938
1006
  for supported values.
939
1007
  agg_numeric : {"max", "min", "sum", "mean", "median", "first", "last"}, default = "mean"
@@ -1020,21 +1088,68 @@ class TimeSeriesDataFrame(pd.DataFrame):
1020
1088
  iterable = iter(iterable)
1021
1089
  return iter(lambda: tuple(islice(iterable, size)), ())
1022
1090
 
1023
- def resample_chunk(chunk: Iterable[Tuple[str, pd.DataFrame]]) -> pd.DataFrame:
1091
+ def resample_chunk(chunk: Iterable[tuple[str, pd.DataFrame]]) -> pd.DataFrame:
1024
1092
  resampled_dfs = []
1025
1093
  for item_id, df in chunk:
1026
- resampled_df = df.resample(offset, level=TIMESTAMP, **kwargs).agg(aggregation)
1027
- resampled_dfs.append(pd.concat({item_id: resampled_df}, names=[ITEMID]))
1094
+ resampled_df = df.resample(offset, level=self.TIMESTAMP, **kwargs).agg(aggregation)
1095
+ resampled_dfs.append(pd.concat({item_id: resampled_df}, names=[self.ITEMID]))
1028
1096
  return pd.concat(resampled_dfs)
1029
1097
 
1030
1098
  # Resampling time for 1 item < overhead time for a single parallel job. Therefore, we group items into chunks
1031
1099
  # so that the speedup from parallelization isn't dominated by the communication costs.
1032
- chunks = split_into_chunks(pd.DataFrame(self).groupby(level=ITEMID, sort=False), chunk_size)
1100
+ df = pd.DataFrame(self)
1101
+ # Make sure that timestamp index has dtype 'datetime64[ns]', otherwise index may contain NaT values.
1102
+ # See https://github.com/autogluon/autogluon/issues/4917
1103
+ df.index = df.index.set_levels(df.index.levels[1].astype("datetime64[ns]"), level=self.TIMESTAMP)
1104
+ chunks = split_into_chunks(df.groupby(level=self.ITEMID, sort=False), chunk_size)
1033
1105
  resampled_chunks = Parallel(n_jobs=num_cpus)(delayed(resample_chunk)(chunk) for chunk in chunks)
1034
1106
  resampled_df = TimeSeriesDataFrame(pd.concat(resampled_chunks))
1035
1107
  resampled_df.static_features = self.static_features
1036
1108
  return resampled_df
1037
1109
 
1038
1110
  def to_data_frame(self) -> pd.DataFrame:
1039
- """Convert `TimeSeriesDataFrame` to a `pandas.DataFrame`"""
1111
+ """Convert ``TimeSeriesDataFrame`` to a ``pandas.DataFrame``"""
1040
1112
  return pd.DataFrame(self)
1113
+
1114
+ def get_indptr(self) -> np.ndarray:
1115
+ """[Advanced] Get a numpy array of shape [num_items + 1] that points to the start and end of each time series.
1116
+
1117
+ This method assumes that the TimeSeriesDataFrame is sorted by [item_id, timestamp].
1118
+ """
1119
+ return np.concatenate([[0], np.cumsum(self.num_timesteps_per_item().to_numpy())]).astype(np.int32)
1120
+
1121
+ # inline typing stubs for various overridden methods
1122
+ if TYPE_CHECKING:
1123
+
1124
+ def query( # type: ignore
1125
+ self, expr: str, *, inplace: bool = False, **kwargs
1126
+ ) -> Self: ...
1127
+
1128
+ def reindex(*args, **kwargs) -> Self: ... # type: ignore
1129
+
1130
+ @overload
1131
+ def __new__(cls, data: pd.DataFrame, static_features: pd.DataFrame | None = None) -> Self: ... # type: ignore
1132
+ @overload
1133
+ def __new__(
1134
+ cls,
1135
+ data: pd.DataFrame | str | Path | Iterable,
1136
+ static_features: pd.DataFrame | str | Path | None = None,
1137
+ id_column: str | None = None,
1138
+ timestamp_column: str | None = None,
1139
+ num_cpus: int = -1,
1140
+ *args,
1141
+ **kwargs,
1142
+ ) -> Self:
1143
+ """This overload is needed since in pandas, during type checking, the default constructor resolves to __new__"""
1144
+ ...
1145
+
1146
+ @overload
1147
+ def __getitem__(self, items: list[str]) -> Self: ... # type: ignore
1148
+ @overload
1149
+ def __getitem__(self, item: str) -> pd.Series: ... # type: ignore
1150
+
1151
+
1152
+ # TODO: remove with v2.0
1153
+ # module-level constants kept for backward compatibility.
1154
+ ITEMID = TimeSeriesDataFrame.ITEMID
1155
+ TIMESTAMP = TimeSeriesDataFrame.TIMESTAMP