autogluon.timeseries 1.3.2b20250624__py3-none-any.whl → 1.3.2b20250625__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. autogluon/timeseries/dataset/ts_dataframe.py +117 -48
  2. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +1 -1
  3. autogluon/timeseries/models/chronos/pipeline/utils.py +1 -2
  4. autogluon/timeseries/models/gluonts/abstract_gluonts.py +3 -6
  5. autogluon/timeseries/models/local/abstract_local_model.py +9 -5
  6. autogluon/timeseries/models/local/naive.py +1 -1
  7. autogluon/timeseries/predictor.py +5 -5
  8. autogluon/timeseries/utils/features.py +1 -1
  9. autogluon/timeseries/utils/forecast.py +2 -1
  10. autogluon/timeseries/version.py +1 -1
  11. {autogluon.timeseries-1.3.2b20250624.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/METADATA +5 -5
  12. {autogluon.timeseries-1.3.2b20250624.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/RECORD +19 -19
  13. /autogluon.timeseries-1.3.2b20250624-py3.9-nspkg.pth → /autogluon.timeseries-1.3.2b20250625-py3.9-nspkg.pth +0 -0
  14. {autogluon.timeseries-1.3.2b20250624.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/LICENSE +0 -0
  15. {autogluon.timeseries-1.3.2b20250624.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/NOTICE +0 -0
  16. {autogluon.timeseries-1.3.2b20250624.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/WHEEL +0 -0
  17. {autogluon.timeseries-1.3.2b20250624.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/namespace_packages.txt +0 -0
  18. {autogluon.timeseries-1.3.2b20250624.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/top_level.txt +0 -0
  19. {autogluon.timeseries-1.3.2b20250624.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/zip-safe +0 -0
@@ -7,9 +7,9 @@ import reprlib
7
7
  from collections.abc import Iterable
8
8
  from itertools import islice
9
9
  from pathlib import Path
10
- from pprint import pformat
11
10
  from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type, Union, overload
12
11
 
12
+ import numpy as np
13
13
  import pandas as pd
14
14
  from joblib.parallel import Parallel, delayed
15
15
  from pandas.core.internals import ArrayManager, BlockManager # type: ignore
@@ -28,7 +28,7 @@ IRREGULAR_TIME_INDEX_FREQSTR = "IRREG"
28
28
  class TimeSeriesDataFrame(pd.DataFrame):
29
29
  """A collection of univariate time series, where each row is identified by an (``item_id``, ``timestamp``) pair.
30
30
 
31
- For example, a time series data frame could represent the daily sales of a collection of products, where each
31
+ For example, a time series dataframe could represent the daily sales of a collection of products, where each
32
32
  ``item_id`` corresponds to a product and ``timestamp`` corresponds to the day of the record.
33
33
 
34
34
  Parameters
@@ -78,7 +78,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
78
78
  You can also use :meth:`~autogluon.timeseries.TimeSeriesDataFrame.from_iterable_dataset` for loading data in such format.
79
79
 
80
80
  static_features : pd.DataFrame, str or pathlib.Path, optional
81
- An optional data frame describing the metadata of each individual time series that does not change with time.
81
+ An optional dataframe describing the metadata of each individual time series that does not change with time.
82
82
  Can take real-valued or categorical values. For example, if ``TimeSeriesDataFrame`` contains sales of various
83
83
  products, static features may refer to time-independent features like color or brand.
84
84
 
@@ -223,7 +223,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
223
223
  raise ValueError(f"for {TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
224
224
  if not data.index.names == (f"{ITEMID}", f"{TIMESTAMP}"):
225
225
  raise ValueError(f"data must have index names as ('{ITEMID}', '{TIMESTAMP}'), got {data.index.names}")
226
- item_id_index = data.index.get_level_values(level=ITEMID)
226
+ item_id_index = data.index.levels[0]
227
227
  if not (pd.api.types.is_integer_dtype(item_id_index) or pd.api.types.is_string_dtype(item_id_index)):
228
228
  raise ValueError(f"all entries in index `{ITEMID}` must be of integer or string dtype")
229
229
 
@@ -303,7 +303,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
303
303
  Returns
304
304
  -------
305
305
  ts_df: TimeSeriesDataFrame
306
- A data frame in TimeSeriesDataFrame format.
306
+ A dataframe in TimeSeriesDataFrame format.
307
307
  """
308
308
  return cls(df, static_features=static_features_df, id_column=id_column, timestamp_column=timestamp_column)
309
309
 
@@ -350,7 +350,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
350
350
  Returns
351
351
  -------
352
352
  ts_df: TimeSeriesDataFrame
353
- A data frame in TimeSeriesDataFrame format.
353
+ A dataframe in TimeSeriesDataFrame format.
354
354
  """
355
355
  return cls(path, static_features=static_features_path, id_column=id_column, timestamp_column=timestamp_column)
356
356
 
@@ -379,7 +379,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
379
379
  Returns
380
380
  -------
381
381
  ts_df: TimeSeriesDataFrame
382
- A data frame in TimeSeriesDataFrame format.
382
+ A dataframe in TimeSeriesDataFrame format.
383
383
  """
384
384
  return cls(iterable_dataset, num_cpus=num_cpus)
385
385
 
@@ -469,53 +469,58 @@ class TimeSeriesDataFrame(pd.DataFrame):
469
469
  If some items have an irregular frequency or if different items have different frequencies, returns string
470
470
  `IRREG`.
471
471
  """
472
+ ts_df = self
473
+ if num_items is not None and ts_df.num_items > num_items:
474
+ items_subset = ts_df.item_ids.to_series().sample(n=num_items, random_state=123)
475
+ ts_df = ts_df.loc[items_subset]
476
+
477
+ if not ts_df.index.is_monotonic_increasing:
478
+ ts_df = ts_df.sort_index()
479
+
480
+ indptr = ts_df.get_indptr()
481
+ item_ids = ts_df.item_ids
482
+ timestamps = ts_df.index.get_level_values(level=1)
483
+ candidate_freq = ts_df.index.levels[1].freq
484
+
485
+ frequencies = []
486
+ irregular_items = []
487
+ for i in range(len(indptr) - 1):
488
+ start, end = indptr[i], indptr[i + 1]
489
+ item_timestamps = timestamps[start:end]
490
+ inferred_freq = item_timestamps.inferred_freq
472
491
 
473
- df = pd.DataFrame(self)
474
- if num_items is not None:
475
- all_item_ids = self.item_ids
476
- if len(all_item_ids) > num_items:
477
- items_subset = all_item_ids.to_series().sample(n=num_items, random_state=123)
478
- df = df.loc[items_subset]
479
-
480
- candidate_freq = df.index.levels[1].freq
481
- index_df = df.index.to_frame(index=False)
482
-
483
- def get_freq(series: pd.Series) -> Optional[str]:
484
- dt_index = pd.DatetimeIndex(series)
485
- inferred_freq = dt_index.inferred_freq
486
492
  # Fallback option: maybe original index has a `freq` attribute that pandas fails to infer (e.g., 'SME')
487
493
  if inferred_freq is None and candidate_freq is not None:
488
494
  try:
489
495
  # If this line does not raise an exception, then candidate_freq is a compatible frequency
490
- dt_index.freq = candidate_freq
496
+ item_timestamps.freq = candidate_freq
491
497
  except ValueError:
492
498
  inferred_freq = None
493
499
  else:
494
500
  inferred_freq = candidate_freq.freqstr
495
- return inferred_freq
496
501
 
497
- freq_for_each_item = index_df.groupby(ITEMID, sort=False).agg(get_freq)[TIMESTAMP]
498
- freq = freq_for_each_item.iloc[0]
499
- if len(set(freq_for_each_item)) > 1 or freq is None:
502
+ if inferred_freq is None:
503
+ irregular_items.append(item_ids[i])
504
+ else:
505
+ frequencies.append(inferred_freq)
506
+
507
+ unique_freqs = list(set(frequencies))
508
+ if len(unique_freqs) != 1 or len(irregular_items) > 0:
500
509
  if raise_if_irregular:
501
- items_with_irregular_freq = freq_for_each_item[pd.isnull(freq_for_each_item)]
502
- if len(items_with_irregular_freq) > 0:
510
+ if irregular_items:
503
511
  raise ValueError(
504
- "Cannot infer frequency. Items with irregular frequency: "
505
- f"{pformat(items_with_irregular_freq.index.tolist())}"
512
+ f"Cannot infer frequency. Items with irregular frequency: {reprlib.repr(irregular_items)}"
506
513
  )
507
514
  else:
508
- raise ValueError(
509
- "Cannot infer frequency. Multiple frequencies detected in the dataset: "
510
- f"{freq_for_each_item.unique().tolist()}"
511
- )
512
- return IRREGULAR_TIME_INDEX_FREQSTR
515
+ raise ValueError(f"Cannot infer frequency. Multiple frequencies detected: {unique_freqs}")
516
+ else:
517
+ return IRREGULAR_TIME_INDEX_FREQSTR
513
518
  else:
514
- return pd.tseries.frequencies.to_offset(freq).freqstr
519
+ return pd.tseries.frequencies.to_offset(unique_freqs[0]).freqstr
515
520
 
516
521
  @property
517
522
  def freq(self):
518
- """Inferred pandas-compatible frequency of the timestamps in the data frame.
523
+ """Inferred pandas-compatible frequency of the timestamps in the dataframe.
519
524
 
520
525
  Computed using a random subset of the time series for speed. This may sometimes result in incorrectly inferred
521
526
  values. For reliable results, use :meth:`~autogluon.timeseries.TimeSeriesDataFrame.infer_frequency`.
@@ -529,8 +534,13 @@ class TimeSeriesDataFrame(pd.DataFrame):
529
534
  return len(self.item_ids)
530
535
 
531
536
  def num_timesteps_per_item(self) -> pd.Series:
532
- """Length of each time series in the dataframe."""
533
- return self.groupby(level=ITEMID, sort=False).size()
537
+ """Number of observations in each time series in the dataframe.
538
+
539
+ Returns a `pandas.Series` with item_id as index and number of observations per item as values.
540
+ """
541
+ counts = pd.Series(self.index.codes[0]).value_counts(sort=False)
542
+ counts.index = self.index.levels[0][counts.index]
543
+ return counts
534
544
 
535
545
  def copy(self: TimeSeriesDataFrame, deep: bool = True) -> TimeSeriesDataFrame:
536
546
  """Make a copy of the TimeSeriesDataFrame.
@@ -568,7 +578,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
568
578
  Parameters
569
579
  ----------
570
580
  cutoff_time: pd.Timestamp
571
- The time to split the current data frame into two data frames.
581
+ The time to split the current dataframe into two dataframes.
572
582
 
573
583
  Returns
574
584
  -------
@@ -593,7 +603,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
593
603
  This operation is equivalent to selecting a slice ``[start_index : end_index]`` from each time series, and then
594
604
  combining these slices into a new ``TimeSeriesDataFrame``. See examples below.
595
605
 
596
- Returns a copy of the original data. This is useful for constructing holdout sets for validation.
606
+ It is recommended to sort the index with `ts_df.sort_index()` before calling this method to take advantage of
607
+ a fast optimized algorithm.
597
608
 
598
609
  Parameters
599
610
  ----------
@@ -680,10 +691,53 @@ class TimeSeriesDataFrame(pd.DataFrame):
680
691
  if end_index is not None and not isinstance(end_index, int):
681
692
  raise ValueError(f"end_index must be of type int or None (got {type(end_index)})")
682
693
 
683
- time_step_slice = slice(start_index, end_index)
684
- result = self.groupby(level=ITEMID, sort=False, as_index=False).nth(time_step_slice)
685
- result.static_features = self.static_features
686
- return result
694
+ if start_index is None and end_index is None:
695
+ # Return a copy to avoid in-place modification.
696
+ # self.copy() is much faster than self.loc[ones(len(self), dtype=bool)]
697
+ return self.copy()
698
+
699
+ if self.index.is_monotonic_increasing:
700
+ # Use a fast optimized algorithm if the index is sorted
701
+ indptr = self.get_indptr()
702
+ lengths = np.diff(indptr)
703
+ starts = indptr[:-1]
704
+
705
+ slice_start = (
706
+ np.zeros_like(lengths)
707
+ if start_index is None
708
+ else np.clip(np.where(start_index >= 0, start_index, lengths + start_index), 0, lengths)
709
+ )
710
+ slice_end = (
711
+ lengths.copy()
712
+ if end_index is None
713
+ else np.clip(np.where(end_index >= 0, end_index, lengths + end_index), 0, lengths)
714
+ )
715
+
716
+ # Filter out invalid slices where start >= end
717
+ valid_slices = slice_start < slice_end
718
+ if not np.any(valid_slices):
719
+ # Return empty dataframe with same structure
720
+ return self.loc[np.zeros(len(self), dtype=bool)]
721
+
722
+ starts = starts[valid_slices]
723
+ slice_start = slice_start[valid_slices]
724
+ slice_end = slice_end[valid_slices]
725
+
726
+ # We put 1 at the slice_start index for each item and -1 at the slice_end index for each item.
727
+ # After we apply cumsum we get the indicator mask selecting values between slice_start and slice_end
728
+ # cumsum([0, 0, 1, 0, 0, -1, 0]) -> [0, 0, 1, 1, 1, 0, 0]
729
+ # We need array of size len(self) + 1 in case events[starts + slice_end] tries to access position len(self)
730
+ events = np.zeros(len(self) + 1, dtype=np.int8)
731
+ events[starts + slice_start] += 1
732
+ events[starts + slice_end] -= 1
733
+ mask = np.cumsum(events)[:-1].astype(bool)
734
+ # loc[mask] returns a view of the original data - modifying it will produce a SettingWithCopyWarning
735
+ return self.loc[mask]
736
+ else:
737
+ # Fall back to a slow groupby operation
738
+ result = self.groupby(level=ITEMID, sort=False, as_index=False).nth(slice(start_index, end_index))
739
+ result.static_features = self.static_features
740
+ return result
687
741
 
688
742
  def slice_by_time(self, start_time: pd.Timestamp, end_time: pd.Timestamp) -> TimeSeriesDataFrame:
689
743
  """Select a subsequence from each time series between start (inclusive) and end (exclusive) timestamps.
@@ -712,7 +766,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
712
766
 
713
767
  @classmethod
714
768
  def from_pickle(cls, filepath_or_buffer: Any) -> TimeSeriesDataFrame:
715
- """Convenience method to read pickled time series data frames. If the read pickle
769
+ """Convenience method to read pickled time series dataframes. If the read pickle
716
770
  file refers to a plain pandas DataFrame, it will be cast to a TimeSeriesDataFrame.
717
771
 
718
772
  Parameters
@@ -723,7 +777,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
723
777
  Returns
724
778
  -------
725
779
  ts_df : TimeSeriesDataFrame
726
- The pickled time series data frame.
780
+ The pickled time series dataframe.
727
781
  """
728
782
  try:
729
783
  data = pd.read_pickle(filepath_or_buffer)
@@ -734,6 +788,11 @@ class TimeSeriesDataFrame(pd.DataFrame):
734
788
  def fill_missing_values(self, method: str = "auto", value: float = 0.0) -> TimeSeriesDataFrame:
735
789
  """Fill missing values represented by NaN.
736
790
 
791
+ .. note::
792
+ This method assumes that the index of the TimeSeriesDataFrame is sorted by [item_id, timestamp].
793
+
794
+ If the index is not sorted, this method will log a warning and may produce an incorrect result.
795
+
737
796
  Parameters
738
797
  ----------
739
798
  method : str, default = "auto"
@@ -876,8 +935,12 @@ class TimeSeriesDataFrame(pd.DataFrame):
876
935
  suffix: Optional[str] = None,
877
936
  ) -> Tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
878
937
  """Generate a train/test split from the given dataset.
938
+
879
939
  This method can be used to generate splits for multi-window backtesting.
880
940
 
941
+ .. note::
942
+ This method automatically sorts the TimeSeriesDataFrame by [item_id, timestamp].
943
+
881
944
  Parameters
882
945
  ----------
883
946
  prediction_length : int
@@ -921,7 +984,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
921
984
  chunk_size: int = 100,
922
985
  **kwargs,
923
986
  ) -> TimeSeriesDataFrame:
924
- """Convert each time series in the data frame to the given frequency.
987
+ """Convert each time series in the dataframe to the given frequency.
925
988
 
926
989
  This method is useful for two purposes:
927
990
 
@@ -931,7 +994,6 @@ class TimeSeriesDataFrame(pd.DataFrame):
931
994
  Standard ``df.groupby(...).resample(...)`` can be extremely slow for large datasets, so we parallelize this
932
995
  operation across multiple CPU cores.
933
996
 
934
-
935
997
  Parameters
936
998
  ----------
937
999
  freq : Union[str, pd.DateOffset]
@@ -1044,6 +1106,13 @@ class TimeSeriesDataFrame(pd.DataFrame):
1044
1106
  """Convert `TimeSeriesDataFrame` to a `pandas.DataFrame`"""
1045
1107
  return pd.DataFrame(self)
1046
1108
 
1109
+ def get_indptr(self) -> np.ndarray:
1110
+ """[Advanced] Get a numpy array of shape [num_items + 1] that points to the start and end of each time series.
1111
+
1112
+ This method assumes that the TimeSeriesDataFrame is sorted by [item_id, timestamp].
1113
+ """
1114
+ return np.concatenate([[0], np.cumsum(self.num_timesteps_per_item().to_numpy())]).astype(np.int32)
1115
+
1047
1116
  # inline typing stubs for various overridden methods
1048
1117
  if TYPE_CHECKING:
1049
1118
 
@@ -570,7 +570,7 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
570
570
  Returns
571
571
  -------
572
572
  predictions: TimeSeriesDataFrame
573
- pandas data frames with a timestamp index, where each input item from the input
573
+ pandas dataframes with a timestamp index, where each input item from the input
574
574
  data is given as a separate forecast item in the dictionary, keyed by the `item_id`s
575
575
  of input items.
576
576
  """
@@ -255,8 +255,7 @@ class ChronosInferenceDataset:
255
255
  self.target_array = target_df[target_column].to_numpy(dtype=np.float32)
256
256
 
257
257
  # store pointer to start:end of each time series
258
- cum_sizes = target_df.num_timesteps_per_item().values.cumsum()
259
- self.indptr = np.append(0, cum_sizes).astype(np.int32)
258
+ self.indptr = target_df.get_indptr()
260
259
 
261
260
  def __len__(self):
262
261
  return len(self.indptr) - 1 # noqa
@@ -70,12 +70,9 @@ class SimpleGluonTSDataset(GluonTSDataset):
70
70
  self.prediction_length = prediction_length
71
71
 
72
72
  # Replace inefficient groupby ITEMID with indptr that stores start:end of each time series
73
- item_id_index = target_df.index.get_level_values(ITEMID)
74
- indices_sizes = item_id_index.value_counts(sort=False)
75
- self.item_ids = indices_sizes.index # shape [num_items]
76
- cum_sizes = indices_sizes.to_numpy().cumsum()
77
- self.indptr = np.append(0, cum_sizes).astype(np.int32)
78
- self.start_timestamps = target_df.reset_index(TIMESTAMP).groupby(level=ITEMID, sort=False).first()[TIMESTAMP]
73
+ self.item_ids = target_df.item_ids
74
+ self.indptr = target_df.get_indptr()
75
+ self.start_timestamps = target_df.index[self.indptr[:-1]].to_frame(index=False)[TIMESTAMP]
79
76
  assert len(self.item_ids) == len(self.start_timestamps)
80
77
 
81
78
  @staticmethod
@@ -136,9 +136,12 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
136
136
  self._dummy_forecast = self._get_dummy_forecast(train_data)
137
137
  return self
138
138
 
139
- def _get_dummy_forecast(self, train_data: TimeSeriesDataFrame) -> pd.DataFrame:
139
+ def _get_dummy_forecast(self, train_data: TimeSeriesDataFrame, max_num_rows: int = 20_000) -> pd.DataFrame:
140
140
  agg_functions = ["mean"] + [get_quantile_function(q) for q in self.quantile_levels]
141
- stats_marginal = train_data[self.target].agg(agg_functions)
141
+ target_series = train_data[self.target]
142
+ if len(target_series) > max_num_rows:
143
+ target_series = target_series.sample(max_num_rows, replace=True)
144
+ stats_marginal = target_series.agg(agg_functions)
142
145
  stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
143
146
  return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
144
147
 
@@ -150,10 +153,11 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
150
153
  max_ts_length = model_params["max_ts_length"]
151
154
  if max_ts_length is not None:
152
155
  logger.debug(f"Shortening all time series to at most {max_ts_length}")
153
- data = data.groupby(level=ITEMID, sort=False).tail(max_ts_length)
156
+ data = data.slice_by_timestep(-max_ts_length, None)
154
157
 
155
- df = pd.DataFrame(data).reset_index(level=ITEMID)
156
- all_series = (ts for _, ts in df.groupby(by=ITEMID, as_index=False, sort=False)[self.target])
158
+ indptr = data.get_indptr()
159
+ target_series = data[self.target].droplevel(level=ITEMID)
160
+ all_series = (target_series[indptr[i] : indptr[i + 1]] for i in range(len(indptr) - 1))
157
161
 
158
162
  # timeout ensures that no individual job takes longer than time_limit
159
163
  # TODO: a job started late may still exceed time_limit - how to prevent that?
@@ -70,7 +70,7 @@ class SeasonalNaiveModel(AbstractLocalModel):
70
70
 
71
71
  def _predict_with_local_model(
72
72
  self,
73
- time_series: np.ndarray,
73
+ time_series: pd.Series,
74
74
  local_model_args: dict,
75
75
  ) -> pd.DataFrame:
76
76
  return seasonal_naive_forecast(
@@ -108,7 +108,7 @@ class TimeSeriesPredictor:
108
108
 
109
109
  If ``known_covariates_names`` are provided, then:
110
110
 
111
- - :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit`, :meth:`~autogluon.timeseries.TimeSeriesPredictor.evaluate`, and :meth:`~autogluon.timeseries.TimeSeriesPredictor.leaderboard` will expect a data frame with columns listed in ``known_covariates_names`` (in addition to the ``target`` column).
111
+ - :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit`, :meth:`~autogluon.timeseries.TimeSeriesPredictor.evaluate`, and :meth:`~autogluon.timeseries.TimeSeriesPredictor.leaderboard` will expect a dataframe with columns listed in ``known_covariates_names`` (in addition to the ``target`` column).
112
112
  - :meth:`~autogluon.timeseries.TimeSeriesPredictor.predict` will expect an additional keyword argument ``known_covariates`` containing the future values of the known covariates in ``TimeSeriesDataFrame`` format.
113
113
 
114
114
  quantile_levels : List[float], optional
@@ -284,7 +284,7 @@ class TimeSeriesPredictor:
284
284
  Parameters
285
285
  ----------
286
286
  data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
287
- Data as a data frame or path to file storing the data.
287
+ Data as a dataframe or path to file storing the data.
288
288
  name : str
289
289
  Name of the data that will be used in log messages (e.g., 'train_data', 'tuning_data', or 'data').
290
290
 
@@ -809,7 +809,7 @@ class TimeSeriesPredictor:
809
809
  - Must include ``timestamp`` values for the full forecast horizon (i.e., ``prediction_length`` time steps) following the end of each series in the input ``data``.
810
810
 
811
811
  You can use :meth:`autogluon.timeseries.TimeSeriesPredictor.make_future_data_frame` to generate a template
812
- containing the required ``item_id`` and ``timestamp`` combinations for the `known_covariates` data frame.
812
+ containing the required ``item_id`` and ``timestamp`` combinations for the `known_covariates` dataframe.
813
813
 
814
814
  See example below.
815
815
  model : str, optional
@@ -1230,7 +1230,7 @@ class TimeSeriesPredictor:
1230
1230
  **kwargs,
1231
1231
  ) -> pd.DataFrame:
1232
1232
  """Return a leaderboard showing the performance of every trained model, the output is a
1233
- pandas data frame with columns:
1233
+ pandas dataframe with columns:
1234
1234
 
1235
1235
  * ``model``: The name of the model.
1236
1236
  * ``score_test``: The test score of the model on ``data``, if provided. Computed according to ``eval_metric``.
@@ -1315,7 +1315,7 @@ class TimeSeriesPredictor:
1315
1315
  return leaderboard
1316
1316
 
1317
1317
  def make_future_data_frame(self, data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]) -> pd.DataFrame:
1318
- """Generate a data frame with the `item_id` and `timestamp` values corresponding to the forecast horizon.
1318
+ """Generate a dataframe with the `item_id` and `timestamp` values corresponding to the forecast horizon.
1319
1319
 
1320
1320
  Parameters
1321
1321
  ----------
@@ -406,7 +406,7 @@ class AbstractFeatureImportanceTransform:
406
406
  if feature_name not in self.covariate_metadata.all_features:
407
407
  raise ValueError(f"Target feature {feature_name} not found in covariate metadata")
408
408
 
409
- # feature transform works on a shallow copy of the main time series data frame
409
+ # feature transform works on a shallow copy of the main time series dataframe
410
410
  # but a deep copy of the static features.
411
411
  data = data.copy(deep=False)
412
412
 
@@ -35,7 +35,8 @@ def make_future_data_frame(
35
35
 
36
36
  Returns a pandas.DataFrame, with columns "item_id" and "timestamp" corresponding to the forecast horizon.
37
37
  """
38
- last = ts_dataframe.reset_index()[[ITEMID, TIMESTAMP]].groupby(by=ITEMID, sort=False, as_index=False).last()
38
+ indptr = ts_dataframe.get_indptr()
39
+ last = ts_dataframe.index[indptr[1:] - 1].to_frame(index=False)
39
40
  item_ids = np.repeat(last[ITEMID].to_numpy(), prediction_length)
40
41
 
41
42
  if freq is None:
@@ -1,4 +1,4 @@
1
1
  """This is the autogluon version file."""
2
2
 
3
- __version__ = "1.3.2b20250624"
3
+ __version__ = "1.3.2b20250625"
4
4
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 1.3.2b20250624
3
+ Version: 1.3.2b20250625
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -55,10 +55,10 @@ Requires-Dist: fugue>=0.9.0
55
55
  Requires-Dist: tqdm<5,>=4.38
56
56
  Requires-Dist: orjson~=3.9
57
57
  Requires-Dist: tensorboard<3,>=2.9
58
- Requires-Dist: autogluon.core[raytune]==1.3.2b20250624
59
- Requires-Dist: autogluon.common==1.3.2b20250624
60
- Requires-Dist: autogluon.features==1.3.2b20250624
61
- Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.3.2b20250624
58
+ Requires-Dist: autogluon.core[raytune]==1.3.2b20250625
59
+ Requires-Dist: autogluon.common==1.3.2b20250625
60
+ Requires-Dist: autogluon.features==1.3.2b20250625
61
+ Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.3.2b20250625
62
62
  Provides-Extra: all
63
63
  Provides-Extra: chronos-onnx
64
64
  Requires-Dist: optimum[onnxruntime]<1.23,>=1.17; extra == "chronos-onnx"
@@ -1,16 +1,16 @@
1
- autogluon.timeseries-1.3.2b20250624-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
1
+ autogluon.timeseries-1.3.2b20250625-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
2
2
  autogluon/timeseries/__init__.py,sha256=_CrLLc1fkjen7UzWoO0Os8WZoHOgvZbHKy46I8v_4k4,304
3
3
  autogluon/timeseries/evaluator.py,sha256=l642tYfTHsl8WVIq_vV6qhgAFVFr9UuZD7gLra3A_Kc,250
4
4
  autogluon/timeseries/learner.py,sha256=pIn4YSOk0aqCWyBpIlwnAsFnG4h7PLXk8guFH3wFS-w,13923
5
- autogluon/timeseries/predictor.py,sha256=Dz-LJVU5sjlFCOqHTeYPt77DuGavdAXB0DkclpM55rY,88173
5
+ autogluon/timeseries/predictor.py,sha256=u4d7-xMs669g5xxqIYuvEyGQ0P6Y8IoToiyg9zUZoy4,88168
6
6
  autogluon/timeseries/regressor.py,sha256=ozlhO-wce6YEtSMj0bfMgfNVeblfU3rI6ITuIk_WAFo,11868
7
7
  autogluon/timeseries/splitter.py,sha256=yzPca9p2bWV-_VJAptUyyzQsxu-uixAdpMoGQtDzMD4,3205
8
8
  autogluon/timeseries/trainer.py,sha256=4T7y58P3RImDbRZn-Og2qSQtOLpEocwdHi_tl1yt0Sc,58021
9
- autogluon/timeseries/version.py,sha256=UEJ7h9MzyXYPRR-OeEpNfrN3hoqD3VlyD4RcaxwT2k4,91
9
+ autogluon/timeseries/version.py,sha256=DDz6ZOzsA3FxARUjxCymDQnnmx13nbzyfMZvMdJfNvQ,91
10
10
  autogluon/timeseries/configs/__init__.py,sha256=BTtHIPCYeGjqgOcvqb8qPD4VNX-ICKOg6wnkew1cPOE,98
11
11
  autogluon/timeseries/configs/presets_configs.py,sha256=cLat8ecLlWrI-SC5KLBDCX2SbVXaucemy2pjxJAtSY0,2543
12
12
  autogluon/timeseries/dataset/__init__.py,sha256=UvnhAN5tjgxXTHoZMQDy64YMDj4Xxa68yY7NP4vAw0o,81
13
- autogluon/timeseries/dataset/ts_dataframe.py,sha256=W3VE65lFyWmqMQ3XHN4Jhrqf_dO1EOLneNL2QDvVxeY,48120
13
+ autogluon/timeseries/dataset/ts_dataframe.py,sha256=pvL85NCrwcIYr7lxFzY2NZ57yUL82nl6Ypdm1z3ho04,51193
14
14
  autogluon/timeseries/metrics/__init__.py,sha256=wfqEf2AiaqCcFGXVGhpNrbo1XBQFmJCS8gRa8Qk2L50,3602
15
15
  autogluon/timeseries/metrics/abstract.py,sha256=BpHVmzkzM6EN63NQrDRkApIeAyrpT6Y9LZiPEygaxvE,11829
16
16
  autogluon/timeseries/metrics/point.py,sha256=xllyGh11otbmUVHyIaceROPR3qyllWPQ9xlSmIGI3EI,18306
@@ -19,7 +19,7 @@ autogluon/timeseries/metrics/utils.py,sha256=HuDe1BNe8yJU4f_DKM913nNrUueoRaw6zhx
19
19
  autogluon/timeseries/models/__init__.py,sha256=MYD9JJ-wUDE5B6jW6E6LU2eXQ6vflfQBvqQJkdzJa3A,1189
20
20
  autogluon/timeseries/models/presets.py,sha256=HEACiRpnY6dcff7W44gnM0x1KRgr2bNf5D6zcaHgHxo,12201
21
21
  autogluon/timeseries/models/abstract/__init__.py,sha256=Htfkjjc3vo92RvyM8rIlQ0PLWt3jcrCKZES07UvCMV0,146
22
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=94TG7tsdfENP41QATr4IeMofaFt8ySjrrrH4MxZZ3Xc,32104
22
+ autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=v4qgHYGmktNz-rY-qCgZjm1n1aOQ6F_OOZIpBsw46hc,32103
23
23
  autogluon/timeseries/models/abstract/model_trial.py,sha256=ENPg_7nsdxIvaNM0o0UShZ3x8jFlRmwRc5m0fGPC0TM,3720
24
24
  autogluon/timeseries/models/abstract/tunable.py,sha256=SFl4vjkb6BfFFaRPVdftnnLYlIyCThutLHxiiAlV6tY,7168
25
25
  autogluon/timeseries/models/autogluon_tabular/__init__.py,sha256=r9i6jWcyeLHYClkcMSKRVsfrkBUMxpDrTATNTBc_qgQ,136
@@ -32,18 +32,18 @@ autogluon/timeseries/models/chronos/pipeline/__init__.py,sha256=bkTR0LSKIxAaKFOr
32
32
  autogluon/timeseries/models/chronos/pipeline/base.py,sha256=14OAKHmio6LmO4mVom2mPGB0CvIrOjMGJzb-MVSAq-s,5596
33
33
  autogluon/timeseries/models/chronos/pipeline/chronos.py,sha256=uFJLsSb2WQiSrmDZ0g2mO-lhTFUlq7vplGRBXZ9_VBk,22591
34
34
  autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py,sha256=kNIDesojKB3rbEK9jM8st4k7ZeaT6tz1znf4PsRDv2Q,20066
35
- autogluon/timeseries/models/chronos/pipeline/utils.py,sha256=dtDX5Pyu95bGv7qmqgfUc1iYowWPY84dnGN0uyqyHyQ,13131
35
+ autogluon/timeseries/models/chronos/pipeline/utils.py,sha256=KHvhmyLUircxjnCRWwXlgFePGwOMhD20YY55TviROuI,13042
36
36
  autogluon/timeseries/models/ensemble/__init__.py,sha256=x2Y6dWk15XugTEWNUKq8U5z6nIjelo3UjpI-TfS13OE,159
37
37
  autogluon/timeseries/models/ensemble/abstract.py,sha256=ie-BKD4JIkQQoKqtf6sYI5Aix7dSgywFsSdeGPxoElk,5821
38
38
  autogluon/timeseries/models/ensemble/basic.py,sha256=BRPWg_Wgfb87iInFSoTRE75BRHaovRR5HFRvzxET_wU,3423
39
39
  autogluon/timeseries/models/ensemble/greedy.py,sha256=fKVLtnaJZ03zrfr9yqxvyA5IdiMtFL6TQidqw0BoqkU,7220
40
40
  autogluon/timeseries/models/gluonts/__init__.py,sha256=asC1PTj4j9xMbilvk1IT1julnpeoKbv5ZNuAR6-DFgA,361
41
- autogluon/timeseries/models/gluonts/abstract_gluonts.py,sha256=35T8rty6sPGiaSFNpiVNmeseo1_qpn664UcWo92W5eI,32906
41
+ autogluon/timeseries/models/gluonts/abstract_gluonts.py,sha256=5VOdRLk5YW79_9af66yxCczgrTBApzNhciRFhAzaIIQ,32665
42
42
  autogluon/timeseries/models/gluonts/torch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
43
  autogluon/timeseries/models/gluonts/torch/models.py,sha256=f7IicZzLAN2v_9y3Pxt9G6f48xIzmDjb1U5k44hS3O0,25760
44
44
  autogluon/timeseries/models/local/__init__.py,sha256=e2UImoJhmj70E148IIObv90C_bHxgyLNk6YsS4p7pfs,701
45
- autogluon/timeseries/models/local/abstract_local_model.py,sha256=VP-yP5Rx93Kb6ine1d5lBe6fRbeZcGG3hBn-AoL4OqQ,11841
46
- autogluon/timeseries/models/local/naive.py,sha256=BhXxL52-_i4Xynx-spfZMkRejofFPpknggS35_aQSwc,7253
45
+ autogluon/timeseries/models/local/abstract_local_model.py,sha256=0apyzut7Vs3jElsR1YipMqRQrskgrZu6kJFs-k4DB0g,12053
46
+ autogluon/timeseries/models/local/naive.py,sha256=SMdA2Tu-o7gfOLhOoh5m1oe85F3LXn9ulTzRXFhLH20,7252
47
47
  autogluon/timeseries/models/local/npts.py,sha256=Bp74doKnfpGE8ywP4FWOCI_RwRMsmgocYDfGtq764DA,4143
48
48
  autogluon/timeseries/models/local/statsforecast.py,sha256=s3Byp7WAUy0Rnfl1qYMSIm44MKD9t8E732xuNLk_aao,32615
49
49
  autogluon/timeseries/models/multi_window/__init__.py,sha256=Bq7AT2Jxdd4WNqmjTdzeqgNiwn1NCyWp4tBIWaM-zfI,60
@@ -52,19 +52,19 @@ autogluon/timeseries/transforms/__init__.py,sha256=fKlT4pkJ_8Gl7IUTc3uSDzt2Xow5i
52
52
  autogluon/timeseries/transforms/covariate_scaler.py,sha256=G56PTHKqCFKiXRKLkLun7mN3-T09jxN-5oI1ISADJdQ,7042
53
53
  autogluon/timeseries/transforms/target_scaler.py,sha256=BeT1aP51Wq9EidxC0dVg6dHvampKafpG1uKu4ZaaJPs,6050
54
54
  autogluon/timeseries/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
- autogluon/timeseries/utils/features.py,sha256=7gyRkuD2sMwJivF6O_bW7kADJBnCbBM055CnwkoU94I,22658
56
- autogluon/timeseries/utils/forecast.py,sha256=vd0Y5YsHU6awu4E7xyDXQGe21P1aB26gwFsA3m09mKw,2197
55
+ autogluon/timeseries/utils/features.py,sha256=OeMvwVX4D2kwoFjuj0RZYZ7MgcbaeBjV97Ud1aUdvNc,22657
56
+ autogluon/timeseries/utils/forecast.py,sha256=yK1_eNtRUPYGs0R-VWMO4c81LrTGF57ih3yzsXVHyGY,2191
57
57
  autogluon/timeseries/utils/warning_filters.py,sha256=tHvhj9y7c3MP6JrjAedc7UiFFw0_mKYziDQupw8NhiQ,2538
58
58
  autogluon/timeseries/utils/datetime/__init__.py,sha256=bTMR8jLh1LW55vHjbOr1zvWRMF_PqbvxpS-cUcNIDWI,173
59
59
  autogluon/timeseries/utils/datetime/base.py,sha256=3NdsH3NDq4cVAOSoy3XpaNixyNlbjy4DJ_YYOGuu9x4,1341
60
60
  autogluon/timeseries/utils/datetime/lags.py,sha256=gQDk5_zmsY5DUWDUpSaCKYkQ9nHKKY-LsywJQRAoYSk,5988
61
61
  autogluon/timeseries/utils/datetime/seasonality.py,sha256=YK_2k8hvYIMW-sJPnjGWRtCnvIOthwA2hATB3nwVoD4,834
62
62
  autogluon/timeseries/utils/datetime/time_features.py,sha256=MjLi3zQ00uWWJtXH9oGX2GJkTbvjdSiuabSa4kcVuxE,2672
63
- autogluon.timeseries-1.3.2b20250624.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
64
- autogluon.timeseries-1.3.2b20250624.dist-info/METADATA,sha256=QLbYt4MsNYmLGSGUbIPfk2wetYO9dBAcYwRoGNGyRPA,12737
65
- autogluon.timeseries-1.3.2b20250624.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
66
- autogluon.timeseries-1.3.2b20250624.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
67
- autogluon.timeseries-1.3.2b20250624.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
68
- autogluon.timeseries-1.3.2b20250624.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
69
- autogluon.timeseries-1.3.2b20250624.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
70
- autogluon.timeseries-1.3.2b20250624.dist-info/RECORD,,
63
+ autogluon.timeseries-1.3.2b20250625.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
64
+ autogluon.timeseries-1.3.2b20250625.dist-info/METADATA,sha256=gvvE1mhWi_xgzM8oQuK-vaJKa6aq31ux76h9xRjkyvk,12737
65
+ autogluon.timeseries-1.3.2b20250625.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
66
+ autogluon.timeseries-1.3.2b20250625.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
67
+ autogluon.timeseries-1.3.2b20250625.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
68
+ autogluon.timeseries-1.3.2b20250625.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
69
+ autogluon.timeseries-1.3.2b20250625.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
70
+ autogluon.timeseries-1.3.2b20250625.dist-info/RECORD,,