autogluon.timeseries 1.3.2b20250623__py3-none-any.whl → 1.3.2b20250625__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/timeseries/dataset/ts_dataframe.py +117 -48
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +1 -1
- autogluon/timeseries/models/chronos/pipeline/utils.py +1 -2
- autogluon/timeseries/models/gluonts/abstract_gluonts.py +3 -6
- autogluon/timeseries/models/local/abstract_local_model.py +9 -5
- autogluon/timeseries/models/local/naive.py +1 -1
- autogluon/timeseries/predictor.py +5 -5
- autogluon/timeseries/utils/features.py +1 -1
- autogluon/timeseries/utils/forecast.py +2 -1
- autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-1.3.2b20250623.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/METADATA +5 -5
- {autogluon.timeseries-1.3.2b20250623.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/RECORD +19 -19
- /autogluon.timeseries-1.3.2b20250623-py3.9-nspkg.pth → /autogluon.timeseries-1.3.2b20250625-py3.9-nspkg.pth +0 -0
- {autogluon.timeseries-1.3.2b20250623.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/LICENSE +0 -0
- {autogluon.timeseries-1.3.2b20250623.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/NOTICE +0 -0
- {autogluon.timeseries-1.3.2b20250623.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/WHEEL +0 -0
- {autogluon.timeseries-1.3.2b20250623.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.3.2b20250623.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.3.2b20250623.dist-info → autogluon.timeseries-1.3.2b20250625.dist-info}/zip-safe +0 -0
@@ -7,9 +7,9 @@ import reprlib
|
|
7
7
|
from collections.abc import Iterable
|
8
8
|
from itertools import islice
|
9
9
|
from pathlib import Path
|
10
|
-
from pprint import pformat
|
11
10
|
from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type, Union, overload
|
12
11
|
|
12
|
+
import numpy as np
|
13
13
|
import pandas as pd
|
14
14
|
from joblib.parallel import Parallel, delayed
|
15
15
|
from pandas.core.internals import ArrayManager, BlockManager # type: ignore
|
@@ -28,7 +28,7 @@ IRREGULAR_TIME_INDEX_FREQSTR = "IRREG"
|
|
28
28
|
class TimeSeriesDataFrame(pd.DataFrame):
|
29
29
|
"""A collection of univariate time series, where each row is identified by an (``item_id``, ``timestamp``) pair.
|
30
30
|
|
31
|
-
For example, a time series
|
31
|
+
For example, a time series dataframe could represent the daily sales of a collection of products, where each
|
32
32
|
``item_id`` corresponds to a product and ``timestamp`` corresponds to the day of the record.
|
33
33
|
|
34
34
|
Parameters
|
@@ -78,7 +78,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
78
78
|
You can also use :meth:`~autogluon.timeseries.TimeSeriesDataFrame.from_iterable_dataset` for loading data in such format.
|
79
79
|
|
80
80
|
static_features : pd.DataFrame, str or pathlib.Path, optional
|
81
|
-
An optional
|
81
|
+
An optional dataframe describing the metadata of each individual time series that does not change with time.
|
82
82
|
Can take real-valued or categorical values. For example, if ``TimeSeriesDataFrame`` contains sales of various
|
83
83
|
products, static features may refer to time-independent features like color or brand.
|
84
84
|
|
@@ -223,7 +223,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
223
223
|
raise ValueError(f"for {TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
|
224
224
|
if not data.index.names == (f"{ITEMID}", f"{TIMESTAMP}"):
|
225
225
|
raise ValueError(f"data must have index names as ('{ITEMID}', '{TIMESTAMP}'), got {data.index.names}")
|
226
|
-
item_id_index = data.index.
|
226
|
+
item_id_index = data.index.levels[0]
|
227
227
|
if not (pd.api.types.is_integer_dtype(item_id_index) or pd.api.types.is_string_dtype(item_id_index)):
|
228
228
|
raise ValueError(f"all entries in index `{ITEMID}` must be of integer or string dtype")
|
229
229
|
|
@@ -303,7 +303,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
303
303
|
Returns
|
304
304
|
-------
|
305
305
|
ts_df: TimeSeriesDataFrame
|
306
|
-
A
|
306
|
+
A dataframe in TimeSeriesDataFrame format.
|
307
307
|
"""
|
308
308
|
return cls(df, static_features=static_features_df, id_column=id_column, timestamp_column=timestamp_column)
|
309
309
|
|
@@ -350,7 +350,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
350
350
|
Returns
|
351
351
|
-------
|
352
352
|
ts_df: TimeSeriesDataFrame
|
353
|
-
A
|
353
|
+
A dataframe in TimeSeriesDataFrame format.
|
354
354
|
"""
|
355
355
|
return cls(path, static_features=static_features_path, id_column=id_column, timestamp_column=timestamp_column)
|
356
356
|
|
@@ -379,7 +379,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
379
379
|
Returns
|
380
380
|
-------
|
381
381
|
ts_df: TimeSeriesDataFrame
|
382
|
-
A
|
382
|
+
A dataframe in TimeSeriesDataFrame format.
|
383
383
|
"""
|
384
384
|
return cls(iterable_dataset, num_cpus=num_cpus)
|
385
385
|
|
@@ -469,53 +469,58 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
469
469
|
If some items have an irregular frequency or if different items have different frequencies, returns string
|
470
470
|
`IRREG`.
|
471
471
|
"""
|
472
|
+
ts_df = self
|
473
|
+
if num_items is not None and ts_df.num_items > num_items:
|
474
|
+
items_subset = ts_df.item_ids.to_series().sample(n=num_items, random_state=123)
|
475
|
+
ts_df = ts_df.loc[items_subset]
|
476
|
+
|
477
|
+
if not ts_df.index.is_monotonic_increasing:
|
478
|
+
ts_df = ts_df.sort_index()
|
479
|
+
|
480
|
+
indptr = ts_df.get_indptr()
|
481
|
+
item_ids = ts_df.item_ids
|
482
|
+
timestamps = ts_df.index.get_level_values(level=1)
|
483
|
+
candidate_freq = ts_df.index.levels[1].freq
|
484
|
+
|
485
|
+
frequencies = []
|
486
|
+
irregular_items = []
|
487
|
+
for i in range(len(indptr) - 1):
|
488
|
+
start, end = indptr[i], indptr[i + 1]
|
489
|
+
item_timestamps = timestamps[start:end]
|
490
|
+
inferred_freq = item_timestamps.inferred_freq
|
472
491
|
|
473
|
-
df = pd.DataFrame(self)
|
474
|
-
if num_items is not None:
|
475
|
-
all_item_ids = self.item_ids
|
476
|
-
if len(all_item_ids) > num_items:
|
477
|
-
items_subset = all_item_ids.to_series().sample(n=num_items, random_state=123)
|
478
|
-
df = df.loc[items_subset]
|
479
|
-
|
480
|
-
candidate_freq = df.index.levels[1].freq
|
481
|
-
index_df = df.index.to_frame(index=False)
|
482
|
-
|
483
|
-
def get_freq(series: pd.Series) -> Optional[str]:
|
484
|
-
dt_index = pd.DatetimeIndex(series)
|
485
|
-
inferred_freq = dt_index.inferred_freq
|
486
492
|
# Fallback option: maybe original index has a `freq` attribute that pandas fails to infer (e.g., 'SME')
|
487
493
|
if inferred_freq is None and candidate_freq is not None:
|
488
494
|
try:
|
489
495
|
# If this line does not raise an exception, then candidate_freq is a compatible frequency
|
490
|
-
|
496
|
+
item_timestamps.freq = candidate_freq
|
491
497
|
except ValueError:
|
492
498
|
inferred_freq = None
|
493
499
|
else:
|
494
500
|
inferred_freq = candidate_freq.freqstr
|
495
|
-
return inferred_freq
|
496
501
|
|
497
|
-
|
498
|
-
|
499
|
-
|
502
|
+
if inferred_freq is None:
|
503
|
+
irregular_items.append(item_ids[i])
|
504
|
+
else:
|
505
|
+
frequencies.append(inferred_freq)
|
506
|
+
|
507
|
+
unique_freqs = list(set(frequencies))
|
508
|
+
if len(unique_freqs) != 1 or len(irregular_items) > 0:
|
500
509
|
if raise_if_irregular:
|
501
|
-
|
502
|
-
if len(items_with_irregular_freq) > 0:
|
510
|
+
if irregular_items:
|
503
511
|
raise ValueError(
|
504
|
-
"Cannot infer frequency. Items with irregular frequency: "
|
505
|
-
f"{pformat(items_with_irregular_freq.index.tolist())}"
|
512
|
+
f"Cannot infer frequency. Items with irregular frequency: {reprlib.repr(irregular_items)}"
|
506
513
|
)
|
507
514
|
else:
|
508
|
-
raise ValueError(
|
509
|
-
|
510
|
-
|
511
|
-
)
|
512
|
-
return IRREGULAR_TIME_INDEX_FREQSTR
|
515
|
+
raise ValueError(f"Cannot infer frequency. Multiple frequencies detected: {unique_freqs}")
|
516
|
+
else:
|
517
|
+
return IRREGULAR_TIME_INDEX_FREQSTR
|
513
518
|
else:
|
514
|
-
return pd.tseries.frequencies.to_offset(
|
519
|
+
return pd.tseries.frequencies.to_offset(unique_freqs[0]).freqstr
|
515
520
|
|
516
521
|
@property
|
517
522
|
def freq(self):
|
518
|
-
"""Inferred pandas-compatible frequency of the timestamps in the
|
523
|
+
"""Inferred pandas-compatible frequency of the timestamps in the dataframe.
|
519
524
|
|
520
525
|
Computed using a random subset of the time series for speed. This may sometimes result in incorrectly inferred
|
521
526
|
values. For reliable results, use :meth:`~autogluon.timeseries.TimeSeriesDataFrame.infer_frequency`.
|
@@ -529,8 +534,13 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
529
534
|
return len(self.item_ids)
|
530
535
|
|
531
536
|
def num_timesteps_per_item(self) -> pd.Series:
|
532
|
-
"""
|
533
|
-
|
537
|
+
"""Number of observations in each time series in the dataframe.
|
538
|
+
|
539
|
+
Returns a `pandas.Series` with item_id as index and number of observations per item as values.
|
540
|
+
"""
|
541
|
+
counts = pd.Series(self.index.codes[0]).value_counts(sort=False)
|
542
|
+
counts.index = self.index.levels[0][counts.index]
|
543
|
+
return counts
|
534
544
|
|
535
545
|
def copy(self: TimeSeriesDataFrame, deep: bool = True) -> TimeSeriesDataFrame:
|
536
546
|
"""Make a copy of the TimeSeriesDataFrame.
|
@@ -568,7 +578,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
568
578
|
Parameters
|
569
579
|
----------
|
570
580
|
cutoff_time: pd.Timestamp
|
571
|
-
The time to split the current
|
581
|
+
The time to split the current dataframe into two dataframes.
|
572
582
|
|
573
583
|
Returns
|
574
584
|
-------
|
@@ -593,7 +603,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
593
603
|
This operation is equivalent to selecting a slice ``[start_index : end_index]`` from each time series, and then
|
594
604
|
combining these slices into a new ``TimeSeriesDataFrame``. See examples below.
|
595
605
|
|
596
|
-
|
606
|
+
It is recommended to sort the index with `ts_df.sort_index()` before calling this method to take advantage of
|
607
|
+
a fast optimized algorithm.
|
597
608
|
|
598
609
|
Parameters
|
599
610
|
----------
|
@@ -680,10 +691,53 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
680
691
|
if end_index is not None and not isinstance(end_index, int):
|
681
692
|
raise ValueError(f"end_index must be of type int or None (got {type(end_index)})")
|
682
693
|
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
694
|
+
if start_index is None and end_index is None:
|
695
|
+
# Return a copy to avoid in-place modification.
|
696
|
+
# self.copy() is much faster than self.loc[ones(len(self), dtype=bool)]
|
697
|
+
return self.copy()
|
698
|
+
|
699
|
+
if self.index.is_monotonic_increasing:
|
700
|
+
# Use a fast optimized algorithm if the index is sorted
|
701
|
+
indptr = self.get_indptr()
|
702
|
+
lengths = np.diff(indptr)
|
703
|
+
starts = indptr[:-1]
|
704
|
+
|
705
|
+
slice_start = (
|
706
|
+
np.zeros_like(lengths)
|
707
|
+
if start_index is None
|
708
|
+
else np.clip(np.where(start_index >= 0, start_index, lengths + start_index), 0, lengths)
|
709
|
+
)
|
710
|
+
slice_end = (
|
711
|
+
lengths.copy()
|
712
|
+
if end_index is None
|
713
|
+
else np.clip(np.where(end_index >= 0, end_index, lengths + end_index), 0, lengths)
|
714
|
+
)
|
715
|
+
|
716
|
+
# Filter out invalid slices where start >= end
|
717
|
+
valid_slices = slice_start < slice_end
|
718
|
+
if not np.any(valid_slices):
|
719
|
+
# Return empty dataframe with same structure
|
720
|
+
return self.loc[np.zeros(len(self), dtype=bool)]
|
721
|
+
|
722
|
+
starts = starts[valid_slices]
|
723
|
+
slice_start = slice_start[valid_slices]
|
724
|
+
slice_end = slice_end[valid_slices]
|
725
|
+
|
726
|
+
# We put 1 at the slice_start index for each item and -1 at the slice_end index for each item.
|
727
|
+
# After we apply cumsum we get the indicator mask selecting values between slice_start and slice_end
|
728
|
+
# cumsum([0, 0, 1, 0, 0, -1, 0]) -> [0, 0, 1, 1, 1, 0, 0]
|
729
|
+
# We need array of size len(self) + 1 in case events[starts + slice_end] tries to access position len(self)
|
730
|
+
events = np.zeros(len(self) + 1, dtype=np.int8)
|
731
|
+
events[starts + slice_start] += 1
|
732
|
+
events[starts + slice_end] -= 1
|
733
|
+
mask = np.cumsum(events)[:-1].astype(bool)
|
734
|
+
# loc[mask] returns a view of the original data - modifying it will produce a SettingWithCopyWarning
|
735
|
+
return self.loc[mask]
|
736
|
+
else:
|
737
|
+
# Fall back to a slow groupby operation
|
738
|
+
result = self.groupby(level=ITEMID, sort=False, as_index=False).nth(slice(start_index, end_index))
|
739
|
+
result.static_features = self.static_features
|
740
|
+
return result
|
687
741
|
|
688
742
|
def slice_by_time(self, start_time: pd.Timestamp, end_time: pd.Timestamp) -> TimeSeriesDataFrame:
|
689
743
|
"""Select a subsequence from each time series between start (inclusive) and end (exclusive) timestamps.
|
@@ -712,7 +766,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
712
766
|
|
713
767
|
@classmethod
|
714
768
|
def from_pickle(cls, filepath_or_buffer: Any) -> TimeSeriesDataFrame:
|
715
|
-
"""Convenience method to read pickled time series
|
769
|
+
"""Convenience method to read pickled time series dataframes. If the read pickle
|
716
770
|
file refers to a plain pandas DataFrame, it will be cast to a TimeSeriesDataFrame.
|
717
771
|
|
718
772
|
Parameters
|
@@ -723,7 +777,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
723
777
|
Returns
|
724
778
|
-------
|
725
779
|
ts_df : TimeSeriesDataFrame
|
726
|
-
The pickled time series
|
780
|
+
The pickled time series dataframe.
|
727
781
|
"""
|
728
782
|
try:
|
729
783
|
data = pd.read_pickle(filepath_or_buffer)
|
@@ -734,6 +788,11 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
734
788
|
def fill_missing_values(self, method: str = "auto", value: float = 0.0) -> TimeSeriesDataFrame:
|
735
789
|
"""Fill missing values represented by NaN.
|
736
790
|
|
791
|
+
.. note::
|
792
|
+
This method assumes that the index of the TimeSeriesDataFrame is sorted by [item_id, timestamp].
|
793
|
+
|
794
|
+
If the index is not sorted, this method will log a warning and may produce an incorrect result.
|
795
|
+
|
737
796
|
Parameters
|
738
797
|
----------
|
739
798
|
method : str, default = "auto"
|
@@ -876,8 +935,12 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
876
935
|
suffix: Optional[str] = None,
|
877
936
|
) -> Tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
|
878
937
|
"""Generate a train/test split from the given dataset.
|
938
|
+
|
879
939
|
This method can be used to generate splits for multi-window backtesting.
|
880
940
|
|
941
|
+
.. note::
|
942
|
+
This method automatically sorts the TimeSeriesDataFrame by [item_id, timestamp].
|
943
|
+
|
881
944
|
Parameters
|
882
945
|
----------
|
883
946
|
prediction_length : int
|
@@ -921,7 +984,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
921
984
|
chunk_size: int = 100,
|
922
985
|
**kwargs,
|
923
986
|
) -> TimeSeriesDataFrame:
|
924
|
-
"""Convert each time series in the
|
987
|
+
"""Convert each time series in the dataframe to the given frequency.
|
925
988
|
|
926
989
|
This method is useful for two purposes:
|
927
990
|
|
@@ -931,7 +994,6 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
931
994
|
Standard ``df.groupby(...).resample(...)`` can be extremely slow for large datasets, so we parallelize this
|
932
995
|
operation across multiple CPU cores.
|
933
996
|
|
934
|
-
|
935
997
|
Parameters
|
936
998
|
----------
|
937
999
|
freq : Union[str, pd.DateOffset]
|
@@ -1044,6 +1106,13 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
1044
1106
|
"""Convert `TimeSeriesDataFrame` to a `pandas.DataFrame`"""
|
1045
1107
|
return pd.DataFrame(self)
|
1046
1108
|
|
1109
|
+
def get_indptr(self) -> np.ndarray:
|
1110
|
+
"""[Advanced] Get a numpy array of shape [num_items + 1] that points to the start and end of each time series.
|
1111
|
+
|
1112
|
+
This method assumes that the TimeSeriesDataFrame is sorted by [item_id, timestamp].
|
1113
|
+
"""
|
1114
|
+
return np.concatenate([[0], np.cumsum(self.num_timesteps_per_item().to_numpy())]).astype(np.int32)
|
1115
|
+
|
1047
1116
|
# inline typing stubs for various overridden methods
|
1048
1117
|
if TYPE_CHECKING:
|
1049
1118
|
|
@@ -570,7 +570,7 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
|
|
570
570
|
Returns
|
571
571
|
-------
|
572
572
|
predictions: TimeSeriesDataFrame
|
573
|
-
pandas
|
573
|
+
pandas dataframes with a timestamp index, where each input item from the input
|
574
574
|
data is given as a separate forecast item in the dictionary, keyed by the `item_id`s
|
575
575
|
of input items.
|
576
576
|
"""
|
@@ -255,8 +255,7 @@ class ChronosInferenceDataset:
|
|
255
255
|
self.target_array = target_df[target_column].to_numpy(dtype=np.float32)
|
256
256
|
|
257
257
|
# store pointer to start:end of each time series
|
258
|
-
|
259
|
-
self.indptr = np.append(0, cum_sizes).astype(np.int32)
|
258
|
+
self.indptr = target_df.get_indptr()
|
260
259
|
|
261
260
|
def __len__(self):
|
262
261
|
return len(self.indptr) - 1 # noqa
|
@@ -70,12 +70,9 @@ class SimpleGluonTSDataset(GluonTSDataset):
|
|
70
70
|
self.prediction_length = prediction_length
|
71
71
|
|
72
72
|
# Replace inefficient groupby ITEMID with indptr that stores start:end of each time series
|
73
|
-
|
74
|
-
|
75
|
-
self.
|
76
|
-
cum_sizes = indices_sizes.to_numpy().cumsum()
|
77
|
-
self.indptr = np.append(0, cum_sizes).astype(np.int32)
|
78
|
-
self.start_timestamps = target_df.reset_index(TIMESTAMP).groupby(level=ITEMID, sort=False).first()[TIMESTAMP]
|
73
|
+
self.item_ids = target_df.item_ids
|
74
|
+
self.indptr = target_df.get_indptr()
|
75
|
+
self.start_timestamps = target_df.index[self.indptr[:-1]].to_frame(index=False)[TIMESTAMP]
|
79
76
|
assert len(self.item_ids) == len(self.start_timestamps)
|
80
77
|
|
81
78
|
@staticmethod
|
@@ -136,9 +136,12 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
136
136
|
self._dummy_forecast = self._get_dummy_forecast(train_data)
|
137
137
|
return self
|
138
138
|
|
139
|
-
def _get_dummy_forecast(self, train_data: TimeSeriesDataFrame) -> pd.DataFrame:
|
139
|
+
def _get_dummy_forecast(self, train_data: TimeSeriesDataFrame, max_num_rows: int = 20_000) -> pd.DataFrame:
|
140
140
|
agg_functions = ["mean"] + [get_quantile_function(q) for q in self.quantile_levels]
|
141
|
-
|
141
|
+
target_series = train_data[self.target]
|
142
|
+
if len(target_series) > max_num_rows:
|
143
|
+
target_series = target_series.sample(max_num_rows, replace=True)
|
144
|
+
stats_marginal = target_series.agg(agg_functions)
|
142
145
|
stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
|
143
146
|
return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
|
144
147
|
|
@@ -150,10 +153,11 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
150
153
|
max_ts_length = model_params["max_ts_length"]
|
151
154
|
if max_ts_length is not None:
|
152
155
|
logger.debug(f"Shortening all time series to at most {max_ts_length}")
|
153
|
-
data = data.
|
156
|
+
data = data.slice_by_timestep(-max_ts_length, None)
|
154
157
|
|
155
|
-
|
156
|
-
|
158
|
+
indptr = data.get_indptr()
|
159
|
+
target_series = data[self.target].droplevel(level=ITEMID)
|
160
|
+
all_series = (target_series[indptr[i] : indptr[i + 1]] for i in range(len(indptr) - 1))
|
157
161
|
|
158
162
|
# timeout ensures that no individual job takes longer than time_limit
|
159
163
|
# TODO: a job started late may still exceed time_limit - how to prevent that?
|
@@ -108,7 +108,7 @@ class TimeSeriesPredictor:
|
|
108
108
|
|
109
109
|
If ``known_covariates_names`` are provided, then:
|
110
110
|
|
111
|
-
- :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit`, :meth:`~autogluon.timeseries.TimeSeriesPredictor.evaluate`, and :meth:`~autogluon.timeseries.TimeSeriesPredictor.leaderboard` will expect a
|
111
|
+
- :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit`, :meth:`~autogluon.timeseries.TimeSeriesPredictor.evaluate`, and :meth:`~autogluon.timeseries.TimeSeriesPredictor.leaderboard` will expect a dataframe with columns listed in ``known_covariates_names`` (in addition to the ``target`` column).
|
112
112
|
- :meth:`~autogluon.timeseries.TimeSeriesPredictor.predict` will expect an additional keyword argument ``known_covariates`` containing the future values of the known covariates in ``TimeSeriesDataFrame`` format.
|
113
113
|
|
114
114
|
quantile_levels : List[float], optional
|
@@ -284,7 +284,7 @@ class TimeSeriesPredictor:
|
|
284
284
|
Parameters
|
285
285
|
----------
|
286
286
|
data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
|
287
|
-
Data as a
|
287
|
+
Data as a dataframe or path to file storing the data.
|
288
288
|
name : str
|
289
289
|
Name of the data that will be used in log messages (e.g., 'train_data', 'tuning_data', or 'data').
|
290
290
|
|
@@ -809,7 +809,7 @@ class TimeSeriesPredictor:
|
|
809
809
|
- Must include ``timestamp`` values for the full forecast horizon (i.e., ``prediction_length`` time steps) following the end of each series in the input ``data``.
|
810
810
|
|
811
811
|
You can use :meth:`autogluon.timeseries.TimeSeriesPredictor.make_future_data_frame` to generate a template
|
812
|
-
containing the required ``item_id`` and ``timestamp`` combinations for the `known_covariates`
|
812
|
+
containing the required ``item_id`` and ``timestamp`` combinations for the `known_covariates` dataframe.
|
813
813
|
|
814
814
|
See example below.
|
815
815
|
model : str, optional
|
@@ -1230,7 +1230,7 @@ class TimeSeriesPredictor:
|
|
1230
1230
|
**kwargs,
|
1231
1231
|
) -> pd.DataFrame:
|
1232
1232
|
"""Return a leaderboard showing the performance of every trained model, the output is a
|
1233
|
-
pandas
|
1233
|
+
pandas dataframe with columns:
|
1234
1234
|
|
1235
1235
|
* ``model``: The name of the model.
|
1236
1236
|
* ``score_test``: The test score of the model on ``data``, if provided. Computed according to ``eval_metric``.
|
@@ -1315,7 +1315,7 @@ class TimeSeriesPredictor:
|
|
1315
1315
|
return leaderboard
|
1316
1316
|
|
1317
1317
|
def make_future_data_frame(self, data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]) -> pd.DataFrame:
|
1318
|
-
"""Generate a
|
1318
|
+
"""Generate a dataframe with the `item_id` and `timestamp` values corresponding to the forecast horizon.
|
1319
1319
|
|
1320
1320
|
Parameters
|
1321
1321
|
----------
|
@@ -406,7 +406,7 @@ class AbstractFeatureImportanceTransform:
|
|
406
406
|
if feature_name not in self.covariate_metadata.all_features:
|
407
407
|
raise ValueError(f"Target feature {feature_name} not found in covariate metadata")
|
408
408
|
|
409
|
-
# feature transform works on a shallow copy of the main time series
|
409
|
+
# feature transform works on a shallow copy of the main time series dataframe
|
410
410
|
# but a deep copy of the static features.
|
411
411
|
data = data.copy(deep=False)
|
412
412
|
|
@@ -35,7 +35,8 @@ def make_future_data_frame(
|
|
35
35
|
|
36
36
|
Returns a pandas.DataFrame, with columns "item_id" and "timestamp" corresponding to the forecast horizon.
|
37
37
|
"""
|
38
|
-
|
38
|
+
indptr = ts_dataframe.get_indptr()
|
39
|
+
last = ts_dataframe.index[indptr[1:] - 1].to_frame(index=False)
|
39
40
|
item_ids = np.repeat(last[ITEMID].to_numpy(), prediction_length)
|
40
41
|
|
41
42
|
if freq is None:
|
autogluon/timeseries/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: autogluon.timeseries
|
3
|
-
Version: 1.3.
|
3
|
+
Version: 1.3.2b20250625
|
4
4
|
Summary: Fast and Accurate ML in 3 Lines of Code
|
5
5
|
Home-page: https://github.com/autogluon/autogluon
|
6
6
|
Author: AutoGluon Community
|
@@ -55,10 +55,10 @@ Requires-Dist: fugue>=0.9.0
|
|
55
55
|
Requires-Dist: tqdm<5,>=4.38
|
56
56
|
Requires-Dist: orjson~=3.9
|
57
57
|
Requires-Dist: tensorboard<3,>=2.9
|
58
|
-
Requires-Dist: autogluon.core[raytune]==1.3.
|
59
|
-
Requires-Dist: autogluon.common==1.3.
|
60
|
-
Requires-Dist: autogluon.features==1.3.
|
61
|
-
Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.3.
|
58
|
+
Requires-Dist: autogluon.core[raytune]==1.3.2b20250625
|
59
|
+
Requires-Dist: autogluon.common==1.3.2b20250625
|
60
|
+
Requires-Dist: autogluon.features==1.3.2b20250625
|
61
|
+
Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.3.2b20250625
|
62
62
|
Provides-Extra: all
|
63
63
|
Provides-Extra: chronos-onnx
|
64
64
|
Requires-Dist: optimum[onnxruntime]<1.23,>=1.17; extra == "chronos-onnx"
|
@@ -1,16 +1,16 @@
|
|
1
|
-
autogluon.timeseries-1.3.
|
1
|
+
autogluon.timeseries-1.3.2b20250625-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
|
2
2
|
autogluon/timeseries/__init__.py,sha256=_CrLLc1fkjen7UzWoO0Os8WZoHOgvZbHKy46I8v_4k4,304
|
3
3
|
autogluon/timeseries/evaluator.py,sha256=l642tYfTHsl8WVIq_vV6qhgAFVFr9UuZD7gLra3A_Kc,250
|
4
4
|
autogluon/timeseries/learner.py,sha256=pIn4YSOk0aqCWyBpIlwnAsFnG4h7PLXk8guFH3wFS-w,13923
|
5
|
-
autogluon/timeseries/predictor.py,sha256=
|
5
|
+
autogluon/timeseries/predictor.py,sha256=u4d7-xMs669g5xxqIYuvEyGQ0P6Y8IoToiyg9zUZoy4,88168
|
6
6
|
autogluon/timeseries/regressor.py,sha256=ozlhO-wce6YEtSMj0bfMgfNVeblfU3rI6ITuIk_WAFo,11868
|
7
7
|
autogluon/timeseries/splitter.py,sha256=yzPca9p2bWV-_VJAptUyyzQsxu-uixAdpMoGQtDzMD4,3205
|
8
8
|
autogluon/timeseries/trainer.py,sha256=4T7y58P3RImDbRZn-Og2qSQtOLpEocwdHi_tl1yt0Sc,58021
|
9
|
-
autogluon/timeseries/version.py,sha256=
|
9
|
+
autogluon/timeseries/version.py,sha256=DDz6ZOzsA3FxARUjxCymDQnnmx13nbzyfMZvMdJfNvQ,91
|
10
10
|
autogluon/timeseries/configs/__init__.py,sha256=BTtHIPCYeGjqgOcvqb8qPD4VNX-ICKOg6wnkew1cPOE,98
|
11
11
|
autogluon/timeseries/configs/presets_configs.py,sha256=cLat8ecLlWrI-SC5KLBDCX2SbVXaucemy2pjxJAtSY0,2543
|
12
12
|
autogluon/timeseries/dataset/__init__.py,sha256=UvnhAN5tjgxXTHoZMQDy64YMDj4Xxa68yY7NP4vAw0o,81
|
13
|
-
autogluon/timeseries/dataset/ts_dataframe.py,sha256=
|
13
|
+
autogluon/timeseries/dataset/ts_dataframe.py,sha256=pvL85NCrwcIYr7lxFzY2NZ57yUL82nl6Ypdm1z3ho04,51193
|
14
14
|
autogluon/timeseries/metrics/__init__.py,sha256=wfqEf2AiaqCcFGXVGhpNrbo1XBQFmJCS8gRa8Qk2L50,3602
|
15
15
|
autogluon/timeseries/metrics/abstract.py,sha256=BpHVmzkzM6EN63NQrDRkApIeAyrpT6Y9LZiPEygaxvE,11829
|
16
16
|
autogluon/timeseries/metrics/point.py,sha256=xllyGh11otbmUVHyIaceROPR3qyllWPQ9xlSmIGI3EI,18306
|
@@ -19,7 +19,7 @@ autogluon/timeseries/metrics/utils.py,sha256=HuDe1BNe8yJU4f_DKM913nNrUueoRaw6zhx
|
|
19
19
|
autogluon/timeseries/models/__init__.py,sha256=MYD9JJ-wUDE5B6jW6E6LU2eXQ6vflfQBvqQJkdzJa3A,1189
|
20
20
|
autogluon/timeseries/models/presets.py,sha256=HEACiRpnY6dcff7W44gnM0x1KRgr2bNf5D6zcaHgHxo,12201
|
21
21
|
autogluon/timeseries/models/abstract/__init__.py,sha256=Htfkjjc3vo92RvyM8rIlQ0PLWt3jcrCKZES07UvCMV0,146
|
22
|
-
autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=
|
22
|
+
autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=v4qgHYGmktNz-rY-qCgZjm1n1aOQ6F_OOZIpBsw46hc,32103
|
23
23
|
autogluon/timeseries/models/abstract/model_trial.py,sha256=ENPg_7nsdxIvaNM0o0UShZ3x8jFlRmwRc5m0fGPC0TM,3720
|
24
24
|
autogluon/timeseries/models/abstract/tunable.py,sha256=SFl4vjkb6BfFFaRPVdftnnLYlIyCThutLHxiiAlV6tY,7168
|
25
25
|
autogluon/timeseries/models/autogluon_tabular/__init__.py,sha256=r9i6jWcyeLHYClkcMSKRVsfrkBUMxpDrTATNTBc_qgQ,136
|
@@ -32,18 +32,18 @@ autogluon/timeseries/models/chronos/pipeline/__init__.py,sha256=bkTR0LSKIxAaKFOr
|
|
32
32
|
autogluon/timeseries/models/chronos/pipeline/base.py,sha256=14OAKHmio6LmO4mVom2mPGB0CvIrOjMGJzb-MVSAq-s,5596
|
33
33
|
autogluon/timeseries/models/chronos/pipeline/chronos.py,sha256=uFJLsSb2WQiSrmDZ0g2mO-lhTFUlq7vplGRBXZ9_VBk,22591
|
34
34
|
autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py,sha256=kNIDesojKB3rbEK9jM8st4k7ZeaT6tz1znf4PsRDv2Q,20066
|
35
|
-
autogluon/timeseries/models/chronos/pipeline/utils.py,sha256=
|
35
|
+
autogluon/timeseries/models/chronos/pipeline/utils.py,sha256=KHvhmyLUircxjnCRWwXlgFePGwOMhD20YY55TviROuI,13042
|
36
36
|
autogluon/timeseries/models/ensemble/__init__.py,sha256=x2Y6dWk15XugTEWNUKq8U5z6nIjelo3UjpI-TfS13OE,159
|
37
37
|
autogluon/timeseries/models/ensemble/abstract.py,sha256=ie-BKD4JIkQQoKqtf6sYI5Aix7dSgywFsSdeGPxoElk,5821
|
38
38
|
autogluon/timeseries/models/ensemble/basic.py,sha256=BRPWg_Wgfb87iInFSoTRE75BRHaovRR5HFRvzxET_wU,3423
|
39
39
|
autogluon/timeseries/models/ensemble/greedy.py,sha256=fKVLtnaJZ03zrfr9yqxvyA5IdiMtFL6TQidqw0BoqkU,7220
|
40
40
|
autogluon/timeseries/models/gluonts/__init__.py,sha256=asC1PTj4j9xMbilvk1IT1julnpeoKbv5ZNuAR6-DFgA,361
|
41
|
-
autogluon/timeseries/models/gluonts/abstract_gluonts.py,sha256=
|
41
|
+
autogluon/timeseries/models/gluonts/abstract_gluonts.py,sha256=5VOdRLk5YW79_9af66yxCczgrTBApzNhciRFhAzaIIQ,32665
|
42
42
|
autogluon/timeseries/models/gluonts/torch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
43
43
|
autogluon/timeseries/models/gluonts/torch/models.py,sha256=f7IicZzLAN2v_9y3Pxt9G6f48xIzmDjb1U5k44hS3O0,25760
|
44
44
|
autogluon/timeseries/models/local/__init__.py,sha256=e2UImoJhmj70E148IIObv90C_bHxgyLNk6YsS4p7pfs,701
|
45
|
-
autogluon/timeseries/models/local/abstract_local_model.py,sha256=
|
46
|
-
autogluon/timeseries/models/local/naive.py,sha256=
|
45
|
+
autogluon/timeseries/models/local/abstract_local_model.py,sha256=0apyzut7Vs3jElsR1YipMqRQrskgrZu6kJFs-k4DB0g,12053
|
46
|
+
autogluon/timeseries/models/local/naive.py,sha256=SMdA2Tu-o7gfOLhOoh5m1oe85F3LXn9ulTzRXFhLH20,7252
|
47
47
|
autogluon/timeseries/models/local/npts.py,sha256=Bp74doKnfpGE8ywP4FWOCI_RwRMsmgocYDfGtq764DA,4143
|
48
48
|
autogluon/timeseries/models/local/statsforecast.py,sha256=s3Byp7WAUy0Rnfl1qYMSIm44MKD9t8E732xuNLk_aao,32615
|
49
49
|
autogluon/timeseries/models/multi_window/__init__.py,sha256=Bq7AT2Jxdd4WNqmjTdzeqgNiwn1NCyWp4tBIWaM-zfI,60
|
@@ -52,19 +52,19 @@ autogluon/timeseries/transforms/__init__.py,sha256=fKlT4pkJ_8Gl7IUTc3uSDzt2Xow5i
|
|
52
52
|
autogluon/timeseries/transforms/covariate_scaler.py,sha256=G56PTHKqCFKiXRKLkLun7mN3-T09jxN-5oI1ISADJdQ,7042
|
53
53
|
autogluon/timeseries/transforms/target_scaler.py,sha256=BeT1aP51Wq9EidxC0dVg6dHvampKafpG1uKu4ZaaJPs,6050
|
54
54
|
autogluon/timeseries/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55
|
-
autogluon/timeseries/utils/features.py,sha256=
|
56
|
-
autogluon/timeseries/utils/forecast.py,sha256=
|
55
|
+
autogluon/timeseries/utils/features.py,sha256=OeMvwVX4D2kwoFjuj0RZYZ7MgcbaeBjV97Ud1aUdvNc,22657
|
56
|
+
autogluon/timeseries/utils/forecast.py,sha256=yK1_eNtRUPYGs0R-VWMO4c81LrTGF57ih3yzsXVHyGY,2191
|
57
57
|
autogluon/timeseries/utils/warning_filters.py,sha256=tHvhj9y7c3MP6JrjAedc7UiFFw0_mKYziDQupw8NhiQ,2538
|
58
58
|
autogluon/timeseries/utils/datetime/__init__.py,sha256=bTMR8jLh1LW55vHjbOr1zvWRMF_PqbvxpS-cUcNIDWI,173
|
59
59
|
autogluon/timeseries/utils/datetime/base.py,sha256=3NdsH3NDq4cVAOSoy3XpaNixyNlbjy4DJ_YYOGuu9x4,1341
|
60
60
|
autogluon/timeseries/utils/datetime/lags.py,sha256=gQDk5_zmsY5DUWDUpSaCKYkQ9nHKKY-LsywJQRAoYSk,5988
|
61
61
|
autogluon/timeseries/utils/datetime/seasonality.py,sha256=YK_2k8hvYIMW-sJPnjGWRtCnvIOthwA2hATB3nwVoD4,834
|
62
62
|
autogluon/timeseries/utils/datetime/time_features.py,sha256=MjLi3zQ00uWWJtXH9oGX2GJkTbvjdSiuabSa4kcVuxE,2672
|
63
|
-
autogluon.timeseries-1.3.
|
64
|
-
autogluon.timeseries-1.3.
|
65
|
-
autogluon.timeseries-1.3.
|
66
|
-
autogluon.timeseries-1.3.
|
67
|
-
autogluon.timeseries-1.3.
|
68
|
-
autogluon.timeseries-1.3.
|
69
|
-
autogluon.timeseries-1.3.
|
70
|
-
autogluon.timeseries-1.3.
|
63
|
+
autogluon.timeseries-1.3.2b20250625.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
|
64
|
+
autogluon.timeseries-1.3.2b20250625.dist-info/METADATA,sha256=gvvE1mhWi_xgzM8oQuK-vaJKa6aq31ux76h9xRjkyvk,12737
|
65
|
+
autogluon.timeseries-1.3.2b20250625.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
|
66
|
+
autogluon.timeseries-1.3.2b20250625.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
67
|
+
autogluon.timeseries-1.3.2b20250625.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
|
68
|
+
autogluon.timeseries-1.3.2b20250625.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
|
69
|
+
autogluon.timeseries-1.3.2b20250625.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
70
|
+
autogluon.timeseries-1.3.2b20250625.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|