autogluon.timeseries 1.4.1b20251010__py3-none-any.whl → 1.4.1b20251115__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/dataset/ts_dataframe.py +66 -53
- autogluon/timeseries/learner.py +5 -4
- autogluon/timeseries/metrics/quantile.py +1 -1
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +28 -36
- autogluon/timeseries/models/autogluon_tabular/per_step.py +14 -5
- autogluon/timeseries/models/autogluon_tabular/transforms.py +9 -7
- autogluon/timeseries/models/chronos/model.py +101 -68
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +64 -32
- autogluon/timeseries/models/ensemble/__init__.py +29 -2
- autogluon/timeseries/models/ensemble/abstract.py +1 -37
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +247 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +50 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +10 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +87 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +133 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +141 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +41 -0
- autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +0 -10
- autogluon/timeseries/models/gluonts/abstract.py +2 -2
- autogluon/timeseries/models/gluonts/dataset.py +2 -2
- autogluon/timeseries/models/local/abstract_local_model.py +2 -2
- autogluon/timeseries/models/multi_window/multi_window_model.py +1 -1
- autogluon/timeseries/models/toto/model.py +5 -3
- autogluon/timeseries/predictor.py +10 -26
- autogluon/timeseries/regressor.py +9 -7
- autogluon/timeseries/splitter.py +1 -25
- autogluon/timeseries/trainer/ensemble_composer.py +250 -0
- autogluon/timeseries/trainer/trainer.py +124 -193
- autogluon/timeseries/trainer/utils.py +18 -0
- autogluon/timeseries/transforms/covariate_scaler.py +1 -1
- autogluon/timeseries/transforms/target_scaler.py +7 -7
- autogluon/timeseries/utils/features.py +9 -5
- autogluon/timeseries/utils/forecast.py +5 -5
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251115-py3.9-nspkg.pth +1 -0
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/METADATA +25 -15
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/RECORD +47 -41
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/WHEEL +1 -1
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
- autogluon.timeseries-1.4.1b20251010-py3.9-nspkg.pth +0 -1
- /autogluon/timeseries/models/ensemble/{greedy.py → weighted/greedy.py} +0 -0
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/zip-safe +0 -0
|
@@ -7,7 +7,7 @@ import reprlib
|
|
|
7
7
|
from collections.abc import Iterable
|
|
8
8
|
from itertools import islice
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import TYPE_CHECKING, Any, Optional, Type, Union, overload
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Final, Optional, Type, Union, overload
|
|
11
11
|
|
|
12
12
|
import numpy as np
|
|
13
13
|
import pandas as pd
|
|
@@ -19,11 +19,6 @@ from autogluon.common.loaders import load_pd
|
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
22
|
-
ITEMID = "item_id"
|
|
23
|
-
TIMESTAMP = "timestamp"
|
|
24
|
-
|
|
25
|
-
IRREGULAR_TIME_INDEX_FREQSTR = "IRREG"
|
|
26
|
-
|
|
27
22
|
|
|
28
23
|
class TimeSeriesDataFrame(pd.DataFrame):
|
|
29
24
|
"""A collection of univariate time series, where each row is identified by an (``item_id``, ``timestamp``) pair.
|
|
@@ -121,6 +116,10 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
121
116
|
index: pd.MultiIndex # type: ignore
|
|
122
117
|
_metadata = ["_static_features"]
|
|
123
118
|
|
|
119
|
+
IRREGULAR_TIME_INDEX_FREQSTR: Final[str] = "IRREG"
|
|
120
|
+
ITEMID: Final[str] = "item_id"
|
|
121
|
+
TIMESTAMP: Final[str] = "timestamp"
|
|
122
|
+
|
|
124
123
|
def __init__(
|
|
125
124
|
self,
|
|
126
125
|
data: Union[pd.DataFrame, str, Path, Iterable],
|
|
@@ -175,23 +174,27 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
175
174
|
df = df.copy()
|
|
176
175
|
if id_column is not None:
|
|
177
176
|
assert id_column in df.columns, f"Column '{id_column}' not found!"
|
|
178
|
-
if id_column != ITEMID and ITEMID in df.columns:
|
|
179
|
-
logger.warning(
|
|
180
|
-
|
|
181
|
-
|
|
177
|
+
if id_column != cls.ITEMID and cls.ITEMID in df.columns:
|
|
178
|
+
logger.warning(
|
|
179
|
+
f"Renaming existing column '{cls.ITEMID}' -> '__{cls.ITEMID}' to avoid name collisions."
|
|
180
|
+
)
|
|
181
|
+
df.rename(columns={cls.ITEMID: "__" + cls.ITEMID}, inplace=True)
|
|
182
|
+
df.rename(columns={id_column: cls.ITEMID}, inplace=True)
|
|
182
183
|
|
|
183
184
|
if timestamp_column is not None:
|
|
184
185
|
assert timestamp_column in df.columns, f"Column '{timestamp_column}' not found!"
|
|
185
|
-
if timestamp_column != TIMESTAMP and TIMESTAMP in df.columns:
|
|
186
|
-
logger.warning(
|
|
187
|
-
|
|
188
|
-
|
|
186
|
+
if timestamp_column != cls.TIMESTAMP and cls.TIMESTAMP in df.columns:
|
|
187
|
+
logger.warning(
|
|
188
|
+
f"Renaming existing column '{cls.TIMESTAMP}' -> '__{cls.TIMESTAMP}' to avoid name collisions."
|
|
189
|
+
)
|
|
190
|
+
df.rename(columns={cls.TIMESTAMP: "__" + cls.TIMESTAMP}, inplace=True)
|
|
191
|
+
df.rename(columns={timestamp_column: cls.TIMESTAMP}, inplace=True)
|
|
189
192
|
|
|
190
|
-
if TIMESTAMP in df.columns:
|
|
191
|
-
df[TIMESTAMP] = pd.to_datetime(df[TIMESTAMP])
|
|
193
|
+
if cls.TIMESTAMP in df.columns:
|
|
194
|
+
df[cls.TIMESTAMP] = pd.to_datetime(df[cls.TIMESTAMP])
|
|
192
195
|
|
|
193
196
|
cls._validate_data_frame(df)
|
|
194
|
-
return df.set_index([ITEMID, TIMESTAMP])
|
|
197
|
+
return df.set_index([cls.ITEMID, cls.TIMESTAMP])
|
|
195
198
|
|
|
196
199
|
@classmethod
|
|
197
200
|
def _construct_tsdf_from_iterable_dataset(cls, iterable_dataset: Iterable, num_cpus: int = -1) -> pd.DataFrame:
|
|
@@ -202,7 +205,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
202
205
|
start_timestamp = start_timestamp.to_timestamp(how="S")
|
|
203
206
|
target = ts["target"]
|
|
204
207
|
datetime_index = tuple(pd.date_range(start_timestamp, periods=len(target), freq=freq))
|
|
205
|
-
idx = pd.MultiIndex.from_product([(item_id,), datetime_index], names=[ITEMID, TIMESTAMP])
|
|
208
|
+
idx = pd.MultiIndex.from_product([(item_id,), datetime_index], names=[cls.ITEMID, cls.TIMESTAMP])
|
|
206
209
|
return pd.Series(target, name="target", index=idx).to_frame()
|
|
207
210
|
|
|
208
211
|
cls._validate_iterable(iterable_dataset)
|
|
@@ -219,32 +222,34 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
219
222
|
raise ValueError(f"data must be a pd.DataFrame, got {type(data)}")
|
|
220
223
|
if not isinstance(data.index, pd.MultiIndex):
|
|
221
224
|
raise ValueError(f"data must have pd.MultiIndex, got {type(data.index)}")
|
|
222
|
-
if not pd.api.types.is_datetime64_dtype(data.index.dtypes[TIMESTAMP]):
|
|
223
|
-
raise ValueError(f"for {TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
|
|
224
|
-
if not data.index.names == (f"{ITEMID}", f"{TIMESTAMP}"):
|
|
225
|
-
raise ValueError(
|
|
225
|
+
if not pd.api.types.is_datetime64_dtype(data.index.dtypes[cls.TIMESTAMP]):
|
|
226
|
+
raise ValueError(f"for {cls.TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
|
|
227
|
+
if not data.index.names == (f"{cls.ITEMID}", f"{cls.TIMESTAMP}"):
|
|
228
|
+
raise ValueError(
|
|
229
|
+
f"data must have index names as ('{cls.ITEMID}', '{cls.TIMESTAMP}'), got {data.index.names}"
|
|
230
|
+
)
|
|
226
231
|
item_id_index = data.index.levels[0]
|
|
227
232
|
if not (pd.api.types.is_integer_dtype(item_id_index) or pd.api.types.is_string_dtype(item_id_index)):
|
|
228
|
-
raise ValueError(f"all entries in index `{ITEMID}` must be of integer or string dtype")
|
|
233
|
+
raise ValueError(f"all entries in index `{cls.ITEMID}` must be of integer or string dtype")
|
|
229
234
|
|
|
230
235
|
@classmethod
|
|
231
236
|
def _validate_data_frame(cls, df: pd.DataFrame):
|
|
232
237
|
"""Validate that a pd.DataFrame with ITEMID and TIMESTAMP columns can be converted to TimeSeriesDataFrame"""
|
|
233
238
|
if not isinstance(df, pd.DataFrame):
|
|
234
239
|
raise ValueError(f"data must be a pd.DataFrame, got {type(df)}")
|
|
235
|
-
if ITEMID not in df.columns:
|
|
236
|
-
raise ValueError(f"data must have a `{ITEMID}` column")
|
|
237
|
-
if TIMESTAMP not in df.columns:
|
|
238
|
-
raise ValueError(f"data must have a `{TIMESTAMP}` column")
|
|
239
|
-
if df[ITEMID].isnull().any():
|
|
240
|
-
raise ValueError(f"`{ITEMID}` column can not have nan")
|
|
241
|
-
if df[TIMESTAMP].isnull().any():
|
|
242
|
-
raise ValueError(f"`{TIMESTAMP}` column can not have nan")
|
|
243
|
-
if not pd.api.types.is_datetime64_dtype(df[TIMESTAMP]):
|
|
244
|
-
raise ValueError(f"for {TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
|
|
245
|
-
item_id_column = df[ITEMID]
|
|
240
|
+
if cls.ITEMID not in df.columns:
|
|
241
|
+
raise ValueError(f"data must have a `{cls.ITEMID}` column")
|
|
242
|
+
if cls.TIMESTAMP not in df.columns:
|
|
243
|
+
raise ValueError(f"data must have a `{cls.TIMESTAMP}` column")
|
|
244
|
+
if df[cls.ITEMID].isnull().any():
|
|
245
|
+
raise ValueError(f"`{cls.ITEMID}` column can not have nan")
|
|
246
|
+
if df[cls.TIMESTAMP].isnull().any():
|
|
247
|
+
raise ValueError(f"`{cls.TIMESTAMP}` column can not have nan")
|
|
248
|
+
if not pd.api.types.is_datetime64_dtype(df[cls.TIMESTAMP]):
|
|
249
|
+
raise ValueError(f"for {cls.TIMESTAMP}, the only pandas dtype allowed is `datetime64`.")
|
|
250
|
+
item_id_column = df[cls.ITEMID]
|
|
246
251
|
if not (pd.api.types.is_integer_dtype(item_id_column) or pd.api.types.is_string_dtype(item_id_column)):
|
|
247
|
-
raise ValueError(f"all entries in column `{ITEMID}` must be of integer or string dtype")
|
|
252
|
+
raise ValueError(f"all entries in column `{cls.ITEMID}` must be of integer or string dtype")
|
|
248
253
|
|
|
249
254
|
@classmethod
|
|
250
255
|
def _validate_iterable(cls, data: Iterable):
|
|
@@ -386,7 +391,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
386
391
|
@property
|
|
387
392
|
def item_ids(self) -> pd.Index:
|
|
388
393
|
"""List of unique time series IDs contained in the data set."""
|
|
389
|
-
return self.index.unique(level=ITEMID)
|
|
394
|
+
return self.index.unique(level=self.ITEMID)
|
|
390
395
|
|
|
391
396
|
@classmethod
|
|
392
397
|
def _construct_static_features(
|
|
@@ -403,10 +408,12 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
403
408
|
|
|
404
409
|
if id_column is not None:
|
|
405
410
|
assert id_column in static_features.columns, f"Column '{id_column}' not found in static_features!"
|
|
406
|
-
if id_column != ITEMID and ITEMID in static_features.columns:
|
|
407
|
-
logger.warning(
|
|
408
|
-
|
|
409
|
-
|
|
411
|
+
if id_column != cls.ITEMID and cls.ITEMID in static_features.columns:
|
|
412
|
+
logger.warning(
|
|
413
|
+
f"Renaming existing column '{cls.ITEMID}' -> '__{cls.ITEMID}' to avoid name collisions."
|
|
414
|
+
)
|
|
415
|
+
static_features.rename(columns={cls.ITEMID: "__" + cls.ITEMID}, inplace=True)
|
|
416
|
+
static_features.rename(columns={id_column: cls.ITEMID}, inplace=True)
|
|
410
417
|
return static_features
|
|
411
418
|
|
|
412
419
|
@property
|
|
@@ -431,10 +438,10 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
431
438
|
|
|
432
439
|
# Avoid modifying static features inplace
|
|
433
440
|
value = value.copy()
|
|
434
|
-
if ITEMID in value.columns and value.index.name != ITEMID:
|
|
435
|
-
value = value.set_index(ITEMID)
|
|
436
|
-
if value.index.name != ITEMID:
|
|
437
|
-
value.index.rename(ITEMID, inplace=True)
|
|
441
|
+
if self.ITEMID in value.columns and value.index.name != self.ITEMID:
|
|
442
|
+
value = value.set_index(self.ITEMID)
|
|
443
|
+
if value.index.name != self.ITEMID:
|
|
444
|
+
value.index.rename(self.ITEMID, inplace=True)
|
|
438
445
|
missing_item_ids = self.item_ids.difference(value.index)
|
|
439
446
|
if len(missing_item_ids) > 0:
|
|
440
447
|
raise ValueError(
|
|
@@ -514,7 +521,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
514
521
|
else:
|
|
515
522
|
raise ValueError(f"Cannot infer frequency. Multiple frequencies detected: {unique_freqs}")
|
|
516
523
|
else:
|
|
517
|
-
return IRREGULAR_TIME_INDEX_FREQSTR
|
|
524
|
+
return self.IRREGULAR_TIME_INDEX_FREQSTR
|
|
518
525
|
else:
|
|
519
526
|
return pd.tseries.frequencies.to_offset(unique_freqs[0]).freqstr
|
|
520
527
|
|
|
@@ -526,7 +533,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
526
533
|
values. For reliable results, use :meth:`~autogluon.timeseries.TimeSeriesDataFrame.infer_frequency`.
|
|
527
534
|
"""
|
|
528
535
|
inferred_freq = self.infer_frequency(num_items=50)
|
|
529
|
-
return None if inferred_freq == IRREGULAR_TIME_INDEX_FREQSTR else inferred_freq
|
|
536
|
+
return None if inferred_freq == self.IRREGULAR_TIME_INDEX_FREQSTR else inferred_freq
|
|
530
537
|
|
|
531
538
|
@property
|
|
532
539
|
def num_items(self):
|
|
@@ -735,7 +742,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
735
742
|
return self.loc[mask]
|
|
736
743
|
else:
|
|
737
744
|
# Fall back to a slow groupby operation
|
|
738
|
-
result = self.groupby(level=ITEMID, sort=False, as_index=False).nth(slice(start_index, end_index))
|
|
745
|
+
result = self.groupby(level=self.ITEMID, sort=False, as_index=False).nth(slice(start_index, end_index))
|
|
739
746
|
result.static_features = self.static_features
|
|
740
747
|
return result
|
|
741
748
|
|
|
@@ -852,12 +859,12 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
852
859
|
"It is highly recommended to call `ts_df.sort_index()` before calling `ts_df.fill_missing_values()`"
|
|
853
860
|
)
|
|
854
861
|
|
|
855
|
-
grouped_df = df.groupby(level=ITEMID, sort=False, group_keys=False)
|
|
862
|
+
grouped_df = df.groupby(level=self.ITEMID, sort=False, group_keys=False)
|
|
856
863
|
if method == "auto":
|
|
857
864
|
filled_df = grouped_df.ffill()
|
|
858
865
|
# If necessary, fill missing values at the start of each time series with bfill
|
|
859
866
|
if filled_df.isna().any(axis=None):
|
|
860
|
-
filled_df = filled_df.groupby(level=ITEMID, sort=False, group_keys=False).bfill()
|
|
867
|
+
filled_df = filled_df.groupby(level=self.ITEMID, sort=False, group_keys=False).bfill()
|
|
861
868
|
elif method in ["ffill", "pad"]:
|
|
862
869
|
filled_df = grouped_df.ffill()
|
|
863
870
|
elif method in ["bfill", "backfill"]:
|
|
@@ -1086,8 +1093,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
1086
1093
|
def resample_chunk(chunk: Iterable[tuple[str, pd.DataFrame]]) -> pd.DataFrame:
|
|
1087
1094
|
resampled_dfs = []
|
|
1088
1095
|
for item_id, df in chunk:
|
|
1089
|
-
resampled_df = df.resample(offset, level=TIMESTAMP, **kwargs).agg(aggregation)
|
|
1090
|
-
resampled_dfs.append(pd.concat({item_id: resampled_df}, names=[ITEMID]))
|
|
1096
|
+
resampled_df = df.resample(offset, level=self.TIMESTAMP, **kwargs).agg(aggregation)
|
|
1097
|
+
resampled_dfs.append(pd.concat({item_id: resampled_df}, names=[self.ITEMID]))
|
|
1091
1098
|
return pd.concat(resampled_dfs)
|
|
1092
1099
|
|
|
1093
1100
|
# Resampling time for 1 item < overhead time for a single parallel job. Therefore, we group items into chunks
|
|
@@ -1095,8 +1102,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
1095
1102
|
df = pd.DataFrame(self)
|
|
1096
1103
|
# Make sure that timestamp index has dtype 'datetime64[ns]', otherwise index may contain NaT values.
|
|
1097
1104
|
# See https://github.com/autogluon/autogluon/issues/4917
|
|
1098
|
-
df.index = df.index.set_levels(df.index.levels[1].astype("datetime64[ns]"), level=TIMESTAMP)
|
|
1099
|
-
chunks = split_into_chunks(df.groupby(level=ITEMID, sort=False), chunk_size)
|
|
1105
|
+
df.index = df.index.set_levels(df.index.levels[1].astype("datetime64[ns]"), level=self.TIMESTAMP)
|
|
1106
|
+
chunks = split_into_chunks(df.groupby(level=self.ITEMID, sort=False), chunk_size)
|
|
1100
1107
|
resampled_chunks = Parallel(n_jobs=num_cpus)(delayed(resample_chunk)(chunk) for chunk in chunks)
|
|
1101
1108
|
resampled_df = TimeSeriesDataFrame(pd.concat(resampled_chunks))
|
|
1102
1109
|
resampled_df.static_features = self.static_features
|
|
@@ -1142,3 +1149,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
1142
1149
|
def __getitem__(self, items: list[str]) -> Self: ... # type: ignore
|
|
1143
1150
|
@overload
|
|
1144
1151
|
def __getitem__(self, item: str) -> pd.Series: ... # type: ignore
|
|
1152
|
+
|
|
1153
|
+
|
|
1154
|
+
# TODO: remove with v2.0
|
|
1155
|
+
# module-level constants kept for backward compatibility.
|
|
1156
|
+
ITEMID = TimeSeriesDataFrame.ITEMID
|
|
1157
|
+
TIMESTAMP = TimeSeriesDataFrame.TIMESTAMP
|
autogluon/timeseries/learner.py
CHANGED
|
@@ -6,10 +6,9 @@ from typing import Any, Literal, Optional, Type, Union
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
8
|
from autogluon.core.learner import AbstractLearner
|
|
9
|
-
from autogluon.timeseries.dataset
|
|
9
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
10
10
|
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
|
11
11
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
12
|
-
from autogluon.timeseries.splitter import AbstractWindowSplitter
|
|
13
12
|
from autogluon.timeseries.trainer import TimeSeriesTrainer
|
|
14
13
|
from autogluon.timeseries.utils.features import TimeSeriesFeatureGenerator
|
|
15
14
|
from autogluon.timeseries.utils.forecast import make_future_data_frame
|
|
@@ -60,7 +59,8 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
60
59
|
val_data: Optional[TimeSeriesDataFrame] = None,
|
|
61
60
|
hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
|
|
62
61
|
time_limit: Optional[float] = None,
|
|
63
|
-
|
|
62
|
+
num_val_windows: Optional[int] = None,
|
|
63
|
+
val_step_size: Optional[int] = None,
|
|
64
64
|
refit_every_n_windows: Optional[int] = 1,
|
|
65
65
|
random_seed: Optional[int] = None,
|
|
66
66
|
**kwargs,
|
|
@@ -86,7 +86,8 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
86
86
|
skip_model_selection=kwargs.get("skip_model_selection", False),
|
|
87
87
|
enable_ensemble=kwargs.get("enable_ensemble", True),
|
|
88
88
|
covariate_metadata=self.feature_generator.covariate_metadata,
|
|
89
|
-
|
|
89
|
+
num_val_windows=num_val_windows,
|
|
90
|
+
val_step_size=val_step_size,
|
|
90
91
|
refit_every_n_windows=refit_every_n_windows,
|
|
91
92
|
cache_predictions=self.cache_predictions,
|
|
92
93
|
ensemble_model_type=self.ensemble_model_type,
|
|
@@ -3,7 +3,7 @@ from typing import Optional, Sequence
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from autogluon.timeseries.dataset
|
|
6
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
7
7
|
|
|
8
8
|
from .abstract import TimeSeriesScorer
|
|
9
9
|
from .utils import in_sample_abs_seasonal_error
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
|
|
3
|
-
from autogluon.timeseries.dataset
|
|
3
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def _get_seasonal_diffs(*, y_past: pd.Series, seasonal_period: int = 1) -> pd.Series:
|
|
7
|
-
return y_past.groupby(level=ITEMID, sort=False).diff(seasonal_period).abs()
|
|
7
|
+
return y_past.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).diff(seasonal_period).abs()
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def in_sample_abs_seasonal_error(*, y_past: pd.Series, seasonal_period: int = 1) -> pd.Series:
|
|
11
11
|
"""Compute seasonal naive forecast error (predict value from seasonal_period steps ago) for each time series."""
|
|
12
12
|
seasonal_diffs = _get_seasonal_diffs(y_past=y_past, seasonal_period=seasonal_period)
|
|
13
|
-
return seasonal_diffs.groupby(level=ITEMID, sort=False).mean().fillna(1.0)
|
|
13
|
+
return seasonal_diffs.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).mean().fillna(1.0)
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def in_sample_squared_seasonal_error(*, y_past: pd.Series, seasonal_period: int = 1) -> pd.Series:
|
|
17
17
|
seasonal_diffs = _get_seasonal_diffs(y_past=y_past, seasonal_period=seasonal_period)
|
|
18
|
-
return seasonal_diffs.pow(2.0).groupby(level=ITEMID, sort=False).mean().fillna(1.0)
|
|
18
|
+
return seasonal_diffs.pow(2.0).groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).mean().fillna(1.0)
|
|
@@ -13,7 +13,7 @@ import autogluon.core as ag
|
|
|
13
13
|
from autogluon.core.models import AbstractModel as AbstractTabularModel
|
|
14
14
|
from autogluon.features import AutoMLPipelineFeatureGenerator
|
|
15
15
|
from autogluon.tabular.registry import ag_model_registry
|
|
16
|
-
from autogluon.timeseries.dataset
|
|
16
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
17
17
|
from autogluon.timeseries.metrics.abstract import TimeSeriesScorer
|
|
18
18
|
from autogluon.timeseries.metrics.utils import in_sample_squared_seasonal_error
|
|
19
19
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
@@ -120,7 +120,9 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
120
120
|
) -> tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
|
|
121
121
|
if is_train:
|
|
122
122
|
# All-NaN series are removed; partially-NaN series in train_data are handled inside _generate_train_val_dfs
|
|
123
|
-
all_nan_items = data.item_ids[
|
|
123
|
+
all_nan_items = data.item_ids[
|
|
124
|
+
data[self.target].isna().groupby(TimeSeriesDataFrame.ITEMID, sort=False).all()
|
|
125
|
+
]
|
|
124
126
|
if len(all_nan_items):
|
|
125
127
|
data = data.query("item_id not in @all_nan_items")
|
|
126
128
|
else:
|
|
@@ -130,31 +132,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
130
132
|
data[self.target] = data[self.target].fillna(value=self._train_target_median)
|
|
131
133
|
return data, known_covariates
|
|
132
134
|
|
|
133
|
-
def _process_deprecated_hyperparameters(self, model_params: dict[str, Any]) -> dict[str, Any]:
|
|
134
|
-
if "tabular_hyperparameters" in model_params:
|
|
135
|
-
logger.warning(
|
|
136
|
-
f"Hyperparameter 'tabular_hyperparameters' for {self.name} is deprecated and will be removed in v1.5. "
|
|
137
|
-
"Please use 'model_name' to specify the tabular model alias and 'model_hyperparameters' "
|
|
138
|
-
"to provide the tabular model hyperparameters."
|
|
139
|
-
)
|
|
140
|
-
tabular_hyperparameters = model_params.pop("tabular_hyperparameters")
|
|
141
|
-
if len(tabular_hyperparameters) == 1:
|
|
142
|
-
# We can automatically convert the hyperparameters if only one model is used
|
|
143
|
-
model_params["model_name"] = list(tabular_hyperparameters.keys())[0]
|
|
144
|
-
model_params["model_hyperparameters"] = tabular_hyperparameters[model_params["model_name"]]
|
|
145
|
-
else:
|
|
146
|
-
raise ValueError(
|
|
147
|
-
f"Provided 'tabular_hyperparameters' {tabular_hyperparameters} cannot be automatically converted "
|
|
148
|
-
f"to the new 'model_name' and 'model_hyperparameters' API for {self.name}."
|
|
149
|
-
)
|
|
150
|
-
if "tabular_fit_kwargs" in model_params:
|
|
151
|
-
logger.warning(
|
|
152
|
-
f"Hyperparameters 'tabular_fit_kwargs' for {self.name} is deprecated and is ignored by the model. "
|
|
153
|
-
"Please use 'model_name' to specify the tabular model alias and 'model_hyperparameters' "
|
|
154
|
-
"to provide the tabular model hyperparameters."
|
|
155
|
-
)
|
|
156
|
-
return model_params
|
|
157
|
-
|
|
158
135
|
def _get_default_hyperparameters(self) -> dict[str, Any]:
|
|
159
136
|
return {
|
|
160
137
|
"max_num_items": 20_000,
|
|
@@ -298,18 +275,28 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
298
275
|
"""
|
|
299
276
|
# TODO: Add support for past_covariates
|
|
300
277
|
selected_columns = self.covariate_metadata.known_covariates.copy()
|
|
301
|
-
column_name_mapping = {ITEMID: MLF_ITEMID, TIMESTAMP: MLF_TIMESTAMP}
|
|
278
|
+
column_name_mapping = {TimeSeriesDataFrame.ITEMID: MLF_ITEMID, TimeSeriesDataFrame.TIMESTAMP: MLF_TIMESTAMP}
|
|
302
279
|
if include_target:
|
|
303
280
|
selected_columns += [self.target]
|
|
304
281
|
column_name_mapping[self.target] = MLF_TARGET
|
|
305
282
|
|
|
306
283
|
df = pd.DataFrame(data)[selected_columns].reset_index()
|
|
307
284
|
if static_features is not None:
|
|
308
|
-
df = pd.merge(
|
|
285
|
+
df = pd.merge(
|
|
286
|
+
df, static_features, how="left", on=TimeSeriesDataFrame.ITEMID, suffixes=(None, "_static_feat")
|
|
287
|
+
)
|
|
309
288
|
|
|
310
289
|
for col in self._non_boolean_real_covariates:
|
|
311
290
|
# Normalize non-boolean features using mean_abs scaling
|
|
312
|
-
df[f"__scaled_{col}"] =
|
|
291
|
+
df[f"__scaled_{col}"] = (
|
|
292
|
+
df[col]
|
|
293
|
+
/ df[col]
|
|
294
|
+
.abs()
|
|
295
|
+
.groupby(df[TimeSeriesDataFrame.ITEMID])
|
|
296
|
+
.mean()
|
|
297
|
+
.reindex(df[TimeSeriesDataFrame.ITEMID])
|
|
298
|
+
.values
|
|
299
|
+
)
|
|
313
300
|
|
|
314
301
|
# Convert float64 to float32 to reduce memory usage
|
|
315
302
|
float64_cols = list(df.select_dtypes(include="float64"))
|
|
@@ -338,7 +325,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
338
325
|
if not set(train_data[col].unique()) == set([0, 1]):
|
|
339
326
|
self._non_boolean_real_covariates.append(col)
|
|
340
327
|
model_params = self.get_hyperparameters()
|
|
341
|
-
model_params = self._process_deprecated_hyperparameters(model_params)
|
|
342
328
|
|
|
343
329
|
mlforecast_init_args = self._get_mlforecast_init_args(train_data, model_params)
|
|
344
330
|
assert self.freq is not None
|
|
@@ -612,12 +598,14 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
|
612
598
|
predictions, repeated_item_ids=predictions[MLF_ITEMID], past_target=data[self.target]
|
|
613
599
|
)
|
|
614
600
|
predictions_tsdf: TimeSeriesDataFrame = TimeSeriesDataFrame(
|
|
615
|
-
predictions.rename(
|
|
601
|
+
predictions.rename(
|
|
602
|
+
columns={MLF_ITEMID: TimeSeriesDataFrame.ITEMID, MLF_TIMESTAMP: TimeSeriesDataFrame.TIMESTAMP}
|
|
603
|
+
)
|
|
616
604
|
)
|
|
617
605
|
|
|
618
606
|
if forecast_for_short_series is not None:
|
|
619
607
|
predictions_tsdf = pd.concat([predictions_tsdf, forecast_for_short_series]) # type: ignore
|
|
620
|
-
predictions_tsdf = predictions_tsdf.reindex(original_item_id_order, level=ITEMID)
|
|
608
|
+
predictions_tsdf = predictions_tsdf.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID)
|
|
621
609
|
|
|
622
610
|
return predictions_tsdf
|
|
623
611
|
|
|
@@ -745,16 +733,20 @@ class RecursiveTabularModel(AbstractMLForecastModel):
|
|
|
745
733
|
X_df=X_df,
|
|
746
734
|
)
|
|
747
735
|
assert isinstance(raw_predictions, pd.DataFrame)
|
|
748
|
-
raw_predictions = raw_predictions.rename(
|
|
736
|
+
raw_predictions = raw_predictions.rename(
|
|
737
|
+
columns={MLF_ITEMID: TimeSeriesDataFrame.ITEMID, MLF_TIMESTAMP: TimeSeriesDataFrame.TIMESTAMP}
|
|
738
|
+
)
|
|
749
739
|
|
|
750
740
|
predictions: TimeSeriesDataFrame = TimeSeriesDataFrame(
|
|
751
741
|
self._add_gaussian_quantiles(
|
|
752
|
-
raw_predictions,
|
|
742
|
+
raw_predictions,
|
|
743
|
+
repeated_item_ids=raw_predictions[TimeSeriesDataFrame.ITEMID],
|
|
744
|
+
past_target=data[self.target],
|
|
753
745
|
)
|
|
754
746
|
)
|
|
755
747
|
if forecast_for_short_series is not None:
|
|
756
748
|
predictions = pd.concat([predictions, forecast_for_short_series]) # type: ignore
|
|
757
|
-
return predictions.reindex(original_item_id_order, level=ITEMID)
|
|
749
|
+
return predictions.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID)
|
|
758
750
|
|
|
759
751
|
def _create_tabular_model(self, model_name: str, model_hyperparameters: dict[str, Any]) -> TabularModel:
|
|
760
752
|
model_class = ag_model_registry.key_to_cls(model_name)
|
|
@@ -17,7 +17,6 @@ from autogluon.core.constants import QUANTILE, REGRESSION
|
|
|
17
17
|
from autogluon.tabular.models import AbstractModel as AbstractTabularModel
|
|
18
18
|
from autogluon.tabular.registry import ag_model_registry
|
|
19
19
|
from autogluon.timeseries import TimeSeriesDataFrame
|
|
20
|
-
from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TIMESTAMP
|
|
21
20
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
22
21
|
from autogluon.timeseries.utils.datetime import get_lags_for_frequency, get_time_features_for_frequency
|
|
23
22
|
from autogluon.timeseries.utils.warning_filters import set_loggers_level, warning_filter
|
|
@@ -115,7 +114,11 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
115
114
|
|
|
116
115
|
@property
|
|
117
116
|
def _ag_to_nixtla(self) -> dict:
|
|
118
|
-
return {
|
|
117
|
+
return {
|
|
118
|
+
self.target: MLF_TARGET,
|
|
119
|
+
TimeSeriesDataFrame.ITEMID: MLF_ITEMID,
|
|
120
|
+
TimeSeriesDataFrame.TIMESTAMP: MLF_TIMESTAMP,
|
|
121
|
+
}
|
|
119
122
|
|
|
120
123
|
def _get_default_hyperparameters(self):
|
|
121
124
|
return {
|
|
@@ -246,7 +249,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
246
249
|
self._non_boolean_real_covariates.append(col)
|
|
247
250
|
|
|
248
251
|
if len(self._non_boolean_real_covariates) > 0:
|
|
249
|
-
item_ids = data.index.get_level_values(level=ITEMID)
|
|
252
|
+
item_ids = data.index.get_level_values(level=TimeSeriesDataFrame.ITEMID)
|
|
250
253
|
scale_per_column: dict[str, pd.Series] = {}
|
|
251
254
|
columns_grouped = data[self._non_boolean_real_covariates].abs().groupby(item_ids)
|
|
252
255
|
for col in self._non_boolean_real_covariates:
|
|
@@ -277,7 +280,11 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
277
280
|
train_df = train_data.to_data_frame().reset_index()
|
|
278
281
|
if train_data.static_features is not None:
|
|
279
282
|
train_df = pd.merge(
|
|
280
|
-
left=train_df,
|
|
283
|
+
left=train_df,
|
|
284
|
+
right=train_data.static_features,
|
|
285
|
+
left_on=TimeSeriesDataFrame.ITEMID,
|
|
286
|
+
right_index=True,
|
|
287
|
+
how="left",
|
|
281
288
|
)
|
|
282
289
|
train_df = train_df.rename(columns=self._ag_to_nixtla)
|
|
283
290
|
train_df = train_df.assign(**{MLF_TARGET: train_df[MLF_TARGET].fillna(float("inf"))})
|
|
@@ -462,7 +469,9 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
462
469
|
full_df = full_df.slice_by_timestep(-(self._max_ts_length + self.prediction_length), None)
|
|
463
470
|
full_df = full_df.to_data_frame().reset_index()
|
|
464
471
|
if data.static_features is not None:
|
|
465
|
-
full_df = pd.merge(
|
|
472
|
+
full_df = pd.merge(
|
|
473
|
+
full_df, data.static_features, left_on=TimeSeriesDataFrame.ITEMID, right_index=True, how="left"
|
|
474
|
+
)
|
|
466
475
|
|
|
467
476
|
full_df = (
|
|
468
477
|
full_df.rename(columns=self._ag_to_nixtla)
|
|
@@ -8,11 +8,7 @@ from mlforecast.target_transforms import (
|
|
|
8
8
|
_BaseGroupedArrayTargetTransform,
|
|
9
9
|
)
|
|
10
10
|
|
|
11
|
-
from autogluon.timeseries.dataset
|
|
12
|
-
ITEMID,
|
|
13
|
-
TIMESTAMP,
|
|
14
|
-
TimeSeriesDataFrame,
|
|
15
|
-
)
|
|
11
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
16
12
|
from autogluon.timeseries.transforms.target_scaler import TargetScaler, get_target_scaler
|
|
17
13
|
|
|
18
14
|
from .utils import MLF_ITEMID, MLF_TIMESTAMP
|
|
@@ -26,11 +22,17 @@ class MLForecastScaler(BaseTargetTransform):
|
|
|
26
22
|
|
|
27
23
|
def _df_to_tsdf(self, df: pd.DataFrame) -> TimeSeriesDataFrame:
|
|
28
24
|
return TimeSeriesDataFrame(
|
|
29
|
-
df.rename(
|
|
25
|
+
df.rename(
|
|
26
|
+
columns={self.id_col: TimeSeriesDataFrame.ITEMID, self.time_col: TimeSeriesDataFrame.TIMESTAMP}
|
|
27
|
+
).set_index([TimeSeriesDataFrame.ITEMID, TimeSeriesDataFrame.TIMESTAMP])
|
|
30
28
|
)
|
|
31
29
|
|
|
32
30
|
def _tsdf_to_df(self, ts_df: TimeSeriesDataFrame) -> pd.DataFrame:
|
|
33
|
-
return
|
|
31
|
+
return (
|
|
32
|
+
pd.DataFrame(ts_df)
|
|
33
|
+
.reset_index()
|
|
34
|
+
.rename(columns={TimeSeriesDataFrame.ITEMID: self.id_col, TimeSeriesDataFrame.TIMESTAMP: self.time_col})
|
|
35
|
+
)
|
|
34
36
|
|
|
35
37
|
def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame: # type: ignore
|
|
36
38
|
self.ag_scaler = get_target_scaler(name=self.scaler_type, target=self.target_col)
|