autogluon.timeseries 1.2.1b20250114__py3-none-any.whl → 1.2.1b20250131__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/timeseries/dataset/ts_dataframe.py +29 -62
- autogluon/timeseries/learner.py +14 -27
- autogluon/timeseries/metrics/__init__.py +8 -8
- autogluon/timeseries/metrics/abstract.py +1 -1
- autogluon/timeseries/metrics/point.py +6 -3
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +35 -12
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +5 -9
- autogluon/timeseries/models/chronos/model.py +1 -2
- autogluon/timeseries/models/chronos/pipeline/utils.py +0 -1
- autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +2 -2
- autogluon/timeseries/models/ensemble/greedy_ensemble.py +1 -1
- autogluon/timeseries/models/gluonts/abstract_gluonts.py +8 -3
- autogluon/timeseries/models/gluonts/torch/models.py +11 -14
- autogluon/timeseries/models/local/abstract_local_model.py +2 -3
- autogluon/timeseries/models/local/statsforecast.py +1 -1
- autogluon/timeseries/models/multi_window/multi_window_model.py +11 -7
- autogluon/timeseries/models/presets.py +3 -3
- autogluon/timeseries/predictor.py +50 -70
- autogluon/timeseries/regressor.py +41 -18
- autogluon/timeseries/splitter.py +3 -0
- autogluon/timeseries/{trainer/abstract_trainer.py → trainer.py} +166 -274
- autogluon/timeseries/transforms/covariate_scaler.py +13 -2
- autogluon/timeseries/transforms/target_scaler.py +1 -1
- autogluon/timeseries/utils/forecast.py +2 -1
- autogluon/timeseries/version.py +2 -1
- {autogluon.timeseries-1.2.1b20250114.dist-info → autogluon.timeseries-1.2.1b20250131.dist-info}/METADATA +10 -8
- {autogluon.timeseries-1.2.1b20250114.dist-info → autogluon.timeseries-1.2.1b20250131.dist-info}/RECORD +34 -36
- autogluon/timeseries/trainer/__init__.py +0 -4
- autogluon/timeseries/trainer/auto_trainer.py +0 -76
- /autogluon.timeseries-1.2.1b20250114-py3.8-nspkg.pth → /autogluon.timeseries-1.2.1b20250131-py3.9-nspkg.pth +0 -0
- {autogluon.timeseries-1.2.1b20250114.dist-info → autogluon.timeseries-1.2.1b20250131.dist-info}/LICENSE +0 -0
- {autogluon.timeseries-1.2.1b20250114.dist-info → autogluon.timeseries-1.2.1b20250131.dist-info}/NOTICE +0 -0
- {autogluon.timeseries-1.2.1b20250114.dist-info → autogluon.timeseries-1.2.1b20250131.dist-info}/WHEEL +0 -0
- {autogluon.timeseries-1.2.1b20250114.dist-info → autogluon.timeseries-1.2.1b20250131.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.2.1b20250114.dist-info → autogluon.timeseries-1.2.1b20250131.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.2.1b20250114.dist-info → autogluon.timeseries-1.2.1b20250131.dist-info}/zip-safe +0 -0
@@ -12,7 +12,7 @@ from typing import Any, List, Optional, Tuple, Type, Union
|
|
12
12
|
|
13
13
|
import pandas as pd
|
14
14
|
from joblib.parallel import Parallel, delayed
|
15
|
-
from pandas.core.internals import ArrayManager, BlockManager
|
15
|
+
from pandas.core.internals import ArrayManager, BlockManager # type: ignore
|
16
16
|
|
17
17
|
from autogluon.common.loaders import load_pd
|
18
18
|
|
@@ -24,23 +24,7 @@ TIMESTAMP = "timestamp"
|
|
24
24
|
IRREGULAR_TIME_INDEX_FREQSTR = "IRREG"
|
25
25
|
|
26
26
|
|
27
|
-
class
|
28
|
-
"""Contains deprecated methods from TimeSeriesDataFrame that shouldn't show up in API documentation."""
|
29
|
-
|
30
|
-
def get_reindexed_view(self, *args, **kwargs) -> TimeSeriesDataFrame:
|
31
|
-
raise ValueError(
|
32
|
-
"`TimeSeriesDataFrame.get_reindexed_view` has been deprecated. If your data has irregular timestamps, "
|
33
|
-
"please convert it to a regular frequency with `convert_frequency`."
|
34
|
-
)
|
35
|
-
|
36
|
-
def to_regular_index(self, *args, **kwargs) -> TimeSeriesDataFrame:
|
37
|
-
raise ValueError(
|
38
|
-
"`TimeSeriesDataFrame.to_regular_index` has been deprecated. "
|
39
|
-
"Please use `TimeSeriesDataFrame.convert_frequency` instead."
|
40
|
-
)
|
41
|
-
|
42
|
-
|
43
|
-
class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
27
|
+
class TimeSeriesDataFrame(pd.DataFrame):
|
44
28
|
"""A collection of univariate time series, where each row is identified by an (``item_id``, ``timestamp``) pair.
|
45
29
|
|
46
30
|
For example, a time series data frame could represent the daily sales of a collection of products, where each
|
@@ -131,20 +115,10 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
131
115
|
Number of CPU cores used to process the iterable dataset in parallel. Set to -1 to use all cores. This argument
|
132
116
|
is only used when constructing a TimeSeriesDataFrame using format 4 (iterable dataset).
|
133
117
|
|
134
|
-
Attributes
|
135
|
-
----------
|
136
|
-
freq : str
|
137
|
-
A pandas-compatible string describing the frequency of the time series. For example ``"D"`` for daily data,
|
138
|
-
``"h"`` for hourly data, etc. This attribute is determined automatically based on the timestamps. For the full
|
139
|
-
list of possible values, see `pandas documentation <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.
|
140
|
-
num_items : int
|
141
|
-
Number of items (time series) in the data set.
|
142
|
-
item_ids : pd.Index
|
143
|
-
List of unique time series IDs contained in the data set.
|
144
118
|
"""
|
145
119
|
|
146
120
|
index: pd.MultiIndex
|
147
|
-
_metadata = ["_static_features"
|
121
|
+
_metadata = ["_static_features"]
|
148
122
|
|
149
123
|
def __init__(
|
150
124
|
self,
|
@@ -174,17 +148,11 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
174
148
|
data = self._construct_tsdf_from_iterable_dataset(data, num_cpus=num_cpus)
|
175
149
|
else:
|
176
150
|
raise ValueError(f"data must be a pd.DataFrame, Iterable, string or Path (received {type(data)}).")
|
177
|
-
super().__init__(data=data, *args, **kwargs)
|
151
|
+
super().__init__(data=data, *args, **kwargs) # type: ignore
|
178
152
|
self._static_features: Optional[pd.DataFrame] = None
|
179
153
|
if static_features is not None:
|
180
154
|
self.static_features = self._construct_static_features(static_features, id_column=id_column)
|
181
155
|
|
182
|
-
# internal value for cached frequency values that are inferred. corresponds to either a
|
183
|
-
# pandas-compatible frequency string, the value IRREGULAR_TIME_INDEX_FREQSTR that signals
|
184
|
-
# the time series have irregular timestamps (in which case tsdf.freq returns None), or None
|
185
|
-
# if inference was not yet performed.
|
186
|
-
self._cached_freq: Optional[str] = None
|
187
|
-
|
188
156
|
@property
|
189
157
|
def _constructor(self) -> Type[TimeSeriesDataFrame]:
|
190
158
|
return TimeSeriesDataFrame
|
@@ -194,7 +162,6 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
194
162
|
# repeatedly calling TimeSeriesDataFrame constructor
|
195
163
|
df = self._from_mgr(mgr, axes=axes)
|
196
164
|
df._static_features = self._static_features
|
197
|
-
df._cached_freq = self._cached_freq
|
198
165
|
return df
|
199
166
|
|
200
167
|
@classmethod
|
@@ -417,12 +384,10 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
417
384
|
|
418
385
|
@property
|
419
386
|
def item_ids(self) -> pd.Index:
|
387
|
+
"""List of unique time series IDs contained in the data set."""
|
420
388
|
return self.index.unique(level=ITEMID)
|
421
389
|
|
422
|
-
@
|
423
|
-
def static_features(self):
|
424
|
-
return self._static_features
|
425
|
-
|
390
|
+
@classmethod
|
426
391
|
def _construct_static_features(
|
427
392
|
cls,
|
428
393
|
static_features: Union[pd.DataFrame, str, Path],
|
@@ -443,6 +408,10 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
443
408
|
static_features.rename(columns={id_column: ITEMID}, inplace=True)
|
444
409
|
return static_features
|
445
410
|
|
411
|
+
@property
|
412
|
+
def static_features(self):
|
413
|
+
return self._static_features
|
414
|
+
|
446
415
|
@static_features.setter
|
447
416
|
def static_features(self, value: Optional[pd.DataFrame]):
|
448
417
|
# if the current item index is not a multiindex, then we are dealing with a single
|
@@ -477,12 +446,12 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
477
446
|
|
478
447
|
self._static_features = value
|
479
448
|
|
480
|
-
def infer_frequency(self, num_items: Optional[int] =
|
449
|
+
def infer_frequency(self, num_items: Optional[int] = None, raise_if_irregular: bool = False) -> str:
|
481
450
|
"""Infer the time series frequency based on the timestamps of the observations.
|
482
451
|
|
483
452
|
Parameters
|
484
453
|
----------
|
485
|
-
num_items : int or None, default =
|
454
|
+
num_items : int or None, default = None
|
486
455
|
Number of items (individual time series) randomly selected to infer the frequency. Lower values speed up
|
487
456
|
the method, but increase the chance that some items with invalid frequency are missed by subsampling.
|
488
457
|
|
@@ -545,23 +514,24 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
545
514
|
|
546
515
|
@property
|
547
516
|
def freq(self):
|
548
|
-
|
549
|
-
self._cached_freq = self.infer_frequency()
|
517
|
+
"""Inferred pandas-compatible frequency of the timestamps in the data frame.
|
550
518
|
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
519
|
+
Computed using a random subset of the time series for speed. This may sometimes result in incorrectly inferred
|
520
|
+
values. For reliable results, use :meth:`~autogluon.timeseries.TimeSeriesDataFrame.infer_frequency`.
|
521
|
+
"""
|
522
|
+
inferred_freq = self.infer_frequency(num_items=50)
|
523
|
+
return None if inferred_freq == IRREGULAR_TIME_INDEX_FREQSTR else inferred_freq
|
555
524
|
|
556
525
|
@property
|
557
526
|
def num_items(self):
|
527
|
+
"""Number of items (time series) in the data set."""
|
558
528
|
return len(self.item_ids)
|
559
529
|
|
560
530
|
def num_timesteps_per_item(self) -> pd.Series:
|
561
531
|
"""Length of each time series in the dataframe."""
|
562
532
|
return self.groupby(level=ITEMID, sort=False).size()
|
563
533
|
|
564
|
-
def copy(self: TimeSeriesDataFrame, deep: bool = True) ->
|
534
|
+
def copy(self: TimeSeriesDataFrame, deep: bool = True) -> TimeSeriesDataFrame:
|
565
535
|
"""Make a copy of the TimeSeriesDataFrame.
|
566
536
|
|
567
537
|
When ``deep=True`` (default), a new object will be created with a copy of the calling object's data and
|
@@ -589,8 +559,6 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
589
559
|
# with the item index
|
590
560
|
if hasattr(other, "_static_features"):
|
591
561
|
self.static_features = other._static_features
|
592
|
-
if hasattr(other, "_cached_freq"):
|
593
|
-
self._cached_freq = other._cached_freq
|
594
562
|
return self
|
595
563
|
|
596
564
|
def split_by_time(self, cutoff_time: pd.Timestamp) -> Tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]:
|
@@ -614,8 +582,6 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
614
582
|
data_after = self.loc[(slice(None), slice(cutoff_time, None)), :]
|
615
583
|
before = TimeSeriesDataFrame(data_before, static_features=self.static_features)
|
616
584
|
after = TimeSeriesDataFrame(data_after, static_features=self.static_features)
|
617
|
-
before._cached_freq = self._cached_freq
|
618
|
-
after._cached_freq = self._cached_freq
|
619
585
|
return before, after
|
620
586
|
|
621
587
|
def slice_by_timestep(
|
@@ -716,7 +682,6 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
716
682
|
time_step_slice = slice(start_index, end_index)
|
717
683
|
result = self.groupby(level=ITEMID, sort=False, as_index=False).nth(time_step_slice)
|
718
684
|
result.static_features = self.static_features
|
719
|
-
result._cached_freq = self._cached_freq
|
720
685
|
return result
|
721
686
|
|
722
687
|
def slice_by_time(self, start_time: pd.Timestamp, end_time: pd.Timestamp) -> TimeSeriesDataFrame:
|
@@ -865,6 +830,15 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
865
830
|
dropped_df = pd.DataFrame(self).dropna(how=how)
|
866
831
|
return TimeSeriesDataFrame(dropped_df, static_features=self.static_features)
|
867
832
|
|
833
|
+
# added for static type checker compatibility
|
834
|
+
def assign(self, **kwargs) -> TimeSeriesDataFrame:
|
835
|
+
"""Assign new columns to the time series dataframe. See :meth:`pandas.DataFrame.assign` for details."""
|
836
|
+
return super().assign(**kwargs) # type: ignore
|
837
|
+
|
838
|
+
# added for static type checker compatibility
|
839
|
+
def sort_index(self, *args, **kwargs) -> TimeSeriesDataFrame:
|
840
|
+
return super().sort_index(*args, **kwargs) # type: ignore
|
841
|
+
|
868
842
|
def get_model_inputs_for_scoring(
|
869
843
|
self, prediction_length: int, known_covariates_names: Optional[List[str]] = None
|
870
844
|
) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
|
@@ -1032,8 +1006,6 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
1032
1006
|
2021-12-31 26.0
|
1033
1007
|
"""
|
1034
1008
|
offset = pd.tseries.frequencies.to_offset(freq)
|
1035
|
-
if self.freq == offset.freqstr:
|
1036
|
-
return self
|
1037
1009
|
|
1038
1010
|
# We need to aggregate categorical columns separately because .agg("mean") deletes all non-numeric columns
|
1039
1011
|
aggregation = {}
|
@@ -1063,11 +1035,6 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
1063
1035
|
resampled_df.static_features = self.static_features
|
1064
1036
|
return resampled_df
|
1065
1037
|
|
1066
|
-
def __dir__(self) -> List[str]:
|
1067
|
-
# This hides method from IPython autocomplete, but not VSCode autocomplete
|
1068
|
-
deprecated = ["get_reindexed_view", "to_regular_index"]
|
1069
|
-
return [d for d in super().__dir__() if d not in deprecated]
|
1070
|
-
|
1071
1038
|
def to_data_frame(self) -> pd.DataFrame:
|
1072
1039
|
"""Convert `TimeSeriesDataFrame` to a `pandas.DataFrame`"""
|
1073
1040
|
return pd.DataFrame(self)
|
autogluon/timeseries/learner.py
CHANGED
@@ -10,7 +10,7 @@ from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
|
|
10
10
|
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
11
11
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
12
12
|
from autogluon.timeseries.splitter import AbstractWindowSplitter
|
13
|
-
from autogluon.timeseries.trainer import
|
13
|
+
from autogluon.timeseries.trainer import TimeSeriesTrainer
|
14
14
|
from autogluon.timeseries.utils.features import TimeSeriesFeatureGenerator
|
15
15
|
from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
|
16
16
|
|
@@ -27,7 +27,7 @@ class TimeSeriesLearner(AbstractLearner):
|
|
27
27
|
path_context: str,
|
28
28
|
target: str = "target",
|
29
29
|
known_covariates_names: Optional[List[str]] = None,
|
30
|
-
trainer_type: Type[
|
30
|
+
trainer_type: Type[TimeSeriesTrainer] = TimeSeriesTrainer,
|
31
31
|
eval_metric: Union[str, TimeSeriesScorer, None] = None,
|
32
32
|
eval_metric_seasonal_period: Optional[int] = None,
|
33
33
|
prediction_length: int = 1,
|
@@ -51,33 +51,17 @@ class TimeSeriesLearner(AbstractLearner):
|
|
51
51
|
target=self.target, known_covariates_names=self.known_covariates_names
|
52
52
|
)
|
53
53
|
|
54
|
-
def load_trainer(self) ->
|
54
|
+
def load_trainer(self) -> TimeSeriesTrainer: # type: ignore
|
55
55
|
"""Return the trainer object corresponding to the learner."""
|
56
|
-
return super().load_trainer() #
|
56
|
+
return super().load_trainer() # type: ignore
|
57
57
|
|
58
58
|
def fit(
|
59
59
|
self,
|
60
60
|
train_data: TimeSeriesDataFrame,
|
61
|
-
|
62
|
-
hyperparameters: Union[str, Dict] = None,
|
63
|
-
hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
|
64
|
-
**kwargs,
|
65
|
-
) -> None:
|
66
|
-
return self._fit(
|
67
|
-
train_data=train_data,
|
68
|
-
val_data=val_data,
|
69
|
-
hyperparameters=hyperparameters,
|
70
|
-
hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
|
71
|
-
**kwargs,
|
72
|
-
)
|
73
|
-
|
74
|
-
def _fit(
|
75
|
-
self,
|
76
|
-
train_data: TimeSeriesDataFrame,
|
61
|
+
hyperparameters: Union[str, Dict],
|
77
62
|
val_data: Optional[TimeSeriesDataFrame] = None,
|
78
|
-
hyperparameters: Union[str, Dict] = None,
|
79
63
|
hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
|
80
|
-
time_limit: Optional[
|
64
|
+
time_limit: Optional[float] = None,
|
81
65
|
val_splitter: Optional[AbstractWindowSplitter] = None,
|
82
66
|
refit_every_n_windows: Optional[int] = 1,
|
83
67
|
random_seed: Optional[int] = None,
|
@@ -111,7 +95,9 @@ class TimeSeriesLearner(AbstractLearner):
|
|
111
95
|
ensemble_model_type=self.ensemble_model_type,
|
112
96
|
)
|
113
97
|
)
|
114
|
-
|
98
|
+
|
99
|
+
assert issubclass(self.trainer_type, TimeSeriesTrainer)
|
100
|
+
self.trainer: Optional[TimeSeriesTrainer] = self.trainer_type(**trainer_init_kwargs)
|
115
101
|
self.trainer_path = self.trainer.path
|
116
102
|
self.save()
|
117
103
|
|
@@ -151,6 +137,7 @@ class TimeSeriesLearner(AbstractLearner):
|
|
151
137
|
raise ValueError(
|
152
138
|
f"known_covariates {self.known_covariates_names} for the forecast horizon should be provided at prediction time."
|
153
139
|
)
|
140
|
+
assert known_covariates is not None
|
154
141
|
|
155
142
|
if self.target in known_covariates.columns:
|
156
143
|
known_covariates = known_covariates.drop(self.target, axis=1)
|
@@ -165,7 +152,7 @@ class TimeSeriesLearner(AbstractLearner):
|
|
165
152
|
data, prediction_length=self.prediction_length, freq=self.freq
|
166
153
|
)
|
167
154
|
try:
|
168
|
-
known_covariates = known_covariates.loc[forecast_index]
|
155
|
+
known_covariates = known_covariates.loc[forecast_index] # type: ignore
|
169
156
|
except KeyError:
|
170
157
|
raise ValueError(
|
171
158
|
f"known_covariates should include the values for prediction_length={self.prediction_length} "
|
@@ -197,7 +184,7 @@ class TimeSeriesLearner(AbstractLearner):
|
|
197
184
|
def score(
|
198
185
|
self,
|
199
186
|
data: TimeSeriesDataFrame,
|
200
|
-
model: AbstractTimeSeriesModel = None,
|
187
|
+
model: Optional[Union[str, AbstractTimeSeriesModel]] = None,
|
201
188
|
metric: Union[str, TimeSeriesScorer, None] = None,
|
202
189
|
use_cache: bool = True,
|
203
190
|
) -> float:
|
@@ -223,7 +210,7 @@ class TimeSeriesLearner(AbstractLearner):
|
|
223
210
|
time_limit: Optional[float] = None,
|
224
211
|
method: Literal["naive", "permutation"] = "permutation",
|
225
212
|
subsample_size: int = 50,
|
226
|
-
num_iterations: int =
|
213
|
+
num_iterations: Optional[int] = None,
|
227
214
|
random_seed: Optional[int] = None,
|
228
215
|
relative_scores: bool = False,
|
229
216
|
include_confidence_band: bool = True,
|
@@ -337,7 +324,7 @@ class TimeSeriesLearner(AbstractLearner):
|
|
337
324
|
List of models removed from memory
|
338
325
|
"""
|
339
326
|
unpersisted_models = self.load_trainer().unpersist()
|
340
|
-
self.trainer = None
|
327
|
+
self.trainer = None # type: ignore
|
341
328
|
return unpersisted_models
|
342
329
|
|
343
330
|
def refit_full(self, model: str = "all") -> Dict[str, str]:
|
@@ -50,28 +50,28 @@ EXPERIMENTAL_METRICS = {
|
|
50
50
|
def check_get_evaluation_metric(
|
51
51
|
eval_metric: Union[str, TimeSeriesScorer, Type[TimeSeriesScorer], None] = None
|
52
52
|
) -> TimeSeriesScorer:
|
53
|
+
scorer: TimeSeriesScorer
|
53
54
|
if isinstance(eval_metric, TimeSeriesScorer):
|
54
|
-
|
55
|
+
scorer = eval_metric
|
55
56
|
elif isinstance(eval_metric, type) and issubclass(eval_metric, TimeSeriesScorer):
|
56
57
|
# e.g., user passed `eval_metric=CustomMetric` instead of `eval_metric=CustomMetric()`
|
57
|
-
|
58
|
+
scorer = eval_metric()
|
58
59
|
elif isinstance(eval_metric, str):
|
59
|
-
|
60
|
-
metric_name = eval_metric.upper()
|
60
|
+
metric_name = DEPRECATED_METRICS.get(eval_metric, eval_metric).upper()
|
61
61
|
if metric_name in AVAILABLE_METRICS:
|
62
|
-
|
62
|
+
scorer = AVAILABLE_METRICS[metric_name]()
|
63
63
|
elif metric_name in EXPERIMENTAL_METRICS:
|
64
|
-
|
64
|
+
scorer = EXPERIMENTAL_METRICS[metric_name]()
|
65
65
|
else:
|
66
66
|
raise ValueError(
|
67
67
|
f"Time series metric {eval_metric} not supported. Available metrics are:\n"
|
68
68
|
f"{pformat(sorted(AVAILABLE_METRICS.keys()))}"
|
69
69
|
)
|
70
70
|
elif eval_metric is None:
|
71
|
-
|
71
|
+
scorer = AVAILABLE_METRICS[DEFAULT_METRIC_NAME]()
|
72
72
|
else:
|
73
73
|
raise ValueError(
|
74
74
|
f"eval_metric must be of type str, TimeSeriesScorer or None "
|
75
75
|
f"(received eval_metric = {eval_metric} of type {type(eval_metric)})"
|
76
76
|
)
|
77
|
-
return
|
77
|
+
return scorer
|
@@ -161,7 +161,7 @@ class TimeSeriesScorer:
|
|
161
161
|
@staticmethod
|
162
162
|
def _safemean(array: Union[np.ndarray, pd.Series]) -> float:
|
163
163
|
"""Compute mean of a numpy array-like object, ignoring inf, -inf and nan values."""
|
164
|
-
return np.mean(array[np.isfinite(array)])
|
164
|
+
return float(np.mean(array[np.isfinite(array)]))
|
165
165
|
|
166
166
|
@staticmethod
|
167
167
|
def _get_point_forecast_score_inputs(
|
@@ -248,7 +248,7 @@ class MASE(TimeSeriesScorer):
|
|
248
248
|
|
249
249
|
num_items = len(self._past_abs_seasonal_error)
|
250
250
|
# Reshape abs errors into [num_items, prediction_length] to normalize per item without groupby
|
251
|
-
abs_errors = np.abs(y_true.
|
251
|
+
abs_errors = np.abs(y_true.to_numpy() - y_pred.to_numpy()).reshape([num_items, -1])
|
252
252
|
return self._safemean(abs_errors / self._past_abs_seasonal_error.values[:, None])
|
253
253
|
|
254
254
|
|
@@ -308,7 +308,7 @@ class RMSSE(TimeSeriesScorer):
|
|
308
308
|
|
309
309
|
num_items = len(self._past_squared_seasonal_error)
|
310
310
|
# Reshape squared errors into [num_items, prediction_length] to normalize per item without groupby
|
311
|
-
squared_errors = ((y_true.
|
311
|
+
squared_errors = ((y_true.to_numpy() - y_pred.to_numpy()) ** 2.0).reshape([num_items, -1])
|
312
312
|
return np.sqrt(self._safemean(squared_errors / self._past_squared_seasonal_error.values[:, None]))
|
313
313
|
|
314
314
|
|
@@ -335,7 +335,9 @@ class RMSLE(TimeSeriesScorer):
|
|
335
335
|
- `Scikit-learn: <https://scikit-learn.org/stable/modules/model_evaluation.html#mean-squared-log-error>`_
|
336
336
|
"""
|
337
337
|
|
338
|
-
def compute_metric(
|
338
|
+
def compute_metric(
|
339
|
+
self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
|
340
|
+
) -> float:
|
339
341
|
y_true, y_pred = self._get_point_forecast_score_inputs(data_future, predictions, target=target)
|
340
342
|
y_pred = np.clip(y_pred, a_min=0.0, a_max=None)
|
341
343
|
|
@@ -399,6 +401,7 @@ class WCD(TimeSeriesScorer):
|
|
399
401
|
|
400
402
|
def _fast_cumsum(self, y: np.ndarray) -> np.ndarray:
|
401
403
|
"""Compute the cumulative sum for each consecutive `prediction_length` items in the array."""
|
404
|
+
assert self.num_items is not None, "Make sure to call `save_past_metrics` before `compute_metric`"
|
402
405
|
y = y.reshape(self.num_items, -1)
|
403
406
|
return np.nancumsum(y, axis=1).ravel()
|
404
407
|
|
@@ -3,7 +3,7 @@ import os
|
|
3
3
|
import re
|
4
4
|
import time
|
5
5
|
from contextlib import nullcontext
|
6
|
-
from typing import Dict, List, Optional, Tuple, Union
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
7
7
|
|
8
8
|
import pandas as pd
|
9
9
|
|
@@ -87,9 +87,9 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
87
87
|
_preprocess_nonadaptive = None
|
88
88
|
_preprocess_set_features = None
|
89
89
|
|
90
|
-
|
91
|
-
|
92
|
-
|
90
|
+
_supports_known_covariates: bool = False
|
91
|
+
_supports_past_covariates: bool = False
|
92
|
+
_supports_static_features: bool = False
|
93
93
|
|
94
94
|
def __init__(
|
95
95
|
self,
|
@@ -138,6 +138,7 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
138
138
|
self.target_scaler: Optional[LocalTargetScaler] = None
|
139
139
|
self.covariate_scaler: Optional[CovariateScaler] = None
|
140
140
|
self.covariate_regressor: Optional[CovariateRegressor] = None
|
141
|
+
self.fit_time: Optional[float]
|
141
142
|
|
142
143
|
def __repr__(self) -> str:
|
143
144
|
return self.name
|
@@ -170,6 +171,23 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
170
171
|
"""Load the cached OOF predictions from disk."""
|
171
172
|
return load_pkl.load(path=os.path.join(path, "utils", cls._oof_filename), verbose=verbose)
|
172
173
|
|
174
|
+
@property
|
175
|
+
def supports_known_covariates(self) -> bool:
|
176
|
+
return (
|
177
|
+
self._get_model_params().get("covariate_regressor") is not None
|
178
|
+
or self.__class__._supports_known_covariates
|
179
|
+
)
|
180
|
+
|
181
|
+
@property
|
182
|
+
def supports_past_covariates(self) -> bool:
|
183
|
+
return self.__class__._supports_past_covariates
|
184
|
+
|
185
|
+
@property
|
186
|
+
def supports_static_features(self) -> bool:
|
187
|
+
return (
|
188
|
+
self._get_model_params().get("covariate_regressor") is not None or self.__class__._supports_static_features
|
189
|
+
)
|
190
|
+
|
173
191
|
def get_oof_predictions(self):
|
174
192
|
if self._oof_predictions is None:
|
175
193
|
self._oof_predictions = self.load_oof_predictions(self.path)
|
@@ -389,7 +407,7 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
389
407
|
|
390
408
|
def predict(
|
391
409
|
self,
|
392
|
-
data: Union[TimeSeriesDataFrame, Dict[str, TimeSeriesDataFrame]],
|
410
|
+
data: Union[TimeSeriesDataFrame, Dict[str, Optional[TimeSeriesDataFrame]]],
|
393
411
|
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
394
412
|
**kwargs,
|
395
413
|
) -> TimeSeriesDataFrame:
|
@@ -402,7 +420,7 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
402
420
|
|
403
421
|
Parameters
|
404
422
|
----------
|
405
|
-
data: Union[TimeSeriesDataFrame, Dict[str, TimeSeriesDataFrame]]
|
423
|
+
data: Union[TimeSeriesDataFrame, Dict[str, Optional[TimeSeriesDataFrame]]]
|
406
424
|
The dataset where each time series is the "context" for predictions. For ensemble models that depend on
|
407
425
|
the predictions of other models, this method may accept a dictionary of previous models' predictions.
|
408
426
|
known_covariates : Optional[TimeSeriesDataFrame]
|
@@ -441,10 +459,7 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
441
459
|
|
442
460
|
if self.covariate_regressor is not None:
|
443
461
|
if known_covariates is None:
|
444
|
-
|
445
|
-
data, prediction_length=self.prediction_length, freq=self.freq
|
446
|
-
)
|
447
|
-
known_covariates = pd.DataFrame(index=forecast_index, dtype="float32")
|
462
|
+
known_covariates = pd.DataFrame(index=self.get_forecast_horizon_index(data), dtype="float32")
|
448
463
|
|
449
464
|
predictions = self.covariate_regressor.inverse_transform(
|
450
465
|
predictions,
|
@@ -456,6 +471,10 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
456
471
|
predictions = self.target_scaler.inverse_transform(predictions)
|
457
472
|
return predictions
|
458
473
|
|
474
|
+
def get_forecast_horizon_index(self, data: TimeSeriesDataFrame) -> pd.MultiIndex:
|
475
|
+
"""For each item in the dataframe, get timestamps for the next `prediction_length` time steps into the future."""
|
476
|
+
return get_forecast_horizon_index_ts_dataframe(data, prediction_length=self.prediction_length, freq=self.freq)
|
477
|
+
|
459
478
|
def _predict(
|
460
479
|
self,
|
461
480
|
data: Union[TimeSeriesDataFrame, Dict[str, TimeSeriesDataFrame]],
|
@@ -543,8 +562,12 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
543
562
|
return False
|
544
563
|
|
545
564
|
def hyperparameter_tune(
|
546
|
-
self,
|
547
|
-
|
565
|
+
self,
|
566
|
+
hyperparameter_tune_kwargs: Union[str, dict] = "auto",
|
567
|
+
hpo_executor: Optional[HpoExecutor] = None,
|
568
|
+
time_limit: Optional[float] = None,
|
569
|
+
**kwargs,
|
570
|
+
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
548
571
|
if hpo_executor is None:
|
549
572
|
hpo_executor = self._get_default_hpo_executor()
|
550
573
|
default_num_trials = kwargs.pop("default_num_trials", None)
|
@@ -19,7 +19,6 @@ from autogluon.timeseries.utils.datetime import (
|
|
19
19
|
get_seasonality,
|
20
20
|
get_time_features_for_frequency,
|
21
21
|
)
|
22
|
-
from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
|
23
22
|
from autogluon.timeseries.utils.warning_filters import warning_filter
|
24
23
|
|
25
24
|
from .utils import MLF_ITEMID, MLF_TARGET, MLF_TIMESTAMP
|
@@ -54,6 +53,9 @@ class TabularEstimator(BaseEstimator):
|
|
54
53
|
|
55
54
|
|
56
55
|
class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
56
|
+
_supports_known_covariates = True
|
57
|
+
_supports_static_features = True
|
58
|
+
|
57
59
|
def __init__(
|
58
60
|
self,
|
59
61
|
freq: Optional[str] = None,
|
@@ -468,9 +470,6 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
468
470
|
end of each time series).
|
469
471
|
"""
|
470
472
|
|
471
|
-
supports_known_covariates = True
|
472
|
-
supports_static_features = True
|
473
|
-
|
474
473
|
@property
|
475
474
|
def is_quantile_model(self) -> bool:
|
476
475
|
return self.eval_metric.needs_quantile
|
@@ -519,7 +518,7 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
519
518
|
if known_covariates is not None:
|
520
519
|
data_future = known_covariates.copy()
|
521
520
|
else:
|
522
|
-
future_index =
|
521
|
+
future_index = self.get_forecast_horizon_index(data)
|
523
522
|
data_future = pd.DataFrame(columns=[self.target], index=future_index, dtype="float32")
|
524
523
|
# MLForecast raises exception of target contains NaN. We use inf as placeholder, replace them by NaN afterwards
|
525
524
|
data_future[self.target] = float("inf")
|
@@ -624,9 +623,6 @@ class RecursiveTabularModel(AbstractMLForecastModel):
|
|
624
623
|
end of each time series).
|
625
624
|
"""
|
626
625
|
|
627
|
-
supports_known_covariates = True
|
628
|
-
supports_static_features = True
|
629
|
-
|
630
626
|
def _get_model_params(self) -> dict:
|
631
627
|
model_params = super()._get_model_params()
|
632
628
|
model_params.setdefault("target_scaler", "standard")
|
@@ -652,7 +648,7 @@ class RecursiveTabularModel(AbstractMLForecastModel):
|
|
652
648
|
if self._max_ts_length is not None:
|
653
649
|
new_df = self._shorten_all_series(new_df, self._max_ts_length)
|
654
650
|
if known_covariates is None:
|
655
|
-
future_index =
|
651
|
+
future_index = self.get_forecast_horizon_index(data)
|
656
652
|
known_covariates = pd.DataFrame(columns=[self.target], index=future_index, dtype="float32")
|
657
653
|
X_df = self._to_mlforecast_df(known_covariates, data.static_features, include_target=False)
|
658
654
|
# If both covariates & static features are missing, set X_df = None to avoid exception from MLForecast
|
@@ -11,7 +11,6 @@ import pandas as pd
|
|
11
11
|
from autogluon.common.loaders import load_pkl
|
12
12
|
from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
|
13
13
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
14
|
-
from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
|
15
14
|
from autogluon.timeseries.utils.warning_filters import disable_duplicate_logs, warning_filter
|
16
15
|
|
17
16
|
logger = logging.getLogger("autogluon.timeseries.models.chronos")
|
@@ -631,7 +630,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
631
630
|
axis=1,
|
632
631
|
),
|
633
632
|
columns=["mean"] + [str(q) for q in self.quantile_levels],
|
634
|
-
index=
|
633
|
+
index=self.get_forecast_horizon_index(data),
|
635
634
|
)
|
636
635
|
|
637
636
|
return TimeSeriesDataFrame(df)
|
@@ -253,7 +253,6 @@ class ChronosInferenceDataset:
|
|
253
253
|
assert context_length > 0
|
254
254
|
self.context_length = context_length
|
255
255
|
self.target_array = target_df[target_column].to_numpy(dtype=np.float32)
|
256
|
-
self.freq = target_df.freq
|
257
256
|
|
258
257
|
# store pointer to start:end of each time series
|
259
258
|
cum_sizes = target_df.num_timesteps_per_item().values.cumsum()
|
@@ -20,7 +20,7 @@ class AbstractTimeSeriesEnsembleModel(AbstractTimeSeriesModel):
|
|
20
20
|
self,
|
21
21
|
predictions_per_window: Dict[str, List[TimeSeriesDataFrame]],
|
22
22
|
data_per_window: List[TimeSeriesDataFrame],
|
23
|
-
time_limit: Optional[
|
23
|
+
time_limit: Optional[float] = None,
|
24
24
|
**kwargs,
|
25
25
|
):
|
26
26
|
"""Fit ensemble model given predictions of candidate base models and the true data.
|
@@ -67,7 +67,7 @@ class AbstractTimeSeriesEnsembleModel(AbstractTimeSeriesModel):
|
|
67
67
|
"""
|
68
68
|
raise NotImplementedError
|
69
69
|
|
70
|
-
def predict(self, data: Dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
|
70
|
+
def predict(self, data: Dict[str, Optional[TimeSeriesDataFrame]], **kwargs) -> TimeSeriesDataFrame:
|
71
71
|
raise NotImplementedError
|
72
72
|
|
73
73
|
def remap_base_models(self, model_refit_map: Dict[str, str]) -> None:
|
@@ -143,7 +143,7 @@ class TimeSeriesGreedyEnsemble(AbstractTimeSeriesEnsembleModel):
|
|
143
143
|
def model_weights(self) -> np.ndarray:
|
144
144
|
return np.array(list(self.model_to_weight.values()), dtype=np.float64)
|
145
145
|
|
146
|
-
def predict(self, data: Dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
|
146
|
+
def predict(self, data: Dict[str, Optional[TimeSeriesDataFrame]], **kwargs) -> TimeSeriesDataFrame:
|
147
147
|
if set(data.keys()) != set(self.model_names):
|
148
148
|
raise ValueError(
|
149
149
|
f"Set of models given for prediction in {self.name} differ from those provided during initialization."
|
@@ -24,7 +24,6 @@ from autogluon.tabular.models.tabular_nn.utils.categorical_encoders import (
|
|
24
24
|
from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TIMESTAMP, TimeSeriesDataFrame
|
25
25
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
26
26
|
from autogluon.timeseries.utils.datetime import norm_freq_str
|
27
|
-
from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
|
28
27
|
from autogluon.timeseries.utils.warning_filters import disable_root_logger, warning_filter
|
29
28
|
|
30
29
|
# NOTE: We avoid imports for torch and lightning.pytorch at the top level and hide them inside class methods.
|
@@ -162,7 +161,9 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
162
161
|
_dummy_gluonts_freq = "D"
|
163
162
|
# default number of samples for prediction
|
164
163
|
default_num_samples: int = 250
|
165
|
-
|
164
|
+
|
165
|
+
#: whether the GluonTS model supports categorical variables as covariates
|
166
|
+
_supports_cat_covariates: bool = False
|
166
167
|
|
167
168
|
def __init__(
|
168
169
|
self,
|
@@ -227,6 +228,10 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
227
228
|
model.gts_predictor = PyTorchPredictor.deserialize(Path(path) / cls.gluonts_model_path, device="auto")
|
228
229
|
return model
|
229
230
|
|
231
|
+
@property
|
232
|
+
def supports_cat_covariates(self) -> bool:
|
233
|
+
return self.__class__._supports_cat_covariates
|
234
|
+
|
230
235
|
def _get_hpo_backend(self):
|
231
236
|
return RAY_BACKEND
|
232
237
|
|
@@ -530,7 +535,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
530
535
|
predicted_targets = self._predict_gluonts_forecasts(data, known_covariates=known_covariates, **kwargs)
|
531
536
|
df = self._gluonts_forecasts_to_data_frame(
|
532
537
|
predicted_targets,
|
533
|
-
forecast_index=
|
538
|
+
forecast_index=self.get_forecast_horizon_index(data),
|
534
539
|
)
|
535
540
|
return df
|
536
541
|
|