autogluon.timeseries 0.8.3b20230817__tar.gz → 0.8.3b20230819__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/PKG-INFO +1 -1
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/dataset/ts_dataframe.py +121 -36
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/learner.py +9 -31
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/__init__.py +4 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py +1 -4
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/local/__init__.py +2 -1
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/local/abstract_local_model.py +5 -1
- autogluon.timeseries-0.8.3b20230819/src/autogluon/timeseries/models/local/naive.py +164 -0
- autogluon.timeseries-0.8.3b20230819/src/autogluon/timeseries/models/local/npts.py +59 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/local/statsforecast.py +0 -2
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/presets.py +6 -1
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/predictor.py +145 -80
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/trainer/abstract_trainer.py +1 -4
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/trainer/auto_trainer.py +1 -1
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/PKG-INFO +1 -1
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/SOURCES.txt +1 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/requires.txt +3 -3
- autogluon.timeseries-0.8.3b20230817/src/autogluon/timeseries/models/local/naive.py +0 -62
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/setup.cfg +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/setup.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/__init__.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/configs/__init__.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/configs/presets_configs.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/dataset/__init__.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/evaluator.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/abstract/__init__.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/abstract/model_trial.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/autogluon_tabular/__init__.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/autogluon_tabular/direct_tabular.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/autogluon_tabular/mlforecast.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/autogluon_tabular/utils.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/ensemble/__init__.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/gluonts/__init__.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/gluonts/torch/models.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/local/statsmodels.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/multi_window/__init__.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/multi_window/multi_window_model.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/splitter.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/trainer/__init__.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/utils/__init__.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/utils/features.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/utils/forecast.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/utils/seasonality.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/utils/warning_filters.py +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/dependency_links.txt +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/namespace_packages.txt +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/top_level.txt +0 -0
- {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/zip-safe +0 -0
|
@@ -4,7 +4,7 @@ import copy
|
|
|
4
4
|
import itertools
|
|
5
5
|
import logging
|
|
6
6
|
from collections.abc import Iterable
|
|
7
|
-
from typing import Any, List, Optional, Tuple, Type
|
|
7
|
+
from typing import Any, List, Optional, Tuple, Type, Union
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import pandas as pd
|
|
@@ -12,6 +12,7 @@ from joblib.parallel import Parallel, delayed
|
|
|
12
12
|
from pandas.core.internals import ArrayManager, BlockManager
|
|
13
13
|
|
|
14
14
|
from autogluon.common.loaders import load_pd
|
|
15
|
+
from autogluon.common.utils.deprecated_utils import Deprecated
|
|
15
16
|
|
|
16
17
|
logger = logging.getLogger(__name__)
|
|
17
18
|
|
|
@@ -322,7 +323,6 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
322
323
|
id_column: Optional[str] = None,
|
|
323
324
|
timestamp_column: Optional[str] = None,
|
|
324
325
|
) -> pd.DataFrame:
|
|
325
|
-
|
|
326
326
|
df = df.copy()
|
|
327
327
|
if id_column is not None:
|
|
328
328
|
assert id_column in df.columns, f"Column '{id_column}' not found!"
|
|
@@ -497,7 +497,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
497
497
|
|
|
498
498
|
Examples
|
|
499
499
|
--------
|
|
500
|
-
>>>
|
|
500
|
+
>>> ts_df
|
|
501
501
|
target
|
|
502
502
|
item_id timestamp
|
|
503
503
|
0 2019-01-01 0
|
|
@@ -582,7 +582,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
582
582
|
|
|
583
583
|
Returns
|
|
584
584
|
-------
|
|
585
|
-
ts_df: TimeSeriesDataFrame
|
|
585
|
+
ts_df : TimeSeriesDataFrame
|
|
586
586
|
A new time series dataframe containing entries of the original time series between start and end timestamps.
|
|
587
587
|
"""
|
|
588
588
|
|
|
@@ -596,7 +596,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
596
596
|
)
|
|
597
597
|
|
|
598
598
|
@classmethod
|
|
599
|
-
def from_pickle(cls, filepath_or_buffer: Any) ->
|
|
599
|
+
def from_pickle(cls, filepath_or_buffer: Any) -> TimeSeriesDataFrame:
|
|
600
600
|
"""Convenience method to read pickled time series data frames. If the read pickle
|
|
601
601
|
file refers to a plain pandas DataFrame, it will be cast to a TimeSeriesDataFrame.
|
|
602
602
|
|
|
@@ -607,7 +607,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
607
607
|
|
|
608
608
|
Returns
|
|
609
609
|
-------
|
|
610
|
-
ts_df: TimeSeriesDataFrame
|
|
610
|
+
ts_df : TimeSeriesDataFrame
|
|
611
611
|
The pickled time series data frame.
|
|
612
612
|
"""
|
|
613
613
|
try:
|
|
@@ -616,6 +616,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
616
616
|
except Exception as err: # noqa
|
|
617
617
|
raise IOError(f"Could not load pickled data set due to error: {str(err)}")
|
|
618
618
|
|
|
619
|
+
@Deprecated(min_version_to_warn="0.9", min_version_to_error="1.0")
|
|
619
620
|
def get_reindexed_view(self, freq: str = "S") -> TimeSeriesDataFrame:
|
|
620
621
|
"""Returns a new TimeSeriesDataFrame object with the same underlying data and
|
|
621
622
|
static features as the current data frame, except the time index is replaced by
|
|
@@ -649,7 +650,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
649
650
|
|
|
650
651
|
return df_view
|
|
651
652
|
|
|
652
|
-
|
|
653
|
+
@Deprecated(min_version_to_warn="0.9", min_version_to_error="1.0", new="convert_frequency")
|
|
654
|
+
def to_regular_index(self, freq: str) -> TimeSeriesDataFrame:
|
|
653
655
|
"""Fill the gaps in an irregularly-sampled time series with NaNs.
|
|
654
656
|
|
|
655
657
|
Parameters
|
|
@@ -659,7 +661,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
659
661
|
|
|
660
662
|
Examples
|
|
661
663
|
--------
|
|
662
|
-
>>>
|
|
664
|
+
>>> ts_df
|
|
663
665
|
target
|
|
664
666
|
item_id timestamp
|
|
665
667
|
0 2019-01-01 NaN
|
|
@@ -669,7 +671,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
669
671
|
1 2019-02-04 3.0
|
|
670
672
|
2019-02-07 4.0
|
|
671
673
|
|
|
672
|
-
>>>
|
|
674
|
+
>>> ts_df.to_regular_index(freq="D")
|
|
673
675
|
target
|
|
674
676
|
item_id timestamp
|
|
675
677
|
0 2019-01-01 NaN
|
|
@@ -685,30 +687,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
685
687
|
2019-02-07 4.0
|
|
686
688
|
|
|
687
689
|
"""
|
|
688
|
-
|
|
689
|
-
if self.freq != freq:
|
|
690
|
-
raise ValueError(
|
|
691
|
-
f"TimeSeriesDataFrame already has a regular index with freq '{self.freq}' "
|
|
692
|
-
f"that cannot be converted to the given freq '{freq}'"
|
|
693
|
-
)
|
|
694
|
-
else:
|
|
695
|
-
return self
|
|
696
|
-
|
|
697
|
-
filled_series = []
|
|
698
|
-
for item_id, time_series in self.groupby(level=ITEMID, sort=False):
|
|
699
|
-
time_series = time_series.droplevel(ITEMID)
|
|
700
|
-
timestamps = time_series.index
|
|
701
|
-
resampled_ts = time_series.resample(freq).asfreq()
|
|
702
|
-
if not timestamps.isin(resampled_ts.index).all():
|
|
703
|
-
raise ValueError(
|
|
704
|
-
f"Irregularly-sampled timestamps in this TimeSeriesDataFrame are not compatible "
|
|
705
|
-
f"with the given frequency '{freq}'"
|
|
706
|
-
)
|
|
707
|
-
filled_series.append(pd.concat({item_id: resampled_ts}, names=[ITEMID]))
|
|
690
|
+
return self.convert_frequency(freq=freq)
|
|
708
691
|
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
def fill_missing_values(self, method: str = "auto", value: float = 0.0) -> "TimeSeriesDataFrame":
|
|
692
|
+
def fill_missing_values(self, method: str = "auto", value: float = 0.0) -> TimeSeriesDataFrame:
|
|
712
693
|
"""Fill missing values represented by NaN.
|
|
713
694
|
|
|
714
695
|
Parameters
|
|
@@ -726,7 +707,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
726
707
|
|
|
727
708
|
Examples
|
|
728
709
|
--------
|
|
729
|
-
>>>
|
|
710
|
+
>>> ts_df
|
|
730
711
|
target
|
|
731
712
|
item_id timestamp
|
|
732
713
|
0 2019-01-01 NaN
|
|
@@ -741,7 +722,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
741
722
|
2019-02-06 NaN
|
|
742
723
|
2019-02-07 4.0
|
|
743
724
|
|
|
744
|
-
>>>
|
|
725
|
+
>>> ts_df.fill_missing_values(method="auto")
|
|
745
726
|
target
|
|
746
727
|
item_id timestamp
|
|
747
728
|
0 2019-01-01 1.0
|
|
@@ -765,7 +746,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
765
746
|
|
|
766
747
|
grouped_df = pd.DataFrame(self).groupby(level=ITEMID, sort=False, group_keys=False)
|
|
767
748
|
if method == "auto":
|
|
768
|
-
filled_df = grouped_df.fillna(method="ffill")
|
|
749
|
+
filled_df = grouped_df.fillna(method="ffill")
|
|
750
|
+
# Fill missing values at the start of each time series with bfill
|
|
751
|
+
filled_df = filled_df.groupby(level=ITEMID, sort=False, group_keys=False).fillna(method="bfill")
|
|
769
752
|
elif method in ["ffill", "pad"]:
|
|
770
753
|
filled_df = grouped_df.fillna(method="ffill")
|
|
771
754
|
elif method in ["bfill", "backfill"]:
|
|
@@ -782,13 +765,14 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
782
765
|
)
|
|
783
766
|
return TimeSeriesDataFrame(filled_df, static_features=self.static_features)
|
|
784
767
|
|
|
785
|
-
def dropna(self, how: str = "any") ->
|
|
768
|
+
def dropna(self, how: str = "any") -> TimeSeriesDataFrame:
|
|
786
769
|
"""Drop rows containing NaNs.
|
|
787
770
|
|
|
788
771
|
Parameters
|
|
789
772
|
----------
|
|
790
773
|
how : {"any", "all"}, default = "any"
|
|
791
774
|
Determine if row or column is removed from TimeSeriesDataFrame, when we have at least one NaN or all NaN.
|
|
775
|
+
|
|
792
776
|
- "any" : If any NaN values are present, drop that row or column.
|
|
793
777
|
- "all" : If all values are NaN, drop that row or column.
|
|
794
778
|
"""
|
|
@@ -864,3 +848,104 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
864
848
|
data.static_features.index = data.static_features.index.astype(str)
|
|
865
849
|
data.static_features.index += suffix
|
|
866
850
|
return train_data, test_data
|
|
851
|
+
|
|
852
|
+
def convert_frequency(
|
|
853
|
+
self,
|
|
854
|
+
freq: Union[str, pd.DateOffset],
|
|
855
|
+
agg_numeric: str = "mean",
|
|
856
|
+
agg_categorical: str = "first",
|
|
857
|
+
**kwargs,
|
|
858
|
+
) -> TimeSeriesDataFrame:
|
|
859
|
+
"""Convert each time series in the data frame to the given frequency.
|
|
860
|
+
|
|
861
|
+
This method is useful for two purposes:
|
|
862
|
+
|
|
863
|
+
1. Converting an irregularly-sampled time series to a regular time index.
|
|
864
|
+
2. Aggregating time series data by downsampling (e.g., convert daily sales into weekly sales)
|
|
865
|
+
|
|
866
|
+
Parameters
|
|
867
|
+
----------
|
|
868
|
+
freq : Union[str, pd.DateOffset]
|
|
869
|
+
Frequency to which the data should be converted. See [pandas frequency aliases](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases)
|
|
870
|
+
for supported values.
|
|
871
|
+
agg_numeric : {"max", "min", "sum", "mean", "median", "first", "last"}, default = "mean"
|
|
872
|
+
Aggregation method applied to numeric columns.
|
|
873
|
+
agg_categorical : {"first", "last"}, default = "first"
|
|
874
|
+
Aggregation method applied to categorical columns.
|
|
875
|
+
**kwargs
|
|
876
|
+
Additional keywords arguments that will be passed to ``pandas.DataFrameGroupBy.resample``.
|
|
877
|
+
|
|
878
|
+
Returns
|
|
879
|
+
-------
|
|
880
|
+
ts_df : TimeSeriesDataFrame
|
|
881
|
+
A new time series dataframe with time series resampled at the new frequency. Output may contain missing
|
|
882
|
+
values represented by ``NaN`` if original data does not have information for the given period.
|
|
883
|
+
|
|
884
|
+
Examples
|
|
885
|
+
--------
|
|
886
|
+
Convert irregularly-sampled time series data to a regular index
|
|
887
|
+
|
|
888
|
+
>>> ts_df
|
|
889
|
+
target
|
|
890
|
+
item_id timestamp
|
|
891
|
+
0 2019-01-01 NaN
|
|
892
|
+
2019-01-03 1.0
|
|
893
|
+
2019-01-06 2.0
|
|
894
|
+
2019-01-07 NaN
|
|
895
|
+
1 2019-02-04 3.0
|
|
896
|
+
2019-02-07 4.0
|
|
897
|
+
>>> ts_df.convert_frequency(freq="D")
|
|
898
|
+
target
|
|
899
|
+
item_id timestamp
|
|
900
|
+
0 2019-01-01 NaN
|
|
901
|
+
2019-01-02 NaN
|
|
902
|
+
2019-01-03 1.0
|
|
903
|
+
2019-01-04 NaN
|
|
904
|
+
2019-01-05 NaN
|
|
905
|
+
2019-01-06 2.0
|
|
906
|
+
2019-01-07 NaN
|
|
907
|
+
1 2019-02-04 3.0
|
|
908
|
+
2019-02-05 NaN
|
|
909
|
+
2019-02-06 NaN
|
|
910
|
+
2019-02-07 4.0
|
|
911
|
+
|
|
912
|
+
Downsample quarterly data to yearly frequency
|
|
913
|
+
|
|
914
|
+
>>> ts_df
|
|
915
|
+
target
|
|
916
|
+
item_id timestamp
|
|
917
|
+
0 2020-03-31 1.0
|
|
918
|
+
2020-06-30 2.0
|
|
919
|
+
2020-09-30 3.0
|
|
920
|
+
2020-12-31 4.0
|
|
921
|
+
2021-03-31 5.0
|
|
922
|
+
2021-06-30 6.0
|
|
923
|
+
2021-09-30 7.0
|
|
924
|
+
2021-12-31 8.0
|
|
925
|
+
>>> ts_df.convert_frequency("Y")
|
|
926
|
+
target
|
|
927
|
+
item_id timestamp
|
|
928
|
+
0 2020-12-31 2.5
|
|
929
|
+
2021-12-31 6.5
|
|
930
|
+
>>> ts_df.convert_frequency("Y", agg_numeric="sum")
|
|
931
|
+
target
|
|
932
|
+
item_id timestamp
|
|
933
|
+
0 2020-12-31 10.0
|
|
934
|
+
2021-12-31 26.0
|
|
935
|
+
"""
|
|
936
|
+
if self.freq == pd.tseries.frequencies.to_offset(freq).freqstr:
|
|
937
|
+
return self
|
|
938
|
+
|
|
939
|
+
# We need to aggregate categorical columns separately because .agg("mean") deletes all non-numeric columns
|
|
940
|
+
aggregation = {}
|
|
941
|
+
for col in self.columns:
|
|
942
|
+
if pd.api.types.is_numeric_dtype(self.dtypes[col]):
|
|
943
|
+
aggregation[col] = agg_numeric
|
|
944
|
+
else:
|
|
945
|
+
aggregation[col] = agg_categorical
|
|
946
|
+
|
|
947
|
+
resampled_df = TimeSeriesDataFrame(
|
|
948
|
+
self.groupby(level=ITEMID, sort=False).resample(freq, level=TIMESTAMP, **kwargs).agg(aggregation)
|
|
949
|
+
)
|
|
950
|
+
resampled_df.static_features = self.static_features
|
|
951
|
+
return resampled_df
|
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import time
|
|
3
|
-
from typing import Any, Dict, List, Optional,
|
|
3
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
4
4
|
|
|
5
|
-
import numpy as np
|
|
6
5
|
import pandas as pd
|
|
7
6
|
|
|
8
7
|
from autogluon.core.learner import AbstractLearner
|
|
9
|
-
from autogluon.timeseries.dataset.ts_dataframe import
|
|
8
|
+
from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
|
|
10
9
|
from autogluon.timeseries.evaluator import TimeSeriesEvaluator
|
|
11
10
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
12
|
-
from autogluon.timeseries.splitter import AbstractTimeSeriesSplitter, LastWindowSplitter
|
|
13
11
|
from autogluon.timeseries.trainer import AbstractTimeSeriesTrainer, AutoTimeSeriesTrainer
|
|
14
12
|
from autogluon.timeseries.utils.features import TimeSeriesFeatureGenerator
|
|
15
13
|
from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
|
|
@@ -31,7 +29,6 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
31
29
|
eval_metric: Optional[str] = None,
|
|
32
30
|
eval_metric_seasonal_period: Optional[int] = None,
|
|
33
31
|
prediction_length: int = 1,
|
|
34
|
-
ignore_time_index: bool = False,
|
|
35
32
|
cache_predictions: bool = True,
|
|
36
33
|
**kwargs,
|
|
37
34
|
):
|
|
@@ -42,11 +39,7 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
42
39
|
self.target = target
|
|
43
40
|
self.known_covariates_names = [] if known_covariates_names is None else known_covariates_names
|
|
44
41
|
self.prediction_length = prediction_length
|
|
45
|
-
self.quantile_levels = kwargs.get(
|
|
46
|
-
"quantile_levels",
|
|
47
|
-
kwargs.get("quantiles", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
|
|
48
|
-
)
|
|
49
|
-
self.ignore_time_index = ignore_time_index
|
|
42
|
+
self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
|
|
50
43
|
self.cache_predictions = cache_predictions
|
|
51
44
|
|
|
52
45
|
self.feature_generator = TimeSeriesFeatureGenerator(
|
|
@@ -160,28 +153,13 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
160
153
|
)
|
|
161
154
|
|
|
162
155
|
forecast_index = get_forecast_horizon_index_ts_dataframe(data, prediction_length=self.prediction_length)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
"
|
|
156
|
+
try:
|
|
157
|
+
known_covariates = known_covariates.loc[forecast_index]
|
|
158
|
+
except KeyError:
|
|
159
|
+
raise ValueError(
|
|
160
|
+
f"known_covariates should include the values for prediction_length={self.prediction_length} "
|
|
161
|
+
"many time steps into the future."
|
|
168
162
|
)
|
|
169
|
-
known_covariates = known_covariates.loc[forecast_index.unique(level=ITEMID)]
|
|
170
|
-
if (known_covariates.num_timesteps_per_item() < self.prediction_length).any():
|
|
171
|
-
raise ValueError(
|
|
172
|
-
f"known_covariates should include the values for prediction_length={self.prediction_length} "
|
|
173
|
-
"many time steps into the future."
|
|
174
|
-
)
|
|
175
|
-
known_covariates = known_covariates.slice_by_timestep(None, self.prediction_length)
|
|
176
|
-
known_covariates.index = forecast_index
|
|
177
|
-
else:
|
|
178
|
-
try:
|
|
179
|
-
known_covariates = known_covariates.loc[forecast_index]
|
|
180
|
-
except KeyError:
|
|
181
|
-
raise ValueError(
|
|
182
|
-
f"known_covariates should include the values for prediction_length={self.prediction_length} "
|
|
183
|
-
"many time steps into the future."
|
|
184
|
-
)
|
|
185
163
|
return known_covariates
|
|
186
164
|
|
|
187
165
|
def predict(
|
|
@@ -4,9 +4,12 @@ from .local import (
|
|
|
4
4
|
ARIMAModel,
|
|
5
5
|
AutoARIMAModel,
|
|
6
6
|
AutoETSModel,
|
|
7
|
+
AverageModel,
|
|
7
8
|
DynamicOptimizedThetaModel,
|
|
8
9
|
ETSModel,
|
|
9
10
|
NaiveModel,
|
|
11
|
+
NPTSModel,
|
|
12
|
+
SeasonalAverageModel,
|
|
10
13
|
SeasonalNaiveModel,
|
|
11
14
|
ThetaModel,
|
|
12
15
|
ThetaStatsmodelsModel,
|
|
@@ -24,6 +27,7 @@ __all__ = [
|
|
|
24
27
|
"DirectTabularModel",
|
|
25
28
|
"RecursiveTabularModel",
|
|
26
29
|
"NaiveModel",
|
|
30
|
+
"NPTSModel",
|
|
27
31
|
"SeasonalNaiveModel",
|
|
28
32
|
"AutoETSModel",
|
|
29
33
|
"AutoARIMAModel",
|
|
@@ -105,10 +105,7 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
105
105
|
|
|
106
106
|
self.freq: str = freq
|
|
107
107
|
self.prediction_length: int = prediction_length
|
|
108
|
-
self.quantile_levels = kwargs.get(
|
|
109
|
-
"quantile_levels",
|
|
110
|
-
kwargs.get("quantiles", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
|
|
111
|
-
)
|
|
108
|
+
self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
|
|
112
109
|
self._oof_predictions: Optional[TimeSeriesDataFrame] = None
|
|
113
110
|
|
|
114
111
|
def __repr__(self) -> str:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import joblib.externals.loky
|
|
2
2
|
|
|
3
|
-
from .naive import NaiveModel, SeasonalNaiveModel
|
|
3
|
+
from .naive import AverageModel, NaiveModel, SeasonalAverageModel, SeasonalNaiveModel
|
|
4
|
+
from .npts import NPTSModel
|
|
4
5
|
from .statsforecast import AutoARIMAModel, AutoETSModel, DynamicOptimizedThetaModel, ThetaModel
|
|
5
6
|
from .statsmodels import ARIMAModel, ETSModel, ThetaStatsmodelsModel
|
|
6
7
|
|
|
@@ -34,6 +34,9 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
34
34
|
Argument that can be passed to the underlying local model.
|
|
35
35
|
default_n_jobs : Union[int, float]
|
|
36
36
|
Default number of CPU cores used to train models. If float, this fraction of CPU cores will be used.
|
|
37
|
+
default_max_ts_length : Optional[int]
|
|
38
|
+
If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
|
|
39
|
+
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
37
40
|
init_time_in_seconds : int
|
|
38
41
|
Time that it takes to initialize the model in seconds (e.g., because of JIT compilation by Numba).
|
|
39
42
|
If time_limit is below this number, model won't be trained.
|
|
@@ -41,6 +44,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
41
44
|
|
|
42
45
|
allowed_local_model_args: List[str] = []
|
|
43
46
|
default_n_jobs: Union[int, float] = AG_DEFAULT_N_JOBS
|
|
47
|
+
default_max_ts_length: Optional[int] = 2500
|
|
44
48
|
init_time_in_seconds: int = 0
|
|
45
49
|
|
|
46
50
|
def __init__(
|
|
@@ -65,7 +69,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
65
69
|
raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
|
|
66
70
|
# Default values, potentially overridden inside _fit()
|
|
67
71
|
self.use_fallback_model = hyperparameters.pop("use_fallback_model", True)
|
|
68
|
-
self.max_ts_length = hyperparameters.pop("max_ts_length",
|
|
72
|
+
self.max_ts_length = hyperparameters.pop("max_ts_length", self.default_max_ts_length)
|
|
69
73
|
|
|
70
74
|
super().__init__(
|
|
71
75
|
path=path,
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
from typing import Callable
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from autogluon.timeseries.models.local.abstract_local_model import AbstractLocalModel, seasonal_naive_forecast
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class NaiveModel(AbstractLocalModel):
|
|
10
|
+
"""Baseline model that sets the forecast equal to the last observed value.
|
|
11
|
+
|
|
12
|
+
Quantiles are obtained by assuming that the residuals follow zero-mean normal distribution, scale of which is
|
|
13
|
+
estimated from the empirical distribution of the residuals.
|
|
14
|
+
As described in https://otexts.com/fpp3/prediction-intervals.html
|
|
15
|
+
|
|
16
|
+
Other Parameters
|
|
17
|
+
----------------
|
|
18
|
+
n_jobs : int or float, default = 0.5
|
|
19
|
+
Number of CPU cores used to fit the models in parallel.
|
|
20
|
+
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
21
|
+
When set to a positive integer, that many cores are used.
|
|
22
|
+
When set to -1, all CPU cores are used.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
allowed_local_model_args = ["seasonal_period"]
|
|
26
|
+
|
|
27
|
+
def _predict_with_local_model(
|
|
28
|
+
self,
|
|
29
|
+
time_series: pd.Series,
|
|
30
|
+
local_model_args: dict,
|
|
31
|
+
) -> pd.DataFrame:
|
|
32
|
+
return seasonal_naive_forecast(
|
|
33
|
+
target=time_series.values.ravel(),
|
|
34
|
+
prediction_length=self.prediction_length,
|
|
35
|
+
quantile_levels=self.quantile_levels,
|
|
36
|
+
seasonal_period=1,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class SeasonalNaiveModel(AbstractLocalModel):
|
|
41
|
+
"""Baseline model that sets the forecast equal to the last observed value from the same season.
|
|
42
|
+
|
|
43
|
+
Quantiles are obtained by assuming that the residuals follow zero-mean normal distribution, scale of which is
|
|
44
|
+
estimated from the empirical distribution of the residuals.
|
|
45
|
+
As described in https://otexts.com/fpp3/prediction-intervals.html
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
Other Parameters
|
|
49
|
+
----------------
|
|
50
|
+
seasonal_period : int or None, default = None
|
|
51
|
+
Number of time steps in a complete seasonal cycle for seasonal models. For example, 7 for daily data with a
|
|
52
|
+
weekly cycle or 12 for monthly data with an annual cycle.
|
|
53
|
+
When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be
|
|
54
|
+
specified manually by providing an integer > 1.
|
|
55
|
+
If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
|
|
56
|
+
Seasonality will also be disabled, if the length of the time series is < seasonal_period.
|
|
57
|
+
n_jobs : int or float, default = 0.5
|
|
58
|
+
Number of CPU cores used to fit the models in parallel.
|
|
59
|
+
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
60
|
+
When set to a positive integer, that many cores are used.
|
|
61
|
+
When set to -1, all CPU cores are used.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
allowed_local_model_args = ["seasonal_period"]
|
|
65
|
+
|
|
66
|
+
def _predict_with_local_model(
|
|
67
|
+
self,
|
|
68
|
+
time_series: np.ndarray,
|
|
69
|
+
local_model_args: dict,
|
|
70
|
+
) -> pd.DataFrame:
|
|
71
|
+
return seasonal_naive_forecast(
|
|
72
|
+
target=time_series.values.ravel(),
|
|
73
|
+
prediction_length=self.prediction_length,
|
|
74
|
+
quantile_levels=self.quantile_levels,
|
|
75
|
+
seasonal_period=local_model_args["seasonal_period"],
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _get_quantile_function(q: float) -> Callable:
|
|
80
|
+
"""Returns a function with name "q" that computes the q'th quantile of a pandas.Series."""
|
|
81
|
+
|
|
82
|
+
def quantile_fn(x: pd.Series) -> pd.Series:
|
|
83
|
+
return x.quantile(q)
|
|
84
|
+
|
|
85
|
+
quantile_fn.__name__ = str(q)
|
|
86
|
+
return quantile_fn
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class AverageModel(AbstractLocalModel):
|
|
90
|
+
"""Baseline model that sets the forecast equal to the historic average or quantile.
|
|
91
|
+
|
|
92
|
+
Other Parameters
|
|
93
|
+
----------------
|
|
94
|
+
n_jobs : int or float, default = 0.5
|
|
95
|
+
Number of CPU cores used to fit the models in parallel.
|
|
96
|
+
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
97
|
+
When set to a positive integer, that many cores are used.
|
|
98
|
+
When set to -1, all CPU cores are used.
|
|
99
|
+
max_ts_length : Optional[int], default = None
|
|
100
|
+
If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
|
|
101
|
+
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
allowed_local_model_args = ["seasonal_period"]
|
|
105
|
+
default_max_ts_length = None
|
|
106
|
+
|
|
107
|
+
def _predict_with_local_model(
|
|
108
|
+
self,
|
|
109
|
+
time_series: pd.Series,
|
|
110
|
+
local_model_args: dict,
|
|
111
|
+
) -> pd.DataFrame:
|
|
112
|
+
agg_functions = ["mean"] + [_get_quantile_function(q) for q in self.quantile_levels]
|
|
113
|
+
stats_marginal = time_series.agg(agg_functions)
|
|
114
|
+
stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
|
|
115
|
+
return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class SeasonalAverageModel(AbstractLocalModel):
|
|
119
|
+
"""Baseline model that sets the forecast equal to the historic average or quantile in the same season.
|
|
120
|
+
|
|
121
|
+
Other Parameters
|
|
122
|
+
----------------
|
|
123
|
+
seasonal_period : int or None, default = None
|
|
124
|
+
Number of time steps in a complete seasonal cycle for seasonal models. For example, 7 for daily data with a
|
|
125
|
+
weekly cycle or 12 for monthly data with an annual cycle.
|
|
126
|
+
When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be
|
|
127
|
+
specified manually by providing an integer > 1.
|
|
128
|
+
If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
|
|
129
|
+
Seasonality will also be disabled, if the length of the time series is < seasonal_period.
|
|
130
|
+
n_jobs : int or float, default = 0.5
|
|
131
|
+
Number of CPU cores used to fit the models in parallel.
|
|
132
|
+
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
133
|
+
When set to a positive integer, that many cores are used.
|
|
134
|
+
When set to -1, all CPU cores are used.
|
|
135
|
+
max_ts_length : Optional[int], default = None
|
|
136
|
+
If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
|
|
137
|
+
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
allowed_local_model_args = ["seasonal_period"]
|
|
141
|
+
default_max_ts_length = None
|
|
142
|
+
|
|
143
|
+
def _predict_with_local_model(
|
|
144
|
+
self,
|
|
145
|
+
time_series: pd.Series,
|
|
146
|
+
local_model_args: dict,
|
|
147
|
+
) -> pd.DataFrame:
|
|
148
|
+
seasonal_period = local_model_args["seasonal_period"]
|
|
149
|
+
agg_functions = ["mean"] + [_get_quantile_function(q) for q in self.quantile_levels]
|
|
150
|
+
|
|
151
|
+
# Compute mean & quantiles for each season
|
|
152
|
+
ts_df = time_series.reset_index(drop=True).to_frame()
|
|
153
|
+
ts_df["season"] = ts_df.index % seasonal_period
|
|
154
|
+
stats_per_season = ts_df.groupby("season")[self.target].agg(agg_functions)
|
|
155
|
+
|
|
156
|
+
next_season = ts_df["season"].iloc[-1] + 1
|
|
157
|
+
season_in_forecast_horizon = np.arange(next_season, next_season + self.prediction_length) % seasonal_period
|
|
158
|
+
result = stats_per_season.reindex(season_in_forecast_horizon)
|
|
159
|
+
|
|
160
|
+
if np.any(result.isna().values):
|
|
161
|
+
# Use statistics over all timesteps to fill values for seasons that are missing from training data
|
|
162
|
+
stats_marginal = time_series.agg(agg_functions)
|
|
163
|
+
result = result.fillna(stats_marginal)
|
|
164
|
+
return result
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from autogluon.timeseries.models.local.abstract_local_model import AbstractLocalModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class NPTSModel(AbstractLocalModel):
|
|
7
|
+
"""Non-Parametric Time Series Forecaster.
|
|
8
|
+
|
|
9
|
+
This models is especially well suited for forecasting sparse or intermittent time series with many zero values.
|
|
10
|
+
|
|
11
|
+
Based on `gluonts.model.npts.NPTSPredictor <https://ts.gluon.ai/stable/api/gluonts/gluonts.model.npts.html>`_.
|
|
12
|
+
See GluonTS documentation for more information about the model.
|
|
13
|
+
|
|
14
|
+
Other Parameters
|
|
15
|
+
----------------
|
|
16
|
+
kernel_type : {"exponential", "uniform"}, default = "exponential"
|
|
17
|
+
Kernel used by the model.
|
|
18
|
+
exp_kernel_weights : float, default = 1.0
|
|
19
|
+
Scaling factor used in the exponential kernel.
|
|
20
|
+
use_seasonal_variant : bool, default = True
|
|
21
|
+
Whether to use the seasonal variant of the model.
|
|
22
|
+
n_jobs : int or float, default = 0.5
|
|
23
|
+
Number of CPU cores used to fit the models in parallel.
|
|
24
|
+
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
25
|
+
When set to a positive integer, that many cores are used.
|
|
26
|
+
When set to -1, all CPU cores are used.
|
|
27
|
+
max_ts_length : Optional[int], default = 2500
|
|
28
|
+
If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
|
|
29
|
+
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
allowed_local_model_args = [
|
|
33
|
+
"kernel_type",
|
|
34
|
+
"exp_kernel_weights",
|
|
35
|
+
"use_seasonal_model",
|
|
36
|
+
"seasonal_period",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
def _predict_with_local_model(
|
|
40
|
+
self,
|
|
41
|
+
time_series: pd.Series,
|
|
42
|
+
local_model_args: dict,
|
|
43
|
+
) -> pd.DataFrame:
|
|
44
|
+
from gluonts.model.npts import NPTSPredictor
|
|
45
|
+
|
|
46
|
+
local_model_args.pop("seasonal_period")
|
|
47
|
+
|
|
48
|
+
predictor = NPTSPredictor(
|
|
49
|
+
freq=self.freq,
|
|
50
|
+
prediction_length=self.prediction_length,
|
|
51
|
+
**local_model_args,
|
|
52
|
+
)
|
|
53
|
+
ts = time_series.copy(deep=False)
|
|
54
|
+
ts.index = ts.index.to_period()
|
|
55
|
+
forecast = predictor.predict_time_series(ts, num_samples=100)
|
|
56
|
+
forecast_dict = {"mean": forecast.mean}
|
|
57
|
+
for q in self.quantile_levels:
|
|
58
|
+
forecast_dict[str(q)] = forecast.quantile(q)
|
|
59
|
+
return pd.DataFrame(forecast_dict)
|
|
@@ -255,8 +255,6 @@ class ThetaModel(AbstractStatsForecastModel):
|
|
|
255
255
|
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
256
256
|
"""
|
|
257
257
|
|
|
258
|
-
max_ts_length = 3000
|
|
259
|
-
|
|
260
258
|
allowed_local_model_args = [
|
|
261
259
|
"decomposition_type",
|
|
262
260
|
"seasonal_period",
|