autogluon.timeseries 0.8.3b20230817__tar.gz → 0.8.3b20230819__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (53) hide show
  1. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/PKG-INFO +1 -1
  2. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/dataset/ts_dataframe.py +121 -36
  3. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/learner.py +9 -31
  4. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/__init__.py +4 -0
  5. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py +1 -4
  6. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/local/__init__.py +2 -1
  7. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/local/abstract_local_model.py +5 -1
  8. autogluon.timeseries-0.8.3b20230819/src/autogluon/timeseries/models/local/naive.py +164 -0
  9. autogluon.timeseries-0.8.3b20230819/src/autogluon/timeseries/models/local/npts.py +59 -0
  10. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/local/statsforecast.py +0 -2
  11. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/presets.py +6 -1
  12. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/predictor.py +145 -80
  13. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/trainer/abstract_trainer.py +1 -4
  14. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/trainer/auto_trainer.py +1 -1
  15. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/version.py +1 -1
  16. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/PKG-INFO +1 -1
  17. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/SOURCES.txt +1 -0
  18. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/requires.txt +3 -3
  19. autogluon.timeseries-0.8.3b20230817/src/autogluon/timeseries/models/local/naive.py +0 -62
  20. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/setup.cfg +0 -0
  21. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/setup.py +0 -0
  22. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/__init__.py +0 -0
  23. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/configs/__init__.py +0 -0
  24. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/configs/presets_configs.py +0 -0
  25. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/dataset/__init__.py +0 -0
  26. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/evaluator.py +0 -0
  27. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/abstract/__init__.py +0 -0
  28. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/abstract/model_trial.py +0 -0
  29. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/autogluon_tabular/__init__.py +0 -0
  30. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/autogluon_tabular/direct_tabular.py +0 -0
  31. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/autogluon_tabular/mlforecast.py +0 -0
  32. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/autogluon_tabular/utils.py +0 -0
  33. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/ensemble/__init__.py +0 -0
  34. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -0
  35. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -0
  36. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/gluonts/__init__.py +0 -0
  37. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -0
  38. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
  39. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/gluonts/torch/models.py +0 -0
  40. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/local/statsmodels.py +0 -0
  41. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/multi_window/__init__.py +0 -0
  42. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/models/multi_window/multi_window_model.py +0 -0
  43. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/splitter.py +0 -0
  44. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/trainer/__init__.py +0 -0
  45. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/utils/__init__.py +0 -0
  46. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/utils/features.py +0 -0
  47. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/utils/forecast.py +0 -0
  48. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/utils/seasonality.py +0 -0
  49. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon/timeseries/utils/warning_filters.py +0 -0
  50. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/dependency_links.txt +0 -0
  51. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/namespace_packages.txt +0 -0
  52. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/top_level.txt +0 -0
  53. {autogluon.timeseries-0.8.3b20230817 → autogluon.timeseries-0.8.3b20230819}/src/autogluon.timeseries.egg-info/zip-safe +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 0.8.3b20230817
3
+ Version: 0.8.3b20230819
4
4
  Summary: AutoML for Image, Text, and Tabular Data
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -4,7 +4,7 @@ import copy
4
4
  import itertools
5
5
  import logging
6
6
  from collections.abc import Iterable
7
- from typing import Any, List, Optional, Tuple, Type
7
+ from typing import Any, List, Optional, Tuple, Type, Union
8
8
 
9
9
  import numpy as np
10
10
  import pandas as pd
@@ -12,6 +12,7 @@ from joblib.parallel import Parallel, delayed
12
12
  from pandas.core.internals import ArrayManager, BlockManager
13
13
 
14
14
  from autogluon.common.loaders import load_pd
15
+ from autogluon.common.utils.deprecated_utils import Deprecated
15
16
 
16
17
  logger = logging.getLogger(__name__)
17
18
 
@@ -322,7 +323,6 @@ class TimeSeriesDataFrame(pd.DataFrame):
322
323
  id_column: Optional[str] = None,
323
324
  timestamp_column: Optional[str] = None,
324
325
  ) -> pd.DataFrame:
325
-
326
326
  df = df.copy()
327
327
  if id_column is not None:
328
328
  assert id_column in df.columns, f"Column '{id_column}' not found!"
@@ -497,7 +497,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
497
497
 
498
498
  Examples
499
499
  --------
500
- >>> print(ts_dataframe)
500
+ >>> ts_df
501
501
  target
502
502
  item_id timestamp
503
503
  0 2019-01-01 0
@@ -582,7 +582,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
582
582
 
583
583
  Returns
584
584
  -------
585
- ts_df: TimeSeriesDataFrame
585
+ ts_df : TimeSeriesDataFrame
586
586
  A new time series dataframe containing entries of the original time series between start and end timestamps.
587
587
  """
588
588
 
@@ -596,7 +596,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
596
596
  )
597
597
 
598
598
  @classmethod
599
- def from_pickle(cls, filepath_or_buffer: Any) -> "TimeSeriesDataFrame":
599
+ def from_pickle(cls, filepath_or_buffer: Any) -> TimeSeriesDataFrame:
600
600
  """Convenience method to read pickled time series data frames. If the read pickle
601
601
  file refers to a plain pandas DataFrame, it will be cast to a TimeSeriesDataFrame.
602
602
 
@@ -607,7 +607,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
607
607
 
608
608
  Returns
609
609
  -------
610
- ts_df: TimeSeriesDataFrame
610
+ ts_df : TimeSeriesDataFrame
611
611
  The pickled time series data frame.
612
612
  """
613
613
  try:
@@ -616,6 +616,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
616
616
  except Exception as err: # noqa
617
617
  raise IOError(f"Could not load pickled data set due to error: {str(err)}")
618
618
 
619
+ @Deprecated(min_version_to_warn="0.9", min_version_to_error="1.0")
619
620
  def get_reindexed_view(self, freq: str = "S") -> TimeSeriesDataFrame:
620
621
  """Returns a new TimeSeriesDataFrame object with the same underlying data and
621
622
  static features as the current data frame, except the time index is replaced by
@@ -649,7 +650,8 @@ class TimeSeriesDataFrame(pd.DataFrame):
649
650
 
650
651
  return df_view
651
652
 
652
- def to_regular_index(self, freq: str) -> "TimeSeriesDataFrame":
653
+ @Deprecated(min_version_to_warn="0.9", min_version_to_error="1.0", new="convert_frequency")
654
+ def to_regular_index(self, freq: str) -> TimeSeriesDataFrame:
653
655
  """Fill the gaps in an irregularly-sampled time series with NaNs.
654
656
 
655
657
  Parameters
@@ -659,7 +661,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
659
661
 
660
662
  Examples
661
663
  --------
662
- >>> print(ts_dataframe)
664
+ >>> ts_df
663
665
  target
664
666
  item_id timestamp
665
667
  0 2019-01-01 NaN
@@ -669,7 +671,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
669
671
  1 2019-02-04 3.0
670
672
  2019-02-07 4.0
671
673
 
672
- >>> print(ts_dataframe.to_regular_index(freq="D"))
674
+ >>> ts_df.to_regular_index(freq="D")
673
675
  target
674
676
  item_id timestamp
675
677
  0 2019-01-01 NaN
@@ -685,30 +687,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
685
687
  2019-02-07 4.0
686
688
 
687
689
  """
688
- if self.freq is not None:
689
- if self.freq != freq:
690
- raise ValueError(
691
- f"TimeSeriesDataFrame already has a regular index with freq '{self.freq}' "
692
- f"that cannot be converted to the given freq '{freq}'"
693
- )
694
- else:
695
- return self
696
-
697
- filled_series = []
698
- for item_id, time_series in self.groupby(level=ITEMID, sort=False):
699
- time_series = time_series.droplevel(ITEMID)
700
- timestamps = time_series.index
701
- resampled_ts = time_series.resample(freq).asfreq()
702
- if not timestamps.isin(resampled_ts.index).all():
703
- raise ValueError(
704
- f"Irregularly-sampled timestamps in this TimeSeriesDataFrame are not compatible "
705
- f"with the given frequency '{freq}'"
706
- )
707
- filled_series.append(pd.concat({item_id: resampled_ts}, names=[ITEMID]))
690
+ return self.convert_frequency(freq=freq)
708
691
 
709
- return TimeSeriesDataFrame(pd.concat(filled_series), static_features=self.static_features)
710
-
711
- def fill_missing_values(self, method: str = "auto", value: float = 0.0) -> "TimeSeriesDataFrame":
692
+ def fill_missing_values(self, method: str = "auto", value: float = 0.0) -> TimeSeriesDataFrame:
712
693
  """Fill missing values represented by NaN.
713
694
 
714
695
  Parameters
@@ -726,7 +707,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
726
707
 
727
708
  Examples
728
709
  --------
729
- >>> print(ts_dataframe)
710
+ >>> ts_df
730
711
  target
731
712
  item_id timestamp
732
713
  0 2019-01-01 NaN
@@ -741,7 +722,7 @@ class TimeSeriesDataFrame(pd.DataFrame):
741
722
  2019-02-06 NaN
742
723
  2019-02-07 4.0
743
724
 
744
- >>> print(ts_dataframe.fill_missing_values(method="auto"))
725
+ >>> ts_df.fill_missing_values(method="auto")
745
726
  target
746
727
  item_id timestamp
747
728
  0 2019-01-01 1.0
@@ -765,7 +746,9 @@ class TimeSeriesDataFrame(pd.DataFrame):
765
746
 
766
747
  grouped_df = pd.DataFrame(self).groupby(level=ITEMID, sort=False, group_keys=False)
767
748
  if method == "auto":
768
- filled_df = grouped_df.fillna(method="ffill").fillna(method="bfill")
749
+ filled_df = grouped_df.fillna(method="ffill")
750
+ # Fill missing values at the start of each time series with bfill
751
+ filled_df = filled_df.groupby(level=ITEMID, sort=False, group_keys=False).fillna(method="bfill")
769
752
  elif method in ["ffill", "pad"]:
770
753
  filled_df = grouped_df.fillna(method="ffill")
771
754
  elif method in ["bfill", "backfill"]:
@@ -782,13 +765,14 @@ class TimeSeriesDataFrame(pd.DataFrame):
782
765
  )
783
766
  return TimeSeriesDataFrame(filled_df, static_features=self.static_features)
784
767
 
785
- def dropna(self, how: str = "any") -> "TimeSeriesDataFrame":
768
+ def dropna(self, how: str = "any") -> TimeSeriesDataFrame:
786
769
  """Drop rows containing NaNs.
787
770
 
788
771
  Parameters
789
772
  ----------
790
773
  how : {"any", "all"}, default = "any"
791
774
  Determine if row or column is removed from TimeSeriesDataFrame, when we have at least one NaN or all NaN.
775
+
792
776
  - "any" : If any NaN values are present, drop that row or column.
793
777
  - "all" : If all values are NaN, drop that row or column.
794
778
  """
@@ -864,3 +848,104 @@ class TimeSeriesDataFrame(pd.DataFrame):
864
848
  data.static_features.index = data.static_features.index.astype(str)
865
849
  data.static_features.index += suffix
866
850
  return train_data, test_data
851
+
852
+ def convert_frequency(
853
+ self,
854
+ freq: Union[str, pd.DateOffset],
855
+ agg_numeric: str = "mean",
856
+ agg_categorical: str = "first",
857
+ **kwargs,
858
+ ) -> TimeSeriesDataFrame:
859
+ """Convert each time series in the data frame to the given frequency.
860
+
861
+ This method is useful for two purposes:
862
+
863
+ 1. Converting an irregularly-sampled time series to a regular time index.
864
+ 2. Aggregating time series data by downsampling (e.g., convert daily sales into weekly sales)
865
+
866
+ Parameters
867
+ ----------
868
+ freq : Union[str, pd.DateOffset]
869
+ Frequency to which the data should be converted. See [pandas frequency aliases](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases)
870
+ for supported values.
871
+ agg_numeric : {"max", "min", "sum", "mean", "median", "first", "last"}, default = "mean"
872
+ Aggregation method applied to numeric columns.
873
+ agg_categorical : {"first", "last"}, default = "first"
874
+ Aggregation method applied to categorical columns.
875
+ **kwargs
876
+ Additional keywords arguments that will be passed to ``pandas.DataFrameGroupBy.resample``.
877
+
878
+ Returns
879
+ -------
880
+ ts_df : TimeSeriesDataFrame
881
+ A new time series dataframe with time series resampled at the new frequency. Output may contain missing
882
+ values represented by ``NaN`` if original data does not have information for the given period.
883
+
884
+ Examples
885
+ --------
886
+ Convert irregularly-sampled time series data to a regular index
887
+
888
+ >>> ts_df
889
+ target
890
+ item_id timestamp
891
+ 0 2019-01-01 NaN
892
+ 2019-01-03 1.0
893
+ 2019-01-06 2.0
894
+ 2019-01-07 NaN
895
+ 1 2019-02-04 3.0
896
+ 2019-02-07 4.0
897
+ >>> ts_df.convert_frequency(freq="D")
898
+ target
899
+ item_id timestamp
900
+ 0 2019-01-01 NaN
901
+ 2019-01-02 NaN
902
+ 2019-01-03 1.0
903
+ 2019-01-04 NaN
904
+ 2019-01-05 NaN
905
+ 2019-01-06 2.0
906
+ 2019-01-07 NaN
907
+ 1 2019-02-04 3.0
908
+ 2019-02-05 NaN
909
+ 2019-02-06 NaN
910
+ 2019-02-07 4.0
911
+
912
+ Downsample quarterly data to yearly frequency
913
+
914
+ >>> ts_df
915
+ target
916
+ item_id timestamp
917
+ 0 2020-03-31 1.0
918
+ 2020-06-30 2.0
919
+ 2020-09-30 3.0
920
+ 2020-12-31 4.0
921
+ 2021-03-31 5.0
922
+ 2021-06-30 6.0
923
+ 2021-09-30 7.0
924
+ 2021-12-31 8.0
925
+ >>> ts_df.convert_frequency("Y")
926
+ target
927
+ item_id timestamp
928
+ 0 2020-12-31 2.5
929
+ 2021-12-31 6.5
930
+ >>> ts_df.convert_frequency("Y", agg_numeric="sum")
931
+ target
932
+ item_id timestamp
933
+ 0 2020-12-31 10.0
934
+ 2021-12-31 26.0
935
+ """
936
+ if self.freq == pd.tseries.frequencies.to_offset(freq).freqstr:
937
+ return self
938
+
939
+ # We need to aggregate categorical columns separately because .agg("mean") deletes all non-numeric columns
940
+ aggregation = {}
941
+ for col in self.columns:
942
+ if pd.api.types.is_numeric_dtype(self.dtypes[col]):
943
+ aggregation[col] = agg_numeric
944
+ else:
945
+ aggregation[col] = agg_categorical
946
+
947
+ resampled_df = TimeSeriesDataFrame(
948
+ self.groupby(level=ITEMID, sort=False).resample(freq, level=TIMESTAMP, **kwargs).agg(aggregation)
949
+ )
950
+ resampled_df.static_features = self.static_features
951
+ return resampled_df
@@ -1,15 +1,13 @@
1
1
  import logging
2
2
  import time
3
- from typing import Any, Dict, List, Optional, Tuple, Type, Union
3
+ from typing import Any, Dict, List, Optional, Type, Union
4
4
 
5
- import numpy as np
6
5
  import pandas as pd
7
6
 
8
7
  from autogluon.core.learner import AbstractLearner
9
- from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFrame
8
+ from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
10
9
  from autogluon.timeseries.evaluator import TimeSeriesEvaluator
11
10
  from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
12
- from autogluon.timeseries.splitter import AbstractTimeSeriesSplitter, LastWindowSplitter
13
11
  from autogluon.timeseries.trainer import AbstractTimeSeriesTrainer, AutoTimeSeriesTrainer
14
12
  from autogluon.timeseries.utils.features import TimeSeriesFeatureGenerator
15
13
  from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
@@ -31,7 +29,6 @@ class TimeSeriesLearner(AbstractLearner):
31
29
  eval_metric: Optional[str] = None,
32
30
  eval_metric_seasonal_period: Optional[int] = None,
33
31
  prediction_length: int = 1,
34
- ignore_time_index: bool = False,
35
32
  cache_predictions: bool = True,
36
33
  **kwargs,
37
34
  ):
@@ -42,11 +39,7 @@ class TimeSeriesLearner(AbstractLearner):
42
39
  self.target = target
43
40
  self.known_covariates_names = [] if known_covariates_names is None else known_covariates_names
44
41
  self.prediction_length = prediction_length
45
- self.quantile_levels = kwargs.get(
46
- "quantile_levels",
47
- kwargs.get("quantiles", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
48
- )
49
- self.ignore_time_index = ignore_time_index
42
+ self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
50
43
  self.cache_predictions = cache_predictions
51
44
 
52
45
  self.feature_generator = TimeSeriesFeatureGenerator(
@@ -160,28 +153,13 @@ class TimeSeriesLearner(AbstractLearner):
160
153
  )
161
154
 
162
155
  forecast_index = get_forecast_horizon_index_ts_dataframe(data, prediction_length=self.prediction_length)
163
- if self.ignore_time_index:
164
- logger.warning(
165
- "Because `ignore_time_index=True`, the predictor will ignore the time index of `known_covariates`. "
166
- "Please make sure that `known_covariates` contain only the future values of the known covariates "
167
- "(and the past values are not included)."
156
+ try:
157
+ known_covariates = known_covariates.loc[forecast_index]
158
+ except KeyError:
159
+ raise ValueError(
160
+ f"known_covariates should include the values for prediction_length={self.prediction_length} "
161
+ "many time steps into the future."
168
162
  )
169
- known_covariates = known_covariates.loc[forecast_index.unique(level=ITEMID)]
170
- if (known_covariates.num_timesteps_per_item() < self.prediction_length).any():
171
- raise ValueError(
172
- f"known_covariates should include the values for prediction_length={self.prediction_length} "
173
- "many time steps into the future."
174
- )
175
- known_covariates = known_covariates.slice_by_timestep(None, self.prediction_length)
176
- known_covariates.index = forecast_index
177
- else:
178
- try:
179
- known_covariates = known_covariates.loc[forecast_index]
180
- except KeyError:
181
- raise ValueError(
182
- f"known_covariates should include the values for prediction_length={self.prediction_length} "
183
- "many time steps into the future."
184
- )
185
163
  return known_covariates
186
164
 
187
165
  def predict(
@@ -4,9 +4,12 @@ from .local import (
4
4
  ARIMAModel,
5
5
  AutoARIMAModel,
6
6
  AutoETSModel,
7
+ AverageModel,
7
8
  DynamicOptimizedThetaModel,
8
9
  ETSModel,
9
10
  NaiveModel,
11
+ NPTSModel,
12
+ SeasonalAverageModel,
10
13
  SeasonalNaiveModel,
11
14
  ThetaModel,
12
15
  ThetaStatsmodelsModel,
@@ -24,6 +27,7 @@ __all__ = [
24
27
  "DirectTabularModel",
25
28
  "RecursiveTabularModel",
26
29
  "NaiveModel",
30
+ "NPTSModel",
27
31
  "SeasonalNaiveModel",
28
32
  "AutoETSModel",
29
33
  "AutoARIMAModel",
@@ -105,10 +105,7 @@ class AbstractTimeSeriesModel(AbstractModel):
105
105
 
106
106
  self.freq: str = freq
107
107
  self.prediction_length: int = prediction_length
108
- self.quantile_levels = kwargs.get(
109
- "quantile_levels",
110
- kwargs.get("quantiles", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
111
- )
108
+ self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
112
109
  self._oof_predictions: Optional[TimeSeriesDataFrame] = None
113
110
 
114
111
  def __repr__(self) -> str:
@@ -1,6 +1,7 @@
1
1
  import joblib.externals.loky
2
2
 
3
- from .naive import NaiveModel, SeasonalNaiveModel
3
+ from .naive import AverageModel, NaiveModel, SeasonalAverageModel, SeasonalNaiveModel
4
+ from .npts import NPTSModel
4
5
  from .statsforecast import AutoARIMAModel, AutoETSModel, DynamicOptimizedThetaModel, ThetaModel
5
6
  from .statsmodels import ARIMAModel, ETSModel, ThetaStatsmodelsModel
6
7
 
@@ -34,6 +34,9 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
34
34
  Argument that can be passed to the underlying local model.
35
35
  default_n_jobs : Union[int, float]
36
36
  Default number of CPU cores used to train models. If float, this fraction of CPU cores will be used.
37
+ default_max_ts_length : Optional[int]
38
+ If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
39
+ This significantly speeds up fitting and usually leads to no change in accuracy.
37
40
  init_time_in_seconds : int
38
41
  Time that it takes to initialize the model in seconds (e.g., because of JIT compilation by Numba).
39
42
  If time_limit is below this number, model won't be trained.
@@ -41,6 +44,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
41
44
 
42
45
  allowed_local_model_args: List[str] = []
43
46
  default_n_jobs: Union[int, float] = AG_DEFAULT_N_JOBS
47
+ default_max_ts_length: Optional[int] = 2500
44
48
  init_time_in_seconds: int = 0
45
49
 
46
50
  def __init__(
@@ -65,7 +69,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
65
69
  raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
66
70
  # Default values, potentially overridden inside _fit()
67
71
  self.use_fallback_model = hyperparameters.pop("use_fallback_model", True)
68
- self.max_ts_length = hyperparameters.pop("max_ts_length", 2500)
72
+ self.max_ts_length = hyperparameters.pop("max_ts_length", self.default_max_ts_length)
69
73
 
70
74
  super().__init__(
71
75
  path=path,
@@ -0,0 +1,164 @@
1
+ from typing import Callable
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from autogluon.timeseries.models.local.abstract_local_model import AbstractLocalModel, seasonal_naive_forecast
7
+
8
+
9
+ class NaiveModel(AbstractLocalModel):
10
+ """Baseline model that sets the forecast equal to the last observed value.
11
+
12
+ Quantiles are obtained by assuming that the residuals follow zero-mean normal distribution, scale of which is
13
+ estimated from the empirical distribution of the residuals.
14
+ As described in https://otexts.com/fpp3/prediction-intervals.html
15
+
16
+ Other Parameters
17
+ ----------------
18
+ n_jobs : int or float, default = 0.5
19
+ Number of CPU cores used to fit the models in parallel.
20
+ When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
21
+ When set to a positive integer, that many cores are used.
22
+ When set to -1, all CPU cores are used.
23
+ """
24
+
25
+ allowed_local_model_args = ["seasonal_period"]
26
+
27
+ def _predict_with_local_model(
28
+ self,
29
+ time_series: pd.Series,
30
+ local_model_args: dict,
31
+ ) -> pd.DataFrame:
32
+ return seasonal_naive_forecast(
33
+ target=time_series.values.ravel(),
34
+ prediction_length=self.prediction_length,
35
+ quantile_levels=self.quantile_levels,
36
+ seasonal_period=1,
37
+ )
38
+
39
+
40
+ class SeasonalNaiveModel(AbstractLocalModel):
41
+ """Baseline model that sets the forecast equal to the last observed value from the same season.
42
+
43
+ Quantiles are obtained by assuming that the residuals follow zero-mean normal distribution, scale of which is
44
+ estimated from the empirical distribution of the residuals.
45
+ As described in https://otexts.com/fpp3/prediction-intervals.html
46
+
47
+
48
+ Other Parameters
49
+ ----------------
50
+ seasonal_period : int or None, default = None
51
+ Number of time steps in a complete seasonal cycle for seasonal models. For example, 7 for daily data with a
52
+ weekly cycle or 12 for monthly data with an annual cycle.
53
+ When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be
54
+ specified manually by providing an integer > 1.
55
+ If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
56
+ Seasonality will also be disabled, if the length of the time series is < seasonal_period.
57
+ n_jobs : int or float, default = 0.5
58
+ Number of CPU cores used to fit the models in parallel.
59
+ When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
60
+ When set to a positive integer, that many cores are used.
61
+ When set to -1, all CPU cores are used.
62
+ """
63
+
64
+ allowed_local_model_args = ["seasonal_period"]
65
+
66
+ def _predict_with_local_model(
67
+ self,
68
+ time_series: np.ndarray,
69
+ local_model_args: dict,
70
+ ) -> pd.DataFrame:
71
+ return seasonal_naive_forecast(
72
+ target=time_series.values.ravel(),
73
+ prediction_length=self.prediction_length,
74
+ quantile_levels=self.quantile_levels,
75
+ seasonal_period=local_model_args["seasonal_period"],
76
+ )
77
+
78
+
79
+ def _get_quantile_function(q: float) -> Callable:
80
+ """Returns a function with name "q" that computes the q'th quantile of a pandas.Series."""
81
+
82
+ def quantile_fn(x: pd.Series) -> pd.Series:
83
+ return x.quantile(q)
84
+
85
+ quantile_fn.__name__ = str(q)
86
+ return quantile_fn
87
+
88
+
89
+ class AverageModel(AbstractLocalModel):
90
+ """Baseline model that sets the forecast equal to the historic average or quantile.
91
+
92
+ Other Parameters
93
+ ----------------
94
+ n_jobs : int or float, default = 0.5
95
+ Number of CPU cores used to fit the models in parallel.
96
+ When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
97
+ When set to a positive integer, that many cores are used.
98
+ When set to -1, all CPU cores are used.
99
+ max_ts_length : Optional[int], default = None
100
+ If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
101
+ This significantly speeds up fitting and usually leads to no change in accuracy.
102
+ """
103
+
104
+ allowed_local_model_args = ["seasonal_period"]
105
+ default_max_ts_length = None
106
+
107
+ def _predict_with_local_model(
108
+ self,
109
+ time_series: pd.Series,
110
+ local_model_args: dict,
111
+ ) -> pd.DataFrame:
112
+ agg_functions = ["mean"] + [_get_quantile_function(q) for q in self.quantile_levels]
113
+ stats_marginal = time_series.agg(agg_functions)
114
+ stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
115
+ return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
116
+
117
+
118
+ class SeasonalAverageModel(AbstractLocalModel):
119
+ """Baseline model that sets the forecast equal to the historic average or quantile in the same season.
120
+
121
+ Other Parameters
122
+ ----------------
123
+ seasonal_period : int or None, default = None
124
+ Number of time steps in a complete seasonal cycle for seasonal models. For example, 7 for daily data with a
125
+ weekly cycle or 12 for monthly data with an annual cycle.
126
+ When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be
127
+ specified manually by providing an integer > 1.
128
+ If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
129
+ Seasonality will also be disabled, if the length of the time series is < seasonal_period.
130
+ n_jobs : int or float, default = 0.5
131
+ Number of CPU cores used to fit the models in parallel.
132
+ When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
133
+ When set to a positive integer, that many cores are used.
134
+ When set to -1, all CPU cores are used.
135
+ max_ts_length : Optional[int], default = None
136
+ If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
137
+ This significantly speeds up fitting and usually leads to no change in accuracy.
138
+ """
139
+
140
+ allowed_local_model_args = ["seasonal_period"]
141
+ default_max_ts_length = None
142
+
143
+ def _predict_with_local_model(
144
+ self,
145
+ time_series: pd.Series,
146
+ local_model_args: dict,
147
+ ) -> pd.DataFrame:
148
+ seasonal_period = local_model_args["seasonal_period"]
149
+ agg_functions = ["mean"] + [_get_quantile_function(q) for q in self.quantile_levels]
150
+
151
+ # Compute mean & quantiles for each season
152
+ ts_df = time_series.reset_index(drop=True).to_frame()
153
+ ts_df["season"] = ts_df.index % seasonal_period
154
+ stats_per_season = ts_df.groupby("season")[self.target].agg(agg_functions)
155
+
156
+ next_season = ts_df["season"].iloc[-1] + 1
157
+ season_in_forecast_horizon = np.arange(next_season, next_season + self.prediction_length) % seasonal_period
158
+ result = stats_per_season.reindex(season_in_forecast_horizon)
159
+
160
+ if np.any(result.isna().values):
161
+ # Use statistics over all timesteps to fill values for seasons that are missing from training data
162
+ stats_marginal = time_series.agg(agg_functions)
163
+ result = result.fillna(stats_marginal)
164
+ return result
@@ -0,0 +1,59 @@
1
+ import pandas as pd
2
+
3
+ from autogluon.timeseries.models.local.abstract_local_model import AbstractLocalModel
4
+
5
+
6
+ class NPTSModel(AbstractLocalModel):
7
+ """Non-Parametric Time Series Forecaster.
8
+
9
+ This models is especially well suited for forecasting sparse or intermittent time series with many zero values.
10
+
11
+ Based on `gluonts.model.npts.NPTSPredictor <https://ts.gluon.ai/stable/api/gluonts/gluonts.model.npts.html>`_.
12
+ See GluonTS documentation for more information about the model.
13
+
14
+ Other Parameters
15
+ ----------------
16
+ kernel_type : {"exponential", "uniform"}, default = "exponential"
17
+ Kernel used by the model.
18
+ exp_kernel_weights : float, default = 1.0
19
+ Scaling factor used in the exponential kernel.
20
+ use_seasonal_variant : bool, default = True
21
+ Whether to use the seasonal variant of the model.
22
+ n_jobs : int or float, default = 0.5
23
+ Number of CPU cores used to fit the models in parallel.
24
+ When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
25
+ When set to a positive integer, that many cores are used.
26
+ When set to -1, all CPU cores are used.
27
+ max_ts_length : Optional[int], default = 2500
28
+ If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
29
+ This significantly speeds up fitting and usually leads to no change in accuracy.
30
+ """
31
+
32
+ allowed_local_model_args = [
33
+ "kernel_type",
34
+ "exp_kernel_weights",
35
+ "use_seasonal_model",
36
+ "seasonal_period",
37
+ ]
38
+
39
+ def _predict_with_local_model(
40
+ self,
41
+ time_series: pd.Series,
42
+ local_model_args: dict,
43
+ ) -> pd.DataFrame:
44
+ from gluonts.model.npts import NPTSPredictor
45
+
46
+ local_model_args.pop("seasonal_period")
47
+
48
+ predictor = NPTSPredictor(
49
+ freq=self.freq,
50
+ prediction_length=self.prediction_length,
51
+ **local_model_args,
52
+ )
53
+ ts = time_series.copy(deep=False)
54
+ ts.index = ts.index.to_period()
55
+ forecast = predictor.predict_time_series(ts, num_samples=100)
56
+ forecast_dict = {"mean": forecast.mean}
57
+ for q in self.quantile_levels:
58
+ forecast_dict[str(q)] = forecast.quantile(q)
59
+ return pd.DataFrame(forecast_dict)
@@ -255,8 +255,6 @@ class ThetaModel(AbstractStatsForecastModel):
255
255
  This significantly speeds up fitting and usually leads to no change in accuracy.
256
256
  """
257
257
 
258
- max_ts_length = 3000
259
-
260
258
  allowed_local_model_args = [
261
259
  "decomposition_type",
262
260
  "seasonal_period",