autogluon.timeseries 0.8.3b20231005__tar.gz → 0.8.3b20231006__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (55) hide show
  1. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/PKG-INFO +1 -1
  2. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/setup.py +1 -1
  3. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/evaluator.py +2 -6
  4. autogluon.timeseries-0.8.3b20231006/src/autogluon/timeseries/models/autogluon_tabular/__init__.py +6 -0
  5. autogluon.timeseries-0.8.3b20231006/src/autogluon/timeseries/models/autogluon_tabular/mlforecast.py +526 -0
  6. autogluon.timeseries-0.8.3b20231006/src/autogluon/timeseries/models/autogluon_tabular/utils.py +51 -0
  7. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py +2 -3
  8. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/local/abstract_local_model.py +2 -2
  9. autogluon.timeseries-0.8.3b20231006/src/autogluon/timeseries/utils/warning_filters.py +46 -0
  10. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/version.py +1 -1
  11. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon.timeseries.egg-info/PKG-INFO +1 -1
  12. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon.timeseries.egg-info/SOURCES.txt +0 -1
  13. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon.timeseries.egg-info/requires.txt +4 -4
  14. autogluon.timeseries-0.8.3b20231005/src/autogluon/timeseries/models/autogluon_tabular/__init__.py +0 -7
  15. autogluon.timeseries-0.8.3b20231005/src/autogluon/timeseries/models/autogluon_tabular/direct_tabular.py +0 -400
  16. autogluon.timeseries-0.8.3b20231005/src/autogluon/timeseries/models/autogluon_tabular/mlforecast.py +0 -368
  17. autogluon.timeseries-0.8.3b20231005/src/autogluon/timeseries/models/autogluon_tabular/utils.py +0 -24
  18. autogluon.timeseries-0.8.3b20231005/src/autogluon/timeseries/utils/warning_filters.py +0 -78
  19. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/setup.cfg +0 -0
  20. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/__init__.py +0 -0
  21. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/configs/__init__.py +0 -0
  22. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/configs/presets_configs.py +0 -0
  23. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/dataset/__init__.py +0 -0
  24. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/dataset/ts_dataframe.py +0 -0
  25. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/learner.py +0 -0
  26. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/__init__.py +0 -0
  27. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/abstract/__init__.py +0 -0
  28. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py +0 -0
  29. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/abstract/model_trial.py +0 -0
  30. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/ensemble/__init__.py +0 -0
  31. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -0
  32. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -0
  33. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/gluonts/__init__.py +0 -0
  34. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
  35. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/gluonts/torch/models.py +0 -0
  36. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/local/__init__.py +0 -0
  37. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/local/naive.py +0 -0
  38. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/local/npts.py +0 -0
  39. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/local/statsforecast.py +0 -0
  40. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/multi_window/__init__.py +0 -0
  41. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/multi_window/multi_window_model.py +0 -0
  42. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/models/presets.py +0 -0
  43. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/predictor.py +0 -0
  44. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/splitter.py +0 -0
  45. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/trainer/__init__.py +0 -0
  46. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/trainer/abstract_trainer.py +0 -0
  47. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/trainer/auto_trainer.py +0 -0
  48. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/utils/__init__.py +0 -0
  49. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/utils/features.py +0 -0
  50. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/utils/forecast.py +0 -0
  51. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon/timeseries/utils/seasonality.py +0 -0
  52. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon.timeseries.egg-info/dependency_links.txt +0 -0
  53. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon.timeseries.egg-info/namespace_packages.txt +0 -0
  54. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon.timeseries.egg-info/top_level.txt +0 -0
  55. {autogluon.timeseries-0.8.3b20231005 → autogluon.timeseries-0.8.3b20231006}/src/autogluon.timeseries.egg-info/zip-safe +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 0.8.3b20231005
3
+ Version: 0.8.3b20231006
4
4
  Summary: AutoML for Image, Text, and Tabular Data
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -32,7 +32,7 @@ install_requires = [
32
32
  "networkx", # version range defined in `core/_setup_utils.py`
33
33
  # TODO: update statsforecast to v1.5.0 - resolve antlr4-python3-runtime dependency clash with multimodal
34
34
  "statsforecast>=1.4.0,<1.5",
35
- "mlforecast>=0.7.0,<0.7.4",
35
+ "mlforecast>=0.9.3,<0.9.4",
36
36
  "tqdm", # version range defined in `core/_setup_utils.py`
37
37
  "ujson>=5,<6", # needed to silence GluonTS warning
38
38
  f"autogluon.core[raytune]=={version}",
@@ -2,7 +2,6 @@
2
2
  See also, https://ts.gluon.ai/api/gluonts/gluonts.evaluation.html
3
3
  """
4
4
  import logging
5
- import warnings
6
5
  from typing import Optional
7
6
 
8
7
  import numpy as np
@@ -11,7 +10,7 @@ import pandas as pd
11
10
  from autogluon.timeseries import TimeSeriesDataFrame
12
11
  from autogluon.timeseries.dataset.ts_dataframe import ITEMID
13
12
  from autogluon.timeseries.utils.seasonality import get_seasonality
14
- from autogluon.timeseries.utils.warning_filters import evaluator_warning_filter
13
+ from autogluon.timeseries.utils.warning_filters import warning_filter
15
14
 
16
15
  logger = logging.getLogger(__name__)
17
16
 
@@ -268,10 +267,7 @@ class TimeSeriesEvaluator:
268
267
 
269
268
  assert data_future.index.equals(predictions.index), "Prediction and data indices do not match."
270
269
 
271
- with evaluator_warning_filter(), warnings.catch_warnings():
272
- warnings.simplefilter("ignore", category=UserWarning)
273
- warnings.simplefilter("ignore", category=RuntimeWarning)
274
- warnings.simplefilter("ignore", category=FutureWarning)
270
+ with warning_filter():
275
271
  return self.metric_method(
276
272
  y_true=data_future[self.target_column],
277
273
  predictions=predictions,
@@ -0,0 +1,6 @@
1
+ from .mlforecast import DirectTabularModel, RecursiveTabularModel
2
+
3
+ __all__ = [
4
+ "DirectTabularModel",
5
+ "RecursiveTabularModel",
6
+ ]
@@ -0,0 +1,526 @@
1
+ import logging
2
+ import math
3
+ import os
4
+ import time
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from sklearn.base import BaseEstimator
10
+
11
+ import autogluon.core as ag
12
+ from autogluon.tabular import TabularPredictor
13
+ from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TIMESTAMP, TimeSeriesDataFrame
14
+ from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
15
+ from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
16
+ from autogluon.timeseries.utils.seasonality import get_seasonality
17
+ from autogluon.timeseries.utils.warning_filters import warning_filter
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ MLF_TARGET = "y"
22
+ MLF_ITEMID = "unique_id"
23
+ MLF_TIMESTAMP = "ds"
24
+
25
+
26
+ class TabularEstimator(BaseEstimator):
27
+ """Scikit-learn compatible interface for TabularPredictor."""
28
+
29
+ def __init__(self, predictor_init_kwargs: Optional[dict] = None, predictor_fit_kwargs: Optional[dict] = None):
30
+ self.predictor_init_kwargs = predictor_init_kwargs if predictor_init_kwargs is not None else {}
31
+ self.predictor_fit_kwargs = predictor_fit_kwargs if predictor_fit_kwargs is not None else {}
32
+
33
+ def get_params(self, deep: bool = True) -> dict:
34
+ return {
35
+ "predictor_init_kwargs": self.predictor_init_kwargs,
36
+ "predictor_fit_kwargs": self.predictor_fit_kwargs,
37
+ }
38
+
39
+ def fit(self, X: pd.DataFrame, y: pd.Series) -> "TabularEstimator":
40
+ assert isinstance(X, pd.DataFrame) and isinstance(y, pd.Series)
41
+ df = pd.concat([X, y.rename(MLF_TARGET).to_frame()], axis=1)
42
+ self.predictor = TabularPredictor(**self.predictor_init_kwargs)
43
+ with warning_filter():
44
+ self.predictor.fit(df, **self.predictor_fit_kwargs)
45
+ return self
46
+
47
+ def predict(self, X: pd.DataFrame) -> np.ndarray:
48
+ assert isinstance(X, pd.DataFrame)
49
+ return self.predictor.predict(X).values
50
+
51
+
52
+ class AbstractMLForecastModel(AbstractTimeSeriesModel):
53
+ def __init__(
54
+ self,
55
+ freq: Optional[str] = None,
56
+ prediction_length: int = 1,
57
+ path: Optional[str] = None,
58
+ name: Optional[str] = None,
59
+ eval_metric: str = None,
60
+ hyperparameters: Dict[str, Any] = None,
61
+ **kwargs, # noqa
62
+ ):
63
+ super().__init__(
64
+ path=path,
65
+ freq=freq,
66
+ prediction_length=prediction_length,
67
+ name=name,
68
+ eval_metric=eval_metric,
69
+ hyperparameters=hyperparameters,
70
+ **kwargs,
71
+ )
72
+ from mlforecast import MLForecast
73
+ from mlforecast.target_transforms import BaseTargetTransform
74
+
75
+ self._required_ts_length: Optional[int] = None
76
+ self._target_lags: Optional[List[int]] = None
77
+ self._date_features: Optional[List[str]] = None
78
+ self._mlf: Optional[MLForecast] = None
79
+ self._scaler: Optional[BaseTargetTransform] = None
80
+ self._avg_residuals_std: float = 1.0
81
+
82
+ def _get_extra_tabular_init_kwargs(self) -> dict:
83
+ raise NotImplementedError
84
+
85
+ def _get_model_params(self) -> dict:
86
+ model_params = super()._get_model_params().copy()
87
+ model_params.setdefault("max_num_items", 10_000)
88
+ model_params.setdefault("max_num_samples", 1_000_000)
89
+ model_params.setdefault("tabular_hyperparameters", {"GBM": {}})
90
+ model_params.setdefault("tabular_fit_kwargs", {})
91
+ return model_params
92
+
93
+ def _get_mlforecast_init_args(self, train_data: TimeSeriesDataFrame, model_params: dict) -> dict:
94
+ # TODO: Support lag generation for all pandas frequencies
95
+ # TODO: Support date_feature generation for all pandas frequencies
96
+ from gluonts.time_feature import get_lags_for_frequency, time_features_from_frequency_str
97
+ from mlforecast.target_transforms import Differences
98
+
99
+ from .utils import MeanAbsScaler, StandardScaler
100
+
101
+ lags = model_params.get("lags")
102
+ if lags is None:
103
+ lags = get_lags_for_frequency(self.freq)
104
+ self._target_lags = np.array(sorted(set(lags)), dtype=np.int64)
105
+
106
+ date_features = model_params.get("date_features")
107
+ if date_features is None:
108
+ date_features = time_features_from_frequency_str(self.freq)
109
+ self._date_features = date_features
110
+
111
+ target_transforms = []
112
+ differences = model_params.get("differences")
113
+
114
+ ts_lengths = train_data.num_timesteps_per_item()
115
+ required_ts_length = sum(differences) + 1
116
+ all_train_ts_are_long_enough = ts_lengths.min() >= required_ts_length
117
+ some_ts_available_for_validation = ts_lengths.max() >= required_ts_length + self.prediction_length
118
+ if not (all_train_ts_are_long_enough and some_ts_available_for_validation):
119
+ logger.warning(
120
+ f"\tTime series in the dataset are too short for chosen differences {differences}. "
121
+ f"Setting differences to [1]."
122
+ )
123
+ differences = [1]
124
+
125
+ if len(differences) > 0:
126
+ target_transforms.append(Differences(differences))
127
+ self._required_ts_length = sum(differences)
128
+
129
+ scaler_name = model_params.get("scaler")
130
+ if scaler_name is None:
131
+ pass
132
+ elif scaler_name == "standard":
133
+ self._scaler = StandardScaler()
134
+ elif scaler_name == "mean_abs":
135
+ self._scaler = MeanAbsScaler()
136
+ else:
137
+ logger.warning(
138
+ f"Unrecognized `scaler` {scaler_name} (supported options: ['standard', 'mean_abs', None]). Scaling disabled."
139
+ )
140
+
141
+ if self._scaler is not None:
142
+ target_transforms.append(self._scaler)
143
+
144
+ return {
145
+ "lags": self._target_lags,
146
+ "date_features": self._date_features,
147
+ "target_transforms": target_transforms,
148
+ }
149
+
150
+ def _mask_df(self, df: pd.DataFrame) -> pd.DataFrame:
151
+ """Apply a mask that mimics the situation at prediction time when target/covariates are unknown during the
152
+ forecast horizon.
153
+
154
+ This method is overridden by DirectTabularModel.
155
+ """
156
+ return df
157
+
158
+ def _generate_train_val_dfs(
159
+ self, data: TimeSeriesDataFrame, max_num_items: Optional[int] = None, max_num_samples: Optional[int] = None
160
+ ) -> Tuple[pd.DataFrame, pd.DataFrame]:
161
+ # Exclude items that are too short for chosen differences - otherwise exception will be raised
162
+ if self._required_ts_length is not None:
163
+ ts_lengths = data.num_timesteps_per_item()
164
+ items_to_exclude = ts_lengths.index[ts_lengths < self._required_ts_length]
165
+ if len(items_to_exclude) > 0:
166
+ logger.debug(f"Removing {len(items_to_exclude)} items that are too short for chosen differences")
167
+ data = data.query("item_id not in @items_to_exclude")
168
+
169
+ if max_num_items is not None and data.num_items > max_num_items:
170
+ items_to_keep = data.item_ids.to_series().sample(n=int(max_num_items)) # noqa: F841
171
+ data = data.query("item_id in @items_to_keep")
172
+
173
+ mlforecast_df = self._to_mlforecast_df(data, data.static_features)
174
+ df = self._mlf.preprocess(mlforecast_df, dropna=False)
175
+ # df.query results in 2x memory saving compared to df.dropna(subset="y")
176
+ df = df.query("y.notnull()")
177
+
178
+ df = self._mask_df(df)
179
+
180
+ grouped_df = df.groupby(MLF_ITEMID, sort=False)
181
+ num_items = len(grouped_df)
182
+
183
+ if max_num_samples is not None and len(df) > max_num_samples:
184
+ df = grouped_df.tail(self.prediction_length + math.ceil(max_num_samples / num_items))
185
+ grouped_df = df.groupby(MLF_ITEMID, sort=False)
186
+
187
+ # Use up to `prediction_length` last rows as validation set (but no more than 50% of the rows)
188
+ val_rows_per_item = min(self.prediction_length, math.ceil(0.5 * len(df) / num_items))
189
+ train_df = grouped_df.nth(slice(None, -val_rows_per_item))
190
+ val_df = grouped_df.tail(val_rows_per_item)
191
+
192
+ return train_df.drop([MLF_ITEMID, MLF_TIMESTAMP], axis=1), val_df.drop([MLF_ITEMID, MLF_TIMESTAMP], axis=1)
193
+
194
+ def _to_mlforecast_df(
195
+ self,
196
+ data: TimeSeriesDataFrame,
197
+ static_features: pd.DataFrame,
198
+ include_target: bool = True,
199
+ ) -> pd.DataFrame:
200
+ """Convert TimeSeriesDataFrame to a format expected by MLForecast methods `predict` and `preprocess`.
201
+
202
+ Each row contains unique_id, ds, y, and (optionally) known covariates & static features.
203
+ """
204
+ # TODO: Add support for past_covariates
205
+ selected_columns = self.metadata.known_covariates_real.copy()
206
+ column_name_mapping = {ITEMID: MLF_ITEMID, TIMESTAMP: MLF_TIMESTAMP}
207
+ if include_target:
208
+ selected_columns += [self.target]
209
+ column_name_mapping[self.target] = MLF_TARGET
210
+
211
+ df = pd.DataFrame(data)[selected_columns].reset_index()
212
+ if static_features is not None:
213
+ df = pd.merge(df, static_features, how="left", on=ITEMID, suffixes=(None, "_static_feat"))
214
+
215
+ # Convert float64 to float32 to reduce memory usage
216
+ float64_cols = list(df.select_dtypes(include="float64"))
217
+ df[float64_cols] = df[float64_cols].astype("float32")
218
+ # We assume that df is sorted by 'unique_id' inside `TimeSeriesPredictor._check_and_prepare_data_frame`
219
+ return df.rename(columns=column_name_mapping)
220
+
221
+ def _fit(
222
+ self,
223
+ train_data: TimeSeriesDataFrame,
224
+ val_data: Optional[TimeSeriesDataFrame] = None,
225
+ time_limit: Optional[int] = None,
226
+ verbosity: int = 2,
227
+ **kwargs,
228
+ ) -> None:
229
+ from mlforecast import MLForecast
230
+
231
+ self._check_fit_params()
232
+ fit_start_time = time.time()
233
+ # TabularEstimator is passed to MLForecast later to include tuning_data
234
+ model_params = self._get_model_params()
235
+
236
+ mlforecast_init_args = self._get_mlforecast_init_args(train_data, model_params)
237
+ self._mlf = MLForecast(models={}, freq=self.freq, **mlforecast_init_args)
238
+
239
+ # We generate train/val splits from train_data and ignore val_data to avoid overfitting
240
+ train_df, val_df = self._generate_train_val_dfs(
241
+ train_data,
242
+ max_num_items=model_params["max_num_items"],
243
+ max_num_samples=model_params["max_num_samples"],
244
+ )
245
+
246
+ estimator = TabularEstimator(
247
+ predictor_init_kwargs={
248
+ "path": os.path.join(self.path, "tabular_predictor"),
249
+ "verbosity": verbosity - 2,
250
+ "label": MLF_TARGET,
251
+ "eval_metric": self.TIMESERIES_METRIC_TO_TABULAR_METRIC[self.eval_metric],
252
+ **self._get_extra_tabular_init_kwargs(),
253
+ },
254
+ predictor_fit_kwargs={
255
+ "tuning_data": val_df,
256
+ "time_limit": None if time_limit is None else time_limit - (time.time() - fit_start_time),
257
+ "hyperparameters": model_params["tabular_hyperparameters"],
258
+ **model_params["tabular_fit_kwargs"],
259
+ },
260
+ )
261
+ self._mlf.models = {"mean": estimator}
262
+
263
+ with warning_filter():
264
+ self._mlf.fit_models(X=train_df.drop(MLF_TARGET, axis=1), y=train_df[MLF_TARGET])
265
+
266
+ self._avg_residuals_std = self._compute_residuals_std(val_df)
267
+
268
+ def _compute_residuals_std(self, val_df: pd.DataFrame) -> float:
269
+ residuals = val_df[MLF_TARGET] - self._mlf.models_["mean"].predict(val_df)
270
+ return np.sqrt(residuals.pow(2.0).mean())
271
+
272
+ def _get_scale_per_item(self, item_ids: pd.Index) -> pd.Series:
273
+ """Extract the '_scale' values from the scaler object, if available."""
274
+ if self._scaler is not None:
275
+ return self._scaler.stats_["_scale"].copy().reindex(item_ids)
276
+ else:
277
+ return pd.Series(1.0, index=item_ids)
278
+
279
+ def predict(
280
+ self,
281
+ data: TimeSeriesDataFrame,
282
+ known_covariates: Optional[TimeSeriesDataFrame] = None,
283
+ **kwargs,
284
+ ) -> TimeSeriesDataFrame:
285
+ raise NotImplementedError
286
+
287
+
288
+ class DirectTabularModel(AbstractMLForecastModel):
289
+ """Predict all future time series values simultaneously using TabularPredictor from AutoGluon-Tabular.
290
+
291
+ A single TabularPredictor is used to forecast all future time series values using the following features:
292
+
293
+ - lag features (observed time series values) based on ``freq`` of the data
294
+ - time features (e.g., day of the week) based on the timestamp of the measurement
295
+ - known covariates (if available)
296
+ - static features of each item (if available)
297
+
298
+ Features not known during the forecast horizon (e.g., future target values) are replaced by NaNs.
299
+
300
+ If ``eval_metric=="WQL"``, the TabularPredictor will be trained with ``"quantile"`` problem type.
301
+ Otherwise, TabularPredictor will be trained with ``"regression"`` problem type, and dummy quantiles will be
302
+ obtained by assuming that the residuals follow zero-mean normal distribution.
303
+
304
+ Based on the `mlforecast <https://github.com/Nixtla/mlforecast>`_ library.
305
+
306
+
307
+ Other Parameters
308
+ ----------------
309
+ lags : List[int], default = None
310
+ Lags of the target that will be used as features for predictions. If None, will be determined automatically
311
+ based on the frequency of the data.
312
+ date_features : List[Union[str, Callable]], default = None
313
+ Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
314
+ If None, will be determined automatically based on the frequency of the data.
315
+ differences : List[int], default = []
316
+ Differences to take of the target before computing the features. These are restored at the forecasting step.
317
+ If None, will be set to ``[seasonal_period]``, where seasonal_period is determined based on the data frequency.
318
+ Defaults to no differencing.
319
+ scaler : {"standard", "mean_abs", None}, default = "mean_abs"
320
+ Scaling applied to each time series.
321
+ tabular_hyperparameters : Dict[Dict[str, Any]], optional
322
+ Hyperparameters dictionary passed to ``TabularPredictor.fit``. Contains the names of models that should be fit.
323
+ Defaults to ``{"GBM": {}}``.
324
+ tabular_fit_kwargs : Dict[str, Any], optional
325
+ Additional keyword arguments passed to ``TabularPredictor.fit``. Defaults to an empty dict.
326
+ max_num_items: int or None, default = 10_000
327
+ If not None, the model will randomly select this many time series for training and validation.
328
+ max_num_samples : int or None, default = 1_000_000
329
+ If not None, training dataset passed to TabularPredictor will contain at most this many rows (starting from the
330
+ end of each time series).
331
+ """
332
+
333
+ TIMESERIES_METRIC_TO_TABULAR_METRIC = {
334
+ "MAPE": "mean_absolute_percentage_error",
335
+ "sMAPE": "mean_absolute_percentage_error",
336
+ "WQL": "pinball_loss",
337
+ "MASE": "mean_absolute_error",
338
+ "WAPE": "mean_absolute_error",
339
+ "MSE": "mean_squared_error",
340
+ "RMSE": "root_mean_squared_error",
341
+ "RMSSE": "root_mean_squared_error",
342
+ }
343
+
344
+ def __init__(self, **kwargs):
345
+ super().__init__(**kwargs)
346
+ if 0.5 not in self.quantile_levels:
347
+ self.must_drop_median = True
348
+ self.quantile_levels = sorted(set([0.5] + self.quantile_levels))
349
+ else:
350
+ self.must_drop_median = False
351
+
352
+ @property
353
+ def is_quantile_model(self) -> bool:
354
+ return self.eval_metric == "WQL"
355
+
356
+ def _get_model_params(self) -> dict:
357
+ model_params = super()._get_model_params()
358
+ model_params.setdefault("scaler", "mean_abs")
359
+ model_params.setdefault("differences", [])
360
+ return model_params
361
+
362
+ def _mask_df(self, df: pd.DataFrame) -> pd.DataFrame:
363
+ """Apply a mask that mimics the situation at prediction time when target/covariates are unknown during the
364
+ forecast horizon.
365
+ """
366
+ num_hidden = np.random.randint(0, self.prediction_length, size=len(df))
367
+ lag_cols = [f"lag{lag}" for lag in self._target_lags]
368
+ mask = num_hidden[:, None] < self._target_lags[None] # shape [len(num_hidden), len(_target_lags)]
369
+ # use df.loc[:, lag_cols] instead of df[lag_cols] to avoid SettingWithCopyWarning
370
+ df.loc[:, lag_cols] = df[lag_cols].where(mask, other=np.nan)
371
+ return df
372
+
373
+ def _compute_residuals_std(self, val_df: pd.DataFrame) -> float:
374
+ if self.is_quantile_model:
375
+ return 1.0 # Quantile model does not require residuals to produce prediction intervals
376
+ else:
377
+ return super()._compute_residuals_std(val_df=val_df)
378
+
379
+ def predict(
380
+ self,
381
+ data: TimeSeriesDataFrame,
382
+ known_covariates: Optional[TimeSeriesDataFrame] = None,
383
+ **kwargs,
384
+ ) -> TimeSeriesDataFrame:
385
+ if known_covariates is not None:
386
+ data_future = known_covariates.copy()
387
+ else:
388
+ future_index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length)
389
+ data_future = pd.DataFrame(columns=[self.target], index=future_index, dtype="float32")
390
+ # MLForecast raises exception of target contains NaN. We use inf as placeholder, replace them by NaN afterwards
391
+ data_future[self.target] = float("inf")
392
+ data_extended = pd.concat([data, data_future])
393
+ mlforecast_df = self._to_mlforecast_df(data_extended, data.static_features)
394
+ df = self._mlf.preprocess(mlforecast_df, dropna=False)
395
+ df = df.groupby(MLF_ITEMID, sort=False).tail(self.prediction_length)
396
+ df = df.replace(float("inf"), float("nan"))
397
+
398
+ raw_predictions = self._mlf.models_["mean"].predict(df)
399
+ predictions = self._postprocess_predictions(raw_predictions)
400
+ predictions[[MLF_ITEMID, MLF_TIMESTAMP]] = df[[MLF_ITEMID, MLF_TIMESTAMP]].values
401
+
402
+ if hasattr(self._mlf.ts, "target_transforms"):
403
+ # Ensure that transforms are fitted only on past data
404
+ self._mlf.preprocess(self._to_mlforecast_df(data, None))
405
+ for tfm in self._mlf.ts.target_transforms[::-1]:
406
+ predictions = tfm.inverse_transform(predictions)
407
+ predictions = predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP}).set_index(
408
+ [ITEMID, TIMESTAMP]
409
+ )
410
+ if self.must_drop_median:
411
+ predictions = predictions.drop("0.5", axis=1)
412
+ return TimeSeriesDataFrame(predictions)
413
+
414
+ def _postprocess_predictions(self, predictions: np.ndarray) -> pd.DataFrame:
415
+ if self.is_quantile_model:
416
+ predictions = pd.DataFrame(predictions, columns=[str(q) for q in self.quantile_levels])
417
+ predictions.values.sort(axis=1)
418
+ predictions["mean"] = predictions["0.5"]
419
+ else:
420
+ predictions = pd.DataFrame(predictions, columns=["mean"])
421
+ for q in self.quantile_levels:
422
+ predictions[str(q)] = predictions["mean"] # + norm.ppf(q) * self._residuals_std
423
+
424
+ column_order = ["mean"] + [col for col in predictions.columns if col != "mean"]
425
+ return predictions[column_order]
426
+
427
+ def _get_extra_tabular_init_kwargs(self) -> dict:
428
+ if self.is_quantile_model:
429
+ return {"problem_type": ag.constants.QUANTILE, "quantile_levels": self.quantile_levels}
430
+ else:
431
+ return {"problem_type": ag.constants.REGRESSION}
432
+
433
+
434
+ class RecursiveTabularModel(AbstractMLForecastModel):
435
+ """Predict future time series values one by one using TabularPredictor from AutoGluon-Tabular.
436
+
437
+ A single TabularPredictor is used to forecast the future time series values using the following features:
438
+
439
+ - lag features (observed time series values) based on ``freq`` of the data
440
+ - time features (e.g., day of the week) based on the timestamp of the measurement
441
+ - known covariates (if available)
442
+ - static features of each item (if available)
443
+
444
+ TabularPredictor will always be trained with ``"regression"`` problem type, and dummy quantiles will be
445
+ obtained by assuming that the residuals follow zero-mean normal distribution.
446
+
447
+ Based on the `mlforecast <https://github.com/Nixtla/mlforecast>`_ library.
448
+
449
+
450
+ Other Parameters
451
+ ----------------
452
+ lags : List[int], default = None
453
+ Lags of the target that will be used as features for predictions. If None, will be determined automatically
454
+ based on the frequency of the data.
455
+ date_features : List[Union[str, Callable]], default = None
456
+ Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
457
+ If None, will be determined automatically based on the frequency of the data.
458
+ differences : List[int], default = None
459
+ Differences to take of the target before computing the features. These are restored at the forecasting step.
460
+ If None, will be set to ``[seasonal_period]``, where seasonal_period is determined based on the data frequency.
461
+ scaler : {"standard", "mean_abs", None}, default = "standard"
462
+ Scaling applied to each time series.
463
+ tabular_hyperparameters : Dict[Dict[str, Any]], optional
464
+ Hyperparameters dictionary passed to ``TabularPredictor.fit``. Contains the names of models that should be fit.
465
+ Defaults to ``{"GBM": {}}``.
466
+ tabular_fit_kwargs : Dict[str, Any], optional
467
+ Additional keyword arguments passed to ``TabularPredictor.fit``. Defaults to an empty dict.
468
+ max_num_items: int or None, default = 10_000
469
+ If not None, the model will randomly select this many time series for training and validation.
470
+ max_num_samples : int or None, default = 1_000_000
471
+ If not None, training dataset passed to TabularPredictor will contain at most this many rows (starting from the
472
+ end of each time series).
473
+ """
474
+
475
+ TIMESERIES_METRIC_TO_TABULAR_METRIC = {
476
+ "MAPE": "mean_absolute_percentage_error",
477
+ "sMAPE": "mean_absolute_percentage_error",
478
+ "WQL": "mean_absolute_error",
479
+ "MASE": "mean_absolute_error",
480
+ "WAPE": "mean_absolute_error",
481
+ "MSE": "mean_squared_error",
482
+ "RMSE": "root_mean_squared_error",
483
+ "RMSSE": "root_mean_squared_error",
484
+ }
485
+
486
+ def _get_model_params(self) -> dict:
487
+ model_params = super()._get_model_params()
488
+ model_params.setdefault("scaler", "standard")
489
+ model_params.setdefault("differences", [get_seasonality(self.freq)])
490
+ return model_params
491
+
492
+ def predict(
493
+ self,
494
+ data: TimeSeriesDataFrame,
495
+ known_covariates: Optional[TimeSeriesDataFrame] = None,
496
+ **kwargs,
497
+ ) -> TimeSeriesDataFrame:
498
+ from scipy.stats import norm
499
+
500
+ new_df = self._to_mlforecast_df(data, data.static_features)
501
+ if known_covariates is not None:
502
+ dynamic_dfs = [self._to_mlforecast_df(known_covariates, data.static_features, include_target=False)]
503
+ else:
504
+ dynamic_dfs = None
505
+ with warning_filter():
506
+ raw_predictions = self._mlf.predict(
507
+ h=self.prediction_length,
508
+ new_df=new_df,
509
+ dynamic_dfs=dynamic_dfs,
510
+ )
511
+ predictions = raw_predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP})
512
+
513
+ # Add quantile levels assuming that residuals follow normal distribution
514
+ scale_per_item = self._get_scale_per_item(predictions[ITEMID].unique())
515
+ num_items = int(len(predictions) / self.prediction_length)
516
+ sqrt_h = np.sqrt(np.arange(1, self.prediction_length + 1))
517
+ # Series where normal_scale_per_timestep.loc[item_id].loc[N] = sqrt(1 + N) for N in range(prediction_length)
518
+ normal_scale_per_timestep = pd.Series(np.tile(sqrt_h, num_items), index=predictions[ITEMID])
519
+
520
+ std_per_timestep = self._avg_residuals_std * scale_per_item * normal_scale_per_timestep
521
+ for q in self.quantile_levels:
522
+ predictions[str(q)] = predictions["mean"] + norm.ppf(q) * std_per_timestep.to_numpy()
523
+ return TimeSeriesDataFrame(predictions).reindex(data.item_ids, level=ITEMID)
524
+
525
+ def _get_extra_tabular_init_kwargs(self) -> dict:
526
+ return {"problem_type": ag.constants.REGRESSION}
@@ -0,0 +1,51 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from mlforecast.target_transforms import BaseTargetTransform
4
+
5
+
6
+ class StandardScaler(BaseTargetTransform):
7
+ """Standardizes the series by subtracting mean and diving by standard deviation."""
8
+
9
+ min_scale: float = 1e-2
10
+
11
+ def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
12
+ self.stats_ = (
13
+ df.replace([np.inf, -np.inf], np.nan)
14
+ .groupby(self.id_col)[self.target_col]
15
+ .agg(["mean", "std"])
16
+ .rename(columns={"mean": "_mean", "std": "_scale"})
17
+ )
18
+ self.stats_["_scale"] = self.stats_["_scale"].clip(lower=self.min_scale)
19
+ df = df.merge(self.stats_, on=self.id_col)
20
+ df[self.target_col] = (df[self.target_col] - df["_mean"]) / df["_scale"]
21
+ df = df.drop(columns=["_mean", "_scale"])
22
+ return df
23
+
24
+ def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
25
+ df = df.merge(self.stats_, on=self.id_col)
26
+ for col in df.columns.drop([self.id_col, self.time_col, "_mean", "_scale"]):
27
+ df[col] = df[col] * df["_scale"] + df["_mean"]
28
+ df = df.drop(columns=["_mean", "_scale"])
29
+ return df
30
+
31
+
32
+ class MeanAbsScaler(BaseTargetTransform):
33
+ """Scales time series by diving by their mean absolute value."""
34
+
35
+ min_scale: float = 1e-2
36
+
37
+ def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
38
+ target = df[self.target_col].replace([np.inf, -np.inf], np.nan).abs()
39
+ self.stats_ = target.groupby(df[self.id_col], sort=False).agg(["mean"]).rename(columns={"mean": "_scale"})
40
+ self.stats_["_scale"] = self.stats_["_scale"].clip(lower=self.min_scale)
41
+ df = df.merge(self.stats_, on=self.id_col)
42
+ df[self.target_col] = df[self.target_col] / df["_scale"]
43
+ df = df.drop(columns=["_scale"])
44
+ return df
45
+
46
+ def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
47
+ df = df.merge(self.stats_, on=self.id_col)
48
+ for col in df.columns.drop([self.id_col, self.time_col, "_scale"]):
49
+ df[col] = df[col] * df["_scale"]
50
+ df = df.drop(columns=["_scale"])
51
+ return df
@@ -19,11 +19,10 @@ from pandas.tseries.frequencies import to_offset
19
19
 
20
20
  from autogluon.common.loaders import load_pkl
21
21
  from autogluon.common.utils.log_utils import set_logger_verbosity
22
- from autogluon.core.utils import warning_filter
23
22
  from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFrame
24
23
  from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
25
24
  from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
26
- from autogluon.timeseries.utils.warning_filters import disable_root_logger, torch_warning_filter
25
+ from autogluon.timeseries.utils.warning_filters import disable_root_logger, warning_filter
27
26
 
28
27
  # NOTE: We avoid imports for torch and pytorch_lightning at the top level and hide them inside class methods.
29
28
  # This is done to skip these imports during multiprocessing (which may cause bugs)
@@ -190,7 +189,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
190
189
  def load(cls, path: str, reset_paths: bool = True, verbose: bool = True) -> "AbstractGluonTSModel":
191
190
  from gluonts.torch.model.predictor import PyTorchPredictor
192
191
 
193
- with torch_warning_filter():
192
+ with warning_filter():
194
193
  model = load_pkl.load(path=os.path.join(path, cls.model_file_name), verbose=verbose)
195
194
  if reset_paths:
196
195
  model.set_contexts(path)
@@ -13,7 +13,7 @@ from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFram
13
13
  from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
14
14
  from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
15
15
  from autogluon.timeseries.utils.seasonality import get_seasonality
16
- from autogluon.timeseries.utils.warning_filters import statsmodels_joblib_warning_filter, statsmodels_warning_filter
16
+ from autogluon.timeseries.utils.warning_filters import warning_filter
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
@@ -133,7 +133,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
133
133
  executor = Parallel(self.n_jobs, timeout=timeout)
134
134
 
135
135
  try:
136
- with statsmodels_joblib_warning_filter(), statsmodels_warning_filter():
136
+ with warning_filter():
137
137
  predictions_with_flags = executor(
138
138
  delayed(self._predict_wrapper)(ts, end_time=end_time) for ts in all_series
139
139
  )