autogluon.timeseries 0.8.3b20231024__py3-none-any.whl → 0.8.3b20231027__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/evaluator.py +26 -249
- autogluon/timeseries/learner.py +5 -5
- autogluon/timeseries/metrics/__init__.py +58 -0
- autogluon/timeseries/metrics/abstract.py +201 -0
- autogluon/timeseries/metrics/point.py +156 -0
- autogluon/timeseries/metrics/quantile.py +26 -0
- autogluon/timeseries/metrics/utils.py +18 -0
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +43 -41
- autogluon/timeseries/models/abstract/model_trial.py +1 -1
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +28 -55
- autogluon/timeseries/models/ensemble/greedy_ensemble.py +27 -15
- autogluon/timeseries/models/gluonts/abstract_gluonts.py +1 -20
- autogluon/timeseries/models/local/abstract_local_model.py +1 -1
- autogluon/timeseries/models/multi_window/multi_window_model.py +4 -2
- autogluon/timeseries/models/presets.py +2 -1
- autogluon/timeseries/predictor.py +24 -15
- autogluon/timeseries/trainer/abstract_trainer.py +14 -22
- autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-0.8.3b20231024.dist-info → autogluon.timeseries-0.8.3b20231027.dist-info}/METADATA +6 -5
- {autogluon.timeseries-0.8.3b20231024.dist-info → autogluon.timeseries-0.8.3b20231027.dist-info}/RECORD +27 -22
- /autogluon.timeseries-0.8.3b20231024-py3.8-nspkg.pth → /autogluon.timeseries-0.8.3b20231027-py3.8-nspkg.pth +0 -0
- {autogluon.timeseries-0.8.3b20231024.dist-info → autogluon.timeseries-0.8.3b20231027.dist-info}/LICENSE +0 -0
- {autogluon.timeseries-0.8.3b20231024.dist-info → autogluon.timeseries-0.8.3b20231027.dist-info}/NOTICE +0 -0
- {autogluon.timeseries-0.8.3b20231024.dist-info → autogluon.timeseries-0.8.3b20231027.dist-info}/WHEEL +0 -0
- {autogluon.timeseries-0.8.3b20231024.dist-info → autogluon.timeseries-0.8.3b20231027.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-0.8.3b20231024.dist-info → autogluon.timeseries-0.8.3b20231027.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-0.8.3b20231024.dist-info → autogluon.timeseries-0.8.3b20231027.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from autogluon.timeseries import TimeSeriesDataFrame
|
|
8
|
+
from autogluon.timeseries.dataset.ts_dataframe import ITEMID
|
|
9
|
+
|
|
10
|
+
from .abstract import TimeSeriesScorer
|
|
11
|
+
from .utils import _in_sample_abs_seasonal_error, _in_sample_squared_seasonal_error
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RMSE(TimeSeriesScorer):
|
|
17
|
+
"""Root mean squared error."""
|
|
18
|
+
|
|
19
|
+
equivalent_tabular_regression_metric = "root_mean_squared_error"
|
|
20
|
+
|
|
21
|
+
def compute_metric(
|
|
22
|
+
self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
|
|
23
|
+
) -> float:
|
|
24
|
+
y_true, y_pred = self._get_point_forecast_score_inputs(data_future, predictions, target=target)
|
|
25
|
+
return np.sqrt(self._safemean((y_true - y_pred) ** 2))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class MSE(TimeSeriesScorer):
|
|
29
|
+
"""Mean squared error."""
|
|
30
|
+
|
|
31
|
+
equivalent_tabular_regression_metric = "mean_squared_error"
|
|
32
|
+
|
|
33
|
+
def compute_metric(
|
|
34
|
+
self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
|
|
35
|
+
) -> float:
|
|
36
|
+
y_true, y_pred = self._get_point_forecast_score_inputs(data_future, predictions, target=target)
|
|
37
|
+
return self._safemean((y_true - y_pred) ** 2)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class MAE(TimeSeriesScorer):
|
|
41
|
+
"""Mean absolute error."""
|
|
42
|
+
|
|
43
|
+
optimized_by_median = True
|
|
44
|
+
equivalent_tabular_regression_metric = "mean_absolute_error"
|
|
45
|
+
|
|
46
|
+
def compute_metric(
|
|
47
|
+
self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
|
|
48
|
+
) -> float:
|
|
49
|
+
y_true, y_pred = self._get_point_forecast_score_inputs(data_future, predictions, target=target)
|
|
50
|
+
return self._safemean((y_true - y_pred).abs())
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class WAPE(TimeSeriesScorer):
|
|
54
|
+
"""Weighted absolute percentage error."""
|
|
55
|
+
|
|
56
|
+
optimized_by_median = True
|
|
57
|
+
equivalent_tabular_regression_metric = "mean_absolute_error"
|
|
58
|
+
|
|
59
|
+
def compute_metric(
|
|
60
|
+
self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
|
|
61
|
+
) -> float:
|
|
62
|
+
y_true, y_pred = self._get_point_forecast_score_inputs(data_future, predictions, target=target)
|
|
63
|
+
return (y_true - y_pred).abs().sum() / y_true.abs().sum()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class sMAPE(TimeSeriesScorer):
|
|
67
|
+
"""Symmetric mean absolute percentage error."""
|
|
68
|
+
|
|
69
|
+
optimized_by_median = True
|
|
70
|
+
equivalent_tabular_regression_metric = "symmetric_mean_absolute_percentage_error"
|
|
71
|
+
|
|
72
|
+
def compute_metric(
|
|
73
|
+
self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
|
|
74
|
+
) -> float:
|
|
75
|
+
y_true, y_pred = self._get_point_forecast_score_inputs(data_future, predictions, target=target)
|
|
76
|
+
return self._safemean(2 * ((y_true - y_pred).abs() / (y_true.abs() + y_pred.abs())))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class MAPE(TimeSeriesScorer):
|
|
80
|
+
"""Mean Absolute Percentage Error."""
|
|
81
|
+
|
|
82
|
+
optimized_by_median = True
|
|
83
|
+
equivalent_tabular_regression_metric = "mean_absolute_percentage_error"
|
|
84
|
+
|
|
85
|
+
def compute_metric(
|
|
86
|
+
self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
|
|
87
|
+
) -> float:
|
|
88
|
+
y_true, y_pred = self._get_point_forecast_score_inputs(data_future, predictions, target=target)
|
|
89
|
+
return self._safemean((y_true - y_pred).abs() / y_true.abs())
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class MASE(TimeSeriesScorer):
|
|
93
|
+
"""Mean absolute scaled error."""
|
|
94
|
+
|
|
95
|
+
optimized_by_median = True
|
|
96
|
+
equivalent_tabular_regression_metric = "mean_absolute_error"
|
|
97
|
+
|
|
98
|
+
def __init__(self):
|
|
99
|
+
self._past_abs_seasonal_error: Optional[pd.Series] = None
|
|
100
|
+
|
|
101
|
+
def save_past_metrics(
|
|
102
|
+
self,
|
|
103
|
+
data_past: TimeSeriesDataFrame,
|
|
104
|
+
target: str = "target",
|
|
105
|
+
seasonal_period: int = 1,
|
|
106
|
+
**kwargs,
|
|
107
|
+
) -> None:
|
|
108
|
+
self._past_abs_seasonal_error = _in_sample_abs_seasonal_error(
|
|
109
|
+
y_past=data_past[target], seasonal_period=seasonal_period
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def clear_past_metrics(self) -> None:
|
|
113
|
+
self._past_abs_seasonal_error = None
|
|
114
|
+
|
|
115
|
+
def compute_metric(
|
|
116
|
+
self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
|
|
117
|
+
) -> float:
|
|
118
|
+
y_true, y_pred = self._get_point_forecast_score_inputs(data_future, predictions, target=target)
|
|
119
|
+
if self._past_abs_seasonal_error is None:
|
|
120
|
+
raise AssertionError("Call `save_past_metrics` before `compute_metric`")
|
|
121
|
+
|
|
122
|
+
mae_per_item = (y_true - y_pred).abs().groupby(level=ITEMID, sort=False).mean()
|
|
123
|
+
return self._safemean(mae_per_item / self._past_abs_seasonal_error)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class RMSSE(TimeSeriesScorer):
|
|
127
|
+
"""Root mean squared scaled error."""
|
|
128
|
+
|
|
129
|
+
equivalent_tabular_regression_metric = "root_mean_squared_error"
|
|
130
|
+
|
|
131
|
+
def __init__(self):
|
|
132
|
+
self._past_squared_seasonal_error: Optional[pd.Series] = None
|
|
133
|
+
|
|
134
|
+
def save_past_metrics(
|
|
135
|
+
self,
|
|
136
|
+
data_past: TimeSeriesDataFrame,
|
|
137
|
+
target: str = "target",
|
|
138
|
+
seasonal_period: int = 1,
|
|
139
|
+
**kwargs,
|
|
140
|
+
) -> None:
|
|
141
|
+
self._past_squared_seasonal_error = _in_sample_squared_seasonal_error(
|
|
142
|
+
y_past=data_past[target], seasonal_period=seasonal_period
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
def clear_past_metrics(self) -> None:
|
|
146
|
+
self._past_squared_seasonal_error = None
|
|
147
|
+
|
|
148
|
+
def compute_metric(
|
|
149
|
+
self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
|
|
150
|
+
) -> float:
|
|
151
|
+
y_true, y_pred = self._get_point_forecast_score_inputs(data_future, predictions, target=target)
|
|
152
|
+
if self._past_squared_seasonal_error is None:
|
|
153
|
+
raise AssertionError("Call `save_past_metrics` before `compute_metric`")
|
|
154
|
+
|
|
155
|
+
mse_per_item = (y_true - y_pred).pow(2.0).groupby(level=ITEMID, sort=False).mean()
|
|
156
|
+
return np.sqrt(self._safemean(mse_per_item / self._past_squared_seasonal_error))
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from autogluon.timeseries import TimeSeriesDataFrame
|
|
4
|
+
|
|
5
|
+
from .abstract import TimeSeriesScorer
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class WQL(TimeSeriesScorer):
|
|
9
|
+
"""Weighted quantile loss.
|
|
10
|
+
|
|
11
|
+
Also known as weighted pinball loss.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
needs_quantile = True
|
|
15
|
+
|
|
16
|
+
def compute_metric(
|
|
17
|
+
self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
|
|
18
|
+
) -> float:
|
|
19
|
+
y_true, q_pred, quantile_levels = self._get_quantile_forecast_score_inputs(data_future, predictions, target)
|
|
20
|
+
values_true = y_true.values[:, None] # shape [N, 1]
|
|
21
|
+
values_pred = q_pred.values # shape [N, len(quantile_levels)]
|
|
22
|
+
|
|
23
|
+
return 2 * np.mean(
|
|
24
|
+
np.abs((values_true - values_pred) * ((values_true <= values_pred) - quantile_levels)).sum(axis=0)
|
|
25
|
+
/ np.abs(values_true).sum()
|
|
26
|
+
)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from autogluon.timeseries.dataset.ts_dataframe import ITEMID
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _get_seasonal_diffs(*, y_past: pd.Series, seasonal_period: int = 1) -> pd.Series:
|
|
7
|
+
return y_past.groupby(level=ITEMID, sort=False).diff(seasonal_period).abs()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _in_sample_abs_seasonal_error(*, y_past: pd.Series, seasonal_period: int = 1) -> pd.Series:
|
|
11
|
+
"""Compute seasonal naive forecast error (predict value from seasonal_period steps ago) for each time series."""
|
|
12
|
+
seasonal_diffs = _get_seasonal_diffs(y_past=y_past, seasonal_period=seasonal_period)
|
|
13
|
+
return seasonal_diffs.groupby(level=ITEMID, sort=False).mean().fillna(1.0)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _in_sample_squared_seasonal_error(*, y_past: pd.Series, seasonal_period: int = 1) -> pd.Series:
|
|
17
|
+
seasonal_diffs = _get_seasonal_diffs(y_past=y_past, seasonal_period=seasonal_period)
|
|
18
|
+
return seasonal_diffs.pow(2.0).groupby(level=ITEMID, sort=False).mean().fillna(1.0)
|
|
@@ -11,7 +11,7 @@ from autogluon.core.hpo.exceptions import EmptySearchSpace
|
|
|
11
11
|
from autogluon.core.hpo.executors import HpoExecutor
|
|
12
12
|
from autogluon.core.models import AbstractModel
|
|
13
13
|
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
14
|
-
from autogluon.timeseries.
|
|
14
|
+
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
|
15
15
|
from autogluon.timeseries.utils.features import CovariateMetadata
|
|
16
16
|
|
|
17
17
|
from .model_trial import model_trial, skip_hpo
|
|
@@ -40,16 +40,13 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
40
40
|
metadata: CovariateMetadata
|
|
41
41
|
A mapping of different covariate types known to autogluon.timeseries to column names
|
|
42
42
|
in the data set.
|
|
43
|
-
eval_metric : str, default
|
|
44
|
-
Metric by which predictions will be ultimately evaluated on test data.
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
detailed documentation can be found in `gluonts.evaluation.Evaluator`. By default, `WQL`
|
|
48
|
-
will be used.
|
|
43
|
+
eval_metric : Union[str, TimeSeriesScorer], default = "WQL"
|
|
44
|
+
Metric by which predictions will be ultimately evaluated on future test data. This only impacts
|
|
45
|
+
``model.score()``, as eval_metric is not used during training. Available metrics can be found in
|
|
46
|
+
``autogluon.timeseries.metrics``.
|
|
49
47
|
eval_metric_seasonal_period : int, optional
|
|
50
|
-
Seasonal period used to compute
|
|
51
|
-
|
|
52
|
-
Defaults to ``None``, in which case the seasonal period is computed based on the data frequency.
|
|
48
|
+
Seasonal period used to compute some evaluation metrics such as mean absolute scaled error (MASE). Defaults to
|
|
49
|
+
``None``, in which case the seasonal period is computed based on the data frequency.
|
|
53
50
|
hyperparameters : dict, default = None
|
|
54
51
|
Hyperparameters that will be used by the model (can be search spaces instead of fixed values).
|
|
55
52
|
If None, model defaults are used. This is identical to passing an empty dictionary.
|
|
@@ -82,7 +79,7 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
82
79
|
path: Optional[str] = None,
|
|
83
80
|
name: Optional[str] = None,
|
|
84
81
|
metadata: Optional[CovariateMetadata] = None,
|
|
85
|
-
eval_metric:
|
|
82
|
+
eval_metric: Union[str, TimeSeriesScorer, None] = None,
|
|
86
83
|
eval_metric_seasonal_period: Optional[int] = None,
|
|
87
84
|
hyperparameters: Dict[str, Union[int, float, str, space.Space]] = None,
|
|
88
85
|
**kwargs,
|
|
@@ -95,7 +92,7 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
95
92
|
eval_metric=None,
|
|
96
93
|
hyperparameters=hyperparameters,
|
|
97
94
|
)
|
|
98
|
-
self.eval_metric:
|
|
95
|
+
self.eval_metric: TimeSeriesScorer = check_get_evaluation_metric(eval_metric)
|
|
99
96
|
self.eval_metric_seasonal_period = eval_metric_seasonal_period
|
|
100
97
|
self.stopping_metric = None
|
|
101
98
|
self.problem_type = "timeseries"
|
|
@@ -106,6 +103,18 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
106
103
|
self.freq: str = freq
|
|
107
104
|
self.prediction_length: int = prediction_length
|
|
108
105
|
self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
|
|
106
|
+
|
|
107
|
+
if not all(0 < q < 1 for q in self.quantile_levels):
|
|
108
|
+
raise ValueError("Invalid quantile_levels specified. Quantiles must be between 0 and 1 (exclusive).")
|
|
109
|
+
|
|
110
|
+
# We ensure that P50 forecast is always among the "raw" predictions generated by _predict.
|
|
111
|
+
# We remove P50 from the final predictions if P50 wasn't present among the specified quantile_levels.
|
|
112
|
+
if 0.5 not in self.quantile_levels:
|
|
113
|
+
self.must_drop_median = True
|
|
114
|
+
self.quantile_levels = sorted(set([0.5] + self.quantile_levels))
|
|
115
|
+
else:
|
|
116
|
+
self.must_drop_median = False
|
|
117
|
+
|
|
109
118
|
self._oof_predictions: Optional[List[TimeSeriesDataFrame]] = None
|
|
110
119
|
|
|
111
120
|
def __repr__(self) -> str:
|
|
@@ -256,22 +265,6 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
256
265
|
"as hyperparameters when initializing or use `hyperparameter_tune` instead."
|
|
257
266
|
)
|
|
258
267
|
|
|
259
|
-
def _check_predict_inputs(
|
|
260
|
-
self,
|
|
261
|
-
data: TimeSeriesDataFrame,
|
|
262
|
-
quantile_levels: Optional[List[float]] = None,
|
|
263
|
-
**kwargs, # noqa: F841
|
|
264
|
-
):
|
|
265
|
-
logger.debug(f"Predicting with time series model {self.name}")
|
|
266
|
-
logger.debug(
|
|
267
|
-
f"\tProvided data for prediction with {len(data)} rows, {data.num_items} items. "
|
|
268
|
-
f"Average time series length is {len(data) / data.num_items:.1f}."
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
quantiles = quantile_levels or self.quantile_levels
|
|
272
|
-
if not all(0 < q < 1 for q in quantiles):
|
|
273
|
-
raise ValueError("Invalid quantile value specified. Quantiles must be between 0 and 1 (exclusive).")
|
|
274
|
-
|
|
275
268
|
def predict(
|
|
276
269
|
self,
|
|
277
270
|
data: Union[TimeSeriesDataFrame, Dict[str, TimeSeriesDataFrame]],
|
|
@@ -293,13 +286,6 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
293
286
|
known_covariates : Optional[TimeSeriesDataFrame]
|
|
294
287
|
A TimeSeriesDataFrame containing the values of the known covariates during the forecast horizon.
|
|
295
288
|
|
|
296
|
-
Other Parameters
|
|
297
|
-
----------------
|
|
298
|
-
quantile_levels
|
|
299
|
-
Quantiles of probabilistic forecasts, if probabilistic forecasts are implemented by the
|
|
300
|
-
corresponding subclass. If None, `self.quantile_levels` will be used instead,
|
|
301
|
-
if provided during initialization.
|
|
302
|
-
|
|
303
289
|
Returns
|
|
304
290
|
-------
|
|
305
291
|
predictions: TimeSeriesDataFrame
|
|
@@ -307,6 +293,22 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
307
293
|
data is given as a separate forecast item in the dictionary, keyed by the `item_id`s
|
|
308
294
|
of input items.
|
|
309
295
|
"""
|
|
296
|
+
predictions = self._predict(data=data, known_covariates=known_covariates, **kwargs)
|
|
297
|
+
# "0.5" might be missing from the quantiles if self is a wrapper (MultiWindowBacktestingModel or ensemble)
|
|
298
|
+
if "0.5" in predictions.columns:
|
|
299
|
+
if self.eval_metric.optimized_by_median:
|
|
300
|
+
predictions["mean"] = predictions["0.5"]
|
|
301
|
+
if self.must_drop_median:
|
|
302
|
+
predictions = predictions.drop("0.5", axis=1)
|
|
303
|
+
return predictions
|
|
304
|
+
|
|
305
|
+
def _predict(
|
|
306
|
+
self,
|
|
307
|
+
data: Union[TimeSeriesDataFrame, Dict[str, TimeSeriesDataFrame]],
|
|
308
|
+
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
|
309
|
+
**kwargs,
|
|
310
|
+
) -> TimeSeriesDataFrame:
|
|
311
|
+
"""Private method for `predict`. See `predict` for documentation of arguments."""
|
|
310
312
|
raise NotImplementedError
|
|
311
313
|
|
|
312
314
|
def _score_with_predictions(
|
|
@@ -316,14 +318,14 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
316
318
|
metric: Optional[str] = None,
|
|
317
319
|
) -> float:
|
|
318
320
|
"""Compute the score measuring how well the predictions align with the data."""
|
|
319
|
-
eval_metric = self.eval_metric if metric is None else metric
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
321
|
+
eval_metric = self.eval_metric if metric is None else check_get_evaluation_metric(metric)
|
|
322
|
+
return eval_metric.score(
|
|
323
|
+
data=data,
|
|
324
|
+
predictions=predictions,
|
|
323
325
|
prediction_length=self.prediction_length,
|
|
324
|
-
|
|
326
|
+
target=self.target,
|
|
327
|
+
seasonal_period=self.eval_metric_seasonal_period,
|
|
325
328
|
)
|
|
326
|
-
return evaluator(data, predictions) * evaluator.coefficient
|
|
327
329
|
|
|
328
330
|
def score(self, data: TimeSeriesDataFrame, metric: Optional[str] = None) -> float:
|
|
329
331
|
"""Return the evaluation scores for given metric and dataset. The last
|
|
@@ -80,7 +80,7 @@ def fit_and_save_model(model, fit_kwargs, train_data, val_data, eval_metric, tim
|
|
|
80
80
|
model.score_and_cache_oof(val_data, store_val_score=True, store_predict_time=True)
|
|
81
81
|
|
|
82
82
|
logger.debug(f"\tHyperparameter tune run: {model.name}")
|
|
83
|
-
logger.debug(f"\t\t{model.val_score:<7.4f}".ljust(15) + f"= Validation score ({eval_metric})")
|
|
83
|
+
logger.debug(f"\t\t{model.val_score:<7.4f}".ljust(15) + f"= Validation score ({eval_metric.name_with_sign})")
|
|
84
84
|
logger.debug(f"\t\t{model.fit_time:<7.3f} s".ljust(15) + "= Training runtime")
|
|
85
85
|
logger.debug(f"\t\t{model.predict_time:<7.3f} s".ljust(15) + "= Training runtime")
|
|
86
86
|
model.save()
|
|
@@ -174,7 +174,8 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
174
174
|
data = data.query("item_id in @items_to_keep")
|
|
175
175
|
|
|
176
176
|
mlforecast_df = self._to_mlforecast_df(data, data.static_features)
|
|
177
|
-
|
|
177
|
+
# Unless we set static_features=[], MLForecast interprets all known covariates as static features
|
|
178
|
+
df = self._mlf.preprocess(mlforecast_df, dropna=False, static_features=[])
|
|
178
179
|
# df.query results in 2x memory saving compared to df.dropna(subset="y")
|
|
179
180
|
df = df.query("y.notnull()")
|
|
180
181
|
|
|
@@ -251,7 +252,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
251
252
|
"path": os.path.join(self.path, "tabular_predictor"),
|
|
252
253
|
"verbosity": verbosity - 2,
|
|
253
254
|
"label": MLF_TARGET,
|
|
254
|
-
"eval_metric": self.TIMESERIES_METRIC_TO_TABULAR_METRIC[self.eval_metric],
|
|
255
255
|
**self._get_extra_tabular_init_kwargs(),
|
|
256
256
|
},
|
|
257
257
|
predictor_fit_kwargs={
|
|
@@ -279,14 +279,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
279
279
|
else:
|
|
280
280
|
return pd.Series(1.0, index=item_ids)
|
|
281
281
|
|
|
282
|
-
def predict(
|
|
283
|
-
self,
|
|
284
|
-
data: TimeSeriesDataFrame,
|
|
285
|
-
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
|
286
|
-
**kwargs,
|
|
287
|
-
) -> TimeSeriesDataFrame:
|
|
288
|
-
raise NotImplementedError
|
|
289
|
-
|
|
290
282
|
|
|
291
283
|
class DirectTabularModel(AbstractMLForecastModel):
|
|
292
284
|
"""Predict all future time series values simultaneously using TabularPredictor from AutoGluon-Tabular.
|
|
@@ -333,28 +325,9 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
|
333
325
|
end of each time series).
|
|
334
326
|
"""
|
|
335
327
|
|
|
336
|
-
TIMESERIES_METRIC_TO_TABULAR_METRIC = {
|
|
337
|
-
"MAPE": "mean_absolute_percentage_error",
|
|
338
|
-
"sMAPE": "symmetric_mean_absolute_percentage_error",
|
|
339
|
-
"WQL": "pinball_loss",
|
|
340
|
-
"MASE": "mean_absolute_error",
|
|
341
|
-
"WAPE": "mean_absolute_error",
|
|
342
|
-
"MSE": "mean_squared_error",
|
|
343
|
-
"RMSE": "root_mean_squared_error",
|
|
344
|
-
"RMSSE": "root_mean_squared_error",
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
def __init__(self, **kwargs):
|
|
348
|
-
super().__init__(**kwargs)
|
|
349
|
-
if 0.5 not in self.quantile_levels:
|
|
350
|
-
self.must_drop_median = True
|
|
351
|
-
self.quantile_levels = sorted(set([0.5] + self.quantile_levels))
|
|
352
|
-
else:
|
|
353
|
-
self.must_drop_median = False
|
|
354
|
-
|
|
355
328
|
@property
|
|
356
329
|
def is_quantile_model(self) -> bool:
|
|
357
|
-
return self.eval_metric
|
|
330
|
+
return self.eval_metric.needs_quantile
|
|
358
331
|
|
|
359
332
|
def _get_model_params(self) -> dict:
|
|
360
333
|
model_params = super()._get_model_params()
|
|
@@ -379,7 +352,7 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
|
379
352
|
else:
|
|
380
353
|
return super()._compute_residuals_std(val_df=val_df)
|
|
381
354
|
|
|
382
|
-
def
|
|
355
|
+
def _predict(
|
|
383
356
|
self,
|
|
384
357
|
data: TimeSeriesDataFrame,
|
|
385
358
|
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
|
@@ -394,7 +367,7 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
|
394
367
|
data_future[self.target] = float("inf")
|
|
395
368
|
data_extended = pd.concat([data, data_future])
|
|
396
369
|
mlforecast_df = self._to_mlforecast_df(data_extended, data.static_features)
|
|
397
|
-
df = self._mlf.preprocess(mlforecast_df, dropna=False)
|
|
370
|
+
df = self._mlf.preprocess(mlforecast_df, dropna=False, static_features=[])
|
|
398
371
|
df = df.groupby(MLF_ITEMID, sort=False).tail(self.prediction_length)
|
|
399
372
|
df = df.replace(float("inf"), float("nan"))
|
|
400
373
|
|
|
@@ -404,14 +377,12 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
|
404
377
|
|
|
405
378
|
if hasattr(self._mlf.ts, "target_transforms"):
|
|
406
379
|
# Ensure that transforms are fitted only on past data
|
|
407
|
-
self._mlf.preprocess(self._to_mlforecast_df(data, None))
|
|
380
|
+
self._mlf.preprocess(self._to_mlforecast_df(data, None), static_features=[])
|
|
408
381
|
for tfm in self._mlf.ts.target_transforms[::-1]:
|
|
409
382
|
predictions = tfm.inverse_transform(predictions)
|
|
410
383
|
predictions = predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP}).set_index(
|
|
411
384
|
[ITEMID, TIMESTAMP]
|
|
412
385
|
)
|
|
413
|
-
if self.must_drop_median:
|
|
414
|
-
predictions = predictions.drop("0.5", axis=1)
|
|
415
386
|
return TimeSeriesDataFrame(predictions)
|
|
416
387
|
|
|
417
388
|
def _postprocess_predictions(self, predictions: np.ndarray) -> pd.DataFrame:
|
|
@@ -429,9 +400,16 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
|
429
400
|
|
|
430
401
|
def _get_extra_tabular_init_kwargs(self) -> dict:
|
|
431
402
|
if self.is_quantile_model:
|
|
432
|
-
return {
|
|
403
|
+
return {
|
|
404
|
+
"problem_type": ag.constants.QUANTILE,
|
|
405
|
+
"quantile_levels": self.quantile_levels,
|
|
406
|
+
"eval_metric": "pinball_loss",
|
|
407
|
+
}
|
|
433
408
|
else:
|
|
434
|
-
return {
|
|
409
|
+
return {
|
|
410
|
+
"problem_type": ag.constants.REGRESSION,
|
|
411
|
+
"eval_metric": self.eval_metric.equivalent_tabular_regression_metric or "mean_absolute_error",
|
|
412
|
+
}
|
|
435
413
|
|
|
436
414
|
|
|
437
415
|
class RecursiveTabularModel(AbstractMLForecastModel):
|
|
@@ -475,24 +453,13 @@ class RecursiveTabularModel(AbstractMLForecastModel):
|
|
|
475
453
|
end of each time series).
|
|
476
454
|
"""
|
|
477
455
|
|
|
478
|
-
TIMESERIES_METRIC_TO_TABULAR_METRIC = {
|
|
479
|
-
"MAPE": "mean_absolute_percentage_error",
|
|
480
|
-
"sMAPE": "symmetric_mean_absolute_percentage_error",
|
|
481
|
-
"WQL": "mean_absolute_error",
|
|
482
|
-
"MASE": "mean_absolute_error",
|
|
483
|
-
"WAPE": "mean_absolute_error",
|
|
484
|
-
"MSE": "mean_squared_error",
|
|
485
|
-
"RMSE": "root_mean_squared_error",
|
|
486
|
-
"RMSSE": "root_mean_squared_error",
|
|
487
|
-
}
|
|
488
|
-
|
|
489
456
|
def _get_model_params(self) -> dict:
|
|
490
457
|
model_params = super()._get_model_params()
|
|
491
458
|
model_params.setdefault("scaler", "standard")
|
|
492
459
|
model_params.setdefault("differences", [get_seasonality(self.freq)])
|
|
493
460
|
return model_params
|
|
494
461
|
|
|
495
|
-
def
|
|
462
|
+
def _predict(
|
|
496
463
|
self,
|
|
497
464
|
data: TimeSeriesDataFrame,
|
|
498
465
|
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
|
@@ -501,15 +468,18 @@ class RecursiveTabularModel(AbstractMLForecastModel):
|
|
|
501
468
|
from scipy.stats import norm
|
|
502
469
|
|
|
503
470
|
new_df = self._to_mlforecast_df(data, data.static_features)
|
|
504
|
-
if known_covariates is
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
471
|
+
if known_covariates is None:
|
|
472
|
+
future_index = get_forecast_horizon_index_ts_dataframe(data, self.prediction_length)
|
|
473
|
+
known_covariates = pd.DataFrame(columns=[self.target], index=future_index, dtype="float32")
|
|
474
|
+
X_df = self._to_mlforecast_df(known_covariates, data.static_features, include_target=False)
|
|
475
|
+
# If both covariates & static features are missing, set X_df = None to avoid exception from MLForecast
|
|
476
|
+
if len(X_df.columns.difference([MLF_ITEMID, MLF_TIMESTAMP])) == 0:
|
|
477
|
+
X_df = None
|
|
508
478
|
with warning_filter():
|
|
509
479
|
raw_predictions = self._mlf.predict(
|
|
510
480
|
h=self.prediction_length,
|
|
511
481
|
new_df=new_df,
|
|
512
|
-
|
|
482
|
+
X_df=X_df,
|
|
513
483
|
)
|
|
514
484
|
predictions = raw_predictions.rename(columns={MLF_ITEMID: ITEMID, MLF_TIMESTAMP: TIMESTAMP})
|
|
515
485
|
|
|
@@ -526,4 +496,7 @@ class RecursiveTabularModel(AbstractMLForecastModel):
|
|
|
526
496
|
return TimeSeriesDataFrame(predictions).reindex(data.item_ids, level=ITEMID)
|
|
527
497
|
|
|
528
498
|
def _get_extra_tabular_init_kwargs(self) -> dict:
|
|
529
|
-
return {
|
|
499
|
+
return {
|
|
500
|
+
"problem_type": ag.constants.REGRESSION,
|
|
501
|
+
"eval_metric": self.eval_metric.equivalent_tabular_regression_metric or "mean_absolute_error",
|
|
502
|
+
}
|
|
@@ -7,8 +7,9 @@ import numpy as np
|
|
|
7
7
|
import autogluon.core as ag
|
|
8
8
|
from autogluon.core.models.greedy_ensemble.ensemble_selection import EnsembleSelection
|
|
9
9
|
from autogluon.timeseries import TimeSeriesDataFrame
|
|
10
|
-
from autogluon.timeseries.
|
|
10
|
+
from autogluon.timeseries.metrics import TimeSeriesScorer
|
|
11
11
|
from autogluon.timeseries.models.ensemble import AbstractTimeSeriesEnsembleModel
|
|
12
|
+
from autogluon.timeseries.utils.datetime import get_seasonality
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
14
15
|
|
|
@@ -17,12 +18,15 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
|
17
18
|
def __init__(
|
|
18
19
|
self,
|
|
19
20
|
ensemble_size: int,
|
|
20
|
-
metric:
|
|
21
|
+
metric: TimeSeriesScorer,
|
|
21
22
|
problem_type: str = ag.constants.QUANTILE,
|
|
22
23
|
sorted_initialization: bool = False,
|
|
23
24
|
bagging: bool = False,
|
|
24
25
|
tie_breaker: str = "random",
|
|
25
26
|
random_state: np.random.RandomState = None,
|
|
27
|
+
prediction_length: int = 1,
|
|
28
|
+
target: str = "target",
|
|
29
|
+
eval_metric_seasonal_period: Optional[int] = None,
|
|
26
30
|
**kwargs,
|
|
27
31
|
):
|
|
28
32
|
super().__init__(
|
|
@@ -35,6 +39,9 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
|
35
39
|
random_state=random_state,
|
|
36
40
|
**kwargs,
|
|
37
41
|
)
|
|
42
|
+
self.prediction_length = prediction_length
|
|
43
|
+
self.target = target
|
|
44
|
+
self.eval_metric_seasonal_period = eval_metric_seasonal_period
|
|
38
45
|
|
|
39
46
|
def _fit(
|
|
40
47
|
self,
|
|
@@ -47,7 +54,7 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
|
47
54
|
stacked_predictions = [np.stack(preds) for preds in predictions]
|
|
48
55
|
|
|
49
56
|
self.dummy_pred_per_window = []
|
|
50
|
-
self.
|
|
57
|
+
self.scorer_per_window = []
|
|
51
58
|
self.data_future_per_window = []
|
|
52
59
|
|
|
53
60
|
for window_idx, data in enumerate(labels):
|
|
@@ -57,12 +64,12 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
|
57
64
|
|
|
58
65
|
self.dummy_pred_per_window.append(dummy_pred)
|
|
59
66
|
|
|
60
|
-
|
|
67
|
+
scorer = copy.deepcopy(self.metric)
|
|
61
68
|
# Split the observed time series once to avoid repeated computations inside the evaluator
|
|
62
|
-
data_past = data.slice_by_timestep(None, -self.
|
|
63
|
-
data_future = data.slice_by_timestep(-self.
|
|
64
|
-
|
|
65
|
-
self.
|
|
69
|
+
data_past = data.slice_by_timestep(None, -self.prediction_length)
|
|
70
|
+
data_future = data.slice_by_timestep(-self.prediction_length, None)
|
|
71
|
+
scorer.save_past_metrics(data_past, target=self.target, seasonal_period=self.eval_metric_seasonal_period)
|
|
72
|
+
self.scorer_per_window.append(scorer)
|
|
66
73
|
self.data_future_per_window.append(data_future)
|
|
67
74
|
|
|
68
75
|
super()._fit(
|
|
@@ -80,8 +87,11 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
|
80
87
|
for window_idx, data_future in enumerate(self.data_future_per_window):
|
|
81
88
|
dummy_pred = self.dummy_pred_per_window[window_idx]
|
|
82
89
|
dummy_pred[list(dummy_pred.columns)] = y_pred_proba[window_idx]
|
|
83
|
-
|
|
84
|
-
|
|
90
|
+
# We use scorer.compute_metric instead of scorer.score to avoid repeated calls to scorer.save_past_metrics
|
|
91
|
+
metric_value = self.scorer_per_window[window_idx].compute_metric(
|
|
92
|
+
data_future, dummy_pred, target=self.target
|
|
93
|
+
)
|
|
94
|
+
total_score += metric.sign * metric_value
|
|
85
95
|
avg_score = total_score / len(self.data_future_per_window)
|
|
86
96
|
# score: higher is better, regret: lower is better, so we flip the sign
|
|
87
97
|
return -avg_score
|
|
@@ -102,13 +112,15 @@ class TimeSeriesGreedyEnsemble(AbstractTimeSeriesEnsembleModel):
|
|
|
102
112
|
time_limit: Optional[int] = None,
|
|
103
113
|
**kwargs,
|
|
104
114
|
):
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
115
|
+
if self.eval_metric_seasonal_period is None:
|
|
116
|
+
self.eval_metric_seasonal_period = get_seasonality(self.freq)
|
|
117
|
+
ensemble_selection = TimeSeriesEnsembleSelection(
|
|
118
|
+
ensemble_size=self.ensemble_size,
|
|
119
|
+
metric=self.eval_metric,
|
|
108
120
|
prediction_length=self.prediction_length,
|
|
109
|
-
|
|
121
|
+
target=self.target,
|
|
122
|
+
eval_metric_seasonal_period=self.eval_metric_seasonal_period,
|
|
110
123
|
)
|
|
111
|
-
ensemble_selection = TimeSeriesEnsembleSelection(ensemble_size=self.ensemble_size, metric=evaluator)
|
|
112
124
|
ensemble_selection.fit(
|
|
113
125
|
predictions=list(predictions_per_window.values()),
|
|
114
126
|
labels=data_per_window,
|