autogluon.timeseries 1.2.1b20250224__py3-none-any.whl → 1.4.1b20251215__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/__init__.py +3 -2
- autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
- autogluon/timeseries/configs/predictor_presets.py +106 -0
- autogluon/timeseries/dataset/ts_dataframe.py +256 -141
- autogluon/timeseries/learner.py +86 -52
- autogluon/timeseries/metrics/__init__.py +42 -8
- autogluon/timeseries/metrics/abstract.py +89 -19
- autogluon/timeseries/metrics/point.py +142 -53
- autogluon/timeseries/metrics/quantile.py +46 -21
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +8 -2
- autogluon/timeseries/models/abstract/__init__.py +2 -2
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +361 -592
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +189 -0
- autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +282 -194
- autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
- autogluon/timeseries/models/autogluon_tabular/transforms.py +25 -18
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +361 -0
- autogluon/timeseries/models/chronos/model.py +219 -138
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +81 -50
- autogluon/timeseries/models/ensemble/__init__.py +37 -2
- autogluon/timeseries/models/ensemble/abstract.py +107 -0
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
- autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
- autogluon/timeseries/models/ensemble/weighted/basic.py +91 -0
- autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
- autogluon/timeseries/models/gluonts/__init__.py +1 -1
- autogluon/timeseries/models/gluonts/{abstract_gluonts.py → abstract.py} +148 -208
- autogluon/timeseries/models/gluonts/dataset.py +109 -0
- autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +38 -22
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +71 -74
- autogluon/timeseries/models/local/naive.py +13 -9
- autogluon/timeseries/models/local/npts.py +9 -2
- autogluon/timeseries/models/local/statsforecast.py +52 -36
- autogluon/timeseries/models/multi_window/multi_window_model.py +65 -45
- autogluon/timeseries/models/registry.py +64 -0
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +200 -0
- autogluon/timeseries/models/toto/model.py +249 -0
- autogluon/timeseries/predictor.py +685 -297
- autogluon/timeseries/regressor.py +94 -44
- autogluon/timeseries/splitter.py +8 -32
- autogluon/timeseries/trainer/__init__.py +3 -0
- autogluon/timeseries/trainer/ensemble_composer.py +444 -0
- autogluon/timeseries/trainer/model_set_builder.py +256 -0
- autogluon/timeseries/trainer/prediction_cache.py +149 -0
- autogluon/timeseries/{trainer.py → trainer/trainer.py} +387 -390
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/__init__.py +2 -13
- autogluon/timeseries/transforms/covariate_scaler.py +34 -40
- autogluon/timeseries/transforms/target_scaler.py +37 -20
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +3 -5
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/datetime/time_features.py +2 -2
- autogluon/timeseries/utils/features.py +70 -47
- autogluon/timeseries/utils/forecast.py +19 -14
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/utils/warning_filters.py +4 -2
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251215-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/METADATA +49 -36
- autogluon_timeseries-1.4.1b20251215.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/WHEEL +1 -1
- autogluon/timeseries/configs/presets_configs.py +0 -79
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -11
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -585
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -518
- autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -78
- autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
- autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- autogluon/timeseries/models/presets.py +0 -360
- autogluon.timeseries-1.2.1b20250224-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.2.1b20250224.dist-info/RECORD +0 -68
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/zip-safe +0 -0
|
@@ -5,24 +5,28 @@ import os
|
|
|
5
5
|
import pprint
|
|
6
6
|
import time
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Any,
|
|
8
|
+
from typing import Any, Literal, Type, cast, overload
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
import pandas as pd
|
|
12
12
|
|
|
13
|
-
from autogluon.common.utils.log_utils import
|
|
13
|
+
from autogluon.common.utils.log_utils import (
|
|
14
|
+
add_log_to_file,
|
|
15
|
+
set_logger_verbosity,
|
|
16
|
+
warn_if_mlflow_autologging_is_enabled,
|
|
17
|
+
)
|
|
14
18
|
from autogluon.common.utils.system_info import get_ag_system_info
|
|
15
19
|
from autogluon.common.utils.utils import check_saved_predictor_version, setup_outputdir
|
|
16
20
|
from autogluon.core.utils.decorators import apply_presets
|
|
17
21
|
from autogluon.core.utils.loaders import load_pkl, load_str
|
|
18
22
|
from autogluon.core.utils.savers import save_pkl, save_str
|
|
19
23
|
from autogluon.timeseries import __version__ as current_ag_version
|
|
20
|
-
from autogluon.timeseries.configs import
|
|
21
|
-
from autogluon.timeseries.dataset
|
|
24
|
+
from autogluon.timeseries.configs import get_predictor_presets
|
|
25
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
22
26
|
from autogluon.timeseries.learner import TimeSeriesLearner
|
|
23
27
|
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
|
24
|
-
from autogluon.timeseries.splitter import ExpandingWindowSplitter
|
|
25
28
|
from autogluon.timeseries.trainer import TimeSeriesTrainer
|
|
29
|
+
from autogluon.timeseries.utils.forecast import make_future_data_frame
|
|
26
30
|
|
|
27
31
|
logger = logging.getLogger("autogluon.timeseries")
|
|
28
32
|
|
|
@@ -62,7 +66,7 @@ class TimeSeriesPredictor:
|
|
|
62
66
|
|
|
63
67
|
If ``freq`` is provided when creating the predictor, all data passed to the predictor will be automatically
|
|
64
68
|
resampled at this frequency.
|
|
65
|
-
eval_metric :
|
|
69
|
+
eval_metric : str | TimeSeriesScorer, default = "WQL"
|
|
66
70
|
Metric by which predictions will be ultimately evaluated on future test data. AutoGluon tunes hyperparameters
|
|
67
71
|
in order to improve this metric on validation data, and ranks models (on validation data) according to this
|
|
68
72
|
metric.
|
|
@@ -88,21 +92,29 @@ class TimeSeriesPredictor:
|
|
|
88
92
|
eval_metric_seasonal_period : int, optional
|
|
89
93
|
Seasonal period used to compute some evaluation metrics such as mean absolute scaled error (MASE). Defaults to
|
|
90
94
|
``None``, in which case the seasonal period is computed based on the data frequency.
|
|
91
|
-
|
|
95
|
+
horizon_weight : list[float], optional
|
|
96
|
+
Weight assigned to each time step in the forecast horizon when computing the ``eval_metric``. If provided, this
|
|
97
|
+
must be a list with ``prediction_length`` non-negative values, where at least some values are greater than zero.
|
|
98
|
+
AutoGluon will automatically normalize the weights so that they sum up to ``prediction_length``. By default, all
|
|
99
|
+
time steps in the forecast horizon have the same weight, which is equivalent to setting ``horizon_weight = [1] * prediction_length``.
|
|
100
|
+
|
|
101
|
+
This parameter only affects model selection and ensemble construction; it has no effect on the loss function of
|
|
102
|
+
the individual forecasting models.
|
|
103
|
+
known_covariates_names: list[str], optional
|
|
92
104
|
Names of the covariates that are known in advance for all time steps in the forecast horizon. These are also
|
|
93
105
|
known as dynamic features, exogenous variables, additional regressors or related time series. Examples of such
|
|
94
106
|
covariates include holidays, promotions or weather forecasts.
|
|
95
107
|
|
|
96
108
|
If ``known_covariates_names`` are provided, then:
|
|
97
109
|
|
|
98
|
-
- :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit`, :meth:`~autogluon.timeseries.TimeSeriesPredictor.evaluate`, and :meth:`~autogluon.timeseries.TimeSeriesPredictor.leaderboard` will expect a
|
|
110
|
+
- :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit`, :meth:`~autogluon.timeseries.TimeSeriesPredictor.evaluate`, and :meth:`~autogluon.timeseries.TimeSeriesPredictor.leaderboard` will expect a dataframe with columns listed in ``known_covariates_names`` (in addition to the ``target`` column).
|
|
99
111
|
- :meth:`~autogluon.timeseries.TimeSeriesPredictor.predict` will expect an additional keyword argument ``known_covariates`` containing the future values of the known covariates in ``TimeSeriesDataFrame`` format.
|
|
100
112
|
|
|
101
|
-
quantile_levels :
|
|
113
|
+
quantile_levels : list[float], optional
|
|
102
114
|
List of increasing decimals that specifies which quantiles should be estimated when making distributional
|
|
103
115
|
forecasts. Defaults to ``[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]``.
|
|
104
116
|
path : str or pathlib.Path, optional
|
|
105
|
-
Path to the directory where models and intermediate outputs will be saved. Defaults to a timestamped folder
|
|
117
|
+
Path to the local directory where models and intermediate outputs will be saved. Defaults to a timestamped folder
|
|
106
118
|
``AutogluonModels/ag-[TIMESTAMP]`` that will be created in the working directory.
|
|
107
119
|
verbosity : int, default = 2
|
|
108
120
|
Verbosity levels range from 0 to 4 and control how much information is printed to stdout. Higher levels
|
|
@@ -112,10 +124,10 @@ class TimeSeriesPredictor:
|
|
|
112
124
|
debug messages from AutoGluon and all logging in dependencies (GluonTS, PyTorch Lightning, AutoGluon-Tabular, etc.)
|
|
113
125
|
log_to_file: bool, default = True
|
|
114
126
|
Whether to save the logs into a file for later reference
|
|
115
|
-
log_file_path:
|
|
127
|
+
log_file_path: str | Path, default = "auto"
|
|
116
128
|
File path to save the logs.
|
|
117
|
-
If auto, logs will be saved under
|
|
118
|
-
Will be ignored if
|
|
129
|
+
If auto, logs will be saved under ``predictor_path/logs/predictor_log.txt``.
|
|
130
|
+
Will be ignored if ``log_to_file`` is set to False
|
|
119
131
|
cache_predictions : bool, default = True
|
|
120
132
|
If True, the predictor will cache and reuse the predictions made by individual models whenever
|
|
121
133
|
:meth:`~autogluon.timeseries.TimeSeriesPredictor.predict`, :meth:`~autogluon.timeseries.TimeSeriesPredictor.leaderboard`,
|
|
@@ -133,24 +145,30 @@ class TimeSeriesPredictor:
|
|
|
133
145
|
|
|
134
146
|
def __init__(
|
|
135
147
|
self,
|
|
136
|
-
target:
|
|
137
|
-
known_covariates_names:
|
|
148
|
+
target: str | None = None,
|
|
149
|
+
known_covariates_names: list[str] | None = None,
|
|
138
150
|
prediction_length: int = 1,
|
|
139
|
-
freq:
|
|
140
|
-
eval_metric:
|
|
141
|
-
eval_metric_seasonal_period:
|
|
142
|
-
|
|
151
|
+
freq: str | None = None,
|
|
152
|
+
eval_metric: str | TimeSeriesScorer | None = None,
|
|
153
|
+
eval_metric_seasonal_period: int | None = None,
|
|
154
|
+
horizon_weight: list[float] | None = None,
|
|
155
|
+
path: str | Path | None = None,
|
|
143
156
|
verbosity: int = 2,
|
|
144
157
|
log_to_file: bool = True,
|
|
145
|
-
log_file_path:
|
|
146
|
-
quantile_levels:
|
|
158
|
+
log_file_path: str | Path = "auto",
|
|
159
|
+
quantile_levels: list[float] | None = None,
|
|
147
160
|
cache_predictions: bool = True,
|
|
148
|
-
label:
|
|
161
|
+
label: str | None = None,
|
|
149
162
|
**kwargs,
|
|
150
163
|
):
|
|
151
164
|
self.verbosity = verbosity
|
|
152
165
|
set_logger_verbosity(self.verbosity, logger=logger)
|
|
153
166
|
self.path = setup_outputdir(path)
|
|
167
|
+
if self.path.lower().startswith("s3://"):
|
|
168
|
+
logger.warning(
|
|
169
|
+
"Warning: S3 paths are not supported for the `path` argument in TimeSeriesPredictor. "
|
|
170
|
+
"Use a local path and upload the trained predictor to S3 manually if needed"
|
|
171
|
+
)
|
|
154
172
|
self._setup_log_to_file(log_to_file=log_to_file, log_file_path=log_file_path)
|
|
155
173
|
|
|
156
174
|
self.cache_predictions = cache_predictions
|
|
@@ -182,15 +200,18 @@ class TimeSeriesPredictor:
|
|
|
182
200
|
if std_freq != str(self.freq):
|
|
183
201
|
logger.info(f"Frequency '{self.freq}' stored as '{std_freq}'")
|
|
184
202
|
self.freq = std_freq
|
|
185
|
-
self.eval_metric = check_get_evaluation_metric(
|
|
186
|
-
|
|
203
|
+
self.eval_metric: TimeSeriesScorer = check_get_evaluation_metric(
|
|
204
|
+
eval_metric,
|
|
205
|
+
prediction_length=prediction_length,
|
|
206
|
+
seasonal_period=eval_metric_seasonal_period,
|
|
207
|
+
horizon_weight=horizon_weight,
|
|
208
|
+
)
|
|
187
209
|
if quantile_levels is None:
|
|
188
210
|
quantile_levels = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
|
|
189
211
|
self.quantile_levels = sorted(quantile_levels)
|
|
190
212
|
self._learner: TimeSeriesLearner = self._learner_type(
|
|
191
213
|
path_context=self.path,
|
|
192
|
-
eval_metric=eval_metric,
|
|
193
|
-
eval_metric_seasonal_period=eval_metric_seasonal_period,
|
|
214
|
+
eval_metric=self.eval_metric,
|
|
194
215
|
target=self.target,
|
|
195
216
|
known_covariates_names=self.known_covariates_names,
|
|
196
217
|
prediction_length=self.prediction_length,
|
|
@@ -199,20 +220,6 @@ class TimeSeriesPredictor:
|
|
|
199
220
|
ensemble_model_type=kwargs.pop("ensemble_model_type", None),
|
|
200
221
|
)
|
|
201
222
|
|
|
202
|
-
if "ignore_time_index" in kwargs:
|
|
203
|
-
raise TypeError(
|
|
204
|
-
"`ignore_time_index` argument to TimeSeriesPredictor.__init__() has been deprecated.\n"
|
|
205
|
-
"If your data has irregular timestamps, please either 1) specify the desired regular frequency when "
|
|
206
|
-
"creating the predictor as `TimeSeriesPredictor(freq=...)` or 2) manually convert timestamps to "
|
|
207
|
-
"regular frequency with `data.convert_frequency(freq=...)`."
|
|
208
|
-
)
|
|
209
|
-
for k in ["learner_type", "learner_kwargs"]:
|
|
210
|
-
if k in kwargs:
|
|
211
|
-
val = kwargs.pop(k)
|
|
212
|
-
logger.warning(
|
|
213
|
-
f"Passing `{k}` to TimeSeriesPredictor has been deprecated and will be removed in v1.4. "
|
|
214
|
-
f"The provided value {val} will be ignored."
|
|
215
|
-
)
|
|
216
223
|
if len(kwargs) > 0:
|
|
217
224
|
for key in kwargs:
|
|
218
225
|
raise TypeError(f"TimeSeriesPredictor.__init__() got an unexpected keyword argument '{key}'")
|
|
@@ -221,7 +228,16 @@ class TimeSeriesPredictor:
|
|
|
221
228
|
def _trainer(self) -> TimeSeriesTrainer:
|
|
222
229
|
return self._learner.load_trainer() # noqa
|
|
223
230
|
|
|
224
|
-
|
|
231
|
+
@property
|
|
232
|
+
def is_fit(self) -> bool:
|
|
233
|
+
return self._learner.is_fit
|
|
234
|
+
|
|
235
|
+
def _assert_is_fit(self, method_name: str) -> None:
|
|
236
|
+
"""Check if predictor is fit and raise AssertionError with informative message if not."""
|
|
237
|
+
if not self.is_fit:
|
|
238
|
+
raise AssertionError(f"Predictor is not fit. Call `.fit` before calling `.{method_name}`. ")
|
|
239
|
+
|
|
240
|
+
def _setup_log_to_file(self, log_to_file: bool, log_file_path: str | Path) -> None:
|
|
225
241
|
if log_to_file:
|
|
226
242
|
if log_file_path == "auto":
|
|
227
243
|
log_file_path = os.path.join(self.path, "logs", self._predictor_log_file_name)
|
|
@@ -231,7 +247,7 @@ class TimeSeriesPredictor:
|
|
|
231
247
|
|
|
232
248
|
def _to_data_frame(
|
|
233
249
|
self,
|
|
234
|
-
data:
|
|
250
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
|
|
235
251
|
name: str = "data",
|
|
236
252
|
) -> TimeSeriesDataFrame:
|
|
237
253
|
if isinstance(data, TimeSeriesDataFrame):
|
|
@@ -252,7 +268,7 @@ class TimeSeriesPredictor:
|
|
|
252
268
|
|
|
253
269
|
def _check_and_prepare_data_frame(
|
|
254
270
|
self,
|
|
255
|
-
data:
|
|
271
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
|
|
256
272
|
name: str = "data",
|
|
257
273
|
) -> TimeSeriesDataFrame:
|
|
258
274
|
"""Ensure that TimeSeriesDataFrame has a sorted index and a valid frequency.
|
|
@@ -261,8 +277,8 @@ class TimeSeriesPredictor:
|
|
|
261
277
|
|
|
262
278
|
Parameters
|
|
263
279
|
----------
|
|
264
|
-
data :
|
|
265
|
-
Data as a
|
|
280
|
+
data : TimeSeriesDataFrame | pd.DataFrame | Path | str
|
|
281
|
+
Data as a dataframe or path to file storing the data.
|
|
266
282
|
name : str
|
|
267
283
|
Name of the data that will be used in log messages (e.g., 'train_data', 'tuning_data', or 'data').
|
|
268
284
|
|
|
@@ -274,7 +290,10 @@ class TimeSeriesPredictor:
|
|
|
274
290
|
df: TimeSeriesDataFrame = self._to_data_frame(data, name=name)
|
|
275
291
|
if not pd.api.types.is_numeric_dtype(df[self.target]):
|
|
276
292
|
raise ValueError(f"Target column {name}['{self.target}'] has a non-numeric dtype {df[self.target].dtype}")
|
|
293
|
+
# Assign makes a copy, so future operations can be performed in-place
|
|
277
294
|
df = df.assign(**{self.target: df[self.target].astype("float64")})
|
|
295
|
+
df.replace(to_replace=[float("-inf"), float("inf")], value=float("nan"), inplace=True)
|
|
296
|
+
|
|
278
297
|
# MultiIndex.is_monotonic_increasing checks if index is sorted by ["item_id", "timestamp"]
|
|
279
298
|
if not df.index.is_monotonic_increasing:
|
|
280
299
|
df = df.sort_index()
|
|
@@ -300,15 +319,32 @@ class TimeSeriesPredictor:
|
|
|
300
319
|
df = df.convert_frequency(freq=self.freq)
|
|
301
320
|
return df
|
|
302
321
|
|
|
303
|
-
def
|
|
304
|
-
|
|
305
|
-
|
|
322
|
+
def _check_and_prepare_data_frame_for_evaluation(
|
|
323
|
+
self, data: TimeSeriesDataFrame, cutoff: int | None = None, name: str = "data"
|
|
324
|
+
) -> TimeSeriesDataFrame:
|
|
325
|
+
"""
|
|
326
|
+
Make sure that provided evaluation data includes both historical and future time series values.
|
|
327
|
+
Slices the dataframe based on cutoff, if needed.
|
|
328
|
+
"""
|
|
329
|
+
cutoff = -1 * self.prediction_length if cutoff is None else cutoff
|
|
330
|
+
if not (isinstance(cutoff, int) and cutoff <= -self.prediction_length):
|
|
331
|
+
raise ValueError(f"`cutoff` should be a negative integer <= -prediction_length, got: {cutoff=}")
|
|
332
|
+
|
|
333
|
+
expected_length = -cutoff
|
|
334
|
+
|
|
335
|
+
if data.num_timesteps_per_item().min() <= expected_length:
|
|
336
|
+
var_name = "-cutoff" if expected_length > self.prediction_length else "prediction_length"
|
|
306
337
|
raise ValueError(
|
|
307
|
-
f"Cannot reserve last
|
|
308
|
-
f"time series in {name}. Please make sure that {name} includes both
|
|
309
|
-
f"all time series have length >
|
|
338
|
+
f"Cannot reserve last {expected_length} time steps for evaluation in some "
|
|
339
|
+
f"time series in {name}. Please make sure that {name} includes both historical and future data, and that"
|
|
340
|
+
f"all time series have length > {var_name} (at least {expected_length + 1})"
|
|
310
341
|
)
|
|
311
342
|
|
|
343
|
+
if cutoff < -self.prediction_length:
|
|
344
|
+
data = data.slice_by_timestep(None, cutoff + self.prediction_length)
|
|
345
|
+
|
|
346
|
+
return data
|
|
347
|
+
|
|
312
348
|
def _get_dataset_stats(self, data: TimeSeriesDataFrame) -> str:
|
|
313
349
|
ts_lengths = data.num_timesteps_per_item()
|
|
314
350
|
median_length = ts_lengths.median()
|
|
@@ -324,36 +360,10 @@ class TimeSeriesPredictor:
|
|
|
324
360
|
f"Median time series length is {median_length:.0f} (min={min_length}, max={max_length}). "
|
|
325
361
|
)
|
|
326
362
|
|
|
327
|
-
def _reduce_num_val_windows_if_necessary(
|
|
328
|
-
self,
|
|
329
|
-
train_data: TimeSeriesDataFrame,
|
|
330
|
-
original_num_val_windows: int,
|
|
331
|
-
val_step_size: int,
|
|
332
|
-
) -> int:
|
|
333
|
-
"""Adjust num_val_windows based on the length of time series in train_data.
|
|
334
|
-
|
|
335
|
-
Chooses num_val_windows such that TS with median length is long enough to perform num_val_windows validations
|
|
336
|
-
(at least 1, at most `original_num_val_windows`).
|
|
337
|
-
|
|
338
|
-
In other words, find largest `num_val_windows` that satisfies
|
|
339
|
-
median_length >= min_train_length + prediction_length + (num_val_windows - 1) * val_step_size
|
|
340
|
-
"""
|
|
341
|
-
median_length = train_data.num_timesteps_per_item().median()
|
|
342
|
-
num_val_windows_for_median_ts = int(
|
|
343
|
-
(median_length - self._min_train_length - self.prediction_length) // val_step_size + 1
|
|
344
|
-
)
|
|
345
|
-
new_num_val_windows = min(original_num_val_windows, max(1, num_val_windows_for_median_ts))
|
|
346
|
-
if new_num_val_windows < original_num_val_windows:
|
|
347
|
-
logger.warning(
|
|
348
|
-
f"Time series in train_data are too short for chosen num_val_windows={original_num_val_windows}. "
|
|
349
|
-
f"Reducing num_val_windows to {new_num_val_windows}."
|
|
350
|
-
)
|
|
351
|
-
return new_num_val_windows
|
|
352
|
-
|
|
353
363
|
def _filter_useless_train_data(
|
|
354
364
|
self,
|
|
355
365
|
train_data: TimeSeriesDataFrame,
|
|
356
|
-
num_val_windows: int,
|
|
366
|
+
num_val_windows: tuple[int, ...],
|
|
357
367
|
val_step_size: int,
|
|
358
368
|
) -> TimeSeriesDataFrame:
|
|
359
369
|
"""Remove time series from train_data that either contain all NaNs or are too short for chosen settings.
|
|
@@ -364,7 +374,8 @@ class TimeSeriesPredictor:
|
|
|
364
374
|
In other words, this method removes from train_data all time series with only NaN values or length less than
|
|
365
375
|
min_train_length + prediction_length + (num_val_windows - 1) * val_step_size
|
|
366
376
|
"""
|
|
367
|
-
|
|
377
|
+
total_num_val_windows = sum(num_val_windows)
|
|
378
|
+
min_length = self._min_train_length + self.prediction_length + (total_num_val_windows - 1) * val_step_size
|
|
368
379
|
train_lengths = train_data.num_timesteps_per_item()
|
|
369
380
|
too_short_items = train_lengths.index[train_lengths < min_length]
|
|
370
381
|
|
|
@@ -373,12 +384,14 @@ class TimeSeriesPredictor:
|
|
|
373
384
|
f"\tRemoving {len(too_short_items)} short time series from train_data. Only series with length "
|
|
374
385
|
f">= {min_length} will be used for training."
|
|
375
386
|
)
|
|
376
|
-
train_data = train_data.query("item_id not in @too_short_items")
|
|
387
|
+
train_data = train_data.query("item_id not in @too_short_items")
|
|
377
388
|
|
|
378
|
-
all_nan_items = train_data.item_ids[
|
|
389
|
+
all_nan_items = train_data.item_ids[
|
|
390
|
+
train_data[self.target].isna().groupby(TimeSeriesDataFrame.ITEMID, sort=False).all()
|
|
391
|
+
]
|
|
379
392
|
if len(all_nan_items) > 0:
|
|
380
393
|
logger.info(f"\tRemoving {len(all_nan_items)} time series consisting of only NaN values from train_data.")
|
|
381
|
-
train_data = train_data.query("item_id not in @all_nan_items")
|
|
394
|
+
train_data = train_data.query("item_id not in @all_nan_items")
|
|
382
395
|
|
|
383
396
|
if len(too_short_items) or len(all_nan_items):
|
|
384
397
|
logger.info(f"\tAfter filtering, train_data has {self._get_dataset_stats(train_data)}")
|
|
@@ -390,30 +403,31 @@ class TimeSeriesPredictor:
|
|
|
390
403
|
)
|
|
391
404
|
return train_data
|
|
392
405
|
|
|
393
|
-
@apply_presets(
|
|
406
|
+
@apply_presets(get_predictor_presets())
|
|
394
407
|
def fit(
|
|
395
408
|
self,
|
|
396
|
-
train_data:
|
|
397
|
-
tuning_data:
|
|
398
|
-
time_limit:
|
|
399
|
-
presets:
|
|
400
|
-
hyperparameters:
|
|
401
|
-
hyperparameter_tune_kwargs:
|
|
402
|
-
excluded_model_types:
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
409
|
+
train_data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
|
|
410
|
+
tuning_data: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
|
|
411
|
+
time_limit: int | None = None,
|
|
412
|
+
presets: str | None = None,
|
|
413
|
+
hyperparameters: str | dict[str | Type, Any] | None = None,
|
|
414
|
+
hyperparameter_tune_kwargs: str | dict | None = None,
|
|
415
|
+
excluded_model_types: list[str] | None = None,
|
|
416
|
+
ensemble_hyperparameters: dict[str, Any] | list[dict[str, Any]] | None = None,
|
|
417
|
+
num_val_windows: int | tuple[int, ...] = 1,
|
|
418
|
+
val_step_size: int | None = None,
|
|
419
|
+
refit_every_n_windows: int | None = 1,
|
|
406
420
|
refit_full: bool = False,
|
|
407
421
|
enable_ensemble: bool = True,
|
|
408
422
|
skip_model_selection: bool = False,
|
|
409
|
-
random_seed:
|
|
410
|
-
verbosity:
|
|
423
|
+
random_seed: int | None = 123,
|
|
424
|
+
verbosity: int | None = None,
|
|
411
425
|
) -> "TimeSeriesPredictor":
|
|
412
426
|
"""Fit probabilistic forecasting models to the given time series dataset.
|
|
413
427
|
|
|
414
428
|
Parameters
|
|
415
429
|
----------
|
|
416
|
-
train_data :
|
|
430
|
+
train_data : TimeSeriesDataFrame | pd.DataFrame | Path | str
|
|
417
431
|
Training data in the :class:`~autogluon.timeseries.TimeSeriesDataFrame` format.
|
|
418
432
|
|
|
419
433
|
Time series with length ``<= (num_val_windows + 1) * prediction_length`` will be ignored during training.
|
|
@@ -437,30 +451,23 @@ class TimeSeriesPredictor:
|
|
|
437
451
|
|
|
438
452
|
data.static_features["store_id"] = data.static_features["store_id"].astype("category")
|
|
439
453
|
|
|
440
|
-
If provided data is a
|
|
441
|
-
``
|
|
442
|
-
|
|
443
|
-
tuning_data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str], optional
|
|
454
|
+
If provided data is a ``pandas.DataFrame``, AutoGluon will attempt to convert it to a ``TimeSeriesDataFrame``.
|
|
455
|
+
If a ``str`` or a ``Path`` is provided, AutoGluon will attempt to load this file.
|
|
456
|
+
tuning_data : TimeSeriesDataFrame | pd.DataFrame | Path | str, optional
|
|
444
457
|
Data reserved for model selection and hyperparameter tuning, rather than training individual models. Also
|
|
445
458
|
used to compute the validation scores. Note that only the last ``prediction_length`` time steps of each
|
|
446
459
|
time series are used for computing the validation score.
|
|
447
460
|
|
|
448
461
|
If ``tuning_data`` is provided, multi-window backtesting on training data will be disabled, the
|
|
449
|
-
|
|
462
|
+
``num_val_windows`` will be set to ``0``, and ``refit_full`` will be set to ``False``.
|
|
450
463
|
|
|
451
464
|
Leaving this argument empty and letting AutoGluon automatically generate the validation set from
|
|
452
465
|
``train_data`` is a good default.
|
|
453
466
|
|
|
454
|
-
|
|
455
|
-
the columns listed in ``known_covariates_names`` with the covariates values aligned with the target time
|
|
456
|
-
series.
|
|
457
|
-
|
|
458
|
-
If ``train_data`` has past covariates or static features, ``tuning_data`` must have also include them (with
|
|
459
|
-
same columns names and dtypes).
|
|
460
|
-
|
|
461
|
-
If provided data is a path or a pandas.DataFrame, AutoGluon will attempt to automatically convert it to a
|
|
462
|
-
``TimeSeriesDataFrame``.
|
|
467
|
+
The names and dtypes of columns and static features in ``tuning_data`` must match the ``train_data``.
|
|
463
468
|
|
|
469
|
+
If provided data is a ``pandas.DataFrame``, AutoGluon will attempt to convert it to a ``TimeSeriesDataFrame``.
|
|
470
|
+
If a ``str`` or a ``Path`` is provided, AutoGluon will attempt to load this file.
|
|
464
471
|
time_limit : int, optional
|
|
465
472
|
Approximately how long :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit` will run (wall-clock time in
|
|
466
473
|
seconds). If not specified, :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit` will run until all models
|
|
@@ -479,33 +486,22 @@ class TimeSeriesPredictor:
|
|
|
479
486
|
|
|
480
487
|
Available presets:
|
|
481
488
|
|
|
482
|
-
- ``"fast_training"``:
|
|
483
|
-
|
|
484
|
-
- ``"
|
|
485
|
-
- ``"high_quality"``: All ML models available in AutoGluon + additional statistical models (``NPTS``, ``AutoETS``,
|
|
486
|
-
``DynamicOptimizedTheta``). Much more accurate than ``medium_quality``, but takes longer to train.
|
|
489
|
+
- ``"fast_training"``: Simple statistical and tree-based ML models. These models are fast to train but may not be very accurate.
|
|
490
|
+
- ``"medium_quality"``: Same models as above, plus deep learning models ``TemporalFusionTransformer`` and Chronos-Bolt (small). Produces good forecasts with reasonable training time.
|
|
491
|
+
- ``"high_quality"``: A mix of multiple DL, ML and statistical forecasting models available in AutoGluon that offers the best forecast accuracy. Much more accurate than ``medium_quality``, but takes longer to train.
|
|
487
492
|
- ``"best_quality"``: Same models as in ``"high_quality"``, but performs validation with multiple backtests. Usually better than ``high_quality``, but takes even longer to train.
|
|
488
493
|
|
|
489
|
-
Available presets with the
|
|
494
|
+
Available presets with the `Chronos-Bolt <https://github.com/amazon-science/chronos-forecasting>`_ model:
|
|
490
495
|
|
|
491
496
|
- ``"bolt_{model_size}"``: where model size is one of ``tiny,mini,small,base``. Uses the Chronos-Bolt pretrained model for zero-shot forecasting.
|
|
492
497
|
See the documentation for ``ChronosModel`` or see `Hugging Face <https://huggingface.co/collections/amazon/chronos-models-65f1791d630a8d57cb718444>`_ for more information.
|
|
493
498
|
|
|
494
|
-
|
|
495
|
-
|
|
499
|
+
Exact definitions of these presets can be found in the source code
|
|
500
|
+
[`1 <https://github.com/autogluon/autogluon/blob/stable/timeseries/src/autogluon/timeseries/configs/presets_configs.py>`_,
|
|
501
|
+
`2 <https://github.com/autogluon/autogluon/blob/stable/timeseries/src/autogluon/timeseries/models/presets.py>`_].
|
|
496
502
|
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
Note that a GPU is required for model sizes ``small``, ``base`` and ``large``.
|
|
500
|
-
- ``"chronos"``: alias for ``"chronos_small"``.
|
|
501
|
-
- ``"chronos_ensemble"``: builds an ensemble of seasonal naive, tree-based and deep learning models with fast inference
|
|
502
|
-
and ``"chronos_small"``.
|
|
503
|
-
- ``"chronos_large_ensemble"``: builds an ensemble of seasonal naive, tree-based and deep learning models
|
|
504
|
-
with fast inference and ``"chronos_large"``.
|
|
505
|
-
|
|
506
|
-
Details for these presets can be found in ``autogluon/timeseries/configs/presets_configs.py``. If not
|
|
507
|
-
provided, user-provided values for ``hyperparameters`` and ``hyperparameter_tune_kwargs`` will be used
|
|
508
|
-
(defaulting to their default values specified below).
|
|
503
|
+
If no ``presets`` are selected, user-provided values for ``hyperparameters`` will be used (defaulting to their
|
|
504
|
+
default values specified below).
|
|
509
505
|
hyperparameters : str or dict, optional
|
|
510
506
|
Determines what models are trained and what hyperparameters are used by each model.
|
|
511
507
|
|
|
@@ -590,7 +586,7 @@ class TimeSeriesPredictor:
|
|
|
590
586
|
"scheduler": "local",
|
|
591
587
|
},
|
|
592
588
|
)
|
|
593
|
-
excluded_model_types:
|
|
589
|
+
excluded_model_types: list[str], optional
|
|
594
590
|
Banned subset of model types to avoid training during ``fit()``, even if present in ``hyperparameters``.
|
|
595
591
|
For example, the following code will train all models included in the ``high_quality`` presets except ``DeepAR``::
|
|
596
592
|
|
|
@@ -599,13 +595,36 @@ class TimeSeriesPredictor:
|
|
|
599
595
|
presets="high_quality",
|
|
600
596
|
excluded_model_types=["DeepAR"],
|
|
601
597
|
)
|
|
602
|
-
|
|
598
|
+
ensemble_hyperparameters : dict or list of dict, optional
|
|
599
|
+
Hyperparameters for ensemble models. Can be a single dict for one ensemble layer, or a list of dicts
|
|
600
|
+
for multiple ensemble layers (multi-layer stacking).
|
|
601
|
+
|
|
602
|
+
For single-layer ensembling (default)::
|
|
603
|
+
|
|
604
|
+
predictor.fit(
|
|
605
|
+
...,
|
|
606
|
+
ensemble_hyperparameters={"WeightedEnsemble": {"ensemble_size": 10}},
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
For multi-layer ensembling, provide a list where each element configures one ensemble layer::
|
|
610
|
+
|
|
611
|
+
predictor.fit(
|
|
612
|
+
...,
|
|
613
|
+
num_val_windows=(2, 3),
|
|
614
|
+
ensemble_hyperparameters=[
|
|
615
|
+
{"WeightedEnsemble": {"ensemble_size": 5}, "SimpleAverageEnsemble": {}}, # Layer 1
|
|
616
|
+
{"PerformanceWeightedEnsemble": {}}, # Layer 2
|
|
617
|
+
],
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
When using multi-layer ensembling, ``num_val_windows`` must be a tuple of integers, and ``len(ensemble_hyperparameters)`` must match ``len(num_val_windows)``.
|
|
621
|
+
num_val_windows : int | tuple[int, ...], default = 1
|
|
603
622
|
Number of backtests done on ``train_data`` for each trained model to estimate the validation performance.
|
|
604
|
-
|
|
605
|
-
of time series in ``train_data`` are long enough for the chosen number of backtests.
|
|
623
|
+
This parameter is also used to control multi-layer ensembling.
|
|
606
624
|
|
|
607
|
-
Increasing this parameter increases the training time roughly by a factor of
|
|
608
|
-
See
|
|
625
|
+
Increasing this parameter increases the training time roughly by a factor of
|
|
626
|
+
``num_val_windows // refit_every_n_windows``. See ``refit_every_n_windows`` and ``val_step_size`` for
|
|
627
|
+
details.
|
|
609
628
|
|
|
610
629
|
For example, for ``prediction_length=2``, ``num_val_windows=3`` and ``val_step_size=1`` the folds are::
|
|
611
630
|
|
|
@@ -616,19 +635,41 @@ class TimeSeriesPredictor:
|
|
|
616
635
|
|
|
617
636
|
where ``x`` are the train time steps and ``y`` are the validation time steps.
|
|
618
637
|
|
|
619
|
-
This
|
|
638
|
+
This parameter can also be used to control how many of the backtesting windows are reserved for training
|
|
639
|
+
multiple layers of ensemble models. By default, AutoGluon-TimeSeries uses only a single layer of ensembles
|
|
640
|
+
trained on the backtest windows specified by the ``num_val_windows`` parameter. However, the
|
|
641
|
+
``ensemble_hyperparameters`` argument can be used to specify multiple layers of ensembles. In this case,
|
|
642
|
+
a tuple of integers can be provided in ``num_val_windows`` to control how many of the backtesting windows
|
|
643
|
+
will be used to train which ensemble layers.
|
|
644
|
+
|
|
645
|
+
For example, if ``len(ensemble_hyperparameters) == 2``, a 2-tuple ``num_val_windows=(2, 3)`` is analogous
|
|
646
|
+
to ``num_val_windows=5``, except the first layer of ensemble models will be trained on the first two
|
|
647
|
+
backtest windows, and the second layer will be trained on the latter three. Validation scores of all models
|
|
648
|
+
will be computed on the last three windows.
|
|
649
|
+
|
|
650
|
+
If ``len(ensemble_hyperparameters) == 1``, then ``num_val_windows=(5,)`` has the same effect as
|
|
651
|
+
``num_val_windows=5``.
|
|
652
|
+
|
|
653
|
+
If ``tuning_data`` is provided and ``len(ensemble_hyperparameters) == 1``, then this parameter is ignored.
|
|
654
|
+
Validation and ensemble training will be performed on ``tuning_data``.
|
|
655
|
+
|
|
656
|
+
If ``tuning_data`` is provided and ``len(ensemble_hyperparameters) > 1``, then this method expects that
|
|
657
|
+
``len(num_val_windows) > 1``. In this case, the last element of ``num_val_windows`` will be ignored. The
|
|
658
|
+
last layer of ensemble training will be performed on ``tuning_data``. Validation scores will likewise be
|
|
659
|
+
computed on ``tuning_data``.
|
|
660
|
+
|
|
620
661
|
val_step_size : int or None, default = None
|
|
621
662
|
Step size between consecutive validation windows. If set to ``None``, defaults to ``prediction_length``
|
|
622
663
|
provided when creating the predictor.
|
|
623
664
|
|
|
624
|
-
|
|
665
|
+
If ``tuning_data`` is provided and ``len(ensemble_hyperparameters) == 1``, then this parameter is ignored.
|
|
625
666
|
refit_every_n_windows: int or None, default = 1
|
|
626
667
|
When performing cross validation, each model will be retrained every ``refit_every_n_windows`` validation
|
|
627
|
-
windows, where the number of validation windows is specified by
|
|
628
|
-
default setting where
|
|
668
|
+
windows, where the number of validation windows is specified by ``num_val_windows``. Note that in the
|
|
669
|
+
default setting where ``num_val_windows=1``, this argument has no effect.
|
|
629
670
|
|
|
630
671
|
If set to ``None``, models will only be fit once for the first (oldest) validation window. By default,
|
|
631
|
-
|
|
672
|
+
``refit_every_n_windows=1``, i.e., all models will be refit for each validation window.
|
|
632
673
|
refit_full : bool, default = False
|
|
633
674
|
If True, after training is complete, AutoGluon will attempt to re-train all models using all of training
|
|
634
675
|
data (including the data initially reserved for validation). This argument has no effect if ``tuning_data``
|
|
@@ -649,12 +690,15 @@ class TimeSeriesPredictor:
|
|
|
649
690
|
|
|
650
691
|
"""
|
|
651
692
|
time_start = time.time()
|
|
652
|
-
if self.
|
|
653
|
-
raise AssertionError(
|
|
693
|
+
if self.is_fit:
|
|
694
|
+
raise AssertionError(
|
|
695
|
+
"Predictor is already fit! To fit additional models create a new `TimeSeriesPredictor`."
|
|
696
|
+
)
|
|
654
697
|
|
|
655
698
|
if verbosity is None:
|
|
656
699
|
verbosity = self.verbosity
|
|
657
700
|
set_logger_verbosity(verbosity, logger=logger)
|
|
701
|
+
warn_if_mlflow_autologging_is_enabled(logger=logger)
|
|
658
702
|
|
|
659
703
|
logger.info("Beginning AutoGluon training..." + (f" Time limit = {time_limit}s" if time_limit else ""))
|
|
660
704
|
logger.info(f"AutoGluon will save models to '{self.path}'")
|
|
@@ -668,7 +712,8 @@ class TimeSeriesPredictor:
|
|
|
668
712
|
target=self.target,
|
|
669
713
|
known_covariates_names=self.known_covariates_names,
|
|
670
714
|
eval_metric=self.eval_metric,
|
|
671
|
-
eval_metric_seasonal_period=self.
|
|
715
|
+
eval_metric_seasonal_period=self.eval_metric.seasonal_period,
|
|
716
|
+
horizon_weight=self.eval_metric.horizon_weight,
|
|
672
717
|
quantile_levels=self.quantile_levels,
|
|
673
718
|
freq=self.freq,
|
|
674
719
|
time_limit=time_limit,
|
|
@@ -695,39 +740,29 @@ class TimeSeriesPredictor:
|
|
|
695
740
|
if val_step_size is None:
|
|
696
741
|
val_step_size = self.prediction_length
|
|
697
742
|
|
|
698
|
-
|
|
699
|
-
num_val_windows
|
|
700
|
-
|
|
701
|
-
|
|
743
|
+
num_val_windows, ensemble_hyperparameters = self._validate_and_normalize_validation_and_ensemble_inputs(
|
|
744
|
+
num_val_windows=num_val_windows,
|
|
745
|
+
ensemble_hyperparameters=ensemble_hyperparameters,
|
|
746
|
+
val_step_size=val_step_size,
|
|
747
|
+
median_timeseries_length=train_data.num_timesteps_per_item().median(),
|
|
748
|
+
tuning_data_provided=tuning_data is not None,
|
|
749
|
+
)
|
|
702
750
|
|
|
703
751
|
if tuning_data is not None:
|
|
704
752
|
tuning_data = self._check_and_prepare_data_frame(tuning_data, name="tuning_data")
|
|
705
|
-
self.
|
|
753
|
+
tuning_data = self._check_and_prepare_data_frame_for_evaluation(tuning_data, name="tuning_data")
|
|
706
754
|
logger.info(f"Provided tuning_data has {self._get_dataset_stats(tuning_data)}")
|
|
707
|
-
# TODO: Use num_val_windows to perform multi-window backtests on tuning_data
|
|
708
|
-
if num_val_windows > 0:
|
|
709
|
-
logger.warning(
|
|
710
|
-
"\tSetting num_val_windows = 0 (disabling backtesting on train_data) because tuning_data is provided."
|
|
711
|
-
)
|
|
712
|
-
num_val_windows = 0
|
|
713
755
|
|
|
714
|
-
if num_val_windows
|
|
715
|
-
raise ValueError("Please set num_val_windows >= 1 or provide custom tuning_data")
|
|
716
|
-
|
|
717
|
-
if num_val_windows <= 1 and refit_every_n_windows is not None and refit_every_n_windows > 1:
|
|
756
|
+
if sum(num_val_windows) <= 1 and refit_every_n_windows is not None and refit_every_n_windows > 1:
|
|
718
757
|
logger.warning(
|
|
719
|
-
f"\trefit_every_n_windows provided as {refit_every_n_windows} but num_val_windows is set to
|
|
720
|
-
"
|
|
758
|
+
f"\trefit_every_n_windows provided as {refit_every_n_windows} but num_val_windows is set to "
|
|
759
|
+
f"{num_val_windows}. refit_every_n_windows will have no effect."
|
|
721
760
|
)
|
|
722
761
|
|
|
723
762
|
if not skip_model_selection:
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
)
|
|
727
|
-
|
|
728
|
-
val_splitter = ExpandingWindowSplitter(
|
|
729
|
-
prediction_length=self.prediction_length, num_val_windows=num_val_windows, val_step_size=val_step_size
|
|
730
|
-
)
|
|
763
|
+
# When tuning_data is provided, ignore the last element of num_val_windows for filtering purposes
|
|
764
|
+
filter_num_val_windows = num_val_windows[:-1] if tuning_data is not None else num_val_windows
|
|
765
|
+
train_data = self._filter_useless_train_data(train_data, filter_num_val_windows, val_step_size)
|
|
731
766
|
|
|
732
767
|
time_left = None if time_limit is None else time_limit - (time.time() - time_start)
|
|
733
768
|
self._learner.fit(
|
|
@@ -736,9 +771,11 @@ class TimeSeriesPredictor:
|
|
|
736
771
|
val_data=tuning_data,
|
|
737
772
|
hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
|
|
738
773
|
excluded_model_types=excluded_model_types,
|
|
774
|
+
ensemble_hyperparameters=ensemble_hyperparameters,
|
|
739
775
|
time_limit=time_left,
|
|
740
776
|
verbosity=verbosity,
|
|
741
|
-
|
|
777
|
+
num_val_windows=num_val_windows,
|
|
778
|
+
val_step_size=val_step_size,
|
|
742
779
|
refit_every_n_windows=refit_every_n_windows,
|
|
743
780
|
skip_model_selection=skip_model_selection,
|
|
744
781
|
enable_ensemble=enable_ensemble,
|
|
@@ -753,40 +790,148 @@ class TimeSeriesPredictor:
|
|
|
753
790
|
self.save()
|
|
754
791
|
return self
|
|
755
792
|
|
|
756
|
-
def
|
|
793
|
+
def _validate_and_normalize_validation_and_ensemble_inputs(
|
|
794
|
+
self,
|
|
795
|
+
num_val_windows: int | tuple[int, ...],
|
|
796
|
+
ensemble_hyperparameters: dict[str, Any] | list[dict[str, Any]] | None,
|
|
797
|
+
val_step_size: int,
|
|
798
|
+
median_timeseries_length: float,
|
|
799
|
+
tuning_data_provided: bool,
|
|
800
|
+
) -> tuple[tuple[int, ...], list[dict[str, Any]] | None]:
|
|
801
|
+
"""Validate and normalize num_val_windows and ensemble_hyperparameters for multi-layer ensembling."""
|
|
802
|
+
original_num_val_windows = num_val_windows if isinstance(num_val_windows, tuple) else (num_val_windows,)
|
|
803
|
+
|
|
804
|
+
if ensemble_hyperparameters is not None:
|
|
805
|
+
if isinstance(ensemble_hyperparameters, dict):
|
|
806
|
+
ensemble_hyperparameters = [ensemble_hyperparameters]
|
|
807
|
+
|
|
808
|
+
if len(ensemble_hyperparameters) != len(original_num_val_windows):
|
|
809
|
+
raise ValueError(
|
|
810
|
+
f"Length mismatch: num_val_windows has {len(original_num_val_windows)} layers but "
|
|
811
|
+
f"ensemble_hyperparameters has {len(ensemble_hyperparameters)} layers. "
|
|
812
|
+
f"These must match for multi-layer ensembling."
|
|
813
|
+
)
|
|
814
|
+
|
|
815
|
+
num_val_windows = self._normalize_num_val_windows_input(num_val_windows, tuning_data_provided)
|
|
816
|
+
num_val_windows = self._reduce_num_val_windows_if_necessary(
|
|
817
|
+
num_val_windows, val_step_size, median_timeseries_length, tuning_data_provided
|
|
818
|
+
)
|
|
819
|
+
|
|
820
|
+
if ensemble_hyperparameters is not None and len(num_val_windows) < len(ensemble_hyperparameters):
|
|
821
|
+
logger.warning(
|
|
822
|
+
f"Time series too short: reducing ensemble layers from {len(ensemble_hyperparameters)} to "
|
|
823
|
+
f"{len(num_val_windows)}. Only the first {len(num_val_windows)} ensemble layer(s) will be trained."
|
|
824
|
+
)
|
|
825
|
+
ensemble_hyperparameters = ensemble_hyperparameters[: len(num_val_windows)]
|
|
826
|
+
|
|
827
|
+
return num_val_windows, ensemble_hyperparameters
|
|
828
|
+
|
|
829
|
+
def _normalize_num_val_windows_input(
|
|
830
|
+
self,
|
|
831
|
+
num_val_windows: int | tuple[int, ...],
|
|
832
|
+
tuning_data_provided: bool,
|
|
833
|
+
) -> tuple[int, ...]:
|
|
834
|
+
if isinstance(num_val_windows, int):
|
|
835
|
+
num_val_windows = (num_val_windows,)
|
|
836
|
+
if not isinstance(num_val_windows, tuple):
|
|
837
|
+
raise TypeError(f"num_val_windows must be int or tuple[int, ...], got {type(num_val_windows)}")
|
|
838
|
+
if len(num_val_windows) == 0:
|
|
839
|
+
raise ValueError("num_val_windows tuple cannot be empty")
|
|
840
|
+
if tuning_data_provided:
|
|
841
|
+
num_val_windows = num_val_windows[:-1] + (1,)
|
|
842
|
+
logger.warning(
|
|
843
|
+
f"\tTuning data is provided. Setting num_val_windows = {num_val_windows}. Validation scores will"
|
|
844
|
+
" be computed on a single window of tuning_data."
|
|
845
|
+
)
|
|
846
|
+
if not all(isinstance(n, int) and n > 0 for n in num_val_windows):
|
|
847
|
+
raise ValueError("All elements of num_val_windows must be positive integers.")
|
|
848
|
+
return num_val_windows
|
|
849
|
+
|
|
850
|
+
def _reduce_num_val_windows_if_necessary(
|
|
851
|
+
self,
|
|
852
|
+
num_val_windows: tuple[int, ...],
|
|
853
|
+
val_step_size: int,
|
|
854
|
+
median_time_series_length: float,
|
|
855
|
+
tuning_data_provided: bool,
|
|
856
|
+
) -> tuple[int, ...]:
|
|
857
|
+
"""Adjust num_val_windows based on the length of time series in train_data.
|
|
858
|
+
|
|
859
|
+
Chooses num_val_windows such that TS with median length is long enough to perform num_val_windows validations
|
|
860
|
+
(at least 1, at most `original_num_val_windows`).
|
|
861
|
+
|
|
862
|
+
In other words, find largest `num_val_windows` that satisfies
|
|
863
|
+
median_length >= min_train_length + prediction_length + (num_val_windows - 1) * val_step_size
|
|
864
|
+
|
|
865
|
+
If tuning_data is provided, the last element of `num_val_windows` is ignored when computing the number of
|
|
866
|
+
requested validation windows.
|
|
867
|
+
"""
|
|
868
|
+
num_val_windows_for_median_ts = int(
|
|
869
|
+
(median_time_series_length - self._min_train_length - self.prediction_length) // val_step_size + 1
|
|
870
|
+
)
|
|
871
|
+
max_allowed = max(1, num_val_windows_for_median_ts)
|
|
872
|
+
total_requested = sum(num_val_windows) if not tuning_data_provided else sum(num_val_windows[:-1])
|
|
873
|
+
|
|
874
|
+
if max_allowed >= total_requested:
|
|
875
|
+
return num_val_windows
|
|
876
|
+
|
|
877
|
+
logger.warning(
|
|
878
|
+
f"Time series in train_data are too short for chosen num_val_windows={num_val_windows}. "
|
|
879
|
+
f"Reducing num_val_windows to {max_allowed} total windows."
|
|
880
|
+
)
|
|
881
|
+
|
|
882
|
+
result = list(num_val_windows)
|
|
883
|
+
|
|
884
|
+
# Starting from the last group of windows, reduce number of windows in each group by 1,
|
|
885
|
+
# until sum(num_val_windows) <= max_allowed is satisfied.
|
|
886
|
+
for i in range(len(result) - 1, -1, -1):
|
|
887
|
+
while result[i] > 1 and sum(result) > max_allowed:
|
|
888
|
+
result[i] -= 1
|
|
889
|
+
if sum(result) <= max_allowed:
|
|
890
|
+
break
|
|
891
|
+
|
|
892
|
+
# It is possible that the above for loop reduced the number of windows in each group to 1
|
|
893
|
+
# (i.e. result = [1] * len(num_val_windows)), but still sum(result) > max_allowed. In this
|
|
894
|
+
# case we set result = [1] * max_allowed
|
|
895
|
+
if sum(result) > max_allowed:
|
|
896
|
+
result = [1] * max_allowed
|
|
897
|
+
|
|
898
|
+
return tuple(result)
|
|
899
|
+
|
|
900
|
+
def model_names(self) -> list[str]:
|
|
757
901
|
"""Returns the list of model names trained by this predictor object."""
|
|
902
|
+
self._assert_is_fit("model_names")
|
|
758
903
|
return self._trainer.get_model_names()
|
|
759
904
|
|
|
760
905
|
def predict(
|
|
761
906
|
self,
|
|
762
|
-
data:
|
|
763
|
-
known_covariates:
|
|
764
|
-
model:
|
|
907
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
|
|
908
|
+
known_covariates: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
|
|
909
|
+
model: str | None = None,
|
|
765
910
|
use_cache: bool = True,
|
|
766
|
-
random_seed:
|
|
911
|
+
random_seed: int | None = 123,
|
|
767
912
|
) -> TimeSeriesDataFrame:
|
|
768
913
|
"""Return quantile and mean forecasts for the given dataset, starting from the end of each time series.
|
|
769
914
|
|
|
770
915
|
Parameters
|
|
771
916
|
----------
|
|
772
|
-
data :
|
|
773
|
-
|
|
917
|
+
data : TimeSeriesDataFrame | pd.DataFrame | Path | str
|
|
918
|
+
Historical time series data for which the forecast needs to be made.
|
|
774
919
|
|
|
775
|
-
|
|
776
|
-
|
|
920
|
+
The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
|
|
921
|
+
the predictor.
|
|
777
922
|
|
|
778
|
-
If
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
If provided data is an instance of pandas DataFrame, AutoGluon will attempt to automatically convert it
|
|
782
|
-
to a ``TimeSeriesDataFrame``.
|
|
783
|
-
known_covariates : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str], optional
|
|
923
|
+
If provided data is a ``pandas.DataFrame``, AutoGluon will attempt to convert it to a ``TimeSeriesDataFrame``.
|
|
924
|
+
If a ``str`` or a ``Path`` is provided, AutoGluon will attempt to load this file.
|
|
925
|
+
known_covariates : TimeSeriesDataFrame | pd.DataFrame | Path | str, optional
|
|
784
926
|
If ``known_covariates_names`` were specified when creating the predictor, it is necessary to provide the
|
|
785
|
-
values of the known covariates for each time series during the forecast horizon.
|
|
927
|
+
values of the known covariates for each time series during the forecast horizon. Specifically:
|
|
928
|
+
|
|
929
|
+
- Must contain all columns listed in ``known_covariates_names``.
|
|
930
|
+
- Must include all ``item_id`` values present in the input ``data``.
|
|
931
|
+
- Must include ``timestamp`` values for the full forecast horizon (i.e., ``prediction_length`` time steps) following the end of each series in the input ``data``.
|
|
786
932
|
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
- The ``timestamp`` index must include the values for ``prediction_length`` many time steps into the future from the end of each time series in ``data``
|
|
933
|
+
You can use :meth:`autogluon.timeseries.TimeSeriesPredictor.make_future_data_frame` to generate a template
|
|
934
|
+
containing the required ``item_id`` and ``timestamp`` combinations for the ``known_covariates`` dataframe.
|
|
790
935
|
|
|
791
936
|
See example below.
|
|
792
937
|
model : str, optional
|
|
@@ -827,6 +972,7 @@ class TimeSeriesPredictor:
|
|
|
827
972
|
B 2020-03-04 17.1
|
|
828
973
|
2020-03-05 8.3
|
|
829
974
|
"""
|
|
975
|
+
self._assert_is_fit("predict")
|
|
830
976
|
# Save original item_id order to return predictions in the same order as input data
|
|
831
977
|
data = self._to_data_frame(data)
|
|
832
978
|
original_item_id_order = data.item_ids
|
|
@@ -840,16 +986,212 @@ class TimeSeriesPredictor:
|
|
|
840
986
|
use_cache=use_cache,
|
|
841
987
|
random_seed=random_seed,
|
|
842
988
|
)
|
|
843
|
-
return cast(TimeSeriesDataFrame, predictions.reindex(original_item_id_order, level=ITEMID))
|
|
989
|
+
return cast(TimeSeriesDataFrame, predictions.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID))
|
|
990
|
+
|
|
991
|
+
@overload
|
|
992
|
+
def backtest_predictions(
|
|
993
|
+
self,
|
|
994
|
+
data: TimeSeriesDataFrame | None = None,
|
|
995
|
+
*,
|
|
996
|
+
model: str | None = None,
|
|
997
|
+
num_val_windows: int | None = None,
|
|
998
|
+
val_step_size: int | None = None,
|
|
999
|
+
use_cache: bool = True,
|
|
1000
|
+
) -> list[TimeSeriesDataFrame]: ...
|
|
1001
|
+
|
|
1002
|
+
@overload
|
|
1003
|
+
def backtest_predictions(
|
|
1004
|
+
self,
|
|
1005
|
+
data: TimeSeriesDataFrame | None = None,
|
|
1006
|
+
*,
|
|
1007
|
+
model: list[str],
|
|
1008
|
+
num_val_windows: int | None = None,
|
|
1009
|
+
val_step_size: int | None = None,
|
|
1010
|
+
use_cache: bool = True,
|
|
1011
|
+
) -> dict[str, list[TimeSeriesDataFrame]]: ...
|
|
1012
|
+
|
|
1013
|
+
def backtest_predictions(
|
|
1014
|
+
self,
|
|
1015
|
+
data: TimeSeriesDataFrame | None = None,
|
|
1016
|
+
*,
|
|
1017
|
+
model: str | list[str] | None = None,
|
|
1018
|
+
num_val_windows: int | None = None,
|
|
1019
|
+
val_step_size: int | None = None,
|
|
1020
|
+
use_cache: bool = True,
|
|
1021
|
+
) -> list[TimeSeriesDataFrame] | dict[str, list[TimeSeriesDataFrame]]:
|
|
1022
|
+
"""Return predictions for multiple validation windows.
|
|
1023
|
+
|
|
1024
|
+
When ``data=None``, returns the predictions that were saved during training. Otherwise, generates new
|
|
1025
|
+
predictions by splitting ``data`` into multiple windows using an expanding window strategy.
|
|
1026
|
+
|
|
1027
|
+
The corresponding target values for each window can be obtained using
|
|
1028
|
+
:meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_targets`.
|
|
1029
|
+
|
|
1030
|
+
Parameters
|
|
1031
|
+
----------
|
|
1032
|
+
data : TimeSeriesDataFrame, optional
|
|
1033
|
+
Time series data to generate predictions for. If ``None``, returns the predictions that were saved
|
|
1034
|
+
during training on ``train_data``.
|
|
1035
|
+
|
|
1036
|
+
If provided, all time series in ``data`` must have length at least
|
|
1037
|
+
``prediction_length + (num_val_windows - 1) * val_step_size + 1``.
|
|
1038
|
+
|
|
1039
|
+
The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
|
|
1040
|
+
the predictor.
|
|
1041
|
+
model : str, list[str], or None, default = None
|
|
1042
|
+
Name of the model(s) to generate predictions with. By default, the best model during training
|
|
1043
|
+
(with highest validation score) will be used.
|
|
1044
|
+
|
|
1045
|
+
- If ``str``: Returns predictions for a single model as a list.
|
|
1046
|
+
- If ``list[str]``: Returns predictions for multiple models as a dict mapping model names to lists.
|
|
1047
|
+
- If ``None``: Uses the best model.
|
|
1048
|
+
num_val_windows : int, optional
|
|
1049
|
+
Number of validation windows to generate. If ``None``, uses the ``num_val_windows`` value from training
|
|
1050
|
+
configuration when ``data=None``, otherwise defaults to 1.
|
|
1051
|
+
|
|
1052
|
+
For example, with ``prediction_length=2``, ``num_val_windows=3``, and ``val_step_size=1``, the validation
|
|
1053
|
+
windows are::
|
|
1054
|
+
|
|
1055
|
+
|-------------------|
|
|
1056
|
+
| x x x x x y y - - |
|
|
1057
|
+
| x x x x x x y y - |
|
|
1058
|
+
| x x x x x x x y y |
|
|
1059
|
+
|
|
1060
|
+
where ``x`` denotes training time steps and ``y`` denotes validation time steps for each window.
|
|
1061
|
+
val_step_size : int, optional
|
|
1062
|
+
Number of time steps between the start of consecutive validation windows. If ``None``, defaults to
|
|
1063
|
+
``prediction_length``.
|
|
1064
|
+
use_cache : bool, default = True
|
|
1065
|
+
If True, will attempt to use cached predictions. If False, cached predictions will be ignored.
|
|
1066
|
+
This argument is ignored if ``cache_predictions`` was set to False when creating the ``TimeSeriesPredictor``.
|
|
1067
|
+
|
|
1068
|
+
Returns
|
|
1069
|
+
-------
|
|
1070
|
+
list[TimeSeriesDataFrame] or dict[str, list[TimeSeriesDataFrame]]
|
|
1071
|
+
Predictions for each validation window.
|
|
1072
|
+
|
|
1073
|
+
- If ``model`` is a ``str`` or ``None``: Returns a list of length ``num_val_windows``, where each element
|
|
1074
|
+
contains the predictions for one validation window.
|
|
1075
|
+
- If ``model`` is a ``list[str]``: Returns a dict mapping each model name to a list of predictions for
|
|
1076
|
+
each validation window.
|
|
1077
|
+
|
|
1078
|
+
Examples
|
|
1079
|
+
--------
|
|
1080
|
+
Make predictions on new data with the best model
|
|
1081
|
+
|
|
1082
|
+
>>> predictor.backtest_predictions(test_data, num_val_windows=2)
|
|
1083
|
+
|
|
1084
|
+
Load validation predictions for all models that were saved during training
|
|
1085
|
+
|
|
1086
|
+
>>> predictor.backtest_predictions(model=predictor.model_names())
|
|
1087
|
+
|
|
1088
|
+
See Also
|
|
1089
|
+
--------
|
|
1090
|
+
backtest_targets
|
|
1091
|
+
Return target values aligned with predictions.
|
|
1092
|
+
evaluate
|
|
1093
|
+
Evaluate forecast accuracy on a hold-out set.
|
|
1094
|
+
predict
|
|
1095
|
+
Generate forecasts for future time steps.
|
|
1096
|
+
"""
|
|
1097
|
+
self._assert_is_fit("backtest_predictions")
|
|
1098
|
+
if data is not None:
|
|
1099
|
+
data = self._check_and_prepare_data_frame(data)
|
|
1100
|
+
|
|
1101
|
+
if model is None:
|
|
1102
|
+
model_names = [self.model_best]
|
|
1103
|
+
elif isinstance(model, str):
|
|
1104
|
+
model_names = [model]
|
|
1105
|
+
else:
|
|
1106
|
+
model_names = model
|
|
1107
|
+
|
|
1108
|
+
result = self._learner.backtest_predictions(
|
|
1109
|
+
data=data,
|
|
1110
|
+
model_names=model_names,
|
|
1111
|
+
num_val_windows=num_val_windows,
|
|
1112
|
+
val_step_size=val_step_size,
|
|
1113
|
+
use_cache=use_cache,
|
|
1114
|
+
)
|
|
1115
|
+
|
|
1116
|
+
if isinstance(model, list):
|
|
1117
|
+
return result
|
|
1118
|
+
else:
|
|
1119
|
+
return result[model_names[0]]
|
|
1120
|
+
|
|
1121
|
+
def backtest_targets(
|
|
1122
|
+
self,
|
|
1123
|
+
data: TimeSeriesDataFrame | None = None,
|
|
1124
|
+
*,
|
|
1125
|
+
num_val_windows: int | None = None,
|
|
1126
|
+
val_step_size: int | None = None,
|
|
1127
|
+
) -> list[TimeSeriesDataFrame]:
|
|
1128
|
+
"""Return target values for each validation window.
|
|
1129
|
+
|
|
1130
|
+
Returns the actual target values corresponding to each validation window used in
|
|
1131
|
+
:meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`. The returned targets are aligned
|
|
1132
|
+
with the predictions, making it easy to compute custom evaluation metrics or analyze forecast errors.
|
|
1133
|
+
|
|
1134
|
+
Parameters
|
|
1135
|
+
----------
|
|
1136
|
+
data : TimeSeriesDataFrame, optional
|
|
1137
|
+
Time series data to extract targets from. If ``None``, returns the targets from the validation windows
|
|
1138
|
+
used during training.
|
|
1139
|
+
|
|
1140
|
+
If provided, all time series in ``data`` must have length at least
|
|
1141
|
+
``prediction_length + (num_val_windows - 1) * val_step_size + 1``.
|
|
1142
|
+
|
|
1143
|
+
The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
|
|
1144
|
+
the predictor.
|
|
1145
|
+
num_val_windows : int, optional
|
|
1146
|
+
Number of validation windows to extract targets for. If ``None``, uses the ``num_val_windows`` value from
|
|
1147
|
+
training configuration when ``data=None``, otherwise defaults to 1.
|
|
1148
|
+
|
|
1149
|
+
This should match the ``num_val_windows`` argument passed to
|
|
1150
|
+
:meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`.
|
|
1151
|
+
val_step_size : int, optional
|
|
1152
|
+
Number of time steps between the start of consecutive validation windows. If ``None``, defaults to
|
|
1153
|
+
``prediction_length``.
|
|
1154
|
+
|
|
1155
|
+
This should match the ``val_step_size`` argument passed to
|
|
1156
|
+
:meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`.
|
|
1157
|
+
|
|
1158
|
+
Returns
|
|
1159
|
+
-------
|
|
1160
|
+
list[TimeSeriesDataFrame]
|
|
1161
|
+
Target values for each validation window. Returns a list of length ``num_val_windows``,
|
|
1162
|
+
where each element contains the full time series data for one validation window.
|
|
1163
|
+
Each dataframe includes both historical context and the last ``prediction_length`` time steps
|
|
1164
|
+
that represent the target values to compare against predictions.
|
|
1165
|
+
|
|
1166
|
+
The returned targets are aligned with the output of
|
|
1167
|
+
:meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`, so ``targets[i]`` corresponds
|
|
1168
|
+
to ``predictions[i]`` for the i-th validation window.
|
|
1169
|
+
|
|
1170
|
+
See Also
|
|
1171
|
+
--------
|
|
1172
|
+
backtest_predictions
|
|
1173
|
+
Return predictions for multiple validation windows.
|
|
1174
|
+
evaluate
|
|
1175
|
+
Evaluate forecast accuracy on a hold-out set.
|
|
1176
|
+
"""
|
|
1177
|
+
self._assert_is_fit("backtest_targets")
|
|
1178
|
+
if data is not None:
|
|
1179
|
+
data = self._check_and_prepare_data_frame(data)
|
|
1180
|
+
return self._learner.backtest_targets(
|
|
1181
|
+
data=data,
|
|
1182
|
+
num_val_windows=num_val_windows,
|
|
1183
|
+
val_step_size=val_step_size,
|
|
1184
|
+
)
|
|
844
1185
|
|
|
845
1186
|
def evaluate(
|
|
846
1187
|
self,
|
|
847
|
-
data:
|
|
848
|
-
model:
|
|
849
|
-
metrics:
|
|
1188
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
|
|
1189
|
+
model: str | None = None,
|
|
1190
|
+
metrics: str | TimeSeriesScorer | list[str | TimeSeriesScorer] | None = None,
|
|
1191
|
+
cutoff: int | None = None,
|
|
850
1192
|
display: bool = False,
|
|
851
1193
|
use_cache: bool = True,
|
|
852
|
-
) ->
|
|
1194
|
+
) -> dict[str, float]:
|
|
853
1195
|
"""Evaluate the forecast accuracy for given dataset.
|
|
854
1196
|
|
|
855
1197
|
This method measures the forecast accuracy using the last ``self.prediction_length`` time steps of each time
|
|
@@ -863,27 +1205,31 @@ class TimeSeriesPredictor:
|
|
|
863
1205
|
|
|
864
1206
|
Parameters
|
|
865
1207
|
----------
|
|
866
|
-
data :
|
|
867
|
-
The data to evaluate the best model on.
|
|
868
|
-
``data`` will be held out for prediction and forecast accuracy will
|
|
1208
|
+
data : TimeSeriesDataFrame | pd.DataFrame | Path | str
|
|
1209
|
+
The data to evaluate the best model on. If a ``cutoff`` is not provided, the last ``prediction_length``
|
|
1210
|
+
time steps of each time series in ``data`` will be held out for prediction and forecast accuracy will
|
|
1211
|
+
be calculated on these time steps. When a ``cutoff`` is provided, the ``-cutoff``-th to the
|
|
1212
|
+
``-cutoff + prediction_length``-th time steps of each time series are used for evaluation.
|
|
869
1213
|
|
|
870
|
-
Must include both
|
|
871
|
-
``prediction_length + 1``).
|
|
1214
|
+
Must include both historical and future data (i.e., length of all time series in ``data`` must be at least
|
|
1215
|
+
``prediction_length + 1``, if ``cutoff`` is not provided, ``-cutoff + 1`` otherwise).
|
|
872
1216
|
|
|
873
|
-
|
|
874
|
-
|
|
1217
|
+
The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
|
|
1218
|
+
the predictor.
|
|
875
1219
|
|
|
876
|
-
If
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
If provided data is an instance of pandas DataFrame, AutoGluon will attempt to automatically convert it
|
|
880
|
-
to a ``TimeSeriesDataFrame``.
|
|
1220
|
+
If provided data is a ``pandas.DataFrame``, AutoGluon will attempt to convert it to a ``TimeSeriesDataFrame``.
|
|
1221
|
+
If a ``str`` or a ``Path`` is provided, AutoGluon will attempt to load this file.
|
|
881
1222
|
model : str, optional
|
|
882
1223
|
Name of the model that you would like to evaluate. By default, the best model during training
|
|
883
1224
|
(with highest validation score) will be used.
|
|
884
|
-
metrics : str, TimeSeriesScorer or
|
|
1225
|
+
metrics : str, TimeSeriesScorer or list[str | TimeSeriesScorer], optional
|
|
885
1226
|
Metric or a list of metrics to compute scores with. Defaults to ``self.eval_metric``. Supports both
|
|
886
1227
|
metric names as strings and custom metrics based on TimeSeriesScorer.
|
|
1228
|
+
cutoff : int, optional
|
|
1229
|
+
A *negative* integer less than or equal to ``-1 * prediction_length`` denoting the time step in ``data``
|
|
1230
|
+
where the forecast evaluation starts, i.e., time series are evaluated from the ``-cutoff``-th to the
|
|
1231
|
+
``-cutoff + prediction_length``-th time step. Defaults to ``-1 * prediction_length``, using the last
|
|
1232
|
+
``prediction_length`` time steps of each time series for evaluation.
|
|
887
1233
|
display : bool, default = False
|
|
888
1234
|
If True, the scores will be printed.
|
|
889
1235
|
use_cache : bool, default = True
|
|
@@ -892,13 +1238,15 @@ class TimeSeriesPredictor:
|
|
|
892
1238
|
|
|
893
1239
|
Returns
|
|
894
1240
|
-------
|
|
895
|
-
scores_dict :
|
|
1241
|
+
scores_dict : dict[str, float]
|
|
896
1242
|
Dictionary where keys = metrics, values = performance along each metric. For consistency, error metrics
|
|
897
1243
|
will have their signs flipped to obey this convention. For example, negative MAPE values will be reported.
|
|
898
1244
|
To get the ``eval_metric`` score, do ``output[predictor.eval_metric.name]``.
|
|
899
1245
|
"""
|
|
1246
|
+
self._assert_is_fit("evaluate")
|
|
900
1247
|
data = self._check_and_prepare_data_frame(data)
|
|
901
|
-
self.
|
|
1248
|
+
data = self._check_and_prepare_data_frame_for_evaluation(data, cutoff=cutoff)
|
|
1249
|
+
|
|
902
1250
|
scores_dict = self._learner.evaluate(data, model=model, metrics=metrics, use_cache=use_cache)
|
|
903
1251
|
if display:
|
|
904
1252
|
logger.info("Evaluations on test data:")
|
|
@@ -907,15 +1255,15 @@ class TimeSeriesPredictor:
|
|
|
907
1255
|
|
|
908
1256
|
def feature_importance(
|
|
909
1257
|
self,
|
|
910
|
-
data:
|
|
911
|
-
model:
|
|
912
|
-
metric:
|
|
913
|
-
features:
|
|
914
|
-
time_limit:
|
|
1258
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
|
|
1259
|
+
model: str | None = None,
|
|
1260
|
+
metric: str | TimeSeriesScorer | None = None,
|
|
1261
|
+
features: list[str] | None = None,
|
|
1262
|
+
time_limit: float | None = None,
|
|
915
1263
|
method: Literal["naive", "permutation"] = "permutation",
|
|
916
1264
|
subsample_size: int = 50,
|
|
917
|
-
num_iterations:
|
|
918
|
-
random_seed:
|
|
1265
|
+
num_iterations: int | None = None,
|
|
1266
|
+
random_seed: int | None = 123,
|
|
919
1267
|
relative_scores: bool = False,
|
|
920
1268
|
include_confidence_band: bool = True,
|
|
921
1269
|
confidence_level: float = 0.99,
|
|
@@ -943,15 +1291,11 @@ class TimeSeriesPredictor:
|
|
|
943
1291
|
item, will be held out for prediction and forecast accuracy will be calculated on these time steps.
|
|
944
1292
|
More accurate feature importances will be obtained from new data that was held-out during ``fit()``.
|
|
945
1293
|
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
This data must contain the label column with the same column name as specified during ``fit()``.
|
|
1294
|
+
The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
|
|
1295
|
+
the predictor.
|
|
949
1296
|
|
|
950
|
-
If
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
If provided data is an instance of pandas DataFrame, AutoGluon will attempt to automatically convert it
|
|
954
|
-
to a ``TimeSeriesDataFrame``. If str or Path is passed, ``data`` will be loaded using the str value as the file path.
|
|
1297
|
+
If provided data is a ``pandas.DataFrame``, AutoGluon will attempt to convert it to a ``TimeSeriesDataFrame``.
|
|
1298
|
+
If a ``str`` or a ``Path`` is provided, AutoGluon will attempt to load this file.
|
|
955
1299
|
|
|
956
1300
|
If ``data`` is not provided, then validation (tuning) data provided during training (or the held out data used for
|
|
957
1301
|
validation if ``tuning_data`` was not explicitly provided ``fit()``) will be used.
|
|
@@ -961,7 +1305,7 @@ class TimeSeriesPredictor:
|
|
|
961
1305
|
metric : str or TimeSeriesScorer, optional
|
|
962
1306
|
Metric to be used for computing feature importance. If None, the ``eval_metric`` specified during initialization of
|
|
963
1307
|
the ``TimeSeriesPredictor`` will be used.
|
|
964
|
-
features :
|
|
1308
|
+
features : list[str], optional
|
|
965
1309
|
List of feature names that feature importances are calculated for and returned. By default, all feature importances
|
|
966
1310
|
will be returned.
|
|
967
1311
|
method : {"permutation", "naive"}, default = "permutation"
|
|
@@ -977,12 +1321,12 @@ class TimeSeriesPredictor:
|
|
|
977
1321
|
permutation importance.
|
|
978
1322
|
|
|
979
1323
|
subsample_size : int, default = 50
|
|
980
|
-
The number of items to sample from
|
|
981
|
-
the feature importance scores. Runtime linearly scales with
|
|
1324
|
+
The number of items to sample from ``data`` when computing feature importance. Larger values increase the accuracy of
|
|
1325
|
+
the feature importance scores. Runtime linearly scales with ``subsample_size``.
|
|
982
1326
|
time_limit : float, optional
|
|
983
1327
|
Time in seconds to limit the calculation of feature importance. If None, feature importance will calculate without early stopping.
|
|
984
1328
|
If ``method="permutation"``, a minimum of 1 full shuffle set will always be evaluated. If a shuffle set evaluation takes longer than
|
|
985
|
-
``time_limit``, the method will take the length of a shuffle set evaluation to return regardless of the
|
|
1329
|
+
``time_limit``, the method will take the length of a shuffle set evaluation to return regardless of the ``time_limit``.
|
|
986
1330
|
num_iterations : int, optional
|
|
987
1331
|
The number of different iterations of the data that are evaluated. If ``method="permutation"``, this will be interpreted
|
|
988
1332
|
as the number of shuffle sets (equivalent to ``num_shuffle_sets`` in :meth:`TabularPredictor.feature_importance`). If ``method="naive"``, the
|
|
@@ -1016,9 +1360,10 @@ class TimeSeriesPredictor:
|
|
|
1016
1360
|
'importance': The estimated feature importance score.
|
|
1017
1361
|
'stddev': The standard deviation of the feature importance score. If NaN, then not enough ``num_iterations`` were used.
|
|
1018
1362
|
"""
|
|
1363
|
+
self._assert_is_fit("feature_importance")
|
|
1019
1364
|
if data is not None:
|
|
1020
1365
|
data = self._check_and_prepare_data_frame(data)
|
|
1021
|
-
self.
|
|
1366
|
+
data = self._check_and_prepare_data_frame_for_evaluation(data)
|
|
1022
1367
|
|
|
1023
1368
|
fi_df = self._learner.get_feature_importance(
|
|
1024
1369
|
data=data,
|
|
@@ -1034,7 +1379,7 @@ class TimeSeriesPredictor:
|
|
|
1034
1379
|
include_confidence_band=include_confidence_band,
|
|
1035
1380
|
confidence_level=confidence_level,
|
|
1036
1381
|
)
|
|
1037
|
-
return fi_df
|
|
1382
|
+
return fi_df.sort_values("importance", ascending=False)
|
|
1038
1383
|
|
|
1039
1384
|
@classmethod
|
|
1040
1385
|
def _load_version_file(cls, path: str) -> str:
|
|
@@ -1062,12 +1407,12 @@ class TimeSeriesPredictor:
|
|
|
1062
1407
|
return version
|
|
1063
1408
|
|
|
1064
1409
|
@classmethod
|
|
1065
|
-
def load(cls, path:
|
|
1410
|
+
def load(cls, path: str | Path, require_version_match: bool = True) -> "TimeSeriesPredictor":
|
|
1066
1411
|
"""Load an existing ``TimeSeriesPredictor`` from given ``path``.
|
|
1067
1412
|
|
|
1068
1413
|
.. warning::
|
|
1069
1414
|
|
|
1070
|
-
:meth:`autogluon.timeseries.TimeSeriesPredictor.load` uses
|
|
1415
|
+
:meth:`autogluon.timeseries.TimeSeriesPredictor.load` uses ``pickle`` module implicitly, which is known to
|
|
1071
1416
|
be insecure. It is possible to construct malicious pickle data which will execute arbitrary code during
|
|
1072
1417
|
unpickling. Never load data that could have come from an untrusted source, or that could have been tampered
|
|
1073
1418
|
with. **Only load data you trust.**
|
|
@@ -1139,22 +1484,21 @@ class TimeSeriesPredictor:
|
|
|
1139
1484
|
self._learner = tmp_learner
|
|
1140
1485
|
self._save_version_file()
|
|
1141
1486
|
|
|
1142
|
-
def info(self) ->
|
|
1487
|
+
def info(self) -> dict[str, Any]:
|
|
1143
1488
|
"""Returns a dictionary of objects each describing an attribute of the training process and trained models."""
|
|
1144
1489
|
return self._learner.get_info(include_model_info=True)
|
|
1145
1490
|
|
|
1146
1491
|
@property
|
|
1147
1492
|
def model_best(self) -> str:
|
|
1148
1493
|
"""Returns the name of the best model from trainer."""
|
|
1494
|
+
self._assert_is_fit("model_best")
|
|
1149
1495
|
if self._trainer.model_best is not None:
|
|
1150
1496
|
models = self._trainer.get_model_names()
|
|
1151
1497
|
if self._trainer.model_best in models:
|
|
1152
1498
|
return self._trainer.model_best
|
|
1153
1499
|
return self._trainer.get_model_best()
|
|
1154
1500
|
|
|
1155
|
-
def persist(
|
|
1156
|
-
self, models: Union[Literal["all", "best"], List[str]] = "best", with_ancestors: bool = True
|
|
1157
|
-
) -> List[str]:
|
|
1501
|
+
def persist(self, models: Literal["all", "best"] | list[str] = "best", with_ancestors: bool = True) -> list[str]:
|
|
1158
1502
|
"""Persist models in memory for reduced inference latency. This is particularly important if the models are being used for online
|
|
1159
1503
|
inference where low latency is critical. If models are not persisted in memory, they are loaded from disk every time they are
|
|
1160
1504
|
asked to make predictions. This is especially cumbersome for large deep learning based models which have to be loaded into
|
|
@@ -1165,45 +1509,47 @@ class TimeSeriesPredictor:
|
|
|
1165
1509
|
models : list of str or str, default = 'best'
|
|
1166
1510
|
Model names of models to persist.
|
|
1167
1511
|
If 'best' then the model with the highest validation score is persisted (this is the model used for prediction by default).
|
|
1168
|
-
If 'all' then all models are persisted. Valid models are listed in this
|
|
1512
|
+
If 'all' then all models are persisted. Valid models are listed in this ``predictor`` by calling ``predictor.model_names()``.
|
|
1169
1513
|
with_ancestors : bool, default = True
|
|
1170
1514
|
If True, all ancestor models of the provided models will also be persisted.
|
|
1171
|
-
If False, ensemble models will not have the models they depend on persisted unless those models were specified in
|
|
1515
|
+
If False, ensemble models will not have the models they depend on persisted unless those models were specified in ``models``.
|
|
1172
1516
|
This will slow down inference as the ancestor models will still need to be loaded from disk for each predict call.
|
|
1173
1517
|
Only relevant for ensemble models.
|
|
1174
1518
|
|
|
1175
1519
|
Returns
|
|
1176
1520
|
-------
|
|
1177
|
-
list_of_models :
|
|
1521
|
+
list_of_models : list[str]
|
|
1178
1522
|
List of persisted model names.
|
|
1179
1523
|
"""
|
|
1524
|
+
self._assert_is_fit("persist")
|
|
1180
1525
|
return self._learner.persist_trainer(models=models, with_ancestors=with_ancestors)
|
|
1181
1526
|
|
|
1182
|
-
def unpersist(self) ->
|
|
1527
|
+
def unpersist(self) -> list[str]:
|
|
1183
1528
|
"""Unpersist models in memory for reduced memory usage. If models are not persisted in memory, they are loaded from
|
|
1184
1529
|
disk every time they are asked to make predictions.
|
|
1185
1530
|
|
|
1186
1531
|
Note: Another way to reset the predictor and unpersist models is to reload the predictor from disk
|
|
1187
|
-
via
|
|
1532
|
+
via ``predictor = TimeSeriesPredictor.load(predictor.path)``.
|
|
1188
1533
|
|
|
1189
1534
|
Returns
|
|
1190
1535
|
-------
|
|
1191
|
-
list_of_models :
|
|
1536
|
+
list_of_models : list[str]
|
|
1192
1537
|
List of unpersisted model names.
|
|
1193
1538
|
"""
|
|
1194
1539
|
return self._learner.unpersist_trainer()
|
|
1195
1540
|
|
|
1196
1541
|
def leaderboard(
|
|
1197
1542
|
self,
|
|
1198
|
-
data:
|
|
1543
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
|
|
1544
|
+
cutoff: int | None = None,
|
|
1199
1545
|
extra_info: bool = False,
|
|
1200
|
-
extra_metrics:
|
|
1546
|
+
extra_metrics: list[str | TimeSeriesScorer] | None = None,
|
|
1201
1547
|
display: bool = False,
|
|
1202
1548
|
use_cache: bool = True,
|
|
1203
1549
|
**kwargs,
|
|
1204
1550
|
) -> pd.DataFrame:
|
|
1205
1551
|
"""Return a leaderboard showing the performance of every trained model, the output is a
|
|
1206
|
-
pandas
|
|
1552
|
+
pandas dataframe with columns:
|
|
1207
1553
|
|
|
1208
1554
|
* ``model``: The name of the model.
|
|
1209
1555
|
* ``score_test``: The test score of the model on ``data``, if provided. Computed according to ``eval_metric``.
|
|
@@ -1222,33 +1568,35 @@ class TimeSeriesPredictor:
|
|
|
1222
1568
|
|
|
1223
1569
|
Parameters
|
|
1224
1570
|
----------
|
|
1225
|
-
data :
|
|
1226
|
-
dataset used for additional evaluation. Must include both
|
|
1227
|
-
time series in ``data`` must be at least ``prediction_length + 1``
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
``
|
|
1237
|
-
|
|
1571
|
+
data : TimeSeriesDataFrame | pd.DataFrame | Path | str, optional
|
|
1572
|
+
dataset used for additional evaluation. Must include both historical and future data (i.e., length of all
|
|
1573
|
+
time series in ``data`` must be at least ``prediction_length + 1``, if ``cutoff`` is not provided,
|
|
1574
|
+
``-cutoff + 1`` otherwise).
|
|
1575
|
+
|
|
1576
|
+
The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
|
|
1577
|
+
the predictor.
|
|
1578
|
+
|
|
1579
|
+
If provided data is a ``pandas.DataFrame``, AutoGluon will attempt to convert it to a ``TimeSeriesDataFrame``.
|
|
1580
|
+
If a ``str`` or a ``Path`` is provided, AutoGluon will attempt to load this file.
|
|
1581
|
+
cutoff : int, optional
|
|
1582
|
+
A *negative* integer less than or equal to ``-1 * prediction_length`` denoting the time step in ``data``
|
|
1583
|
+
where the forecast evaluation starts, i.e., time series are evaluated from the ``-cutoff``-th to the
|
|
1584
|
+
``-cutoff + prediction_length``-th time step. Defaults to ``-1 * prediction_length``, using the last
|
|
1585
|
+
``prediction_length`` time steps of each time series for evaluation.
|
|
1238
1586
|
extra_info : bool, default = False
|
|
1239
|
-
If True, the leaderboard will contain an additional column
|
|
1240
|
-
by each model during training. An empty dictionary
|
|
1587
|
+
If True, the leaderboard will contain an additional column ``hyperparameters`` with the hyperparameters used
|
|
1588
|
+
by each model during training. An empty dictionary ``{}`` means that the model was trained with default
|
|
1241
1589
|
hyperparameters.
|
|
1242
|
-
extra_metrics :
|
|
1590
|
+
extra_metrics : list[str | TimeSeriesScorer], optional
|
|
1243
1591
|
A list of metrics to calculate scores for and include in the output DataFrame.
|
|
1244
1592
|
|
|
1245
|
-
Only valid when
|
|
1246
|
-
calculate the
|
|
1593
|
+
Only valid when ``data`` is specified. The scores refer to the scores on ``data`` (same data as used to
|
|
1594
|
+
calculate the ``score_test`` column).
|
|
1247
1595
|
|
|
1248
|
-
This list can contain any values which would also be valid for
|
|
1596
|
+
This list can contain any values which would also be valid for ``eval_metric`` when creating a :class:`~autogluon.timeseries.TimeSeriesPredictor`.
|
|
1249
1597
|
|
|
1250
|
-
For each provided
|
|
1251
|
-
the value of the metric computed on
|
|
1598
|
+
For each provided ``metric``, a column with name ``str(metric)`` will be added to the leaderboard, containing
|
|
1599
|
+
the value of the metric computed on ``data``.
|
|
1252
1600
|
display : bool, default = False
|
|
1253
1601
|
If True, the leaderboard DataFrame will be printed.
|
|
1254
1602
|
use_cache : bool, default = True
|
|
@@ -1261,6 +1609,7 @@ class TimeSeriesPredictor:
|
|
|
1261
1609
|
The leaderboard containing information on all models and in order of best model to worst in terms of
|
|
1262
1610
|
test performance.
|
|
1263
1611
|
"""
|
|
1612
|
+
self._assert_is_fit("leaderboard")
|
|
1264
1613
|
if "silent" in kwargs:
|
|
1265
1614
|
# keep `silent` logic for backwards compatibility
|
|
1266
1615
|
assert isinstance(kwargs["silent"], bool)
|
|
@@ -1270,10 +1619,12 @@ class TimeSeriesPredictor:
|
|
|
1270
1619
|
raise TypeError(f"TimeSeriesPredictor.leaderboard() got an unexpected keyword argument '{key}'")
|
|
1271
1620
|
if data is None and extra_metrics is not None:
|
|
1272
1621
|
raise ValueError("`extra_metrics` is only valid when `data` is specified.")
|
|
1622
|
+
if data is None and cutoff is not None:
|
|
1623
|
+
raise ValueError("`cutoff` is only valid when `data` is specified.")
|
|
1273
1624
|
|
|
1274
1625
|
if data is not None:
|
|
1275
1626
|
data = self._check_and_prepare_data_frame(data)
|
|
1276
|
-
self.
|
|
1627
|
+
data = self._check_and_prepare_data_frame_for_evaluation(data, cutoff=cutoff)
|
|
1277
1628
|
|
|
1278
1629
|
leaderboard = self._learner.leaderboard(
|
|
1279
1630
|
data, extra_info=extra_info, extra_metrics=extra_metrics, use_cache=use_cache
|
|
@@ -1283,7 +1634,45 @@ class TimeSeriesPredictor:
|
|
|
1283
1634
|
print(leaderboard)
|
|
1284
1635
|
return leaderboard
|
|
1285
1636
|
|
|
1286
|
-
def
|
|
1637
|
+
def make_future_data_frame(self, data: TimeSeriesDataFrame | pd.DataFrame | Path | str) -> pd.DataFrame:
|
|
1638
|
+
"""Generate a dataframe with the ``item_id`` and ``timestamp`` values corresponding to the forecast horizon.
|
|
1639
|
+
|
|
1640
|
+
Parameters
|
|
1641
|
+
----------
|
|
1642
|
+
data : TimeSeriesDataFrame | pd.DataFrame | Path | str
|
|
1643
|
+
Historical time series data.
|
|
1644
|
+
|
|
1645
|
+
Returns
|
|
1646
|
+
-------
|
|
1647
|
+
forecast_horizon : pd.DataFrame
|
|
1648
|
+
Data frame with columns ``item_id`` and ``timestamp`` corresponding to the forecast horizon. For each item ID
|
|
1649
|
+
in ``data``, ``forecast_horizon`` will contain the timestamps for the next ``prediction_length`` time steps,
|
|
1650
|
+
following the end of each series in the input data.
|
|
1651
|
+
|
|
1652
|
+
Examples
|
|
1653
|
+
--------
|
|
1654
|
+
>>> print(data)
|
|
1655
|
+
target
|
|
1656
|
+
item_id timestamp
|
|
1657
|
+
A 2024-01-01 0
|
|
1658
|
+
2024-01-02 1
|
|
1659
|
+
2024-01-03 2
|
|
1660
|
+
B 2024-04-07 3
|
|
1661
|
+
2024-04-08 4
|
|
1662
|
+
>>> predictor = TimeSeriesPredictor(prediction_length=2, freq="D")
|
|
1663
|
+
>>> print(predictor.make_future_data_frame(data))
|
|
1664
|
+
item_id timestamp
|
|
1665
|
+
0 A 2024-01-04
|
|
1666
|
+
0 A 2024-01-05
|
|
1667
|
+
1 B 2024-04-09
|
|
1668
|
+
1 B 2024-04-10
|
|
1669
|
+
"""
|
|
1670
|
+
if self.freq is None:
|
|
1671
|
+
raise ValueError("Please fit the predictor before calling `make_future_data_frame`")
|
|
1672
|
+
data = self._check_and_prepare_data_frame(data)
|
|
1673
|
+
return make_future_data_frame(data, prediction_length=self.prediction_length, freq=self.freq)
|
|
1674
|
+
|
|
1675
|
+
def fit_summary(self, verbosity: int = 1) -> dict[str, Any]:
|
|
1287
1676
|
"""Output summary of information about models produced during
|
|
1288
1677
|
:meth:`~autogluon.timeseries.TimeSeriesPredictor.fit`.
|
|
1289
1678
|
|
|
@@ -1294,10 +1683,11 @@ class TimeSeriesPredictor:
|
|
|
1294
1683
|
|
|
1295
1684
|
Returns
|
|
1296
1685
|
-------
|
|
1297
|
-
summary_dict :
|
|
1686
|
+
summary_dict : dict[str, Any]
|
|
1298
1687
|
Dict containing various detailed information. We do not recommend directly printing this dict as it may
|
|
1299
1688
|
be very large.
|
|
1300
1689
|
"""
|
|
1690
|
+
self._assert_is_fit("fit_summary")
|
|
1301
1691
|
# TODO: HPO-specific information currently not reported in fit_summary
|
|
1302
1692
|
# TODO: Revisit after ray tune integration
|
|
1303
1693
|
|
|
@@ -1318,7 +1708,7 @@ class TimeSeriesPredictor:
|
|
|
1318
1708
|
model_hyperparams = {}
|
|
1319
1709
|
for model_name in self.model_names():
|
|
1320
1710
|
model_obj = self._trainer.load_model(model_name)
|
|
1321
|
-
model_hyperparams[model_name] = model_obj.
|
|
1711
|
+
model_hyperparams[model_name] = model_obj.get_hyperparameters()
|
|
1322
1712
|
|
|
1323
1713
|
results["model_hyperparams"] = model_hyperparams
|
|
1324
1714
|
results["leaderboard"] = self._learner.leaderboard()
|
|
@@ -1333,7 +1723,7 @@ class TimeSeriesPredictor:
|
|
|
1333
1723
|
print("****************** End of fit() summary ******************")
|
|
1334
1724
|
return results
|
|
1335
1725
|
|
|
1336
|
-
def refit_full(self, model: str = "all", set_best_to_refit_full: bool = True) ->
|
|
1726
|
+
def refit_full(self, model: str = "all", set_best_to_refit_full: bool = True) -> dict[str, str]:
|
|
1337
1727
|
"""Retrain model on all of the data (training + validation).
|
|
1338
1728
|
|
|
1339
1729
|
This method can only be used if no ``tuning_data`` was passed to :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit`.
|
|
@@ -1358,6 +1748,7 @@ class TimeSeriesPredictor:
|
|
|
1358
1748
|
``predictor.predict(data)`` is called will be the refit_full version instead of the original version of the
|
|
1359
1749
|
model. Has no effect if ``model`` is not the best model.
|
|
1360
1750
|
"""
|
|
1751
|
+
self._assert_is_fit("refit_full")
|
|
1361
1752
|
logger.warning(
|
|
1362
1753
|
"\tWARNING: refit_full functionality for TimeSeriesPredictor is experimental "
|
|
1363
1754
|
"and is not yet supported by all models."
|
|
@@ -1395,11 +1786,6 @@ class TimeSeriesPredictor:
|
|
|
1395
1786
|
)
|
|
1396
1787
|
return refit_full_dict
|
|
1397
1788
|
|
|
1398
|
-
def __dir__(self) -> List[str]:
|
|
1399
|
-
# This hides method from IPython autocomplete, but not VSCode autocomplete
|
|
1400
|
-
deprecated = ["score", "get_model_best", "get_model_names"]
|
|
1401
|
-
return [d for d in super().__dir__() if d not in deprecated]
|
|
1402
|
-
|
|
1403
1789
|
def _simulation_artifact(self, test_data: TimeSeriesDataFrame) -> dict:
|
|
1404
1790
|
"""[Advanced] Computes and returns the necessary information to perform offline ensemble simulation."""
|
|
1405
1791
|
|
|
@@ -1409,28 +1795,29 @@ class TimeSeriesPredictor:
|
|
|
1409
1795
|
return cast(TimeSeriesDataFrame, ts_df[[self.target]])
|
|
1410
1796
|
|
|
1411
1797
|
test_data = self._check_and_prepare_data_frame(test_data)
|
|
1412
|
-
self.
|
|
1798
|
+
test_data = self._check_and_prepare_data_frame_for_evaluation(test_data, name="test_data")
|
|
1413
1799
|
test_data = self._learner.feature_generator.transform(test_data)
|
|
1414
1800
|
|
|
1415
1801
|
trainer = self._trainer
|
|
1416
1802
|
train_data = trainer.load_train_data()
|
|
1417
1803
|
val_data = trainer.load_val_data()
|
|
1418
|
-
base_model_names = trainer.get_model_names(
|
|
1419
|
-
pred_proba_dict_val:
|
|
1804
|
+
base_model_names = trainer.get_model_names(layer=0)
|
|
1805
|
+
pred_proba_dict_val: dict[str, list[TimeSeriesDataFrame]] = {
|
|
1420
1806
|
model_name: trainer._get_model_oof_predictions(model_name)
|
|
1421
1807
|
for model_name in base_model_names
|
|
1422
1808
|
if "_FULL" not in model_name
|
|
1423
1809
|
}
|
|
1424
1810
|
|
|
1425
1811
|
past_data, known_covariates = test_data.get_model_inputs_for_scoring(
|
|
1426
|
-
prediction_length=self.prediction_length,
|
|
1812
|
+
prediction_length=self.prediction_length,
|
|
1813
|
+
known_covariates_names=trainer.covariate_metadata.known_covariates,
|
|
1427
1814
|
)
|
|
1428
1815
|
pred_proba_dict_test, _ = trainer.get_model_pred_dict(
|
|
1429
1816
|
base_model_names, data=past_data, known_covariates=known_covariates
|
|
1430
1817
|
)
|
|
1431
1818
|
|
|
1432
|
-
y_val:
|
|
1433
|
-
select_target(df) for df in trainer.
|
|
1819
|
+
y_val: list[TimeSeriesDataFrame] = [
|
|
1820
|
+
select_target(df) for df in trainer._get_validation_windows(train_data=train_data, val_data=val_data)
|
|
1434
1821
|
]
|
|
1435
1822
|
y_test: TimeSeriesDataFrame = select_target(test_data)
|
|
1436
1823
|
|
|
@@ -1442,34 +1829,35 @@ class TimeSeriesPredictor:
|
|
|
1442
1829
|
target=self.target,
|
|
1443
1830
|
prediction_length=self.prediction_length,
|
|
1444
1831
|
eval_metric=self.eval_metric.name,
|
|
1445
|
-
eval_metric_seasonal_period=self.
|
|
1832
|
+
eval_metric_seasonal_period=self.eval_metric.seasonal_period,
|
|
1833
|
+
horizon_weight=self.eval_metric.horizon_weight,
|
|
1446
1834
|
quantile_levels=self.quantile_levels,
|
|
1447
1835
|
)
|
|
1448
1836
|
return simulation_dict
|
|
1449
1837
|
|
|
1450
1838
|
def plot(
|
|
1451
1839
|
self,
|
|
1452
|
-
data:
|
|
1453
|
-
predictions:
|
|
1454
|
-
quantile_levels:
|
|
1455
|
-
item_ids:
|
|
1840
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
|
|
1841
|
+
predictions: TimeSeriesDataFrame | None = None,
|
|
1842
|
+
quantile_levels: list[float] | None = None,
|
|
1843
|
+
item_ids: list[str | int] | None = None,
|
|
1456
1844
|
max_num_item_ids: int = 8,
|
|
1457
|
-
max_history_length:
|
|
1458
|
-
point_forecast_column:
|
|
1459
|
-
matplotlib_rc_params:
|
|
1845
|
+
max_history_length: int | None = None,
|
|
1846
|
+
point_forecast_column: str | None = None,
|
|
1847
|
+
matplotlib_rc_params: dict | None = None,
|
|
1460
1848
|
):
|
|
1461
|
-
"""Plot
|
|
1849
|
+
"""Plot historical time series values and the forecasts.
|
|
1462
1850
|
|
|
1463
1851
|
Parameters
|
|
1464
1852
|
----------
|
|
1465
|
-
data :
|
|
1853
|
+
data : TimeSeriesDataFrame | pd.DataFrame | Path | str
|
|
1466
1854
|
Observed time series data.
|
|
1467
1855
|
predictions : TimeSeriesDataFrame, optional
|
|
1468
1856
|
Predictions generated by calling :meth:`~autogluon.timeseries.TimeSeriesPredictor.predict`.
|
|
1469
|
-
quantile_levels :
|
|
1857
|
+
quantile_levels : list[float], optional
|
|
1470
1858
|
Quantile levels for which to plot the prediction intervals. Defaults to lowest & highest quantile levels
|
|
1471
1859
|
available in ``predictions``.
|
|
1472
|
-
item_ids :
|
|
1860
|
+
item_ids : list[str | int], optional
|
|
1473
1861
|
If provided, plots will only be generated for time series with these item IDs. By default (if set to
|
|
1474
1862
|
``None``), item IDs are selected randomly. In either case, plots are generated for at most
|
|
1475
1863
|
``max_num_item_ids`` time series.
|
|
@@ -1481,8 +1869,8 @@ class TimeSeriesPredictor:
|
|
|
1481
1869
|
Name of the column in ``predictions`` that will be plotted as the point forecast. Defaults to ``"0.5"``,
|
|
1482
1870
|
if this column is present in ``predictions``, otherwise ``"mean"``.
|
|
1483
1871
|
matplotlib_rc_params : dict, optional
|
|
1484
|
-
Dictionary describing the plot style that will be passed to
|
|
1485
|
-
See
|
|
1872
|
+
Dictionary describing the plot style that will be passed to `matplotlib.pyplot.rc_context <https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.rc_context.html>`_.
|
|
1873
|
+
See `matplotlib documentation <https://matplotlib.org/stable/users/explain/customizing.html#the-default-matplotlibrc-file>`_ for the list of available options.
|
|
1486
1874
|
"""
|
|
1487
1875
|
import matplotlib.pyplot as plt
|
|
1488
1876
|
|
|
@@ -1552,7 +1940,7 @@ class TimeSeriesPredictor:
|
|
|
1552
1940
|
for q in quantile_levels:
|
|
1553
1941
|
ax.fill_between(forecast.index, point_forecast, forecast[str(q)], color="C1", alpha=0.2)
|
|
1554
1942
|
if len(axes) > len(item_ids):
|
|
1555
|
-
axes[len(item_ids)].set_axis_off()
|
|
1556
|
-
handles, labels = axes[0].get_legend_handles_labels()
|
|
1943
|
+
axes[len(item_ids)].set_axis_off() # type: ignore
|
|
1944
|
+
handles, labels = axes[0].get_legend_handles_labels() # type: ignore
|
|
1557
1945
|
fig.legend(handles, labels, bbox_to_anchor=(0.5, 0.0), ncols=len(handles))
|
|
1558
1946
|
return fig
|