autogluon.timeseries 1.4.1b20250906__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/hyperparameter_presets.py +2 -2
- autogluon/timeseries/dataset/ts_dataframe.py +97 -86
- autogluon/timeseries/learner.py +68 -35
- autogluon/timeseries/metrics/__init__.py +4 -4
- autogluon/timeseries/metrics/abstract.py +8 -8
- autogluon/timeseries/metrics/point.py +9 -9
- autogluon/timeseries/metrics/quantile.py +5 -5
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +4 -1
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -39
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +8 -8
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +58 -62
- autogluon/timeseries/models/autogluon_tabular/per_step.py +26 -15
- autogluon/timeseries/models/autogluon_tabular/transforms.py +11 -9
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +361 -0
- autogluon/timeseries/models/chronos/model.py +125 -87
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +68 -36
- autogluon/timeseries/models/ensemble/__init__.py +34 -2
- autogluon/timeseries/models/ensemble/abstract.py +5 -42
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +236 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +73 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +167 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/{greedy.py → ensemble_selection.py} +41 -61
- autogluon/timeseries/models/ensemble/per_item_greedy.py +162 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +40 -0
- autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +6 -16
- autogluon/timeseries/models/ensemble/weighted/greedy.py +57 -0
- autogluon/timeseries/models/gluonts/abstract.py +25 -25
- autogluon/timeseries/models/gluonts/dataset.py +11 -11
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +15 -18
- autogluon/timeseries/models/local/naive.py +2 -2
- autogluon/timeseries/models/local/npts.py +1 -1
- autogluon/timeseries/models/local/statsforecast.py +12 -12
- autogluon/timeseries/models/multi_window/multi_window_model.py +39 -24
- autogluon/timeseries/models/registry.py +3 -4
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +118 -0
- autogluon/timeseries/models/toto/model.py +236 -0
- autogluon/timeseries/predictor.py +301 -103
- autogluon/timeseries/regressor.py +27 -30
- autogluon/timeseries/splitter.py +3 -27
- autogluon/timeseries/trainer/ensemble_composer.py +439 -0
- autogluon/timeseries/trainer/model_set_builder.py +9 -9
- autogluon/timeseries/trainer/prediction_cache.py +16 -16
- autogluon/timeseries/trainer/trainer.py +300 -275
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/covariate_scaler.py +8 -8
- autogluon/timeseries/transforms/target_scaler.py +15 -15
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +1 -3
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/features.py +18 -14
- autogluon/timeseries/utils/forecast.py +6 -7
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251210-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/METADATA +39 -22
- autogluon_timeseries-1.4.1b20251210.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/WHEEL +1 -1
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
- autogluon.timeseries-1.4.1b20250906-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.4.1b20250906.dist-info/RECORD +0 -75
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/zip-safe +0 -0
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
import pprint
|
|
6
6
|
import time
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Any, Literal,
|
|
8
|
+
from typing import Any, Literal, Type, cast, overload
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
import pandas as pd
|
|
@@ -22,10 +22,9 @@ from autogluon.core.utils.loaders import load_pkl, load_str
|
|
|
22
22
|
from autogluon.core.utils.savers import save_pkl, save_str
|
|
23
23
|
from autogluon.timeseries import __version__ as current_ag_version
|
|
24
24
|
from autogluon.timeseries.configs import get_predictor_presets
|
|
25
|
-
from autogluon.timeseries.dataset
|
|
25
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
26
26
|
from autogluon.timeseries.learner import TimeSeriesLearner
|
|
27
27
|
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
|
28
|
-
from autogluon.timeseries.splitter import ExpandingWindowSplitter
|
|
29
28
|
from autogluon.timeseries.trainer import TimeSeriesTrainer
|
|
30
29
|
from autogluon.timeseries.utils.forecast import make_future_data_frame
|
|
31
30
|
|
|
@@ -67,7 +66,7 @@ class TimeSeriesPredictor:
|
|
|
67
66
|
|
|
68
67
|
If ``freq`` is provided when creating the predictor, all data passed to the predictor will be automatically
|
|
69
68
|
resampled at this frequency.
|
|
70
|
-
eval_metric :
|
|
69
|
+
eval_metric : str | TimeSeriesScorer, default = "WQL"
|
|
71
70
|
Metric by which predictions will be ultimately evaluated on future test data. AutoGluon tunes hyperparameters
|
|
72
71
|
in order to improve this metric on validation data, and ranks models (on validation data) according to this
|
|
73
72
|
metric.
|
|
@@ -125,7 +124,7 @@ class TimeSeriesPredictor:
|
|
|
125
124
|
debug messages from AutoGluon and all logging in dependencies (GluonTS, PyTorch Lightning, AutoGluon-Tabular, etc.)
|
|
126
125
|
log_to_file: bool, default = True
|
|
127
126
|
Whether to save the logs into a file for later reference
|
|
128
|
-
log_file_path:
|
|
127
|
+
log_file_path: str | Path, default = "auto"
|
|
129
128
|
File path to save the logs.
|
|
130
129
|
If auto, logs will be saved under ``predictor_path/logs/predictor_log.txt``.
|
|
131
130
|
Will be ignored if ``log_to_file`` is set to False
|
|
@@ -146,20 +145,20 @@ class TimeSeriesPredictor:
|
|
|
146
145
|
|
|
147
146
|
def __init__(
|
|
148
147
|
self,
|
|
149
|
-
target:
|
|
150
|
-
known_covariates_names:
|
|
148
|
+
target: str | None = None,
|
|
149
|
+
known_covariates_names: list[str] | None = None,
|
|
151
150
|
prediction_length: int = 1,
|
|
152
|
-
freq:
|
|
153
|
-
eval_metric:
|
|
154
|
-
eval_metric_seasonal_period:
|
|
155
|
-
horizon_weight:
|
|
156
|
-
path:
|
|
151
|
+
freq: str | None = None,
|
|
152
|
+
eval_metric: str | TimeSeriesScorer | None = None,
|
|
153
|
+
eval_metric_seasonal_period: int | None = None,
|
|
154
|
+
horizon_weight: list[float] | None = None,
|
|
155
|
+
path: str | Path | None = None,
|
|
157
156
|
verbosity: int = 2,
|
|
158
157
|
log_to_file: bool = True,
|
|
159
|
-
log_file_path:
|
|
160
|
-
quantile_levels:
|
|
158
|
+
log_file_path: str | Path = "auto",
|
|
159
|
+
quantile_levels: list[float] | None = None,
|
|
161
160
|
cache_predictions: bool = True,
|
|
162
|
-
label:
|
|
161
|
+
label: str | None = None,
|
|
163
162
|
**kwargs,
|
|
164
163
|
):
|
|
165
164
|
self.verbosity = verbosity
|
|
@@ -221,20 +220,6 @@ class TimeSeriesPredictor:
|
|
|
221
220
|
ensemble_model_type=kwargs.pop("ensemble_model_type", None),
|
|
222
221
|
)
|
|
223
222
|
|
|
224
|
-
if "ignore_time_index" in kwargs:
|
|
225
|
-
raise TypeError(
|
|
226
|
-
"`ignore_time_index` argument to TimeSeriesPredictor.__init__() has been deprecated.\n"
|
|
227
|
-
"If your data has irregular timestamps, please either 1) specify the desired regular frequency when "
|
|
228
|
-
"creating the predictor as `TimeSeriesPredictor(freq=...)` or 2) manually convert timestamps to "
|
|
229
|
-
"regular frequency with `data.convert_frequency(freq=...)`."
|
|
230
|
-
)
|
|
231
|
-
for k in ["learner_type", "learner_kwargs"]:
|
|
232
|
-
if k in kwargs:
|
|
233
|
-
val = kwargs.pop(k)
|
|
234
|
-
logger.warning(
|
|
235
|
-
f"Passing `{k}` to TimeSeriesPredictor has been deprecated and will be removed in v1.4. "
|
|
236
|
-
f"The provided value {val} will be ignored."
|
|
237
|
-
)
|
|
238
223
|
if len(kwargs) > 0:
|
|
239
224
|
for key in kwargs:
|
|
240
225
|
raise TypeError(f"TimeSeriesPredictor.__init__() got an unexpected keyword argument '{key}'")
|
|
@@ -243,7 +228,16 @@ class TimeSeriesPredictor:
|
|
|
243
228
|
def _trainer(self) -> TimeSeriesTrainer:
|
|
244
229
|
return self._learner.load_trainer() # noqa
|
|
245
230
|
|
|
246
|
-
|
|
231
|
+
@property
|
|
232
|
+
def is_fit(self) -> bool:
|
|
233
|
+
return self._learner.is_fit
|
|
234
|
+
|
|
235
|
+
def _assert_is_fit(self, method_name: str) -> None:
|
|
236
|
+
"""Check if predictor is fit and raise AssertionError with informative message if not."""
|
|
237
|
+
if not self.is_fit:
|
|
238
|
+
raise AssertionError(f"Predictor is not fit. Call `.fit` before calling `.{method_name}`. ")
|
|
239
|
+
|
|
240
|
+
def _setup_log_to_file(self, log_to_file: bool, log_file_path: str | Path) -> None:
|
|
247
241
|
if log_to_file:
|
|
248
242
|
if log_file_path == "auto":
|
|
249
243
|
log_file_path = os.path.join(self.path, "logs", self._predictor_log_file_name)
|
|
@@ -253,7 +247,7 @@ class TimeSeriesPredictor:
|
|
|
253
247
|
|
|
254
248
|
def _to_data_frame(
|
|
255
249
|
self,
|
|
256
|
-
data:
|
|
250
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
|
|
257
251
|
name: str = "data",
|
|
258
252
|
) -> TimeSeriesDataFrame:
|
|
259
253
|
if isinstance(data, TimeSeriesDataFrame):
|
|
@@ -274,7 +268,7 @@ class TimeSeriesPredictor:
|
|
|
274
268
|
|
|
275
269
|
def _check_and_prepare_data_frame(
|
|
276
270
|
self,
|
|
277
|
-
data:
|
|
271
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
|
|
278
272
|
name: str = "data",
|
|
279
273
|
) -> TimeSeriesDataFrame:
|
|
280
274
|
"""Ensure that TimeSeriesDataFrame has a sorted index and a valid frequency.
|
|
@@ -283,7 +277,7 @@ class TimeSeriesPredictor:
|
|
|
283
277
|
|
|
284
278
|
Parameters
|
|
285
279
|
----------
|
|
286
|
-
data :
|
|
280
|
+
data : TimeSeriesDataFrame | pd.DataFrame | Path | str
|
|
287
281
|
Data as a dataframe or path to file storing the data.
|
|
288
282
|
name : str
|
|
289
283
|
Name of the data that will be used in log messages (e.g., 'train_data', 'tuning_data', or 'data').
|
|
@@ -326,7 +320,7 @@ class TimeSeriesPredictor:
|
|
|
326
320
|
return df
|
|
327
321
|
|
|
328
322
|
def _check_and_prepare_data_frame_for_evaluation(
|
|
329
|
-
self, data: TimeSeriesDataFrame, cutoff:
|
|
323
|
+
self, data: TimeSeriesDataFrame, cutoff: int | None = None, name: str = "data"
|
|
330
324
|
) -> TimeSeriesDataFrame:
|
|
331
325
|
"""
|
|
332
326
|
Make sure that provided evaluation data includes both historical and future time series values.
|
|
@@ -417,7 +411,9 @@ class TimeSeriesPredictor:
|
|
|
417
411
|
)
|
|
418
412
|
train_data = train_data.query("item_id not in @too_short_items")
|
|
419
413
|
|
|
420
|
-
all_nan_items = train_data.item_ids[
|
|
414
|
+
all_nan_items = train_data.item_ids[
|
|
415
|
+
train_data[self.target].isna().groupby(TimeSeriesDataFrame.ITEMID, sort=False).all()
|
|
416
|
+
]
|
|
421
417
|
if len(all_nan_items) > 0:
|
|
422
418
|
logger.info(f"\tRemoving {len(all_nan_items)} time series consisting of only NaN values from train_data.")
|
|
423
419
|
train_data = train_data.query("item_id not in @all_nan_items")
|
|
@@ -435,27 +431,27 @@ class TimeSeriesPredictor:
|
|
|
435
431
|
@apply_presets(get_predictor_presets())
|
|
436
432
|
def fit(
|
|
437
433
|
self,
|
|
438
|
-
train_data:
|
|
439
|
-
tuning_data:
|
|
440
|
-
time_limit:
|
|
441
|
-
presets:
|
|
442
|
-
hyperparameters:
|
|
443
|
-
hyperparameter_tune_kwargs:
|
|
444
|
-
excluded_model_types:
|
|
434
|
+
train_data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
|
|
435
|
+
tuning_data: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
|
|
436
|
+
time_limit: int | None = None,
|
|
437
|
+
presets: str | None = None,
|
|
438
|
+
hyperparameters: str | dict[str | Type, Any] | None = None,
|
|
439
|
+
hyperparameter_tune_kwargs: str | dict | None = None,
|
|
440
|
+
excluded_model_types: list[str] | None = None,
|
|
445
441
|
num_val_windows: int = 1,
|
|
446
|
-
val_step_size:
|
|
447
|
-
refit_every_n_windows:
|
|
442
|
+
val_step_size: int | None = None,
|
|
443
|
+
refit_every_n_windows: int | None = 1,
|
|
448
444
|
refit_full: bool = False,
|
|
449
445
|
enable_ensemble: bool = True,
|
|
450
446
|
skip_model_selection: bool = False,
|
|
451
|
-
random_seed:
|
|
452
|
-
verbosity:
|
|
447
|
+
random_seed: int | None = 123,
|
|
448
|
+
verbosity: int | None = None,
|
|
453
449
|
) -> "TimeSeriesPredictor":
|
|
454
450
|
"""Fit probabilistic forecasting models to the given time series dataset.
|
|
455
451
|
|
|
456
452
|
Parameters
|
|
457
453
|
----------
|
|
458
|
-
train_data :
|
|
454
|
+
train_data : TimeSeriesDataFrame | pd.DataFrame | Path | str
|
|
459
455
|
Training data in the :class:`~autogluon.timeseries.TimeSeriesDataFrame` format.
|
|
460
456
|
|
|
461
457
|
Time series with length ``<= (num_val_windows + 1) * prediction_length`` will be ignored during training.
|
|
@@ -481,7 +477,7 @@ class TimeSeriesPredictor:
|
|
|
481
477
|
|
|
482
478
|
If provided data is a ``pandas.DataFrame``, AutoGluon will attempt to convert it to a ``TimeSeriesDataFrame``.
|
|
483
479
|
If a ``str`` or a ``Path`` is provided, AutoGluon will attempt to load this file.
|
|
484
|
-
tuning_data :
|
|
480
|
+
tuning_data : TimeSeriesDataFrame | pd.DataFrame | Path | str, optional
|
|
485
481
|
Data reserved for model selection and hyperparameter tuning, rather than training individual models. Also
|
|
486
482
|
used to compute the validation scores. Note that only the last ``prediction_length`` time steps of each
|
|
487
483
|
time series are used for computing the validation score.
|
|
@@ -673,8 +669,10 @@ class TimeSeriesPredictor:
|
|
|
673
669
|
|
|
674
670
|
"""
|
|
675
671
|
time_start = time.time()
|
|
676
|
-
if self.
|
|
677
|
-
raise AssertionError(
|
|
672
|
+
if self.is_fit:
|
|
673
|
+
raise AssertionError(
|
|
674
|
+
"Predictor is already fit! To fit additional models create a new `TimeSeriesPredictor`."
|
|
675
|
+
)
|
|
678
676
|
|
|
679
677
|
if verbosity is None:
|
|
680
678
|
verbosity = self.verbosity
|
|
@@ -731,11 +729,11 @@ class TimeSeriesPredictor:
|
|
|
731
729
|
tuning_data = self._check_and_prepare_data_frame_for_evaluation(tuning_data, name="tuning_data")
|
|
732
730
|
logger.info(f"Provided tuning_data has {self._get_dataset_stats(tuning_data)}")
|
|
733
731
|
# TODO: Use num_val_windows to perform multi-window backtests on tuning_data
|
|
734
|
-
if num_val_windows >
|
|
732
|
+
if num_val_windows > 1:
|
|
735
733
|
logger.warning(
|
|
736
734
|
"\tSetting num_val_windows = 0 (disabling backtesting on train_data) because tuning_data is provided."
|
|
737
735
|
)
|
|
738
|
-
num_val_windows =
|
|
736
|
+
num_val_windows = 1
|
|
739
737
|
|
|
740
738
|
if num_val_windows == 0 and tuning_data is None:
|
|
741
739
|
raise ValueError("Please set num_val_windows >= 1 or provide custom tuning_data")
|
|
@@ -748,13 +746,11 @@ class TimeSeriesPredictor:
|
|
|
748
746
|
|
|
749
747
|
if not skip_model_selection:
|
|
750
748
|
train_data = self._filter_useless_train_data(
|
|
751
|
-
train_data,
|
|
749
|
+
train_data,
|
|
750
|
+
num_val_windows=0 if tuning_data is not None else num_val_windows,
|
|
751
|
+
val_step_size=val_step_size,
|
|
752
752
|
)
|
|
753
753
|
|
|
754
|
-
val_splitter = ExpandingWindowSplitter(
|
|
755
|
-
prediction_length=self.prediction_length, num_val_windows=num_val_windows, val_step_size=val_step_size
|
|
756
|
-
)
|
|
757
|
-
|
|
758
754
|
time_left = None if time_limit is None else time_limit - (time.time() - time_start)
|
|
759
755
|
self._learner.fit(
|
|
760
756
|
train_data=train_data,
|
|
@@ -764,7 +760,8 @@ class TimeSeriesPredictor:
|
|
|
764
760
|
excluded_model_types=excluded_model_types,
|
|
765
761
|
time_limit=time_left,
|
|
766
762
|
verbosity=verbosity,
|
|
767
|
-
|
|
763
|
+
num_val_windows=(num_val_windows,) if isinstance(num_val_windows, int) else num_val_windows,
|
|
764
|
+
val_step_size=val_step_size,
|
|
768
765
|
refit_every_n_windows=refit_every_n_windows,
|
|
769
766
|
skip_model_selection=skip_model_selection,
|
|
770
767
|
enable_ensemble=enable_ensemble,
|
|
@@ -781,21 +778,22 @@ class TimeSeriesPredictor:
|
|
|
781
778
|
|
|
782
779
|
def model_names(self) -> list[str]:
|
|
783
780
|
"""Returns the list of model names trained by this predictor object."""
|
|
781
|
+
self._assert_is_fit("model_names")
|
|
784
782
|
return self._trainer.get_model_names()
|
|
785
783
|
|
|
786
784
|
def predict(
|
|
787
785
|
self,
|
|
788
|
-
data:
|
|
789
|
-
known_covariates:
|
|
790
|
-
model:
|
|
786
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
|
|
787
|
+
known_covariates: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
|
|
788
|
+
model: str | None = None,
|
|
791
789
|
use_cache: bool = True,
|
|
792
|
-
random_seed:
|
|
790
|
+
random_seed: int | None = 123,
|
|
793
791
|
) -> TimeSeriesDataFrame:
|
|
794
792
|
"""Return quantile and mean forecasts for the given dataset, starting from the end of each time series.
|
|
795
793
|
|
|
796
794
|
Parameters
|
|
797
795
|
----------
|
|
798
|
-
data :
|
|
796
|
+
data : TimeSeriesDataFrame | pd.DataFrame | Path | str
|
|
799
797
|
Historical time series data for which the forecast needs to be made.
|
|
800
798
|
|
|
801
799
|
The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
|
|
@@ -803,7 +801,7 @@ class TimeSeriesPredictor:
|
|
|
803
801
|
|
|
804
802
|
If provided data is a ``pandas.DataFrame``, AutoGluon will attempt to convert it to a ``TimeSeriesDataFrame``.
|
|
805
803
|
If a ``str`` or a ``Path`` is provided, AutoGluon will attempt to load this file.
|
|
806
|
-
known_covariates :
|
|
804
|
+
known_covariates : TimeSeriesDataFrame | pd.DataFrame | Path | str, optional
|
|
807
805
|
If ``known_covariates_names`` were specified when creating the predictor, it is necessary to provide the
|
|
808
806
|
values of the known covariates for each time series during the forecast horizon. Specifically:
|
|
809
807
|
|
|
@@ -853,6 +851,7 @@ class TimeSeriesPredictor:
|
|
|
853
851
|
B 2020-03-04 17.1
|
|
854
852
|
2020-03-05 8.3
|
|
855
853
|
"""
|
|
854
|
+
self._assert_is_fit("predict")
|
|
856
855
|
# Save original item_id order to return predictions in the same order as input data
|
|
857
856
|
data = self._to_data_frame(data)
|
|
858
857
|
original_item_id_order = data.item_ids
|
|
@@ -866,14 +865,209 @@ class TimeSeriesPredictor:
|
|
|
866
865
|
use_cache=use_cache,
|
|
867
866
|
random_seed=random_seed,
|
|
868
867
|
)
|
|
869
|
-
return cast(TimeSeriesDataFrame, predictions.reindex(original_item_id_order, level=ITEMID))
|
|
868
|
+
return cast(TimeSeriesDataFrame, predictions.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID))
|
|
869
|
+
|
|
870
|
+
@overload
|
|
871
|
+
def backtest_predictions(
|
|
872
|
+
self,
|
|
873
|
+
data: TimeSeriesDataFrame | None = None,
|
|
874
|
+
*,
|
|
875
|
+
model: str | None = None,
|
|
876
|
+
num_val_windows: int | None = None,
|
|
877
|
+
val_step_size: int | None = None,
|
|
878
|
+
use_cache: bool = True,
|
|
879
|
+
) -> list[TimeSeriesDataFrame]: ...
|
|
880
|
+
|
|
881
|
+
@overload
|
|
882
|
+
def backtest_predictions(
|
|
883
|
+
self,
|
|
884
|
+
data: TimeSeriesDataFrame | None = None,
|
|
885
|
+
*,
|
|
886
|
+
model: list[str],
|
|
887
|
+
num_val_windows: int | None = None,
|
|
888
|
+
val_step_size: int | None = None,
|
|
889
|
+
use_cache: bool = True,
|
|
890
|
+
) -> dict[str, list[TimeSeriesDataFrame]]: ...
|
|
891
|
+
|
|
892
|
+
def backtest_predictions(
|
|
893
|
+
self,
|
|
894
|
+
data: TimeSeriesDataFrame | None = None,
|
|
895
|
+
*,
|
|
896
|
+
model: str | list[str] | None = None,
|
|
897
|
+
num_val_windows: int | None = None,
|
|
898
|
+
val_step_size: int | None = None,
|
|
899
|
+
use_cache: bool = True,
|
|
900
|
+
) -> list[TimeSeriesDataFrame] | dict[str, list[TimeSeriesDataFrame]]:
|
|
901
|
+
"""Return predictions for multiple validation windows.
|
|
902
|
+
|
|
903
|
+
When ``data=None``, returns the predictions that were saved during training. Otherwise, generates new
|
|
904
|
+
predictions by splitting ``data`` into multiple windows using an expanding window strategy.
|
|
905
|
+
|
|
906
|
+
The corresponding target values for each window can be obtained using
|
|
907
|
+
:meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_targets`.
|
|
908
|
+
|
|
909
|
+
Parameters
|
|
910
|
+
----------
|
|
911
|
+
data : TimeSeriesDataFrame, optional
|
|
912
|
+
Time series data to generate predictions for. If ``None``, returns the predictions that were saved
|
|
913
|
+
during training on ``train_data``.
|
|
914
|
+
|
|
915
|
+
If provided, all time series in ``data`` must have length at least
|
|
916
|
+
``prediction_length + (num_val_windows - 1) * val_step_size + 1``.
|
|
917
|
+
|
|
918
|
+
The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
|
|
919
|
+
the predictor.
|
|
920
|
+
model : str, list[str], or None, default = None
|
|
921
|
+
Name of the model(s) to generate predictions with. By default, the best model during training
|
|
922
|
+
(with highest validation score) will be used.
|
|
923
|
+
|
|
924
|
+
- If ``str``: Returns predictions for a single model as a list.
|
|
925
|
+
- If ``list[str]``: Returns predictions for multiple models as a dict mapping model names to lists.
|
|
926
|
+
- If ``None``: Uses the best model.
|
|
927
|
+
num_val_windows : int, optional
|
|
928
|
+
Number of validation windows to generate. If ``None``, uses the ``num_val_windows`` value from training
|
|
929
|
+
configuration when ``data=None``, otherwise defaults to 1.
|
|
930
|
+
|
|
931
|
+
For example, with ``prediction_length=2``, ``num_val_windows=3``, and ``val_step_size=1``, the validation
|
|
932
|
+
windows are::
|
|
933
|
+
|
|
934
|
+
|-------------------|
|
|
935
|
+
| x x x x x y y - - |
|
|
936
|
+
| x x x x x x y y - |
|
|
937
|
+
| x x x x x x x y y |
|
|
938
|
+
|
|
939
|
+
where ``x`` denotes training time steps and ``y`` denotes validation time steps for each window.
|
|
940
|
+
val_step_size : int, optional
|
|
941
|
+
Number of time steps between the start of consecutive validation windows. If ``None``, defaults to
|
|
942
|
+
``prediction_length``.
|
|
943
|
+
use_cache : bool, default = True
|
|
944
|
+
If True, will attempt to use cached predictions. If False, cached predictions will be ignored.
|
|
945
|
+
This argument is ignored if ``cache_predictions`` was set to False when creating the ``TimeSeriesPredictor``.
|
|
946
|
+
|
|
947
|
+
Returns
|
|
948
|
+
-------
|
|
949
|
+
list[TimeSeriesDataFrame] or dict[str, list[TimeSeriesDataFrame]]
|
|
950
|
+
Predictions for each validation window.
|
|
951
|
+
|
|
952
|
+
- If ``model`` is a ``str`` or ``None``: Returns a list of length ``num_val_windows``, where each element
|
|
953
|
+
contains the predictions for one validation window.
|
|
954
|
+
- If ``model`` is a ``list[str]``: Returns a dict mapping each model name to a list of predictions for
|
|
955
|
+
each validation window.
|
|
956
|
+
|
|
957
|
+
Examples
|
|
958
|
+
--------
|
|
959
|
+
Make predictions on new data with the best model
|
|
960
|
+
|
|
961
|
+
>>> predictor.backtest_predictions(test_data, num_val_windows=2)
|
|
962
|
+
|
|
963
|
+
Load validation predictions for all models that were saved during training
|
|
964
|
+
|
|
965
|
+
>>> predictor.backtest_predictions(model=predictor.model_names())
|
|
966
|
+
|
|
967
|
+
See Also
|
|
968
|
+
--------
|
|
969
|
+
backtest_targets
|
|
970
|
+
Return target values aligned with predictions.
|
|
971
|
+
evaluate
|
|
972
|
+
Evaluate forecast accuracy on a hold-out set.
|
|
973
|
+
predict
|
|
974
|
+
Generate forecasts for future time steps.
|
|
975
|
+
"""
|
|
976
|
+
self._assert_is_fit("backtest_predictions")
|
|
977
|
+
if data is not None:
|
|
978
|
+
data = self._check_and_prepare_data_frame(data)
|
|
979
|
+
|
|
980
|
+
if model is None:
|
|
981
|
+
model_names = [self.model_best]
|
|
982
|
+
elif isinstance(model, str):
|
|
983
|
+
model_names = [model]
|
|
984
|
+
else:
|
|
985
|
+
model_names = model
|
|
986
|
+
|
|
987
|
+
result = self._learner.backtest_predictions(
|
|
988
|
+
data=data,
|
|
989
|
+
model_names=model_names,
|
|
990
|
+
num_val_windows=num_val_windows,
|
|
991
|
+
val_step_size=val_step_size,
|
|
992
|
+
use_cache=use_cache,
|
|
993
|
+
)
|
|
994
|
+
|
|
995
|
+
if isinstance(model, list):
|
|
996
|
+
return result
|
|
997
|
+
else:
|
|
998
|
+
return result[model_names[0]]
|
|
999
|
+
|
|
1000
|
+
def backtest_targets(
|
|
1001
|
+
self,
|
|
1002
|
+
data: TimeSeriesDataFrame | None = None,
|
|
1003
|
+
*,
|
|
1004
|
+
num_val_windows: int | None = None,
|
|
1005
|
+
val_step_size: int | None = None,
|
|
1006
|
+
) -> list[TimeSeriesDataFrame]:
|
|
1007
|
+
"""Return target values for each validation window.
|
|
1008
|
+
|
|
1009
|
+
Returns the actual target values corresponding to each validation window used in
|
|
1010
|
+
:meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`. The returned targets are aligned
|
|
1011
|
+
with the predictions, making it easy to compute custom evaluation metrics or analyze forecast errors.
|
|
1012
|
+
|
|
1013
|
+
Parameters
|
|
1014
|
+
----------
|
|
1015
|
+
data : TimeSeriesDataFrame, optional
|
|
1016
|
+
Time series data to extract targets from. If ``None``, returns the targets from the validation windows
|
|
1017
|
+
used during training.
|
|
1018
|
+
|
|
1019
|
+
If provided, all time series in ``data`` must have length at least
|
|
1020
|
+
``prediction_length + (num_val_windows - 1) * val_step_size + 1``.
|
|
1021
|
+
|
|
1022
|
+
The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
|
|
1023
|
+
the predictor.
|
|
1024
|
+
num_val_windows : int, optional
|
|
1025
|
+
Number of validation windows to extract targets for. If ``None``, uses the ``num_val_windows`` value from
|
|
1026
|
+
training configuration when ``data=None``, otherwise defaults to 1.
|
|
1027
|
+
|
|
1028
|
+
This should match the ``num_val_windows`` argument passed to
|
|
1029
|
+
:meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`.
|
|
1030
|
+
val_step_size : int, optional
|
|
1031
|
+
Number of time steps between the start of consecutive validation windows. If ``None``, defaults to
|
|
1032
|
+
``prediction_length``.
|
|
1033
|
+
|
|
1034
|
+
This should match the ``val_step_size`` argument passed to
|
|
1035
|
+
:meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`.
|
|
1036
|
+
|
|
1037
|
+
Returns
|
|
1038
|
+
-------
|
|
1039
|
+
list[TimeSeriesDataFrame]
|
|
1040
|
+
Target values for each validation window. Returns a list of length ``num_val_windows``,
|
|
1041
|
+
where each element contains the full time series data for one validation window.
|
|
1042
|
+
Each dataframe includes both historical context and the last ``prediction_length`` time steps
|
|
1043
|
+
that represent the target values to compare against predictions.
|
|
1044
|
+
|
|
1045
|
+
The returned targets are aligned with the output of
|
|
1046
|
+
:meth:`~autogluon.timeseries.TimeSeriesPredictor.backtest_predictions`, so ``targets[i]`` corresponds
|
|
1047
|
+
to ``predictions[i]`` for the i-th validation window.
|
|
1048
|
+
|
|
1049
|
+
See Also
|
|
1050
|
+
--------
|
|
1051
|
+
backtest_predictions
|
|
1052
|
+
Return predictions for multiple validation windows.
|
|
1053
|
+
evaluate
|
|
1054
|
+
Evaluate forecast accuracy on a hold-out set.
|
|
1055
|
+
"""
|
|
1056
|
+
self._assert_is_fit("backtest_targets")
|
|
1057
|
+
if data is not None:
|
|
1058
|
+
data = self._check_and_prepare_data_frame(data)
|
|
1059
|
+
return self._learner.backtest_targets(
|
|
1060
|
+
data=data,
|
|
1061
|
+
num_val_windows=num_val_windows,
|
|
1062
|
+
val_step_size=val_step_size,
|
|
1063
|
+
)
|
|
870
1064
|
|
|
871
1065
|
def evaluate(
|
|
872
1066
|
self,
|
|
873
|
-
data:
|
|
874
|
-
model:
|
|
875
|
-
metrics:
|
|
876
|
-
cutoff:
|
|
1067
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
|
|
1068
|
+
model: str | None = None,
|
|
1069
|
+
metrics: str | TimeSeriesScorer | list[str | TimeSeriesScorer] | None = None,
|
|
1070
|
+
cutoff: int | None = None,
|
|
877
1071
|
display: bool = False,
|
|
878
1072
|
use_cache: bool = True,
|
|
879
1073
|
) -> dict[str, float]:
|
|
@@ -890,7 +1084,7 @@ class TimeSeriesPredictor:
|
|
|
890
1084
|
|
|
891
1085
|
Parameters
|
|
892
1086
|
----------
|
|
893
|
-
data :
|
|
1087
|
+
data : TimeSeriesDataFrame | pd.DataFrame | Path | str
|
|
894
1088
|
The data to evaluate the best model on. If a ``cutoff`` is not provided, the last ``prediction_length``
|
|
895
1089
|
time steps of each time series in ``data`` will be held out for prediction and forecast accuracy will
|
|
896
1090
|
be calculated on these time steps. When a ``cutoff`` is provided, the ``-cutoff``-th to the
|
|
@@ -907,7 +1101,7 @@ class TimeSeriesPredictor:
|
|
|
907
1101
|
model : str, optional
|
|
908
1102
|
Name of the model that you would like to evaluate. By default, the best model during training
|
|
909
1103
|
(with highest validation score) will be used.
|
|
910
|
-
metrics : str, TimeSeriesScorer or list[
|
|
1104
|
+
metrics : str, TimeSeriesScorer or list[str | TimeSeriesScorer], optional
|
|
911
1105
|
Metric or a list of metrics to compute scores with. Defaults to ``self.eval_metric``. Supports both
|
|
912
1106
|
metric names as strings and custom metrics based on TimeSeriesScorer.
|
|
913
1107
|
cutoff : int, optional
|
|
@@ -928,7 +1122,7 @@ class TimeSeriesPredictor:
|
|
|
928
1122
|
will have their signs flipped to obey this convention. For example, negative MAPE values will be reported.
|
|
929
1123
|
To get the ``eval_metric`` score, do ``output[predictor.eval_metric.name]``.
|
|
930
1124
|
"""
|
|
931
|
-
|
|
1125
|
+
self._assert_is_fit("evaluate")
|
|
932
1126
|
data = self._check_and_prepare_data_frame(data)
|
|
933
1127
|
data = self._check_and_prepare_data_frame_for_evaluation(data, cutoff=cutoff)
|
|
934
1128
|
|
|
@@ -940,15 +1134,15 @@ class TimeSeriesPredictor:
|
|
|
940
1134
|
|
|
941
1135
|
def feature_importance(
|
|
942
1136
|
self,
|
|
943
|
-
data:
|
|
944
|
-
model:
|
|
945
|
-
metric:
|
|
946
|
-
features:
|
|
947
|
-
time_limit:
|
|
1137
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
|
|
1138
|
+
model: str | None = None,
|
|
1139
|
+
metric: str | TimeSeriesScorer | None = None,
|
|
1140
|
+
features: list[str] | None = None,
|
|
1141
|
+
time_limit: float | None = None,
|
|
948
1142
|
method: Literal["naive", "permutation"] = "permutation",
|
|
949
1143
|
subsample_size: int = 50,
|
|
950
|
-
num_iterations:
|
|
951
|
-
random_seed:
|
|
1144
|
+
num_iterations: int | None = None,
|
|
1145
|
+
random_seed: int | None = 123,
|
|
952
1146
|
relative_scores: bool = False,
|
|
953
1147
|
include_confidence_band: bool = True,
|
|
954
1148
|
confidence_level: float = 0.99,
|
|
@@ -1045,6 +1239,7 @@ class TimeSeriesPredictor:
|
|
|
1045
1239
|
'importance': The estimated feature importance score.
|
|
1046
1240
|
'stddev': The standard deviation of the feature importance score. If NaN, then not enough ``num_iterations`` were used.
|
|
1047
1241
|
"""
|
|
1242
|
+
self._assert_is_fit("feature_importance")
|
|
1048
1243
|
if data is not None:
|
|
1049
1244
|
data = self._check_and_prepare_data_frame(data)
|
|
1050
1245
|
data = self._check_and_prepare_data_frame_for_evaluation(data)
|
|
@@ -1091,7 +1286,7 @@ class TimeSeriesPredictor:
|
|
|
1091
1286
|
return version
|
|
1092
1287
|
|
|
1093
1288
|
@classmethod
|
|
1094
|
-
def load(cls, path:
|
|
1289
|
+
def load(cls, path: str | Path, require_version_match: bool = True) -> "TimeSeriesPredictor":
|
|
1095
1290
|
"""Load an existing ``TimeSeriesPredictor`` from given ``path``.
|
|
1096
1291
|
|
|
1097
1292
|
.. warning::
|
|
@@ -1175,15 +1370,14 @@ class TimeSeriesPredictor:
|
|
|
1175
1370
|
@property
|
|
1176
1371
|
def model_best(self) -> str:
|
|
1177
1372
|
"""Returns the name of the best model from trainer."""
|
|
1373
|
+
self._assert_is_fit("model_best")
|
|
1178
1374
|
if self._trainer.model_best is not None:
|
|
1179
1375
|
models = self._trainer.get_model_names()
|
|
1180
1376
|
if self._trainer.model_best in models:
|
|
1181
1377
|
return self._trainer.model_best
|
|
1182
1378
|
return self._trainer.get_model_best()
|
|
1183
1379
|
|
|
1184
|
-
def persist(
|
|
1185
|
-
self, models: Union[Literal["all", "best"], list[str]] = "best", with_ancestors: bool = True
|
|
1186
|
-
) -> list[str]:
|
|
1380
|
+
def persist(self, models: Literal["all", "best"] | list[str] = "best", with_ancestors: bool = True) -> list[str]:
|
|
1187
1381
|
"""Persist models in memory for reduced inference latency. This is particularly important if the models are being used for online
|
|
1188
1382
|
inference where low latency is critical. If models are not persisted in memory, they are loaded from disk every time they are
|
|
1189
1383
|
asked to make predictions. This is especially cumbersome for large deep learning based models which have to be loaded into
|
|
@@ -1206,6 +1400,7 @@ class TimeSeriesPredictor:
|
|
|
1206
1400
|
list_of_models : list[str]
|
|
1207
1401
|
List of persisted model names.
|
|
1208
1402
|
"""
|
|
1403
|
+
self._assert_is_fit("persist")
|
|
1209
1404
|
return self._learner.persist_trainer(models=models, with_ancestors=with_ancestors)
|
|
1210
1405
|
|
|
1211
1406
|
def unpersist(self) -> list[str]:
|
|
@@ -1224,10 +1419,10 @@ class TimeSeriesPredictor:
|
|
|
1224
1419
|
|
|
1225
1420
|
def leaderboard(
|
|
1226
1421
|
self,
|
|
1227
|
-
data:
|
|
1228
|
-
cutoff:
|
|
1422
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str | None = None,
|
|
1423
|
+
cutoff: int | None = None,
|
|
1229
1424
|
extra_info: bool = False,
|
|
1230
|
-
extra_metrics:
|
|
1425
|
+
extra_metrics: list[str | TimeSeriesScorer] | None = None,
|
|
1231
1426
|
display: bool = False,
|
|
1232
1427
|
use_cache: bool = True,
|
|
1233
1428
|
**kwargs,
|
|
@@ -1252,7 +1447,7 @@ class TimeSeriesPredictor:
|
|
|
1252
1447
|
|
|
1253
1448
|
Parameters
|
|
1254
1449
|
----------
|
|
1255
|
-
data :
|
|
1450
|
+
data : TimeSeriesDataFrame | pd.DataFrame | Path | str, optional
|
|
1256
1451
|
dataset used for additional evaluation. Must include both historical and future data (i.e., length of all
|
|
1257
1452
|
time series in ``data`` must be at least ``prediction_length + 1``, if ``cutoff`` is not provided,
|
|
1258
1453
|
``-cutoff + 1`` otherwise).
|
|
@@ -1271,7 +1466,7 @@ class TimeSeriesPredictor:
|
|
|
1271
1466
|
If True, the leaderboard will contain an additional column ``hyperparameters`` with the hyperparameters used
|
|
1272
1467
|
by each model during training. An empty dictionary ``{}`` means that the model was trained with default
|
|
1273
1468
|
hyperparameters.
|
|
1274
|
-
extra_metrics : list[
|
|
1469
|
+
extra_metrics : list[str | TimeSeriesScorer], optional
|
|
1275
1470
|
A list of metrics to calculate scores for and include in the output DataFrame.
|
|
1276
1471
|
|
|
1277
1472
|
Only valid when ``data`` is specified. The scores refer to the scores on ``data`` (same data as used to
|
|
@@ -1293,6 +1488,7 @@ class TimeSeriesPredictor:
|
|
|
1293
1488
|
The leaderboard containing information on all models and in order of best model to worst in terms of
|
|
1294
1489
|
test performance.
|
|
1295
1490
|
"""
|
|
1491
|
+
self._assert_is_fit("leaderboard")
|
|
1296
1492
|
if "silent" in kwargs:
|
|
1297
1493
|
# keep `silent` logic for backwards compatibility
|
|
1298
1494
|
assert isinstance(kwargs["silent"], bool)
|
|
@@ -1317,12 +1513,12 @@ class TimeSeriesPredictor:
|
|
|
1317
1513
|
print(leaderboard)
|
|
1318
1514
|
return leaderboard
|
|
1319
1515
|
|
|
1320
|
-
def make_future_data_frame(self, data:
|
|
1516
|
+
def make_future_data_frame(self, data: TimeSeriesDataFrame | pd.DataFrame | Path | str) -> pd.DataFrame:
|
|
1321
1517
|
"""Generate a dataframe with the ``item_id`` and ``timestamp`` values corresponding to the forecast horizon.
|
|
1322
1518
|
|
|
1323
1519
|
Parameters
|
|
1324
1520
|
----------
|
|
1325
|
-
data :
|
|
1521
|
+
data : TimeSeriesDataFrame | pd.DataFrame | Path | str
|
|
1326
1522
|
Historical time series data.
|
|
1327
1523
|
|
|
1328
1524
|
Returns
|
|
@@ -1370,6 +1566,7 @@ class TimeSeriesPredictor:
|
|
|
1370
1566
|
Dict containing various detailed information. We do not recommend directly printing this dict as it may
|
|
1371
1567
|
be very large.
|
|
1372
1568
|
"""
|
|
1569
|
+
self._assert_is_fit("fit_summary")
|
|
1373
1570
|
# TODO: HPO-specific information currently not reported in fit_summary
|
|
1374
1571
|
# TODO: Revisit after ray tune integration
|
|
1375
1572
|
|
|
@@ -1430,6 +1627,7 @@ class TimeSeriesPredictor:
|
|
|
1430
1627
|
``predictor.predict(data)`` is called will be the refit_full version instead of the original version of the
|
|
1431
1628
|
model. Has no effect if ``model`` is not the best model.
|
|
1432
1629
|
"""
|
|
1630
|
+
self._assert_is_fit("refit_full")
|
|
1433
1631
|
logger.warning(
|
|
1434
1632
|
"\tWARNING: refit_full functionality for TimeSeriesPredictor is experimental "
|
|
1435
1633
|
"and is not yet supported by all models."
|
|
@@ -1482,7 +1680,7 @@ class TimeSeriesPredictor:
|
|
|
1482
1680
|
trainer = self._trainer
|
|
1483
1681
|
train_data = trainer.load_train_data()
|
|
1484
1682
|
val_data = trainer.load_val_data()
|
|
1485
|
-
base_model_names = trainer.get_model_names(
|
|
1683
|
+
base_model_names = trainer.get_model_names(layer=0)
|
|
1486
1684
|
pred_proba_dict_val: dict[str, list[TimeSeriesDataFrame]] = {
|
|
1487
1685
|
model_name: trainer._get_model_oof_predictions(model_name)
|
|
1488
1686
|
for model_name in base_model_names
|
|
@@ -1498,7 +1696,7 @@ class TimeSeriesPredictor:
|
|
|
1498
1696
|
)
|
|
1499
1697
|
|
|
1500
1698
|
y_val: list[TimeSeriesDataFrame] = [
|
|
1501
|
-
select_target(df) for df in trainer.
|
|
1699
|
+
select_target(df) for df in trainer._get_validation_windows(train_data=train_data, val_data=val_data)
|
|
1502
1700
|
]
|
|
1503
1701
|
y_test: TimeSeriesDataFrame = select_target(test_data)
|
|
1504
1702
|
|
|
@@ -1518,27 +1716,27 @@ class TimeSeriesPredictor:
|
|
|
1518
1716
|
|
|
1519
1717
|
def plot(
|
|
1520
1718
|
self,
|
|
1521
|
-
data:
|
|
1522
|
-
predictions:
|
|
1523
|
-
quantile_levels:
|
|
1524
|
-
item_ids:
|
|
1719
|
+
data: TimeSeriesDataFrame | pd.DataFrame | Path | str,
|
|
1720
|
+
predictions: TimeSeriesDataFrame | None = None,
|
|
1721
|
+
quantile_levels: list[float] | None = None,
|
|
1722
|
+
item_ids: list[str | int] | None = None,
|
|
1525
1723
|
max_num_item_ids: int = 8,
|
|
1526
|
-
max_history_length:
|
|
1527
|
-
point_forecast_column:
|
|
1528
|
-
matplotlib_rc_params:
|
|
1724
|
+
max_history_length: int | None = None,
|
|
1725
|
+
point_forecast_column: str | None = None,
|
|
1726
|
+
matplotlib_rc_params: dict | None = None,
|
|
1529
1727
|
):
|
|
1530
1728
|
"""Plot historical time series values and the forecasts.
|
|
1531
1729
|
|
|
1532
1730
|
Parameters
|
|
1533
1731
|
----------
|
|
1534
|
-
data :
|
|
1732
|
+
data : TimeSeriesDataFrame | pd.DataFrame | Path | str
|
|
1535
1733
|
Observed time series data.
|
|
1536
1734
|
predictions : TimeSeriesDataFrame, optional
|
|
1537
1735
|
Predictions generated by calling :meth:`~autogluon.timeseries.TimeSeriesPredictor.predict`.
|
|
1538
1736
|
quantile_levels : list[float], optional
|
|
1539
1737
|
Quantile levels for which to plot the prediction intervals. Defaults to lowest & highest quantile levels
|
|
1540
1738
|
available in ``predictions``.
|
|
1541
|
-
item_ids : list[
|
|
1739
|
+
item_ids : list[str | int], optional
|
|
1542
1740
|
If provided, plots will only be generated for time series with these item IDs. By default (if set to
|
|
1543
1741
|
``None``), item IDs are selected randomly. In either case, plots are generated for at most
|
|
1544
1742
|
``max_num_item_ids`` time series.
|
|
@@ -1621,7 +1819,7 @@ class TimeSeriesPredictor:
|
|
|
1621
1819
|
for q in quantile_levels:
|
|
1622
1820
|
ax.fill_between(forecast.index, point_forecast, forecast[str(q)], color="C1", alpha=0.2)
|
|
1623
1821
|
if len(axes) > len(item_ids):
|
|
1624
|
-
axes[len(item_ids)].set_axis_off()
|
|
1625
|
-
handles, labels = axes[0].get_legend_handles_labels()
|
|
1822
|
+
axes[len(item_ids)].set_axis_off() # type: ignore
|
|
1823
|
+
handles, labels = axes[0].get_legend_handles_labels() # type: ignore
|
|
1626
1824
|
fig.legend(handles, labels, bbox_to_anchor=(0.5, 0.0), ncols=len(handles))
|
|
1627
1825
|
return fig
|