autogluon.timeseries 1.2.1b20250422__py3-none-any.whl → 1.2.1b20250424__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/timeseries/dataset/ts_dataframe.py +18 -3
- autogluon/timeseries/learner.py +0 -4
- autogluon/timeseries/metrics/__init__.py +1 -30
- autogluon/timeseries/metrics/abstract.py +0 -10
- autogluon/timeseries/metrics/point.py +41 -131
- autogluon/timeseries/metrics/quantile.py +15 -36
- autogluon/timeseries/models/abstract/__init__.py +2 -2
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +178 -129
- autogluon/timeseries/models/chronos/model.py +3 -2
- autogluon/timeseries/models/ensemble/__init__.py +3 -2
- autogluon/timeseries/models/ensemble/abstract.py +139 -0
- autogluon/timeseries/models/ensemble/basic.py +88 -0
- autogluon/timeseries/models/ensemble/{greedy_ensemble.py → greedy.py} +67 -61
- autogluon/timeseries/models/presets.py +0 -4
- autogluon/timeseries/predictor.py +51 -26
- autogluon/timeseries/trainer.py +35 -27
- autogluon/timeseries/utils/features.py +4 -1
- autogluon/timeseries/utils/warning_filters.py +1 -1
- autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/METADATA +5 -4
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/RECORD +28 -27
- autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -86
- /autogluon.timeseries-1.2.1b20250422-py3.9-nspkg.pth → /autogluon.timeseries-1.2.1b20250424-py3.9-nspkg.pth +0 -0
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/LICENSE +0 -0
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/NOTICE +0 -0
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/WHEEL +0 -0
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/zip-safe +0 -0
@@ -8,7 +8,6 @@ import time
|
|
8
8
|
from abc import ABC, abstractmethod
|
9
9
|
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
|
10
10
|
|
11
|
-
import numpy as np
|
12
11
|
import pandas as pd
|
13
12
|
from typing_extensions import Self
|
14
13
|
|
@@ -33,7 +32,8 @@ logger = logging.getLogger(__name__)
|
|
33
32
|
|
34
33
|
|
35
34
|
class TimeSeriesModelBase(ModelBase, ABC):
|
36
|
-
"""Abstract class for all `Model` objects in autogluon.timeseries
|
35
|
+
"""Abstract base class for all `Model` objects in autogluon.timeseries, including both
|
36
|
+
forecasting models and forecast combination/ensemble models.
|
37
37
|
|
38
38
|
Parameters
|
39
39
|
----------
|
@@ -60,9 +60,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
60
60
|
eval_metric_seasonal_period : int, optional
|
61
61
|
Seasonal period used to compute some evaluation metrics such as mean absolute scaled error (MASE). Defaults to
|
62
62
|
``None``, in which case the seasonal period is computed based on the data frequency.
|
63
|
-
horizon_weight : np.ndarray, optional
|
64
|
-
Weight assigned to each time step in the forecast horizon when computing the metric. If provided, this list
|
65
|
-
must contain `prediction_length` non-negative values, with `sum(horizon_weight) = prediction_length`.
|
66
63
|
hyperparameters : dict, default = None
|
67
64
|
Hyperparameters that will be used by the model (can be search spaces instead of fixed values).
|
68
65
|
If None, model defaults are used. This is identical to passing an empty dictionary.
|
@@ -92,7 +89,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
92
89
|
quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
|
93
90
|
eval_metric: Union[str, TimeSeriesScorer, None] = None,
|
94
91
|
eval_metric_seasonal_period: Optional[int] = None,
|
95
|
-
horizon_weight: Optional[np.ndarray] = None,
|
96
92
|
):
|
97
93
|
self.name = name or re.sub(r"Model$", "", self.__class__.__name__)
|
98
94
|
|
@@ -109,7 +105,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
109
105
|
|
110
106
|
self.eval_metric: TimeSeriesScorer = check_get_evaluation_metric(eval_metric)
|
111
107
|
self.eval_metric_seasonal_period = eval_metric_seasonal_period
|
112
|
-
self.horizon_weight = horizon_weight
|
113
108
|
self.target: str = target
|
114
109
|
self.covariate_metadata = covariate_metadata or CovariateMetadata()
|
115
110
|
|
@@ -140,11 +135,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
140
135
|
)
|
141
136
|
self.val_score: Optional[float] = None # Score with eval_metric (Validation data)
|
142
137
|
|
143
|
-
self.target_scaler: Optional[TargetScaler]
|
144
|
-
self.covariate_scaler: Optional[CovariateScaler]
|
145
|
-
self.covariate_regressor: Optional[CovariateRegressor]
|
146
|
-
self._initialize_transforms_and_regressor()
|
147
|
-
|
148
138
|
def __repr__(self) -> str:
|
149
139
|
return self.name
|
150
140
|
|
@@ -255,21 +245,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
255
245
|
self._oof_predictions = self.load_oof_predictions(self.path)
|
256
246
|
return self._oof_predictions
|
257
247
|
|
258
|
-
def _initialize_transforms_and_regressor(self) -> None:
|
259
|
-
self.target_scaler = get_target_scaler(self.get_hyperparameters().get("target_scaler"), target=self.target)
|
260
|
-
self.covariate_scaler = get_covariate_scaler(
|
261
|
-
self.get_hyperparameters().get("covariate_scaler"),
|
262
|
-
covariate_metadata=self.covariate_metadata,
|
263
|
-
use_static_features=self.supports_static_features,
|
264
|
-
use_known_covariates=self.supports_known_covariates,
|
265
|
-
use_past_covariates=self.supports_past_covariates,
|
266
|
-
)
|
267
|
-
self.covariate_regressor = get_covariate_regressor(
|
268
|
-
self.get_hyperparameters().get("covariate_regressor"),
|
269
|
-
target=self.target,
|
270
|
-
covariate_metadata=self.covariate_metadata,
|
271
|
-
)
|
272
|
-
|
273
248
|
def _get_default_hyperparameters(self) -> dict:
|
274
249
|
return {}
|
275
250
|
|
@@ -309,79 +284,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
309
284
|
else:
|
310
285
|
raise
|
311
286
|
|
312
|
-
@property
|
313
|
-
def allowed_hyperparameters(self) -> List[str]:
|
314
|
-
"""List of hyperparameters allowed by the model."""
|
315
|
-
return ["target_scaler", "covariate_regressor"]
|
316
|
-
|
317
|
-
def _score_with_predictions(
|
318
|
-
self,
|
319
|
-
data: TimeSeriesDataFrame,
|
320
|
-
predictions: TimeSeriesDataFrame,
|
321
|
-
metric: Optional[str] = None,
|
322
|
-
) -> float:
|
323
|
-
"""Compute the score measuring how well the predictions align with the data."""
|
324
|
-
eval_metric = self.eval_metric if metric is None else check_get_evaluation_metric(metric)
|
325
|
-
return eval_metric.score(
|
326
|
-
data=data,
|
327
|
-
predictions=predictions,
|
328
|
-
prediction_length=self.prediction_length,
|
329
|
-
target=self.target,
|
330
|
-
seasonal_period=self.eval_metric_seasonal_period,
|
331
|
-
horizon_weight=self.horizon_weight,
|
332
|
-
)
|
333
|
-
|
334
|
-
def score(self, data: TimeSeriesDataFrame, metric: Optional[str] = None) -> float: # type: ignore
|
335
|
-
"""Return the evaluation scores for given metric and dataset. The last
|
336
|
-
`self.prediction_length` time steps of each time series in the input data set
|
337
|
-
will be held out and used for computing the evaluation score. Time series
|
338
|
-
models always return higher-is-better type scores.
|
339
|
-
|
340
|
-
Parameters
|
341
|
-
----------
|
342
|
-
data: TimeSeriesDataFrame
|
343
|
-
Dataset used for scoring.
|
344
|
-
metric: str
|
345
|
-
String identifier of evaluation metric to use, from one of
|
346
|
-
`autogluon.timeseries.utils.metric_utils.AVAILABLE_METRICS`.
|
347
|
-
|
348
|
-
Other Parameters
|
349
|
-
----------------
|
350
|
-
num_samples: int
|
351
|
-
Number of samples to use for making evaluation predictions if the probabilistic
|
352
|
-
forecasts are generated by forward sampling from the fitted model.
|
353
|
-
|
354
|
-
Returns
|
355
|
-
-------
|
356
|
-
score: float
|
357
|
-
The computed forecast evaluation score on the last `self.prediction_length`
|
358
|
-
time steps of each time series.
|
359
|
-
"""
|
360
|
-
past_data, known_covariates = data.get_model_inputs_for_scoring(
|
361
|
-
prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
|
362
|
-
)
|
363
|
-
predictions = self.predict(past_data, known_covariates=known_covariates)
|
364
|
-
return self._score_with_predictions(data=data, predictions=predictions, metric=metric)
|
365
|
-
|
366
|
-
def score_and_cache_oof(
|
367
|
-
self,
|
368
|
-
val_data: TimeSeriesDataFrame,
|
369
|
-
store_val_score: bool = False,
|
370
|
-
store_predict_time: bool = False,
|
371
|
-
**predict_kwargs,
|
372
|
-
) -> None:
|
373
|
-
"""Compute val_score, predict_time and cache out-of-fold (OOF) predictions."""
|
374
|
-
past_data, known_covariates = val_data.get_model_inputs_for_scoring(
|
375
|
-
prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
|
376
|
-
)
|
377
|
-
predict_start_time = time.time()
|
378
|
-
oof_predictions = self.predict(past_data, known_covariates=known_covariates, **predict_kwargs)
|
379
|
-
self._oof_predictions = [oof_predictions]
|
380
|
-
if store_predict_time:
|
381
|
-
self.predict_time = time.time() - predict_start_time
|
382
|
-
if store_val_score:
|
383
|
-
self.val_score = self._score_with_predictions(val_data, oof_predictions)
|
384
|
-
|
385
287
|
def _is_gpu_available(self) -> bool:
|
386
288
|
return False
|
387
289
|
|
@@ -398,16 +300,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
398
300
|
def _get_model_base(self) -> Self:
|
399
301
|
return self
|
400
302
|
|
401
|
-
def preprocess( # type: ignore
|
402
|
-
self,
|
403
|
-
data: TimeSeriesDataFrame,
|
404
|
-
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
405
|
-
is_train: bool = False,
|
406
|
-
**kwargs,
|
407
|
-
) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
|
408
|
-
"""Method that implements model-specific preprocessing logic."""
|
409
|
-
return data, known_covariates
|
410
|
-
|
411
303
|
def persist(self) -> Self:
|
412
304
|
"""Ask the model to persist its assets in memory, i.e., to predict with low latency. In practice
|
413
305
|
this is used for pretrained models that have to lazy-load model parameters to device memory at
|
@@ -434,8 +326,112 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
434
326
|
"can_use_val_data": False,
|
435
327
|
}
|
436
328
|
|
329
|
+
def get_params(self) -> dict:
|
330
|
+
"""Get the constructor parameters required for cloning this model object"""
|
331
|
+
# We only use the user-provided hyperparameters for cloning. We cannot use the output of get_hyperparameters()
|
332
|
+
# since it may contain search spaces that won't be converted to concrete values during HPO
|
333
|
+
hyperparameters = self._hyperparameters.copy()
|
334
|
+
if self._extra_ag_args:
|
335
|
+
hyperparameters[AG_ARGS_FIT] = self._extra_ag_args.copy()
|
336
|
+
|
337
|
+
return dict(
|
338
|
+
path=self.path_root,
|
339
|
+
name=self.name,
|
340
|
+
eval_metric=self.eval_metric,
|
341
|
+
hyperparameters=hyperparameters,
|
342
|
+
freq=self.freq,
|
343
|
+
prediction_length=self.prediction_length,
|
344
|
+
quantile_levels=self.quantile_levels,
|
345
|
+
covariate_metadata=self.covariate_metadata,
|
346
|
+
target=self.target,
|
347
|
+
)
|
348
|
+
|
349
|
+
def convert_to_refit_full_via_copy(self) -> Self:
|
350
|
+
# save the model as a new model on disk
|
351
|
+
previous_name = self.name
|
352
|
+
self.rename(self.name + REFIT_FULL_SUFFIX)
|
353
|
+
refit_model_path = self.path
|
354
|
+
self.save(path=self.path, verbose=False)
|
355
|
+
|
356
|
+
self.rename(previous_name)
|
357
|
+
|
358
|
+
refit_model = self.load(path=refit_model_path, verbose=False)
|
359
|
+
refit_model.val_score = None
|
360
|
+
refit_model.predict_time = None
|
361
|
+
|
362
|
+
return refit_model
|
363
|
+
|
364
|
+
def convert_to_refit_full_template(self) -> Self:
|
365
|
+
"""After calling this function, returned model should be able to be fit without `val_data`."""
|
366
|
+
params = copy.deepcopy(self.get_params())
|
367
|
+
|
368
|
+
if "hyperparameters" not in params:
|
369
|
+
params["hyperparameters"] = dict()
|
370
|
+
|
371
|
+
if AG_ARGS_FIT not in params["hyperparameters"]:
|
372
|
+
params["hyperparameters"][AG_ARGS_FIT] = dict()
|
373
|
+
|
374
|
+
params["name"] = params["name"] + REFIT_FULL_SUFFIX
|
375
|
+
template = self.__class__(**params)
|
376
|
+
|
377
|
+
return template
|
378
|
+
|
437
379
|
|
438
380
|
class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
|
381
|
+
"""Abstract base class for all time series models that take historical data as input and
|
382
|
+
make predictions for the forecast horizon.
|
383
|
+
"""
|
384
|
+
|
385
|
+
def __init__(
|
386
|
+
self,
|
387
|
+
path: Optional[str] = None,
|
388
|
+
name: Optional[str] = None,
|
389
|
+
hyperparameters: Optional[Dict[str, Any]] = None,
|
390
|
+
freq: Optional[str] = None,
|
391
|
+
prediction_length: int = 1,
|
392
|
+
covariate_metadata: Optional[CovariateMetadata] = None,
|
393
|
+
target: str = "target",
|
394
|
+
quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
|
395
|
+
eval_metric: Union[str, TimeSeriesScorer, None] = None,
|
396
|
+
eval_metric_seasonal_period: Optional[int] = None,
|
397
|
+
):
|
398
|
+
super().__init__(
|
399
|
+
path=path,
|
400
|
+
name=name,
|
401
|
+
hyperparameters=hyperparameters,
|
402
|
+
freq=freq,
|
403
|
+
prediction_length=prediction_length,
|
404
|
+
covariate_metadata=covariate_metadata,
|
405
|
+
target=target,
|
406
|
+
quantile_levels=quantile_levels,
|
407
|
+
eval_metric=eval_metric,
|
408
|
+
eval_metric_seasonal_period=eval_metric_seasonal_period,
|
409
|
+
)
|
410
|
+
self.target_scaler: Optional[TargetScaler]
|
411
|
+
self.covariate_scaler: Optional[CovariateScaler]
|
412
|
+
self.covariate_regressor: Optional[CovariateRegressor]
|
413
|
+
self._initialize_transforms_and_regressor()
|
414
|
+
|
415
|
+
def _initialize_transforms_and_regressor(self) -> None:
|
416
|
+
self.target_scaler = get_target_scaler(self.get_hyperparameters().get("target_scaler"), target=self.target)
|
417
|
+
self.covariate_scaler = get_covariate_scaler(
|
418
|
+
self.get_hyperparameters().get("covariate_scaler"),
|
419
|
+
covariate_metadata=self.covariate_metadata,
|
420
|
+
use_static_features=self.supports_static_features,
|
421
|
+
use_known_covariates=self.supports_known_covariates,
|
422
|
+
use_past_covariates=self.supports_past_covariates,
|
423
|
+
)
|
424
|
+
self.covariate_regressor = get_covariate_regressor(
|
425
|
+
self.get_hyperparameters().get("covariate_regressor"),
|
426
|
+
target=self.target,
|
427
|
+
covariate_metadata=self.covariate_metadata,
|
428
|
+
)
|
429
|
+
|
430
|
+
@property
|
431
|
+
def allowed_hyperparameters(self) -> List[str]:
|
432
|
+
"""List of hyperparameters allowed by the model."""
|
433
|
+
return ["target_scaler", "covariate_regressor"]
|
434
|
+
|
439
435
|
def fit(
|
440
436
|
self,
|
441
437
|
train_data: TimeSeriesDataFrame,
|
@@ -556,7 +552,7 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
|
|
556
552
|
"as hyperparameters when initializing or use `hyperparameter_tune` instead."
|
557
553
|
)
|
558
554
|
|
559
|
-
def predict(
|
555
|
+
def predict(
|
560
556
|
self,
|
561
557
|
data: TimeSeriesDataFrame,
|
562
558
|
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
@@ -654,24 +650,6 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
|
|
654
650
|
|
655
651
|
return template
|
656
652
|
|
657
|
-
def get_params(self) -> dict:
|
658
|
-
"""Get the constructor parameters required for cloning this model object"""
|
659
|
-
hyperparameters = self.get_hyperparameters().copy()
|
660
|
-
if self._extra_ag_args:
|
661
|
-
hyperparameters[AG_ARGS_FIT] = self._extra_ag_args.copy()
|
662
|
-
|
663
|
-
return dict(
|
664
|
-
path=self.path_root,
|
665
|
-
name=self.name,
|
666
|
-
eval_metric=self.eval_metric,
|
667
|
-
hyperparameters=hyperparameters,
|
668
|
-
freq=self.freq,
|
669
|
-
prediction_length=self.prediction_length,
|
670
|
-
quantile_levels=self.quantile_levels,
|
671
|
-
covariate_metadata=self.covariate_metadata,
|
672
|
-
target=self.target,
|
673
|
-
)
|
674
|
-
|
675
653
|
def get_forecast_horizon_index(self, data: TimeSeriesDataFrame) -> pd.MultiIndex:
|
676
654
|
"""For each item in the dataframe, get timestamps for the next `prediction_length` time steps into the future."""
|
677
655
|
return pd.MultiIndex.from_frame(
|
@@ -716,3 +694,74 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
|
|
716
694
|
"""
|
717
695
|
params = self._hyperparameters.copy()
|
718
696
|
return params
|
697
|
+
|
698
|
+
def _score_with_predictions(
|
699
|
+
self,
|
700
|
+
data: TimeSeriesDataFrame,
|
701
|
+
predictions: TimeSeriesDataFrame,
|
702
|
+
metric: Optional[str] = None,
|
703
|
+
) -> float:
|
704
|
+
"""Compute the score measuring how well the predictions align with the data."""
|
705
|
+
eval_metric = self.eval_metric if metric is None else check_get_evaluation_metric(metric)
|
706
|
+
return eval_metric.score(
|
707
|
+
data=data,
|
708
|
+
predictions=predictions,
|
709
|
+
prediction_length=self.prediction_length,
|
710
|
+
target=self.target,
|
711
|
+
seasonal_period=self.eval_metric_seasonal_period,
|
712
|
+
)
|
713
|
+
|
714
|
+
def score(self, data: TimeSeriesDataFrame, metric: Optional[str] = None) -> float:
|
715
|
+
"""Return the evaluation scores for given metric and dataset. The last
|
716
|
+
`self.prediction_length` time steps of each time series in the input data set
|
717
|
+
will be held out and used for computing the evaluation score. Time series
|
718
|
+
models always return higher-is-better type scores.
|
719
|
+
|
720
|
+
Parameters
|
721
|
+
----------
|
722
|
+
data: TimeSeriesDataFrame
|
723
|
+
Dataset used for scoring.
|
724
|
+
metric: str
|
725
|
+
String identifier of evaluation metric to use, from one of
|
726
|
+
`autogluon.timeseries.utils.metric_utils.AVAILABLE_METRICS`.
|
727
|
+
|
728
|
+
Returns
|
729
|
+
-------
|
730
|
+
score: float
|
731
|
+
The computed forecast evaluation score on the last `self.prediction_length`
|
732
|
+
time steps of each time series.
|
733
|
+
"""
|
734
|
+
past_data, known_covariates = data.get_model_inputs_for_scoring(
|
735
|
+
prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
|
736
|
+
)
|
737
|
+
predictions = self.predict(past_data, known_covariates=known_covariates)
|
738
|
+
return self._score_with_predictions(data=data, predictions=predictions, metric=metric)
|
739
|
+
|
740
|
+
def score_and_cache_oof(
|
741
|
+
self,
|
742
|
+
val_data: TimeSeriesDataFrame,
|
743
|
+
store_val_score: bool = False,
|
744
|
+
store_predict_time: bool = False,
|
745
|
+
**predict_kwargs,
|
746
|
+
) -> None:
|
747
|
+
"""Compute val_score, predict_time and cache out-of-fold (OOF) predictions."""
|
748
|
+
past_data, known_covariates = val_data.get_model_inputs_for_scoring(
|
749
|
+
prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
|
750
|
+
)
|
751
|
+
predict_start_time = time.time()
|
752
|
+
oof_predictions = self.predict(past_data, known_covariates=known_covariates, **predict_kwargs)
|
753
|
+
self._oof_predictions = [oof_predictions]
|
754
|
+
if store_predict_time:
|
755
|
+
self.predict_time = time.time() - predict_start_time
|
756
|
+
if store_val_score:
|
757
|
+
self.val_score = self._score_with_predictions(val_data, oof_predictions)
|
758
|
+
|
759
|
+
def preprocess(
|
760
|
+
self,
|
761
|
+
data: TimeSeriesDataFrame,
|
762
|
+
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
763
|
+
is_train: bool = False,
|
764
|
+
**kwargs,
|
765
|
+
) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
|
766
|
+
"""Method that implements model-specific preprocessing logic."""
|
767
|
+
return data, known_covariates
|
@@ -196,8 +196,9 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
196
196
|
name = name if name is not None else "Chronos"
|
197
197
|
if not isinstance(model_path_input, Space):
|
198
198
|
# we truncate the name to avoid long path errors on Windows
|
199
|
-
|
200
|
-
|
199
|
+
model_path_suffix = "[" + str(model_path_input).replace("/", "__").replace(os.path.sep, "__")[-50:] + "]"
|
200
|
+
if model_path_suffix not in name:
|
201
|
+
name += model_path_suffix
|
201
202
|
|
202
203
|
super().__init__(
|
203
204
|
path=path,
|
@@ -1,2 +1,3 @@
|
|
1
|
-
from .
|
2
|
-
from .
|
1
|
+
from .abstract import AbstractTimeSeriesEnsembleModel
|
2
|
+
from .greedy import GreedyEnsemble
|
3
|
+
from .basic import SimpleAverageEnsemble, PerformanceWeightedEnsemble
|
@@ -0,0 +1,139 @@
|
|
1
|
+
import functools
|
2
|
+
import logging
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
from typing import Dict, List, Optional
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
from typing_extensions import final
|
8
|
+
|
9
|
+
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
10
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
11
|
+
from autogluon.timeseries.models.abstract import TimeSeriesModelBase
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class AbstractTimeSeriesEnsembleModel(TimeSeriesModelBase, ABC):
|
17
|
+
"""Abstract class for time series ensemble models."""
|
18
|
+
|
19
|
+
@property
|
20
|
+
@abstractmethod
|
21
|
+
def model_names(self) -> List[str]:
|
22
|
+
"""Names of base models included in the ensemble."""
|
23
|
+
pass
|
24
|
+
|
25
|
+
@final
|
26
|
+
def fit(
|
27
|
+
self,
|
28
|
+
predictions_per_window: Dict[str, List[TimeSeriesDataFrame]],
|
29
|
+
data_per_window: List[TimeSeriesDataFrame],
|
30
|
+
model_scores: Optional[Dict[str, float]] = None,
|
31
|
+
time_limit: Optional[float] = None,
|
32
|
+
):
|
33
|
+
"""Fit ensemble model given predictions of candidate base models and the true data.
|
34
|
+
|
35
|
+
Parameters
|
36
|
+
----------
|
37
|
+
predictions_per_window : Dict[str, List[TimeSeriesDataFrame]]
|
38
|
+
Dictionary that maps the names of component models to their respective predictions for each validation
|
39
|
+
window.
|
40
|
+
data_per_window : List[TimeSeriesDataFrame]
|
41
|
+
Observed ground truth data used to train the ensemble for each validation window. Each entry in the list
|
42
|
+
includes both the forecast horizon (for which the predictions are given in ``predictions``), as well as the
|
43
|
+
"history".
|
44
|
+
model_scores : Optional[Dict[str, float]]
|
45
|
+
Scores (higher is better) for the models that will constitute the ensemble.
|
46
|
+
time_limit : Optional[float]
|
47
|
+
Maximum allowed time for training in seconds.
|
48
|
+
"""
|
49
|
+
if time_limit is not None and time_limit <= 0:
|
50
|
+
logger.warning(
|
51
|
+
f"\tWarning: Model has no time left to train, skipping model... (Time Left = {round(time_limit, 1)}s)"
|
52
|
+
)
|
53
|
+
raise TimeLimitExceeded
|
54
|
+
if isinstance(data_per_window, TimeSeriesDataFrame):
|
55
|
+
raise ValueError("When fitting ensemble, `data` should contain ground truth for each validation window")
|
56
|
+
num_val_windows = len(data_per_window)
|
57
|
+
for model, preds in predictions_per_window.items():
|
58
|
+
if len(preds) != num_val_windows:
|
59
|
+
raise ValueError(f"For model {model} predictions are unavailable for some validation windows")
|
60
|
+
self._fit(
|
61
|
+
predictions_per_window=predictions_per_window,
|
62
|
+
data_per_window=data_per_window,
|
63
|
+
model_scores=model_scores,
|
64
|
+
time_limit=time_limit,
|
65
|
+
)
|
66
|
+
return self
|
67
|
+
|
68
|
+
def _fit(
|
69
|
+
self,
|
70
|
+
predictions_per_window: Dict[str, List[TimeSeriesDataFrame]],
|
71
|
+
data_per_window: List[TimeSeriesDataFrame],
|
72
|
+
model_scores: Optional[Dict[str, float]] = None,
|
73
|
+
time_limit: Optional[float] = None,
|
74
|
+
):
|
75
|
+
"""Private method for `fit`. See `fit` for documentation of arguments. Apart from the model
|
76
|
+
training logic, `fit` additionally implements other logic such as keeping track of the time limit.
|
77
|
+
"""
|
78
|
+
raise NotImplementedError
|
79
|
+
|
80
|
+
@final
|
81
|
+
def predict(self, data: Dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
|
82
|
+
if not set(self.model_names).issubset(set(data.keys())):
|
83
|
+
raise ValueError(
|
84
|
+
f"Set of models given for prediction in {self.name} differ from those provided during initialization."
|
85
|
+
)
|
86
|
+
for model_name, model_pred in data.items():
|
87
|
+
if model_pred is None:
|
88
|
+
raise RuntimeError(f"{self.name} cannot predict because base model {model_name} failed.")
|
89
|
+
|
90
|
+
# Make sure that all predictions have same shape
|
91
|
+
assert len(set(pred.shape for pred in data.values())) == 1
|
92
|
+
|
93
|
+
return self._predict(data=data, **kwargs)
|
94
|
+
|
95
|
+
@abstractmethod
|
96
|
+
def _predict(self, data: Dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
|
97
|
+
pass
|
98
|
+
|
99
|
+
@abstractmethod
|
100
|
+
def remap_base_models(self, model_refit_map: Dict[str, str]) -> None:
|
101
|
+
"""Update names of the base models based on the mapping in model_refit_map.
|
102
|
+
|
103
|
+
This method should be called after performing refit_full to point to the refitted base models, if necessary.
|
104
|
+
"""
|
105
|
+
pass
|
106
|
+
|
107
|
+
|
108
|
+
class AbstractWeightedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
|
109
|
+
"""Abstract class for weighted ensembles which assign one (global) weight per model."""
|
110
|
+
|
111
|
+
def __init__(self, name: Optional[str] = None, **kwargs):
|
112
|
+
if name is None:
|
113
|
+
name = "WeightedEnsemble"
|
114
|
+
super().__init__(name=name, **kwargs)
|
115
|
+
self.model_to_weight: Dict[str, float] = {}
|
116
|
+
|
117
|
+
@property
|
118
|
+
def model_names(self) -> List[str]:
|
119
|
+
return list(self.model_to_weight.keys())
|
120
|
+
|
121
|
+
@property
|
122
|
+
def model_weights(self) -> np.ndarray:
|
123
|
+
return np.array(list(self.model_to_weight.values()), dtype=np.float64)
|
124
|
+
|
125
|
+
def _predict(self, data: Dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
|
126
|
+
weighted_predictions = [data[model_name] * weight for model_name, weight in self.model_to_weight.items()]
|
127
|
+
return functools.reduce(lambda x, y: x + y, weighted_predictions)
|
128
|
+
|
129
|
+
def get_info(self) -> dict:
|
130
|
+
info = super().get_info()
|
131
|
+
info["model_weights"] = self.model_to_weight.copy()
|
132
|
+
return info
|
133
|
+
|
134
|
+
def remap_base_models(self, model_refit_map: Dict[str, str]) -> None:
|
135
|
+
updated_weights = {}
|
136
|
+
for model, weight in self.model_to_weight.items():
|
137
|
+
model_full_name = model_refit_map.get(model, model)
|
138
|
+
updated_weights[model_full_name] = weight
|
139
|
+
self.model_to_weight = updated_weights
|
@@ -0,0 +1,88 @@
|
|
1
|
+
from typing import Dict, List, Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
|
5
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
6
|
+
|
7
|
+
from .abstract import AbstractWeightedTimeSeriesEnsembleModel
|
8
|
+
|
9
|
+
|
10
|
+
class SimpleAverageEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
|
11
|
+
"""Constructs a weighted ensemble using a simple average of the constituent models' predictions."""
|
12
|
+
|
13
|
+
def __init__(self, name: Optional[str] = None, **kwargs):
|
14
|
+
if name is None:
|
15
|
+
name = "SimpleAverageEnsemble"
|
16
|
+
super().__init__(name=name, **kwargs)
|
17
|
+
|
18
|
+
def _fit(
|
19
|
+
self,
|
20
|
+
predictions_per_window: Dict[str, List[TimeSeriesDataFrame]],
|
21
|
+
data_per_window: List[TimeSeriesDataFrame],
|
22
|
+
model_scores: Optional[Dict[str, float]] = None,
|
23
|
+
time_limit: Optional[float] = None,
|
24
|
+
):
|
25
|
+
self.model_to_weight = {}
|
26
|
+
num_models = len(predictions_per_window)
|
27
|
+
for model_name in predictions_per_window.keys():
|
28
|
+
self.model_to_weight[model_name] = 1.0 / num_models
|
29
|
+
|
30
|
+
|
31
|
+
class PerformanceWeightedEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
|
32
|
+
"""Constructs a weighted ensemble, where the weights are assigned in proportion to the
|
33
|
+
(inverse) validation scores.
|
34
|
+
|
35
|
+
Other Parameters
|
36
|
+
----------------
|
37
|
+
weight_scheme: Literal["sq", "inv", "loginv"], default = "loginv"
|
38
|
+
Method used to compute the weights as a function of the validation scores.
|
39
|
+
- "sqrt" computes weights in proportion to `sqrt(1 / S)`. This is the default.
|
40
|
+
- "inv" computes weights in proportion to `(1 / S)`.
|
41
|
+
- "sq" computes the weights in proportion to `(1 / S)^2` as outlined in [PC2020]_.
|
42
|
+
|
43
|
+
References
|
44
|
+
----------
|
45
|
+
.. [PC2020] Pawlikowski, Maciej, and Agata Chorowska.
|
46
|
+
"Weighted ensemble of statistical models." International Journal of Forecasting
|
47
|
+
36.1 (2020): 93-97.
|
48
|
+
"""
|
49
|
+
|
50
|
+
def __init__(self, name: Optional[str] = None, **kwargs):
|
51
|
+
if name is None:
|
52
|
+
name = "PerformanceWeightedEnsemble"
|
53
|
+
super().__init__(name=name, **kwargs)
|
54
|
+
|
55
|
+
def _get_default_hyperparameters(self) -> Dict:
|
56
|
+
return {"weight_scheme": "sqrt"}
|
57
|
+
|
58
|
+
def _fit(
|
59
|
+
self,
|
60
|
+
predictions_per_window: Dict[str, List[TimeSeriesDataFrame]],
|
61
|
+
data_per_window: List[TimeSeriesDataFrame],
|
62
|
+
model_scores: Optional[Dict[str, float]] = None,
|
63
|
+
time_limit: Optional[float] = None,
|
64
|
+
):
|
65
|
+
assert model_scores is not None
|
66
|
+
|
67
|
+
weight_scheme = self.get_hyperparameters()["weight_scheme"]
|
68
|
+
|
69
|
+
# drop NaNs
|
70
|
+
model_scores = {k: v for k, v in model_scores.items() if np.isfinite(v)}
|
71
|
+
assert len(model_scores) > 0, (
|
72
|
+
"All models have NaN scores. At least one model must score successfully to fit an ensemble"
|
73
|
+
)
|
74
|
+
assert all(s <= 0 for s in model_scores.values()), (
|
75
|
+
"All model scores must be negative, in higher-is-better format."
|
76
|
+
)
|
77
|
+
|
78
|
+
score_transform = {
|
79
|
+
"sq": lambda x: np.square(np.reciprocal(x)),
|
80
|
+
"inv": lambda x: np.reciprocal(x),
|
81
|
+
"sqrt": lambda x: np.sqrt(np.reciprocal(x)),
|
82
|
+
}[weight_scheme]
|
83
|
+
|
84
|
+
self.model_to_weight = {
|
85
|
+
model_name: score_transform(-model_scores[model_name] + 1e-5) for model_name in model_scores.keys()
|
86
|
+
}
|
87
|
+
total_weight = sum(self.model_to_weight.values())
|
88
|
+
self.model_to_weight = {k: v / total_weight for k, v in self.model_to_weight.items()}
|