autogluon.timeseries 1.2.1b20250422__py3-none-any.whl → 1.2.1b20250424__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. autogluon/timeseries/dataset/ts_dataframe.py +18 -3
  2. autogluon/timeseries/learner.py +0 -4
  3. autogluon/timeseries/metrics/__init__.py +1 -30
  4. autogluon/timeseries/metrics/abstract.py +0 -10
  5. autogluon/timeseries/metrics/point.py +41 -131
  6. autogluon/timeseries/metrics/quantile.py +15 -36
  7. autogluon/timeseries/models/abstract/__init__.py +2 -2
  8. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +178 -129
  9. autogluon/timeseries/models/chronos/model.py +3 -2
  10. autogluon/timeseries/models/ensemble/__init__.py +3 -2
  11. autogluon/timeseries/models/ensemble/abstract.py +139 -0
  12. autogluon/timeseries/models/ensemble/basic.py +88 -0
  13. autogluon/timeseries/models/ensemble/{greedy_ensemble.py → greedy.py} +67 -61
  14. autogluon/timeseries/models/presets.py +0 -4
  15. autogluon/timeseries/predictor.py +51 -26
  16. autogluon/timeseries/trainer.py +35 -27
  17. autogluon/timeseries/utils/features.py +4 -1
  18. autogluon/timeseries/utils/warning_filters.py +1 -1
  19. autogluon/timeseries/version.py +1 -1
  20. {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/METADATA +5 -4
  21. {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/RECORD +28 -27
  22. autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -86
  23. /autogluon.timeseries-1.2.1b20250422-py3.9-nspkg.pth → /autogluon.timeseries-1.2.1b20250424-py3.9-nspkg.pth +0 -0
  24. {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/LICENSE +0 -0
  25. {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/NOTICE +0 -0
  26. {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/WHEEL +0 -0
  27. {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/namespace_packages.txt +0 -0
  28. {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/top_level.txt +0 -0
  29. {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/zip-safe +0 -0
@@ -8,7 +8,6 @@ import time
8
8
  from abc import ABC, abstractmethod
9
9
  from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
10
10
 
11
- import numpy as np
12
11
  import pandas as pd
13
12
  from typing_extensions import Self
14
13
 
@@ -33,7 +32,8 @@ logger = logging.getLogger(__name__)
33
32
 
34
33
 
35
34
  class TimeSeriesModelBase(ModelBase, ABC):
36
- """Abstract class for all `Model` objects in autogluon.timeseries.
35
+ """Abstract base class for all `Model` objects in autogluon.timeseries, including both
36
+ forecasting models and forecast combination/ensemble models.
37
37
 
38
38
  Parameters
39
39
  ----------
@@ -60,9 +60,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
60
60
  eval_metric_seasonal_period : int, optional
61
61
  Seasonal period used to compute some evaluation metrics such as mean absolute scaled error (MASE). Defaults to
62
62
  ``None``, in which case the seasonal period is computed based on the data frequency.
63
- horizon_weight : np.ndarray, optional
64
- Weight assigned to each time step in the forecast horizon when computing the metric. If provided, this list
65
- must contain `prediction_length` non-negative values, with `sum(horizon_weight) = prediction_length`.
66
63
  hyperparameters : dict, default = None
67
64
  Hyperparameters that will be used by the model (can be search spaces instead of fixed values).
68
65
  If None, model defaults are used. This is identical to passing an empty dictionary.
@@ -92,7 +89,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
92
89
  quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
93
90
  eval_metric: Union[str, TimeSeriesScorer, None] = None,
94
91
  eval_metric_seasonal_period: Optional[int] = None,
95
- horizon_weight: Optional[np.ndarray] = None,
96
92
  ):
97
93
  self.name = name or re.sub(r"Model$", "", self.__class__.__name__)
98
94
 
@@ -109,7 +105,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
109
105
 
110
106
  self.eval_metric: TimeSeriesScorer = check_get_evaluation_metric(eval_metric)
111
107
  self.eval_metric_seasonal_period = eval_metric_seasonal_period
112
- self.horizon_weight = horizon_weight
113
108
  self.target: str = target
114
109
  self.covariate_metadata = covariate_metadata or CovariateMetadata()
115
110
 
@@ -140,11 +135,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
140
135
  )
141
136
  self.val_score: Optional[float] = None # Score with eval_metric (Validation data)
142
137
 
143
- self.target_scaler: Optional[TargetScaler]
144
- self.covariate_scaler: Optional[CovariateScaler]
145
- self.covariate_regressor: Optional[CovariateRegressor]
146
- self._initialize_transforms_and_regressor()
147
-
148
138
  def __repr__(self) -> str:
149
139
  return self.name
150
140
 
@@ -255,21 +245,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
255
245
  self._oof_predictions = self.load_oof_predictions(self.path)
256
246
  return self._oof_predictions
257
247
 
258
- def _initialize_transforms_and_regressor(self) -> None:
259
- self.target_scaler = get_target_scaler(self.get_hyperparameters().get("target_scaler"), target=self.target)
260
- self.covariate_scaler = get_covariate_scaler(
261
- self.get_hyperparameters().get("covariate_scaler"),
262
- covariate_metadata=self.covariate_metadata,
263
- use_static_features=self.supports_static_features,
264
- use_known_covariates=self.supports_known_covariates,
265
- use_past_covariates=self.supports_past_covariates,
266
- )
267
- self.covariate_regressor = get_covariate_regressor(
268
- self.get_hyperparameters().get("covariate_regressor"),
269
- target=self.target,
270
- covariate_metadata=self.covariate_metadata,
271
- )
272
-
273
248
  def _get_default_hyperparameters(self) -> dict:
274
249
  return {}
275
250
 
@@ -309,79 +284,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
309
284
  else:
310
285
  raise
311
286
 
312
- @property
313
- def allowed_hyperparameters(self) -> List[str]:
314
- """List of hyperparameters allowed by the model."""
315
- return ["target_scaler", "covariate_regressor"]
316
-
317
- def _score_with_predictions(
318
- self,
319
- data: TimeSeriesDataFrame,
320
- predictions: TimeSeriesDataFrame,
321
- metric: Optional[str] = None,
322
- ) -> float:
323
- """Compute the score measuring how well the predictions align with the data."""
324
- eval_metric = self.eval_metric if metric is None else check_get_evaluation_metric(metric)
325
- return eval_metric.score(
326
- data=data,
327
- predictions=predictions,
328
- prediction_length=self.prediction_length,
329
- target=self.target,
330
- seasonal_period=self.eval_metric_seasonal_period,
331
- horizon_weight=self.horizon_weight,
332
- )
333
-
334
- def score(self, data: TimeSeriesDataFrame, metric: Optional[str] = None) -> float: # type: ignore
335
- """Return the evaluation scores for given metric and dataset. The last
336
- `self.prediction_length` time steps of each time series in the input data set
337
- will be held out and used for computing the evaluation score. Time series
338
- models always return higher-is-better type scores.
339
-
340
- Parameters
341
- ----------
342
- data: TimeSeriesDataFrame
343
- Dataset used for scoring.
344
- metric: str
345
- String identifier of evaluation metric to use, from one of
346
- `autogluon.timeseries.utils.metric_utils.AVAILABLE_METRICS`.
347
-
348
- Other Parameters
349
- ----------------
350
- num_samples: int
351
- Number of samples to use for making evaluation predictions if the probabilistic
352
- forecasts are generated by forward sampling from the fitted model.
353
-
354
- Returns
355
- -------
356
- score: float
357
- The computed forecast evaluation score on the last `self.prediction_length`
358
- time steps of each time series.
359
- """
360
- past_data, known_covariates = data.get_model_inputs_for_scoring(
361
- prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
362
- )
363
- predictions = self.predict(past_data, known_covariates=known_covariates)
364
- return self._score_with_predictions(data=data, predictions=predictions, metric=metric)
365
-
366
- def score_and_cache_oof(
367
- self,
368
- val_data: TimeSeriesDataFrame,
369
- store_val_score: bool = False,
370
- store_predict_time: bool = False,
371
- **predict_kwargs,
372
- ) -> None:
373
- """Compute val_score, predict_time and cache out-of-fold (OOF) predictions."""
374
- past_data, known_covariates = val_data.get_model_inputs_for_scoring(
375
- prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
376
- )
377
- predict_start_time = time.time()
378
- oof_predictions = self.predict(past_data, known_covariates=known_covariates, **predict_kwargs)
379
- self._oof_predictions = [oof_predictions]
380
- if store_predict_time:
381
- self.predict_time = time.time() - predict_start_time
382
- if store_val_score:
383
- self.val_score = self._score_with_predictions(val_data, oof_predictions)
384
-
385
287
  def _is_gpu_available(self) -> bool:
386
288
  return False
387
289
 
@@ -398,16 +300,6 @@ class TimeSeriesModelBase(ModelBase, ABC):
398
300
  def _get_model_base(self) -> Self:
399
301
  return self
400
302
 
401
- def preprocess( # type: ignore
402
- self,
403
- data: TimeSeriesDataFrame,
404
- known_covariates: Optional[TimeSeriesDataFrame] = None,
405
- is_train: bool = False,
406
- **kwargs,
407
- ) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
408
- """Method that implements model-specific preprocessing logic."""
409
- return data, known_covariates
410
-
411
303
  def persist(self) -> Self:
412
304
  """Ask the model to persist its assets in memory, i.e., to predict with low latency. In practice
413
305
  this is used for pretrained models that have to lazy-load model parameters to device memory at
@@ -434,8 +326,112 @@ class TimeSeriesModelBase(ModelBase, ABC):
434
326
  "can_use_val_data": False,
435
327
  }
436
328
 
329
+ def get_params(self) -> dict:
330
+ """Get the constructor parameters required for cloning this model object"""
331
+ # We only use the user-provided hyperparameters for cloning. We cannot use the output of get_hyperparameters()
332
+ # since it may contain search spaces that won't be converted to concrete values during HPO
333
+ hyperparameters = self._hyperparameters.copy()
334
+ if self._extra_ag_args:
335
+ hyperparameters[AG_ARGS_FIT] = self._extra_ag_args.copy()
336
+
337
+ return dict(
338
+ path=self.path_root,
339
+ name=self.name,
340
+ eval_metric=self.eval_metric,
341
+ hyperparameters=hyperparameters,
342
+ freq=self.freq,
343
+ prediction_length=self.prediction_length,
344
+ quantile_levels=self.quantile_levels,
345
+ covariate_metadata=self.covariate_metadata,
346
+ target=self.target,
347
+ )
348
+
349
+ def convert_to_refit_full_via_copy(self) -> Self:
350
+ # save the model as a new model on disk
351
+ previous_name = self.name
352
+ self.rename(self.name + REFIT_FULL_SUFFIX)
353
+ refit_model_path = self.path
354
+ self.save(path=self.path, verbose=False)
355
+
356
+ self.rename(previous_name)
357
+
358
+ refit_model = self.load(path=refit_model_path, verbose=False)
359
+ refit_model.val_score = None
360
+ refit_model.predict_time = None
361
+
362
+ return refit_model
363
+
364
+ def convert_to_refit_full_template(self) -> Self:
365
+ """After calling this function, returned model should be able to be fit without `val_data`."""
366
+ params = copy.deepcopy(self.get_params())
367
+
368
+ if "hyperparameters" not in params:
369
+ params["hyperparameters"] = dict()
370
+
371
+ if AG_ARGS_FIT not in params["hyperparameters"]:
372
+ params["hyperparameters"][AG_ARGS_FIT] = dict()
373
+
374
+ params["name"] = params["name"] + REFIT_FULL_SUFFIX
375
+ template = self.__class__(**params)
376
+
377
+ return template
378
+
437
379
 
438
380
  class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
381
+ """Abstract base class for all time series models that take historical data as input and
382
+ make predictions for the forecast horizon.
383
+ """
384
+
385
+ def __init__(
386
+ self,
387
+ path: Optional[str] = None,
388
+ name: Optional[str] = None,
389
+ hyperparameters: Optional[Dict[str, Any]] = None,
390
+ freq: Optional[str] = None,
391
+ prediction_length: int = 1,
392
+ covariate_metadata: Optional[CovariateMetadata] = None,
393
+ target: str = "target",
394
+ quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
395
+ eval_metric: Union[str, TimeSeriesScorer, None] = None,
396
+ eval_metric_seasonal_period: Optional[int] = None,
397
+ ):
398
+ super().__init__(
399
+ path=path,
400
+ name=name,
401
+ hyperparameters=hyperparameters,
402
+ freq=freq,
403
+ prediction_length=prediction_length,
404
+ covariate_metadata=covariate_metadata,
405
+ target=target,
406
+ quantile_levels=quantile_levels,
407
+ eval_metric=eval_metric,
408
+ eval_metric_seasonal_period=eval_metric_seasonal_period,
409
+ )
410
+ self.target_scaler: Optional[TargetScaler]
411
+ self.covariate_scaler: Optional[CovariateScaler]
412
+ self.covariate_regressor: Optional[CovariateRegressor]
413
+ self._initialize_transforms_and_regressor()
414
+
415
+ def _initialize_transforms_and_regressor(self) -> None:
416
+ self.target_scaler = get_target_scaler(self.get_hyperparameters().get("target_scaler"), target=self.target)
417
+ self.covariate_scaler = get_covariate_scaler(
418
+ self.get_hyperparameters().get("covariate_scaler"),
419
+ covariate_metadata=self.covariate_metadata,
420
+ use_static_features=self.supports_static_features,
421
+ use_known_covariates=self.supports_known_covariates,
422
+ use_past_covariates=self.supports_past_covariates,
423
+ )
424
+ self.covariate_regressor = get_covariate_regressor(
425
+ self.get_hyperparameters().get("covariate_regressor"),
426
+ target=self.target,
427
+ covariate_metadata=self.covariate_metadata,
428
+ )
429
+
430
+ @property
431
+ def allowed_hyperparameters(self) -> List[str]:
432
+ """List of hyperparameters allowed by the model."""
433
+ return ["target_scaler", "covariate_regressor"]
434
+
439
435
  def fit(
440
436
  self,
441
437
  train_data: TimeSeriesDataFrame,
@@ -556,7 +552,7 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
556
552
  "as hyperparameters when initializing or use `hyperparameter_tune` instead."
557
553
  )
558
554
 
559
- def predict( # type: ignore
555
+ def predict(
560
556
  self,
561
557
  data: TimeSeriesDataFrame,
562
558
  known_covariates: Optional[TimeSeriesDataFrame] = None,
@@ -654,24 +650,6 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
654
650
 
655
651
  return template
656
652
 
657
- def get_params(self) -> dict:
658
- """Get the constructor parameters required for cloning this model object"""
659
- hyperparameters = self.get_hyperparameters().copy()
660
- if self._extra_ag_args:
661
- hyperparameters[AG_ARGS_FIT] = self._extra_ag_args.copy()
662
-
663
- return dict(
664
- path=self.path_root,
665
- name=self.name,
666
- eval_metric=self.eval_metric,
667
- hyperparameters=hyperparameters,
668
- freq=self.freq,
669
- prediction_length=self.prediction_length,
670
- quantile_levels=self.quantile_levels,
671
- covariate_metadata=self.covariate_metadata,
672
- target=self.target,
673
- )
674
-
675
653
  def get_forecast_horizon_index(self, data: TimeSeriesDataFrame) -> pd.MultiIndex:
676
654
  """For each item in the dataframe, get timestamps for the next `prediction_length` time steps into the future."""
677
655
  return pd.MultiIndex.from_frame(
@@ -716,3 +694,74 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
716
694
  """
717
695
  params = self._hyperparameters.copy()
718
696
  return params
697
+
698
+ def _score_with_predictions(
699
+ self,
700
+ data: TimeSeriesDataFrame,
701
+ predictions: TimeSeriesDataFrame,
702
+ metric: Optional[str] = None,
703
+ ) -> float:
704
+ """Compute the score measuring how well the predictions align with the data."""
705
+ eval_metric = self.eval_metric if metric is None else check_get_evaluation_metric(metric)
706
+ return eval_metric.score(
707
+ data=data,
708
+ predictions=predictions,
709
+ prediction_length=self.prediction_length,
710
+ target=self.target,
711
+ seasonal_period=self.eval_metric_seasonal_period,
712
+ )
713
+
714
+ def score(self, data: TimeSeriesDataFrame, metric: Optional[str] = None) -> float:
715
+ """Return the evaluation scores for given metric and dataset. The last
716
+ `self.prediction_length` time steps of each time series in the input data set
717
+ will be held out and used for computing the evaluation score. Time series
718
+ models always return higher-is-better type scores.
719
+
720
+ Parameters
721
+ ----------
722
+ data: TimeSeriesDataFrame
723
+ Dataset used for scoring.
724
+ metric: str
725
+ String identifier of evaluation metric to use, from one of
726
+ `autogluon.timeseries.utils.metric_utils.AVAILABLE_METRICS`.
727
+
728
+ Returns
729
+ -------
730
+ score: float
731
+ The computed forecast evaluation score on the last `self.prediction_length`
732
+ time steps of each time series.
733
+ """
734
+ past_data, known_covariates = data.get_model_inputs_for_scoring(
735
+ prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
736
+ )
737
+ predictions = self.predict(past_data, known_covariates=known_covariates)
738
+ return self._score_with_predictions(data=data, predictions=predictions, metric=metric)
739
+
740
+ def score_and_cache_oof(
741
+ self,
742
+ val_data: TimeSeriesDataFrame,
743
+ store_val_score: bool = False,
744
+ store_predict_time: bool = False,
745
+ **predict_kwargs,
746
+ ) -> None:
747
+ """Compute val_score, predict_time and cache out-of-fold (OOF) predictions."""
748
+ past_data, known_covariates = val_data.get_model_inputs_for_scoring(
749
+ prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
750
+ )
751
+ predict_start_time = time.time()
752
+ oof_predictions = self.predict(past_data, known_covariates=known_covariates, **predict_kwargs)
753
+ self._oof_predictions = [oof_predictions]
754
+ if store_predict_time:
755
+ self.predict_time = time.time() - predict_start_time
756
+ if store_val_score:
757
+ self.val_score = self._score_with_predictions(val_data, oof_predictions)
758
+
759
+ def preprocess(
760
+ self,
761
+ data: TimeSeriesDataFrame,
762
+ known_covariates: Optional[TimeSeriesDataFrame] = None,
763
+ is_train: bool = False,
764
+ **kwargs,
765
+ ) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
766
+ """Method that implements model-specific preprocessing logic."""
767
+ return data, known_covariates
@@ -196,8 +196,9 @@ class ChronosModel(AbstractTimeSeriesModel):
196
196
  name = name if name is not None else "Chronos"
197
197
  if not isinstance(model_path_input, Space):
198
198
  # we truncate the name to avoid long path errors on Windows
199
- model_path_safe = str(model_path_input).replace("/", "__").replace(os.path.sep, "__")[-50:]
200
- name += f"[{model_path_safe}]"
199
+ model_path_suffix = "[" + str(model_path_input).replace("/", "__").replace(os.path.sep, "__")[-50:] + "]"
200
+ if model_path_suffix not in name:
201
+ name += model_path_suffix
201
202
 
202
203
  super().__init__(
203
204
  path=path,
@@ -1,2 +1,3 @@
1
- from .abstract_timeseries_ensemble import AbstractTimeSeriesEnsembleModel
2
- from .greedy_ensemble import TimeSeriesGreedyEnsemble
1
+ from .abstract import AbstractTimeSeriesEnsembleModel
2
+ from .greedy import GreedyEnsemble
3
+ from .basic import SimpleAverageEnsemble, PerformanceWeightedEnsemble
@@ -0,0 +1,139 @@
1
+ import functools
2
+ import logging
3
+ from abc import ABC, abstractmethod
4
+ from typing import Dict, List, Optional
5
+
6
+ import numpy as np
7
+ from typing_extensions import final
8
+
9
+ from autogluon.core.utils.exceptions import TimeLimitExceeded
10
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
11
+ from autogluon.timeseries.models.abstract import TimeSeriesModelBase
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class AbstractTimeSeriesEnsembleModel(TimeSeriesModelBase, ABC):
17
+ """Abstract class for time series ensemble models."""
18
+
19
+ @property
20
+ @abstractmethod
21
+ def model_names(self) -> List[str]:
22
+ """Names of base models included in the ensemble."""
23
+ pass
24
+
25
+ @final
26
+ def fit(
27
+ self,
28
+ predictions_per_window: Dict[str, List[TimeSeriesDataFrame]],
29
+ data_per_window: List[TimeSeriesDataFrame],
30
+ model_scores: Optional[Dict[str, float]] = None,
31
+ time_limit: Optional[float] = None,
32
+ ):
33
+ """Fit ensemble model given predictions of candidate base models and the true data.
34
+
35
+ Parameters
36
+ ----------
37
+ predictions_per_window : Dict[str, List[TimeSeriesDataFrame]]
38
+ Dictionary that maps the names of component models to their respective predictions for each validation
39
+ window.
40
+ data_per_window : List[TimeSeriesDataFrame]
41
+ Observed ground truth data used to train the ensemble for each validation window. Each entry in the list
42
+ includes both the forecast horizon (for which the predictions are given in ``predictions``), as well as the
43
+ "history".
44
+ model_scores : Optional[Dict[str, float]]
45
+ Scores (higher is better) for the models that will constitute the ensemble.
46
+ time_limit : Optional[float]
47
+ Maximum allowed time for training in seconds.
48
+ """
49
+ if time_limit is not None and time_limit <= 0:
50
+ logger.warning(
51
+ f"\tWarning: Model has no time left to train, skipping model... (Time Left = {round(time_limit, 1)}s)"
52
+ )
53
+ raise TimeLimitExceeded
54
+ if isinstance(data_per_window, TimeSeriesDataFrame):
55
+ raise ValueError("When fitting ensemble, `data` should contain ground truth for each validation window")
56
+ num_val_windows = len(data_per_window)
57
+ for model, preds in predictions_per_window.items():
58
+ if len(preds) != num_val_windows:
59
+ raise ValueError(f"For model {model} predictions are unavailable for some validation windows")
60
+ self._fit(
61
+ predictions_per_window=predictions_per_window,
62
+ data_per_window=data_per_window,
63
+ model_scores=model_scores,
64
+ time_limit=time_limit,
65
+ )
66
+ return self
67
+
68
+ def _fit(
69
+ self,
70
+ predictions_per_window: Dict[str, List[TimeSeriesDataFrame]],
71
+ data_per_window: List[TimeSeriesDataFrame],
72
+ model_scores: Optional[Dict[str, float]] = None,
73
+ time_limit: Optional[float] = None,
74
+ ):
75
+ """Private method for `fit`. See `fit` for documentation of arguments. Apart from the model
76
+ training logic, `fit` additionally implements other logic such as keeping track of the time limit.
77
+ """
78
+ raise NotImplementedError
79
+
80
+ @final
81
+ def predict(self, data: Dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
82
+ if not set(self.model_names).issubset(set(data.keys())):
83
+ raise ValueError(
84
+ f"Set of models given for prediction in {self.name} differ from those provided during initialization."
85
+ )
86
+ for model_name, model_pred in data.items():
87
+ if model_pred is None:
88
+ raise RuntimeError(f"{self.name} cannot predict because base model {model_name} failed.")
89
+
90
+ # Make sure that all predictions have same shape
91
+ assert len(set(pred.shape for pred in data.values())) == 1
92
+
93
+ return self._predict(data=data, **kwargs)
94
+
95
+ @abstractmethod
96
+ def _predict(self, data: Dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
97
+ pass
98
+
99
+ @abstractmethod
100
+ def remap_base_models(self, model_refit_map: Dict[str, str]) -> None:
101
+ """Update names of the base models based on the mapping in model_refit_map.
102
+
103
+ This method should be called after performing refit_full to point to the refitted base models, if necessary.
104
+ """
105
+ pass
106
+
107
+
108
+ class AbstractWeightedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
109
+ """Abstract class for weighted ensembles which assign one (global) weight per model."""
110
+
111
+ def __init__(self, name: Optional[str] = None, **kwargs):
112
+ if name is None:
113
+ name = "WeightedEnsemble"
114
+ super().__init__(name=name, **kwargs)
115
+ self.model_to_weight: Dict[str, float] = {}
116
+
117
+ @property
118
+ def model_names(self) -> List[str]:
119
+ return list(self.model_to_weight.keys())
120
+
121
+ @property
122
+ def model_weights(self) -> np.ndarray:
123
+ return np.array(list(self.model_to_weight.values()), dtype=np.float64)
124
+
125
+ def _predict(self, data: Dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
126
+ weighted_predictions = [data[model_name] * weight for model_name, weight in self.model_to_weight.items()]
127
+ return functools.reduce(lambda x, y: x + y, weighted_predictions)
128
+
129
+ def get_info(self) -> dict:
130
+ info = super().get_info()
131
+ info["model_weights"] = self.model_to_weight.copy()
132
+ return info
133
+
134
+ def remap_base_models(self, model_refit_map: Dict[str, str]) -> None:
135
+ updated_weights = {}
136
+ for model, weight in self.model_to_weight.items():
137
+ model_full_name = model_refit_map.get(model, model)
138
+ updated_weights[model_full_name] = weight
139
+ self.model_to_weight = updated_weights
@@ -0,0 +1,88 @@
1
+ from typing import Dict, List, Optional
2
+
3
+ import numpy as np
4
+
5
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
6
+
7
+ from .abstract import AbstractWeightedTimeSeriesEnsembleModel
8
+
9
+
10
+ class SimpleAverageEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
11
+ """Constructs a weighted ensemble using a simple average of the constituent models' predictions."""
12
+
13
+ def __init__(self, name: Optional[str] = None, **kwargs):
14
+ if name is None:
15
+ name = "SimpleAverageEnsemble"
16
+ super().__init__(name=name, **kwargs)
17
+
18
+ def _fit(
19
+ self,
20
+ predictions_per_window: Dict[str, List[TimeSeriesDataFrame]],
21
+ data_per_window: List[TimeSeriesDataFrame],
22
+ model_scores: Optional[Dict[str, float]] = None,
23
+ time_limit: Optional[float] = None,
24
+ ):
25
+ self.model_to_weight = {}
26
+ num_models = len(predictions_per_window)
27
+ for model_name in predictions_per_window.keys():
28
+ self.model_to_weight[model_name] = 1.0 / num_models
29
+
30
+
31
+ class PerformanceWeightedEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
32
+ """Constructs a weighted ensemble, where the weights are assigned in proportion to the
33
+ (inverse) validation scores.
34
+
35
+ Other Parameters
36
+ ----------------
37
+ weight_scheme: Literal["sq", "inv", "loginv"], default = "loginv"
38
+ Method used to compute the weights as a function of the validation scores.
39
+ - "sqrt" computes weights in proportion to `sqrt(1 / S)`. This is the default.
40
+ - "inv" computes weights in proportion to `(1 / S)`.
41
+ - "sq" computes the weights in proportion to `(1 / S)^2` as outlined in [PC2020]_.
42
+
43
+ References
44
+ ----------
45
+ .. [PC2020] Pawlikowski, Maciej, and Agata Chorowska.
46
+ "Weighted ensemble of statistical models." International Journal of Forecasting
47
+ 36.1 (2020): 93-97.
48
+ """
49
+
50
+ def __init__(self, name: Optional[str] = None, **kwargs):
51
+ if name is None:
52
+ name = "PerformanceWeightedEnsemble"
53
+ super().__init__(name=name, **kwargs)
54
+
55
+ def _get_default_hyperparameters(self) -> Dict:
56
+ return {"weight_scheme": "sqrt"}
57
+
58
+ def _fit(
59
+ self,
60
+ predictions_per_window: Dict[str, List[TimeSeriesDataFrame]],
61
+ data_per_window: List[TimeSeriesDataFrame],
62
+ model_scores: Optional[Dict[str, float]] = None,
63
+ time_limit: Optional[float] = None,
64
+ ):
65
+ assert model_scores is not None
66
+
67
+ weight_scheme = self.get_hyperparameters()["weight_scheme"]
68
+
69
+ # drop NaNs
70
+ model_scores = {k: v for k, v in model_scores.items() if np.isfinite(v)}
71
+ assert len(model_scores) > 0, (
72
+ "All models have NaN scores. At least one model must score successfully to fit an ensemble"
73
+ )
74
+ assert all(s <= 0 for s in model_scores.values()), (
75
+ "All model scores must be negative, in higher-is-better format."
76
+ )
77
+
78
+ score_transform = {
79
+ "sq": lambda x: np.square(np.reciprocal(x)),
80
+ "inv": lambda x: np.reciprocal(x),
81
+ "sqrt": lambda x: np.sqrt(np.reciprocal(x)),
82
+ }[weight_scheme]
83
+
84
+ self.model_to_weight = {
85
+ model_name: score_transform(-model_scores[model_name] + 1e-5) for model_name in model_scores.keys()
86
+ }
87
+ total_weight = sum(self.model_to_weight.values())
88
+ self.model_to_weight = {k: v / total_weight for k, v in self.model_to_weight.items()}