autogluon.timeseries 1.4.1b20251115__py3-none-any.whl → 1.5.0b20251221__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/hyperparameter_presets.py +13 -28
- autogluon/timeseries/configs/predictor_presets.py +23 -39
- autogluon/timeseries/dataset/ts_dataframe.py +32 -34
- autogluon/timeseries/learner.py +67 -33
- autogluon/timeseries/metrics/__init__.py +4 -4
- autogluon/timeseries/metrics/abstract.py +8 -8
- autogluon/timeseries/metrics/point.py +9 -9
- autogluon/timeseries/metrics/quantile.py +4 -4
- autogluon/timeseries/models/__init__.py +2 -1
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -50
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +8 -8
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +30 -26
- autogluon/timeseries/models/autogluon_tabular/per_step.py +13 -11
- autogluon/timeseries/models/autogluon_tabular/transforms.py +2 -2
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +395 -0
- autogluon/timeseries/models/chronos/model.py +30 -25
- autogluon/timeseries/models/chronos/utils.py +5 -5
- autogluon/timeseries/models/ensemble/__init__.py +17 -10
- autogluon/timeseries/models/ensemble/abstract.py +13 -9
- autogluon/timeseries/models/ensemble/array_based/__init__.py +2 -2
- autogluon/timeseries/models/ensemble/array_based/abstract.py +24 -31
- autogluon/timeseries/models/ensemble/array_based/models.py +146 -11
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +2 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +6 -5
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +44 -83
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +21 -55
- autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
- autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +7 -3
- autogluon/timeseries/models/ensemble/weighted/basic.py +26 -13
- autogluon/timeseries/models/ensemble/weighted/greedy.py +21 -144
- autogluon/timeseries/models/gluonts/abstract.py +30 -29
- autogluon/timeseries/models/gluonts/dataset.py +9 -9
- autogluon/timeseries/models/gluonts/models.py +0 -7
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +13 -16
- autogluon/timeseries/models/local/naive.py +2 -2
- autogluon/timeseries/models/local/npts.py +7 -1
- autogluon/timeseries/models/local/statsforecast.py +13 -13
- autogluon/timeseries/models/multi_window/multi_window_model.py +38 -23
- autogluon/timeseries/models/registry.py +3 -4
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +3 -4
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +6 -6
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +4 -9
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +2 -3
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +10 -10
- autogluon/timeseries/models/toto/_internal/dataset.py +2 -2
- autogluon/timeseries/models/toto/_internal/forecaster.py +8 -8
- autogluon/timeseries/models/toto/dataloader.py +4 -4
- autogluon/timeseries/models/toto/hf_pretrained_model.py +97 -16
- autogluon/timeseries/models/toto/model.py +30 -17
- autogluon/timeseries/predictor.py +531 -136
- autogluon/timeseries/regressor.py +18 -23
- autogluon/timeseries/splitter.py +2 -2
- autogluon/timeseries/trainer/ensemble_composer.py +323 -129
- autogluon/timeseries/trainer/model_set_builder.py +9 -9
- autogluon/timeseries/trainer/prediction_cache.py +16 -16
- autogluon/timeseries/trainer/trainer.py +235 -145
- autogluon/timeseries/trainer/utils.py +3 -4
- autogluon/timeseries/transforms/covariate_scaler.py +7 -7
- autogluon/timeseries/transforms/target_scaler.py +8 -8
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +1 -3
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/features.py +22 -9
- autogluon/timeseries/utils/forecast.py +1 -2
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/version.py +1 -1
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/METADATA +23 -21
- autogluon_timeseries-1.5.0b20251221.dist-info/RECORD +103 -0
- autogluon_timeseries-1.4.1b20251115.dist-info/RECORD +0 -96
- /autogluon.timeseries-1.4.1b20251115-py3.9-nspkg.pth → /autogluon.timeseries-1.5.0b20251221-py3.11-nspkg.pth +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/WHEEL +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/licenses/LICENSE +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/licenses/NOTICE +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/namespace_packages.txt +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/top_level.txt +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/zip-safe +0 -0
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
|
-
from typing import Optional
|
|
4
3
|
|
|
5
4
|
from typing_extensions import final
|
|
6
5
|
|
|
@@ -12,7 +11,12 @@ logger = logging.getLogger(__name__)
|
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
class AbstractTimeSeriesEnsembleModel(TimeSeriesModelBase, ABC):
|
|
15
|
-
"""Abstract class for time series ensemble models.
|
|
14
|
+
"""Abstract base class for time series ensemble models that combine predictions from multiple base models.
|
|
15
|
+
|
|
16
|
+
Ensemble training process operates on validation predictions from base models rather than raw time series
|
|
17
|
+
data. This allows the ensemble to learn optimal combination strategies based on each model's performance
|
|
18
|
+
across different validation windows and time series patterns.
|
|
19
|
+
"""
|
|
16
20
|
|
|
17
21
|
@property
|
|
18
22
|
@abstractmethod
|
|
@@ -25,8 +29,8 @@ class AbstractTimeSeriesEnsembleModel(TimeSeriesModelBase, ABC):
|
|
|
25
29
|
self,
|
|
26
30
|
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
27
31
|
data_per_window: list[TimeSeriesDataFrame],
|
|
28
|
-
model_scores:
|
|
29
|
-
time_limit:
|
|
32
|
+
model_scores: dict[str, float] | None = None,
|
|
33
|
+
time_limit: float | None = None,
|
|
30
34
|
):
|
|
31
35
|
"""Fit ensemble model given predictions of candidate base models and the true data.
|
|
32
36
|
|
|
@@ -50,7 +54,7 @@ class AbstractTimeSeriesEnsembleModel(TimeSeriesModelBase, ABC):
|
|
|
50
54
|
)
|
|
51
55
|
raise TimeLimitExceeded
|
|
52
56
|
if isinstance(data_per_window, TimeSeriesDataFrame):
|
|
53
|
-
raise ValueError("When fitting ensemble,
|
|
57
|
+
raise ValueError("When fitting ensemble, ``data`` should contain ground truth for each validation window")
|
|
54
58
|
num_val_windows = len(data_per_window)
|
|
55
59
|
for model, preds in predictions_per_window.items():
|
|
56
60
|
if len(preds) != num_val_windows:
|
|
@@ -67,11 +71,11 @@ class AbstractTimeSeriesEnsembleModel(TimeSeriesModelBase, ABC):
|
|
|
67
71
|
self,
|
|
68
72
|
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
69
73
|
data_per_window: list[TimeSeriesDataFrame],
|
|
70
|
-
model_scores:
|
|
71
|
-
time_limit:
|
|
74
|
+
model_scores: dict[str, float] | None = None,
|
|
75
|
+
time_limit: float | None = None,
|
|
72
76
|
) -> None:
|
|
73
|
-
"""Private method for
|
|
74
|
-
training logic,
|
|
77
|
+
"""Private method for ``fit``. See ``fit`` for documentation of arguments. Apart from the model
|
|
78
|
+
training logic, ``fit`` additionally implements other logic such as keeping track of the time limit.
|
|
75
79
|
"""
|
|
76
80
|
raise NotImplementedError
|
|
77
81
|
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
from .models import MedianEnsemble, PerQuantileTabularEnsemble, TabularEnsemble
|
|
1
|
+
from .models import LinearStackerEnsemble, MedianEnsemble, PerQuantileTabularEnsemble, TabularEnsemble
|
|
2
2
|
|
|
3
|
-
__all__ = ["MedianEnsemble", "PerQuantileTabularEnsemble", "TabularEnsemble"]
|
|
3
|
+
__all__ = ["LinearStackerEnsemble", "MedianEnsemble", "PerQuantileTabularEnsemble", "TabularEnsemble"]
|
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from abc import ABC, abstractmethod
|
|
3
|
-
from typing import Any,
|
|
2
|
+
from typing import Any, Sequence
|
|
4
3
|
|
|
5
4
|
import numpy as np
|
|
6
|
-
from typing_extensions import Self
|
|
7
5
|
|
|
8
6
|
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
9
7
|
from autogluon.timeseries.metrics.abstract import TimeSeriesScorer
|
|
@@ -14,15 +12,19 @@ from .regressor import EnsembleRegressor
|
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
class ArrayBasedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
|
|
17
|
-
"""Abstract base class for
|
|
18
|
-
|
|
15
|
+
"""Abstract base class for ensemble models that operate on multi-dimensional arrays of base model predictions.
|
|
16
|
+
|
|
17
|
+
Array-based ensembles convert time series predictions into structured numpy arrays for efficient processing
|
|
18
|
+
and enable sophisticated combination strategies beyond simple weighted averaging. Array-based ensembles also
|
|
19
|
+
support isotonization in quantile forecasts--ensuring quantile crossing does not occur. They also have built-in
|
|
20
|
+
failed model detection and filtering capabilities.
|
|
19
21
|
|
|
20
22
|
Other Parameters
|
|
21
23
|
----------------
|
|
22
|
-
isotonization: str, default = "sort"
|
|
24
|
+
isotonization : str, default = "sort"
|
|
23
25
|
The isotonization method to use (i.e. the algorithm to prevent quantile non-crossing).
|
|
24
26
|
Currently only "sort" is supported.
|
|
25
|
-
detect_and_ignore_failures: bool, default = True
|
|
27
|
+
detect_and_ignore_failures : bool, default = True
|
|
26
28
|
Whether to detect and ignore "failed models", defined as models which have a loss that is larger
|
|
27
29
|
than 10x the median loss of all the models. This can be very important for the regression-based
|
|
28
30
|
ensembles, as moving the weight from such a "failed model" to zero can require a long training
|
|
@@ -31,15 +33,15 @@ class ArrayBasedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
|
|
|
31
33
|
|
|
32
34
|
def __init__(
|
|
33
35
|
self,
|
|
34
|
-
path:
|
|
35
|
-
name:
|
|
36
|
-
hyperparameters:
|
|
37
|
-
freq:
|
|
36
|
+
path: str | None = None,
|
|
37
|
+
name: str | None = None,
|
|
38
|
+
hyperparameters: dict[str, Any] | None = None,
|
|
39
|
+
freq: str | None = None,
|
|
38
40
|
prediction_length: int = 1,
|
|
39
|
-
covariate_metadata:
|
|
41
|
+
covariate_metadata: CovariateMetadata | None = None,
|
|
40
42
|
target: str = "target",
|
|
41
43
|
quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
|
|
42
|
-
eval_metric:
|
|
44
|
+
eval_metric: str | TimeSeriesScorer | None = None,
|
|
43
45
|
):
|
|
44
46
|
super().__init__(
|
|
45
47
|
path=path,
|
|
@@ -52,7 +54,7 @@ class ArrayBasedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
|
|
|
52
54
|
quantile_levels=quantile_levels,
|
|
53
55
|
eval_metric=eval_metric,
|
|
54
56
|
)
|
|
55
|
-
self.ensemble_regressor:
|
|
57
|
+
self.ensemble_regressor: EnsembleRegressor | None = None
|
|
56
58
|
self._model_names: list[str] = []
|
|
57
59
|
|
|
58
60
|
def _get_default_hyperparameters(self) -> dict[str, Any]:
|
|
@@ -61,15 +63,6 @@ class ArrayBasedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
|
|
|
61
63
|
"detect_and_ignore_failures": True,
|
|
62
64
|
}
|
|
63
65
|
|
|
64
|
-
@classmethod
|
|
65
|
-
def load(cls, path: str, reset_paths: bool = True, load_oof: bool = False, verbose: bool = True) -> Self:
|
|
66
|
-
model = super().load(path=path, reset_paths=reset_paths, load_oof=load_oof, verbose=verbose)
|
|
67
|
-
|
|
68
|
-
if reset_paths and model.ensemble_regressor is not None:
|
|
69
|
-
model.ensemble_regressor.set_path(os.path.join(model.path, "ensemble_regressor"))
|
|
70
|
-
|
|
71
|
-
return model
|
|
72
|
-
|
|
73
66
|
@staticmethod
|
|
74
67
|
def to_array(df: TimeSeriesDataFrame) -> np.ndarray:
|
|
75
68
|
"""Given a TimeSeriesDataFrame object, return a single array composing the values contained
|
|
@@ -78,8 +71,8 @@ class ArrayBasedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
|
|
|
78
71
|
Parameters
|
|
79
72
|
----------
|
|
80
73
|
df
|
|
81
|
-
TimeSeriesDataFrame to convert to an array. Must contain exactly
|
|
82
|
-
values for each item. The columns of
|
|
74
|
+
TimeSeriesDataFrame to convert to an array. Must contain exactly ``prediction_length``
|
|
75
|
+
values for each item. The columns of ``df`` can correspond to ground truth values
|
|
83
76
|
or predictions (in which case, these will be the mean or quantile forecasts).
|
|
84
77
|
|
|
85
78
|
Returns
|
|
@@ -99,7 +92,7 @@ class ArrayBasedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
|
|
|
99
92
|
|
|
100
93
|
def _get_base_model_predictions(
|
|
101
94
|
self,
|
|
102
|
-
predictions_per_window:
|
|
95
|
+
predictions_per_window: dict[str, list[TimeSeriesDataFrame]] | dict[str, TimeSeriesDataFrame],
|
|
103
96
|
) -> tuple[np.ndarray, np.ndarray]:
|
|
104
97
|
"""Given a mapping from model names to a list of data frames representing
|
|
105
98
|
their predictions per window, return a multidimensional array representation.
|
|
@@ -147,7 +140,7 @@ class ArrayBasedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
|
|
|
147
140
|
isotonized_array
|
|
148
141
|
Array with same shape but quantiles sorted along last dimension
|
|
149
142
|
"""
|
|
150
|
-
isotonization = self.
|
|
143
|
+
isotonization = self.get_hyperparameter("isotonization")
|
|
151
144
|
if isotonization == "sort":
|
|
152
145
|
return np.sort(prediction_array, axis=-1)
|
|
153
146
|
return prediction_array
|
|
@@ -156,8 +149,8 @@ class ArrayBasedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
|
|
|
156
149
|
self,
|
|
157
150
|
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
158
151
|
data_per_window: list[TimeSeriesDataFrame],
|
|
159
|
-
model_scores:
|
|
160
|
-
time_limit:
|
|
152
|
+
model_scores: dict[str, float] | None = None,
|
|
153
|
+
time_limit: float | None = None,
|
|
161
154
|
) -> None:
|
|
162
155
|
# process inputs
|
|
163
156
|
filtered_predictions = self._filter_failed_models(predictions_per_window, model_scores)
|
|
@@ -226,10 +219,10 @@ class ArrayBasedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
|
|
|
226
219
|
def _filter_failed_models(
|
|
227
220
|
self,
|
|
228
221
|
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
229
|
-
model_scores:
|
|
222
|
+
model_scores: dict[str, float] | None,
|
|
230
223
|
) -> dict[str, list[TimeSeriesDataFrame]]:
|
|
231
224
|
"""Filter out failed models based on detect_and_ignore_failures setting."""
|
|
232
|
-
if not self.
|
|
225
|
+
if not self.get_hyperparameter("detect_and_ignore_failures"):
|
|
233
226
|
return predictions_per_window
|
|
234
227
|
|
|
235
228
|
if model_scores is None or len(model_scores) == 0:
|
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from abc import ABC
|
|
3
2
|
from typing import Any, Type
|
|
4
3
|
|
|
4
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
5
|
+
|
|
5
6
|
from .abstract import ArrayBasedTimeSeriesEnsembleModel
|
|
6
7
|
from .regressor import (
|
|
7
8
|
EnsembleRegressor,
|
|
9
|
+
LinearStackerEnsembleRegressor,
|
|
8
10
|
MedianEnsembleRegressor,
|
|
9
11
|
PerQuantileTabularEnsembleRegressor,
|
|
10
12
|
TabularEnsembleRegressor,
|
|
@@ -12,6 +14,21 @@ from .regressor import (
|
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class MedianEnsemble(ArrayBasedTimeSeriesEnsembleModel):
|
|
17
|
+
"""Robust ensemble that computes predictions as the element-wise median of base model mean
|
|
18
|
+
and quantile forecasts, providing robustness to outlier predictions.
|
|
19
|
+
|
|
20
|
+
Other Parameters
|
|
21
|
+
----------------
|
|
22
|
+
isotonization : str, default = "sort"
|
|
23
|
+
The isotonization method to use (i.e. the algorithm to prevent quantile non-crossing).
|
|
24
|
+
Currently only "sort" is supported.
|
|
25
|
+
detect_and_ignore_failures : bool, default = True
|
|
26
|
+
Whether to detect and ignore "failed models", defined as models which have a loss that is larger
|
|
27
|
+
than 10x the median loss of all the models. This can be very important for the regression-based
|
|
28
|
+
ensembles, as moving the weight from such a "failed model" to zero can require a long training
|
|
29
|
+
time.
|
|
30
|
+
"""
|
|
31
|
+
|
|
15
32
|
def _get_ensemble_regressor(self) -> MedianEnsembleRegressor:
|
|
16
33
|
return MedianEnsembleRegressor()
|
|
17
34
|
|
|
@@ -21,30 +38,148 @@ class BaseTabularEnsemble(ArrayBasedTimeSeriesEnsembleModel, ABC):
|
|
|
21
38
|
|
|
22
39
|
def _get_default_hyperparameters(self) -> dict[str, Any]:
|
|
23
40
|
default_hps = super()._get_default_hyperparameters()
|
|
24
|
-
default_hps.update(
|
|
25
|
-
{
|
|
26
|
-
"tabular_hyperparameters": {"GBM": {}},
|
|
27
|
-
}
|
|
28
|
-
)
|
|
41
|
+
default_hps.update({"model_name": "CAT", "model_hyperparameters": {}})
|
|
29
42
|
return default_hps
|
|
30
43
|
|
|
31
44
|
def _get_ensemble_regressor(self):
|
|
45
|
+
hyperparameters = self.get_hyperparameters()
|
|
32
46
|
return self.ensemble_regressor_type(
|
|
33
|
-
path=os.path.join(self.path, "ensemble_regressor"),
|
|
34
47
|
quantile_levels=list(self.quantile_levels),
|
|
35
|
-
|
|
48
|
+
model_name=hyperparameters["model_name"],
|
|
49
|
+
model_hyperparameters=hyperparameters["model_hyperparameters"],
|
|
36
50
|
)
|
|
37
51
|
|
|
38
52
|
|
|
39
53
|
class TabularEnsemble(BaseTabularEnsemble):
|
|
40
|
-
"""
|
|
54
|
+
"""Tabular ensemble that uses a single AutoGluon-Tabular model to learn ensemble combinations.
|
|
55
|
+
|
|
56
|
+
This ensemble trains a single tabular model (such as gradient boosting machines) to predict all
|
|
57
|
+
quantiles simultaneously from base model predictions. The tabular model learns complex non-linear
|
|
58
|
+
patterns in how base models should be combined, potentially capturing interactions and conditional
|
|
59
|
+
dependencies that simple weighted averages cannot represent.
|
|
60
|
+
|
|
61
|
+
Other Parameters
|
|
62
|
+
----------------
|
|
63
|
+
model_name : str, default = "CAT"
|
|
64
|
+
Name of the AutoGluon-Tabular model to use for ensemble learning. Model name should be registered
|
|
65
|
+
in AutoGluon-Tabular model registry.
|
|
66
|
+
model_hyperparameters : dict, default = {}
|
|
67
|
+
Hyperparameters to pass to the underlying AutoGluon-Tabular model.
|
|
68
|
+
isotonization : str, default = "sort"
|
|
69
|
+
The isotonization method to use (i.e. the algorithm to prevent quantile non-crossing).
|
|
70
|
+
Currently only "sort" is supported.
|
|
71
|
+
detect_and_ignore_failures : bool, default = True
|
|
72
|
+
Whether to detect and ignore "failed models", defined as models which have a loss that is larger
|
|
73
|
+
than 10x the median loss of all the models. This can be very important for the regression-based
|
|
74
|
+
ensembles, as moving the weight from such a "failed model" to zero can require a long training
|
|
75
|
+
time.
|
|
76
|
+
"""
|
|
41
77
|
|
|
42
78
|
ensemble_regressor_type = TabularEnsembleRegressor
|
|
43
79
|
|
|
44
80
|
|
|
45
81
|
class PerQuantileTabularEnsemble(BaseTabularEnsemble):
|
|
46
|
-
"""
|
|
47
|
-
|
|
82
|
+
"""Tabular ensemble using separate AutoGluon-Tabular models for each quantile and mean forecast.
|
|
83
|
+
|
|
84
|
+
This ensemble trains dedicated tabular models for each quantile level plus a separate model
|
|
85
|
+
for the mean prediction. Each model specializes in learning optimal combinations for its
|
|
86
|
+
specific target, allowing for quantile-specific ensemble strategies that can capture different
|
|
87
|
+
model behaviors across the prediction distribution.
|
|
88
|
+
|
|
89
|
+
Other Parameters
|
|
90
|
+
----------------
|
|
91
|
+
model_name : str, default = "GBM"
|
|
92
|
+
Name of the AutoGluon-Tabular model to use for ensemble learning. Model name should be registered
|
|
93
|
+
in AutoGluon-Tabular model registry.
|
|
94
|
+
model_hyperparameters : dict, default = {}
|
|
95
|
+
Hyperparameters to pass to the underlying AutoGluon-Tabular model.
|
|
96
|
+
isotonization : str, default = "sort"
|
|
97
|
+
The isotonization method to use (i.e. the algorithm to prevent quantile non-crossing).
|
|
98
|
+
Currently only "sort" is supported.
|
|
99
|
+
detect_and_ignore_failures : bool, default = True
|
|
100
|
+
Whether to detect and ignore "failed models", defined as models which have a loss that is larger
|
|
101
|
+
than 10x the median loss of all the models. This can be very important for the regression-based
|
|
102
|
+
ensembles, as moving the weight from such a "failed model" to zero can require a long training
|
|
103
|
+
time.
|
|
48
104
|
"""
|
|
49
105
|
|
|
50
106
|
ensemble_regressor_type = PerQuantileTabularEnsembleRegressor
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class LinearStackerEnsemble(ArrayBasedTimeSeriesEnsembleModel):
|
|
110
|
+
"""Linear stacking ensemble that learns optimal linear combination weights through gradient-based
|
|
111
|
+
optimization.
|
|
112
|
+
|
|
113
|
+
Weighted combinations can be per model or per model-quantile, model-horizon, model-quantile-horizon
|
|
114
|
+
combinations. These choices are controlled by the ``weights_per`` hyperparameter.
|
|
115
|
+
|
|
116
|
+
The optimization process uses gradient descent with configurable learning rates and convergence
|
|
117
|
+
criteria, allowing for flexible training dynamics. Weight pruning can be applied to remove
|
|
118
|
+
models with negligible contributions, resulting in sparse and interpretable ensembles.
|
|
119
|
+
|
|
120
|
+
Other Parameters
|
|
121
|
+
----------------
|
|
122
|
+
weights_per : str, default = "m"
|
|
123
|
+
Granularity of weight learning.
|
|
124
|
+
|
|
125
|
+
- "m": single weight per model
|
|
126
|
+
- "mq": single weight for each model-quantile combination
|
|
127
|
+
- "mt": single weight for each model-time step where time steps run across the prediction horizon
|
|
128
|
+
- "mtq": single weight for each model-quantile-time step combination
|
|
129
|
+
lr : float, default = 0.1
|
|
130
|
+
Learning rate for PyTorch optimizer during weight training.
|
|
131
|
+
max_epochs : int, default = 10000
|
|
132
|
+
Maximum number of training epochs for weight optimization.
|
|
133
|
+
relative_tolerance : float, default = 1e-7
|
|
134
|
+
Relative tolerance for convergence detection during training.
|
|
135
|
+
prune_below : float, default = 0.0
|
|
136
|
+
Threshold below which weights are pruned to zero for sparsity. The weights are redistributed across
|
|
137
|
+
remaining models after pruning.
|
|
138
|
+
isotonization : str, default = "sort"
|
|
139
|
+
The isotonization method to use (i.e. the algorithm to prevent quantile non-crossing).
|
|
140
|
+
Currently only "sort" is supported.
|
|
141
|
+
detect_and_ignore_failures : bool, default = True
|
|
142
|
+
Whether to detect and ignore "failed models", defined as models which have a loss that is larger
|
|
143
|
+
than 10x the median loss of all the models. This can be very important for the regression-based
|
|
144
|
+
ensembles, as moving the weight from such a "failed model" to zero can require a long training
|
|
145
|
+
time.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
def _get_default_hyperparameters(self) -> dict[str, Any]:
|
|
149
|
+
default_hps = super()._get_default_hyperparameters()
|
|
150
|
+
default_hps.update(
|
|
151
|
+
{
|
|
152
|
+
"weights_per": "m",
|
|
153
|
+
"lr": 0.1,
|
|
154
|
+
"max_epochs": 10000,
|
|
155
|
+
"relative_tolerance": 1e-7,
|
|
156
|
+
"prune_below": 0.0,
|
|
157
|
+
}
|
|
158
|
+
)
|
|
159
|
+
return default_hps
|
|
160
|
+
|
|
161
|
+
def _get_ensemble_regressor(self) -> LinearStackerEnsembleRegressor:
|
|
162
|
+
hps = self.get_hyperparameters()
|
|
163
|
+
return LinearStackerEnsembleRegressor(
|
|
164
|
+
quantile_levels=list(self.quantile_levels),
|
|
165
|
+
weights_per=hps["weights_per"],
|
|
166
|
+
lr=hps["lr"],
|
|
167
|
+
max_epochs=hps["max_epochs"],
|
|
168
|
+
relative_tolerance=hps["relative_tolerance"],
|
|
169
|
+
prune_below=hps["prune_below"],
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
def _fit(
|
|
173
|
+
self,
|
|
174
|
+
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
175
|
+
data_per_window: list[TimeSeriesDataFrame],
|
|
176
|
+
model_scores: dict[str, float] | None = None,
|
|
177
|
+
time_limit: float | None = None,
|
|
178
|
+
) -> None:
|
|
179
|
+
super()._fit(predictions_per_window, data_per_window, model_scores, time_limit)
|
|
180
|
+
|
|
181
|
+
assert isinstance(self.ensemble_regressor, LinearStackerEnsembleRegressor)
|
|
182
|
+
|
|
183
|
+
if self.ensemble_regressor.kept_indices is not None:
|
|
184
|
+
original_names = self._model_names
|
|
185
|
+
self._model_names = [original_names[i] for i in self.ensemble_regressor.kept_indices]
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from .abstract import EnsembleRegressor, MedianEnsembleRegressor
|
|
2
|
+
from .linear_stacker import LinearStackerEnsembleRegressor
|
|
2
3
|
from .per_quantile_tabular import PerQuantileTabularEnsembleRegressor
|
|
3
4
|
from .tabular import TabularEnsembleRegressor
|
|
4
5
|
|
|
5
6
|
__all__ = [
|
|
6
7
|
"EnsembleRegressor",
|
|
8
|
+
"LinearStackerEnsembleRegressor",
|
|
7
9
|
"MedianEnsembleRegressor",
|
|
8
10
|
"PerQuantileTabularEnsembleRegressor",
|
|
9
11
|
"TabularEnsembleRegressor",
|
|
@@ -8,16 +8,13 @@ class EnsembleRegressor(ABC):
|
|
|
8
8
|
def __init__(self, *args, **kwargs):
|
|
9
9
|
pass
|
|
10
10
|
|
|
11
|
-
def set_path(self, path: str) -> None:
|
|
12
|
-
pass
|
|
13
|
-
|
|
14
11
|
@abstractmethod
|
|
15
12
|
def fit(
|
|
16
13
|
self,
|
|
17
14
|
base_model_mean_predictions: np.ndarray,
|
|
18
15
|
base_model_quantile_predictions: np.ndarray,
|
|
19
16
|
labels: np.ndarray,
|
|
20
|
-
|
|
17
|
+
time_limit: float | None = None,
|
|
21
18
|
) -> Self:
|
|
22
19
|
"""
|
|
23
20
|
Parameters
|
|
@@ -33,6 +30,10 @@ class EnsembleRegressor(ABC):
|
|
|
33
30
|
labels
|
|
34
31
|
Ground truth array of shape
|
|
35
32
|
(num_windows, num_items, prediction_length, 1)
|
|
33
|
+
|
|
34
|
+
time_limit
|
|
35
|
+
Approximately how long ``fit`` will run (wall-clock time in seconds). If
|
|
36
|
+
not specified, training time will not be limited.
|
|
36
37
|
"""
|
|
37
38
|
pass
|
|
38
39
|
|
|
@@ -72,7 +73,7 @@ class MedianEnsembleRegressor(EnsembleRegressor):
|
|
|
72
73
|
base_model_mean_predictions: np.ndarray,
|
|
73
74
|
base_model_quantile_predictions: np.ndarray,
|
|
74
75
|
labels: np.ndarray,
|
|
75
|
-
|
|
76
|
+
time_limit: float | None = None,
|
|
76
77
|
) -> Self:
|
|
77
78
|
return self
|
|
78
79
|
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from typing_extensions import Self
|
|
5
|
+
|
|
6
|
+
from autogluon.timeseries.utils.timer import Timer
|
|
7
|
+
|
|
8
|
+
from .abstract import EnsembleRegressor
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LinearStackerEnsembleRegressor(EnsembleRegressor):
|
|
12
|
+
"""Linear stacker ensemble regressor using PyTorch optimization with softmax weights.
|
|
13
|
+
|
|
14
|
+
Implements weighted averaging of base model predictions with learnable weights optimized
|
|
15
|
+
via gradient descent. Uses PyTorch during training for optimization, then stores weights
|
|
16
|
+
as numpy arrays for efficient prediction.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
quantile_levels
|
|
21
|
+
List of quantile levels for quantile predictions (e.g., [0.1, 0.5, 0.9]).
|
|
22
|
+
weights_per
|
|
23
|
+
Weight configuration specifying which dimensions to learn weights for:
|
|
24
|
+
|
|
25
|
+
- "m": Per-model weights (shape: num_models), defaults to "m"
|
|
26
|
+
- "mt": Per-model and per-time weights (shape: prediction_length, num_models)
|
|
27
|
+
- "mq": Per-model and per-model-output (quantiles and mean) weights
|
|
28
|
+
(shape: num_quantiles+1, num_models)
|
|
29
|
+
- "mtq": Per-model, per-time, and per-quantile weights
|
|
30
|
+
(shape: prediction_length, num_quantiles+1, num_models)
|
|
31
|
+
lr
|
|
32
|
+
Learning rate for Adam optimizer. Defaults to 0.1.
|
|
33
|
+
max_epochs
|
|
34
|
+
Maximum number of training epochs. Defaults to 10000.
|
|
35
|
+
relative_tolerance
|
|
36
|
+
Convergence tolerance for relative loss change between epochs. Defaults to 1e-7.
|
|
37
|
+
prune_below
|
|
38
|
+
Importance threshold for model sparsification. Models with importance below this
|
|
39
|
+
threshold are dropped after weight optimization. Set to 0.0 to disable sparsification.
|
|
40
|
+
Defaults to 0.0.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
quantile_levels: list[float],
|
|
46
|
+
weights_per: Literal["m", "mt", "mq", "mtq"] = "m",
|
|
47
|
+
lr: float = 0.1,
|
|
48
|
+
max_epochs: int = 10_000,
|
|
49
|
+
relative_tolerance: float = 1e-7,
|
|
50
|
+
prune_below: float = 0.0,
|
|
51
|
+
):
|
|
52
|
+
super().__init__()
|
|
53
|
+
self.quantile_levels = quantile_levels
|
|
54
|
+
self.weights_per = weights_per
|
|
55
|
+
self.lr = lr
|
|
56
|
+
self.max_epochs = max_epochs
|
|
57
|
+
self.relative_tolerance = relative_tolerance
|
|
58
|
+
self.prune_below = prune_below
|
|
59
|
+
|
|
60
|
+
self.weights: np.ndarray | None = None
|
|
61
|
+
self.kept_indices: list[int] | None = None
|
|
62
|
+
|
|
63
|
+
def _compute_weight_shape(self, base_model_predictions_shape: tuple) -> tuple:
|
|
64
|
+
"""Compute weight tensor shape based on weights_per configuration."""
|
|
65
|
+
_, _, prediction_length, num_outputs, num_models = base_model_predictions_shape
|
|
66
|
+
|
|
67
|
+
shapes = {
|
|
68
|
+
"m": (1, 1, num_models),
|
|
69
|
+
"mt": (prediction_length, 1, num_models),
|
|
70
|
+
"mq": (1, num_outputs, num_models),
|
|
71
|
+
"mtq": (prediction_length, num_outputs, num_models),
|
|
72
|
+
}
|
|
73
|
+
try:
|
|
74
|
+
return (1, 1) + shapes[self.weights_per]
|
|
75
|
+
except KeyError:
|
|
76
|
+
raise ValueError(f"Unsupported weights_per: {self.weights_per}")
|
|
77
|
+
|
|
78
|
+
def make_weighted_average_module(self, base_model_predictions_shape: tuple):
|
|
79
|
+
import torch
|
|
80
|
+
|
|
81
|
+
class WeightedAverage(torch.nn.Module):
|
|
82
|
+
def __init__(self, shape):
|
|
83
|
+
super().__init__()
|
|
84
|
+
self.raw_weights = torch.nn.Parameter(torch.zeros(*shape, dtype=torch.float32))
|
|
85
|
+
|
|
86
|
+
def get_normalized_weights(self):
|
|
87
|
+
return torch.softmax(self.raw_weights, dim=-1) # softmax over models
|
|
88
|
+
|
|
89
|
+
def forward(self, base_model_predictions: torch.Tensor):
|
|
90
|
+
return torch.sum(self.get_normalized_weights() * base_model_predictions, dim=-1)
|
|
91
|
+
|
|
92
|
+
return WeightedAverage(self._compute_weight_shape(base_model_predictions_shape))
|
|
93
|
+
|
|
94
|
+
def fit(
|
|
95
|
+
self,
|
|
96
|
+
base_model_mean_predictions: np.ndarray,
|
|
97
|
+
base_model_quantile_predictions: np.ndarray,
|
|
98
|
+
labels: np.ndarray,
|
|
99
|
+
time_limit: float | None = None,
|
|
100
|
+
) -> Self:
|
|
101
|
+
import torch
|
|
102
|
+
|
|
103
|
+
def _ql(
|
|
104
|
+
labels_tensor: torch.Tensor,
|
|
105
|
+
ensemble_predictions: torch.Tensor,
|
|
106
|
+
) -> torch.Tensor:
|
|
107
|
+
"""Compute the weighted quantile loss on predictions and ground truth (labels).
|
|
108
|
+
Considering that the first dimension of predictions is the mean, we treat
|
|
109
|
+
mean predictions on the same footing as median (0.5) predictions as contribution
|
|
110
|
+
to the overall weighted quantile loss.
|
|
111
|
+
"""
|
|
112
|
+
quantile_levels = torch.tensor([0.5] + self.quantile_levels, dtype=torch.float32)
|
|
113
|
+
error = labels_tensor - ensemble_predictions # (num_windows, num_items, num_time, num_outputs)
|
|
114
|
+
quantile_loss = torch.maximum(quantile_levels * error, (quantile_levels - 1) * error)
|
|
115
|
+
return torch.mean(quantile_loss)
|
|
116
|
+
|
|
117
|
+
timer = Timer(time_limit).start()
|
|
118
|
+
|
|
119
|
+
base_model_predictions = torch.tensor(
|
|
120
|
+
np.concatenate(
|
|
121
|
+
[base_model_mean_predictions, base_model_quantile_predictions],
|
|
122
|
+
axis=3,
|
|
123
|
+
),
|
|
124
|
+
dtype=torch.float32,
|
|
125
|
+
)
|
|
126
|
+
labels_tensor = torch.tensor(labels, dtype=torch.float32)
|
|
127
|
+
|
|
128
|
+
weighted_average = self.make_weighted_average_module(base_model_predictions.shape)
|
|
129
|
+
|
|
130
|
+
optimizer = torch.optim.Adam(weighted_average.parameters(), lr=self.lr)
|
|
131
|
+
|
|
132
|
+
prev_loss = float("inf")
|
|
133
|
+
for _ in range(self.max_epochs):
|
|
134
|
+
optimizer.zero_grad()
|
|
135
|
+
|
|
136
|
+
ensemble_predictions = weighted_average(base_model_predictions)
|
|
137
|
+
|
|
138
|
+
loss = _ql(labels_tensor, ensemble_predictions)
|
|
139
|
+
loss.backward()
|
|
140
|
+
optimizer.step()
|
|
141
|
+
|
|
142
|
+
loss_change = abs(prev_loss - loss.item()) / (loss.item() + 1e-8)
|
|
143
|
+
if loss_change < self.relative_tolerance:
|
|
144
|
+
break
|
|
145
|
+
prev_loss = loss.item()
|
|
146
|
+
|
|
147
|
+
if timer.timed_out():
|
|
148
|
+
break
|
|
149
|
+
|
|
150
|
+
with torch.no_grad():
|
|
151
|
+
self.weights = weighted_average.get_normalized_weights().detach().numpy()
|
|
152
|
+
|
|
153
|
+
assert self.weights is not None
|
|
154
|
+
if self.prune_below > 0.0:
|
|
155
|
+
importances = self.weights.mean(axis=tuple(range(self.weights.ndim - 1))) # shape (num_models,)
|
|
156
|
+
|
|
157
|
+
mask = importances >= self.prune_below
|
|
158
|
+
if not mask.any():
|
|
159
|
+
mask[importances.argmax()] = True
|
|
160
|
+
|
|
161
|
+
if not mask.all():
|
|
162
|
+
self.kept_indices = np.where(mask)[0].tolist()
|
|
163
|
+
self.weights = self.weights[..., mask]
|
|
164
|
+
self.weights = self.weights / self.weights.sum(axis=-1, keepdims=True)
|
|
165
|
+
|
|
166
|
+
return self
|
|
167
|
+
|
|
168
|
+
def predict(
|
|
169
|
+
self,
|
|
170
|
+
base_model_mean_predictions: np.ndarray,
|
|
171
|
+
base_model_quantile_predictions: np.ndarray,
|
|
172
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
173
|
+
if self.weights is None:
|
|
174
|
+
raise ValueError("Model must be fitted before prediction")
|
|
175
|
+
|
|
176
|
+
all_predictions = np.concatenate([base_model_mean_predictions, base_model_quantile_predictions], axis=3)
|
|
177
|
+
|
|
178
|
+
if self.kept_indices is not None:
|
|
179
|
+
assert all_predictions.shape[-1] == len(self.kept_indices)
|
|
180
|
+
|
|
181
|
+
ensemble_pred = np.sum(self.weights * all_predictions, axis=-1)
|
|
182
|
+
|
|
183
|
+
mean_predictions = ensemble_pred[:, :, :, :1]
|
|
184
|
+
quantile_predictions = ensemble_pred[:, :, :, 1:]
|
|
185
|
+
|
|
186
|
+
return mean_predictions, quantile_predictions
|