autogluon.timeseries 1.4.1b20251016__py3-none-any.whl → 1.4.1b20251218__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/hyperparameter_presets.py +7 -21
- autogluon/timeseries/configs/predictor_presets.py +23 -39
- autogluon/timeseries/dataset/ts_dataframe.py +97 -86
- autogluon/timeseries/learner.py +70 -35
- autogluon/timeseries/metrics/__init__.py +4 -4
- autogluon/timeseries/metrics/abstract.py +8 -8
- autogluon/timeseries/metrics/point.py +9 -9
- autogluon/timeseries/metrics/quantile.py +5 -5
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +2 -1
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -39
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +8 -8
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +58 -62
- autogluon/timeseries/models/autogluon_tabular/per_step.py +26 -15
- autogluon/timeseries/models/autogluon_tabular/transforms.py +11 -9
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +395 -0
- autogluon/timeseries/models/chronos/model.py +126 -88
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +69 -37
- autogluon/timeseries/models/ensemble/__init__.py +36 -2
- autogluon/timeseries/models/ensemble/abstract.py +14 -46
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/{greedy.py → ensemble_selection.py} +41 -61
- autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
- autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +25 -22
- autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
- autogluon/timeseries/models/gluonts/abstract.py +32 -31
- autogluon/timeseries/models/gluonts/dataset.py +11 -11
- autogluon/timeseries/models/gluonts/models.py +0 -7
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +15 -18
- autogluon/timeseries/models/local/naive.py +2 -2
- autogluon/timeseries/models/local/npts.py +7 -1
- autogluon/timeseries/models/local/statsforecast.py +12 -12
- autogluon/timeseries/models/multi_window/multi_window_model.py +39 -24
- autogluon/timeseries/models/registry.py +3 -4
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +3 -4
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +6 -6
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +4 -9
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +2 -3
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +10 -10
- autogluon/timeseries/models/toto/_internal/dataset.py +2 -2
- autogluon/timeseries/models/toto/_internal/forecaster.py +8 -8
- autogluon/timeseries/models/toto/dataloader.py +4 -4
- autogluon/timeseries/models/toto/hf_pretrained_model.py +97 -16
- autogluon/timeseries/models/toto/model.py +35 -20
- autogluon/timeseries/predictor.py +527 -155
- autogluon/timeseries/regressor.py +27 -30
- autogluon/timeseries/splitter.py +3 -27
- autogluon/timeseries/trainer/ensemble_composer.py +444 -0
- autogluon/timeseries/trainer/model_set_builder.py +9 -9
- autogluon/timeseries/trainer/prediction_cache.py +16 -16
- autogluon/timeseries/trainer/trainer.py +300 -278
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/covariate_scaler.py +8 -8
- autogluon/timeseries/transforms/target_scaler.py +15 -15
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +1 -3
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/features.py +31 -14
- autogluon/timeseries/utils/forecast.py +6 -7
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251218-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/METADATA +39 -27
- autogluon_timeseries-1.4.1b20251218.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/WHEEL +1 -1
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
- autogluon.timeseries-1.4.1b20251016-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.4.1b20251016.dist-info/RECORD +0 -90
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/zip-safe +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import time
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Protocol, overload, runtime_checkable
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
8
|
from autogluon.core.models import AbstractModel
|
|
9
9
|
from autogluon.tabular.registry import ag_model_registry as tabular_ag_model_registry
|
|
10
|
-
from autogluon.timeseries.dataset
|
|
10
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
11
11
|
from autogluon.timeseries.utils.features import CovariateMetadata
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
@@ -17,19 +17,19 @@ logger = logging.getLogger(__name__)
|
|
|
17
17
|
class CovariateRegressor(Protocol):
|
|
18
18
|
def is_fit(self) -> bool: ...
|
|
19
19
|
|
|
20
|
-
def fit(self, data: TimeSeriesDataFrame, time_limit:
|
|
20
|
+
def fit(self, data: TimeSeriesDataFrame, time_limit: float | None = None, **kwargs) -> "CovariateRegressor": ...
|
|
21
21
|
|
|
22
22
|
def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
|
|
23
23
|
|
|
24
24
|
def fit_transform(
|
|
25
|
-
self, data: TimeSeriesDataFrame, time_limit:
|
|
25
|
+
self, data: TimeSeriesDataFrame, time_limit: float | None = None, **kwargs
|
|
26
26
|
) -> TimeSeriesDataFrame: ...
|
|
27
27
|
|
|
28
28
|
def inverse_transform(
|
|
29
29
|
self,
|
|
30
30
|
predictions: TimeSeriesDataFrame,
|
|
31
31
|
known_covariates: TimeSeriesDataFrame,
|
|
32
|
-
static_features:
|
|
32
|
+
static_features: pd.DataFrame | None,
|
|
33
33
|
) -> TimeSeriesDataFrame: ...
|
|
34
34
|
|
|
35
35
|
|
|
@@ -75,24 +75,19 @@ class GlobalCovariateRegressor(CovariateRegressor):
|
|
|
75
75
|
def __init__(
|
|
76
76
|
self,
|
|
77
77
|
model_name: str = "CAT",
|
|
78
|
-
model_hyperparameters:
|
|
78
|
+
model_hyperparameters: dict[str, Any] | None = None,
|
|
79
79
|
eval_metric: str = "mean_absolute_error",
|
|
80
80
|
refit_during_predict: bool = False,
|
|
81
|
-
max_num_samples:
|
|
82
|
-
covariate_metadata:
|
|
81
|
+
max_num_samples: int | None = 500_000,
|
|
82
|
+
covariate_metadata: CovariateMetadata | None = None,
|
|
83
83
|
target: str = "target",
|
|
84
|
-
validation_fraction:
|
|
84
|
+
validation_fraction: float | None = 0.1,
|
|
85
85
|
fit_time_fraction: float = 0.5,
|
|
86
86
|
include_static_features: bool = True,
|
|
87
87
|
include_item_id: bool = False,
|
|
88
88
|
):
|
|
89
|
-
tabular_model_types = tabular_ag_model_registry.key_to_cls_map()
|
|
90
|
-
if model_name not in tabular_model_types:
|
|
91
|
-
raise ValueError(
|
|
92
|
-
f"Tabular model {model_name} not supported. Available models: {list(tabular_model_types)}"
|
|
93
|
-
)
|
|
94
89
|
self.target = target
|
|
95
|
-
self.model_type =
|
|
90
|
+
self.model_type = tabular_ag_model_registry.key_to_cls(model_name)
|
|
96
91
|
self.model_name = model_name
|
|
97
92
|
self.model_hyperparameters = model_hyperparameters or {}
|
|
98
93
|
self.refit_during_predict = refit_during_predict
|
|
@@ -103,14 +98,14 @@ class GlobalCovariateRegressor(CovariateRegressor):
|
|
|
103
98
|
self.include_static_features = include_static_features
|
|
104
99
|
self.include_item_id = include_item_id
|
|
105
100
|
|
|
106
|
-
self.model:
|
|
101
|
+
self.model: AbstractModel | None = None
|
|
107
102
|
self.disabled = False
|
|
108
103
|
self.covariate_metadata = covariate_metadata or CovariateMetadata()
|
|
109
104
|
|
|
110
105
|
def is_fit(self) -> bool:
|
|
111
106
|
return self.model is not None
|
|
112
107
|
|
|
113
|
-
def fit(self, data: TimeSeriesDataFrame, time_limit:
|
|
108
|
+
def fit(self, data: TimeSeriesDataFrame, time_limit: float | None = None, **kwargs) -> "CovariateRegressor":
|
|
114
109
|
"""Fit the tabular regressor on the target column using covariates as features."""
|
|
115
110
|
start_time = time.monotonic()
|
|
116
111
|
tabular_df = self._get_tabular_df(data, static_features=data.static_features, include_target=True)
|
|
@@ -119,9 +114,9 @@ class GlobalCovariateRegressor(CovariateRegressor):
|
|
|
119
114
|
median_ts_length = data.num_timesteps_per_item().median()
|
|
120
115
|
features_to_drop = [self.target]
|
|
121
116
|
if not self.include_item_id:
|
|
122
|
-
features_to_drop += [ITEMID]
|
|
117
|
+
features_to_drop += [TimeSeriesDataFrame.ITEMID]
|
|
123
118
|
if self.validation_fraction is not None:
|
|
124
|
-
grouped_df = tabular_df.groupby(ITEMID, observed=False, sort=False)
|
|
119
|
+
grouped_df = tabular_df.groupby(TimeSeriesDataFrame.ITEMID, observed=False, sort=False)
|
|
125
120
|
val_size = max(int(self.validation_fraction * median_ts_length), 1)
|
|
126
121
|
train_df = self._subsample_df(grouped_df.head(-val_size))
|
|
127
122
|
val_df = self._subsample_df(grouped_df.tail(val_size))
|
|
@@ -178,7 +173,7 @@ class GlobalCovariateRegressor(CovariateRegressor):
|
|
|
178
173
|
return data
|
|
179
174
|
|
|
180
175
|
def fit_transform(
|
|
181
|
-
self, data: TimeSeriesDataFrame, time_limit:
|
|
176
|
+
self, data: TimeSeriesDataFrame, time_limit: float | None = None, **kwargs
|
|
182
177
|
) -> TimeSeriesDataFrame:
|
|
183
178
|
if not self.is_fit() or self.refit_during_predict:
|
|
184
179
|
self.fit(data=data, time_limit=time_limit, **kwargs)
|
|
@@ -188,7 +183,7 @@ class GlobalCovariateRegressor(CovariateRegressor):
|
|
|
188
183
|
self,
|
|
189
184
|
predictions: TimeSeriesDataFrame,
|
|
190
185
|
known_covariates: TimeSeriesDataFrame,
|
|
191
|
-
static_features:
|
|
186
|
+
static_features: pd.DataFrame | None,
|
|
192
187
|
) -> TimeSeriesDataFrame:
|
|
193
188
|
"""Add the tabular regressor predictions to the target column."""
|
|
194
189
|
if not self.disabled:
|
|
@@ -196,27 +191,29 @@ class GlobalCovariateRegressor(CovariateRegressor):
|
|
|
196
191
|
predictions = predictions.assign(**{col: predictions[col] + y_pred for col in predictions.columns})
|
|
197
192
|
return predictions
|
|
198
193
|
|
|
199
|
-
def _predict(self, data: TimeSeriesDataFrame, static_features:
|
|
194
|
+
def _predict(self, data: TimeSeriesDataFrame, static_features: pd.DataFrame | None) -> np.ndarray:
|
|
200
195
|
"""Construct the tabular features matrix and make predictions"""
|
|
201
196
|
assert self.model is not None, "CovariateRegressor must be fit before calling predict."
|
|
202
197
|
tabular_df = self._get_tabular_df(data, static_features=static_features)
|
|
203
198
|
if not self.include_item_id:
|
|
204
|
-
tabular_df = tabular_df.drop(columns=[ITEMID])
|
|
199
|
+
tabular_df = tabular_df.drop(columns=[TimeSeriesDataFrame.ITEMID])
|
|
205
200
|
return self.model.predict(X=tabular_df)
|
|
206
201
|
|
|
207
202
|
def _get_tabular_df(
|
|
208
203
|
self,
|
|
209
204
|
data: TimeSeriesDataFrame,
|
|
210
|
-
static_features:
|
|
205
|
+
static_features: pd.DataFrame | None = None,
|
|
211
206
|
include_target: bool = False,
|
|
212
207
|
) -> pd.DataFrame:
|
|
213
208
|
"""Construct a tabular dataframe from known covariates and static features."""
|
|
214
|
-
available_columns = [ITEMID] + self.covariate_metadata.known_covariates
|
|
209
|
+
available_columns = [TimeSeriesDataFrame.ITEMID] + self.covariate_metadata.known_covariates
|
|
215
210
|
if include_target:
|
|
216
211
|
available_columns += [self.target]
|
|
217
|
-
tabular_df =
|
|
212
|
+
tabular_df = (
|
|
213
|
+
pd.DataFrame(data).reset_index()[available_columns].astype({TimeSeriesDataFrame.ITEMID: "category"})
|
|
214
|
+
)
|
|
218
215
|
if static_features is not None and self.include_static_features:
|
|
219
|
-
tabular_df = pd.merge(tabular_df, static_features, on=ITEMID)
|
|
216
|
+
tabular_df = pd.merge(tabular_df, static_features, on=TimeSeriesDataFrame.ITEMID)
|
|
220
217
|
return tabular_df
|
|
221
218
|
|
|
222
219
|
def _subsample_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -230,11 +227,11 @@ class GlobalCovariateRegressor(CovariateRegressor):
|
|
|
230
227
|
def get_covariate_regressor(covariate_regressor: None, target: str, covariate_metadata: CovariateMetadata) -> None: ...
|
|
231
228
|
@overload
|
|
232
229
|
def get_covariate_regressor(
|
|
233
|
-
covariate_regressor:
|
|
230
|
+
covariate_regressor: str | dict, target: str, covariate_metadata: CovariateMetadata
|
|
234
231
|
) -> CovariateRegressor: ...
|
|
235
232
|
def get_covariate_regressor(
|
|
236
|
-
covariate_regressor:
|
|
237
|
-
) ->
|
|
233
|
+
covariate_regressor: str | dict | None, target: str, covariate_metadata: CovariateMetadata
|
|
234
|
+
) -> CovariateRegressor | None:
|
|
238
235
|
"""Create a CovariateRegressor object based on the value of the `covariate_regressor` hyperparameter."""
|
|
239
236
|
if covariate_regressor is None:
|
|
240
237
|
return None
|
autogluon/timeseries/splitter.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from typing import Iterator
|
|
1
|
+
from typing import Iterator
|
|
2
2
|
|
|
3
|
-
from .dataset
|
|
3
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
4
4
|
|
|
5
5
|
__all__ = [
|
|
6
6
|
"AbstractWindowSplitter",
|
|
@@ -41,7 +41,7 @@ class ExpandingWindowSplitter(AbstractWindowSplitter):
|
|
|
41
41
|
The end of each subsequent window is moved this many time steps forward.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
def __init__(self, prediction_length: int, num_val_windows: int = 1, val_step_size:
|
|
44
|
+
def __init__(self, prediction_length: int, num_val_windows: int = 1, val_step_size: int | None = None):
|
|
45
45
|
super().__init__(prediction_length=prediction_length, num_val_windows=num_val_windows)
|
|
46
46
|
if val_step_size is None:
|
|
47
47
|
val_step_size = prediction_length
|
|
@@ -57,27 +57,3 @@ class ExpandingWindowSplitter(AbstractWindowSplitter):
|
|
|
57
57
|
train_data = data.slice_by_timestep(None, train_end)
|
|
58
58
|
val_data = data.slice_by_timestep(None, val_end)
|
|
59
59
|
yield train_data, val_data
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
class AbstractTimeSeriesSplitter:
|
|
63
|
-
def __init__(self, *args, **kwargs):
|
|
64
|
-
raise ValueError(
|
|
65
|
-
"`AbstractTimeSeriesSplitter` has been deprecated. "
|
|
66
|
-
"Please use `autogluon.timeseries.splitter.ExpandingWindowSplitter` instead."
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
class MultiWindowSplitter(AbstractTimeSeriesSplitter):
|
|
71
|
-
def __init__(self, *args, **kwargs):
|
|
72
|
-
raise ValueError(
|
|
73
|
-
"`MultiWindowSplitter` has been deprecated. "
|
|
74
|
-
"Please use `autogluon.timeseries.splitter.ExpandingWindowSplitter` instead."
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
class LastWindowSplitter(MultiWindowSplitter):
|
|
79
|
-
def __init__(self, *args, **kwargs):
|
|
80
|
-
raise ValueError(
|
|
81
|
-
"`LastWindowSplitter` has been deprecated. "
|
|
82
|
-
"Please use `autogluon.timeseries.splitter.ExpandingWindowSplitter` instead."
|
|
83
|
-
)
|
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
import traceback
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Iterator
|
|
7
|
+
|
|
8
|
+
import networkx as nx
|
|
9
|
+
import numpy as np
|
|
10
|
+
from typing_extensions import Self
|
|
11
|
+
|
|
12
|
+
from autogluon.timeseries import TimeSeriesDataFrame
|
|
13
|
+
from autogluon.timeseries.metrics import TimeSeriesScorer
|
|
14
|
+
from autogluon.timeseries.models.ensemble import (
|
|
15
|
+
AbstractTimeSeriesEnsembleModel,
|
|
16
|
+
PerformanceWeightedEnsemble,
|
|
17
|
+
get_ensemble_class,
|
|
18
|
+
)
|
|
19
|
+
from autogluon.timeseries.utils.timer import SplitTimer
|
|
20
|
+
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
21
|
+
|
|
22
|
+
from .utils import log_scores_and_times
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger("autogluon.timeseries.trainer")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class EnsembleComposer:
|
|
28
|
+
"""Helper class for TimeSeriesTrainer to build multi-layer stack ensembles.
|
|
29
|
+
|
|
30
|
+
This class depends on the trainer to provide the necessary initialization parameters, training
|
|
31
|
+
and validation data, as well as having fit the base (non-ensemble) models and persisted their
|
|
32
|
+
out-of-fold predictions which will be used for ensemble training.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
path
|
|
37
|
+
Path of the calling TimeSeriesTrainer. EnsembleComposer finds the model objects and their
|
|
38
|
+
out-of-fold prediction artifacts with respect to this path. EnsembleComposer only saves
|
|
39
|
+
ensemble models and their out-of-fold predictions to this folder (i.e., does not pickle
|
|
40
|
+
itself).
|
|
41
|
+
prediction_length
|
|
42
|
+
Number of time steps to forecast.
|
|
43
|
+
eval_metric
|
|
44
|
+
Metric used to evaluate ensemble performance.
|
|
45
|
+
target
|
|
46
|
+
Name of the target column in the time series data.
|
|
47
|
+
num_windows_per_layer
|
|
48
|
+
Number of windows used for training each ensemble layer. Length must match the number of layers
|
|
49
|
+
in ensemble_hyperparameters. Example: (3, 2) means first layer uses 3 windows, second layer uses
|
|
50
|
+
2 windows.
|
|
51
|
+
|
|
52
|
+
Base models must have OOF predictions saved for all sum(num_windows_per_layer) windows, prior
|
|
53
|
+
to this class being called.
|
|
54
|
+
ensemble_hyperparameters
|
|
55
|
+
Ensemble configuration. A list of dicts, one per layer. If an ensemble model should be fitted
|
|
56
|
+
with multiple hyperparameter configurations, a list of dicts may be provided as the value.
|
|
57
|
+
Each layer's dict maps ensemble names to either a single hyperparameter dict or a list of
|
|
58
|
+
hyperparameter dicts.
|
|
59
|
+
|
|
60
|
+
Examples:
|
|
61
|
+
- ``[{"GreedyEnsemble": {}}, {"GreedyEnsemble": {}}]`` for 2 layers of greedy ensembles.
|
|
62
|
+
- ``[{"GreedyEnsemble": [{"ensemble_size": 10}, {"ensemble_size": 20}]}]`` for a single layer of
|
|
63
|
+
two greedy ensembles, with differing ensemble sizes.
|
|
64
|
+
quantile_levels
|
|
65
|
+
Quantile levels for probabilistic forecasting.
|
|
66
|
+
model_graph
|
|
67
|
+
Directed graph containing base models and their metadata (val_score, fit_time, etc.). Only
|
|
68
|
+
base models (nodes without predecessors) are used for ensemble training.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(
|
|
72
|
+
self,
|
|
73
|
+
path: str,
|
|
74
|
+
prediction_length: int,
|
|
75
|
+
eval_metric: TimeSeriesScorer,
|
|
76
|
+
target: str,
|
|
77
|
+
num_windows_per_layer: tuple[int, ...],
|
|
78
|
+
ensemble_hyperparameters: list[dict[str, dict | list[dict]]],
|
|
79
|
+
quantile_levels: list[float],
|
|
80
|
+
model_graph: nx.DiGraph,
|
|
81
|
+
):
|
|
82
|
+
self.eval_metric = eval_metric
|
|
83
|
+
self.path = path
|
|
84
|
+
self.prediction_length = prediction_length
|
|
85
|
+
self.target = target
|
|
86
|
+
self.quantile_levels = quantile_levels
|
|
87
|
+
|
|
88
|
+
self.num_windows_per_layer = num_windows_per_layer
|
|
89
|
+
self.num_layers = len(num_windows_per_layer)
|
|
90
|
+
|
|
91
|
+
if len(ensemble_hyperparameters) != self.num_layers:
|
|
92
|
+
raise ValueError(
|
|
93
|
+
"Number of ensemble_hyperparameters must match the number of layers. "
|
|
94
|
+
f"Received {len(ensemble_hyperparameters)} ensemble_hyperparameters, "
|
|
95
|
+
f"but {self.num_layers} layers."
|
|
96
|
+
)
|
|
97
|
+
self.ensemble_hyperparameters = ensemble_hyperparameters
|
|
98
|
+
|
|
99
|
+
self.banned_model_names = list(model_graph.nodes)
|
|
100
|
+
self.model_graph = self._get_base_model_graph(source_graph=model_graph)
|
|
101
|
+
|
|
102
|
+
@staticmethod
|
|
103
|
+
def _get_base_model_graph(source_graph: nx.DiGraph) -> nx.DiGraph:
|
|
104
|
+
"""Return a model graph by copying only base models (nodes without predecessors).
|
|
105
|
+
|
|
106
|
+
This ensures we start fresh for training ensembles.
|
|
107
|
+
"""
|
|
108
|
+
rootset = EnsembleComposer._get_rootset(source_graph)
|
|
109
|
+
|
|
110
|
+
dst_graph = nx.DiGraph()
|
|
111
|
+
for node in rootset:
|
|
112
|
+
dst_graph.add_node(node, **source_graph.nodes[node])
|
|
113
|
+
|
|
114
|
+
return dst_graph
|
|
115
|
+
|
|
116
|
+
@staticmethod
|
|
117
|
+
def _get_rootset(graph: nx.DiGraph) -> list[str]:
|
|
118
|
+
return [n for n in graph.nodes if not list(graph.predecessors(n))]
|
|
119
|
+
|
|
120
|
+
def _load_model(self, model_name: str) -> Any:
|
|
121
|
+
"""Load a model from the graph by name."""
|
|
122
|
+
attrs = self.model_graph.nodes[model_name]
|
|
123
|
+
model_path = os.path.join(self.path, *attrs["path"])
|
|
124
|
+
return attrs["type"].load(path=model_path)
|
|
125
|
+
|
|
126
|
+
def _iter_models(self, layer: int) -> Iterator[tuple[str, Any]]:
|
|
127
|
+
"""Iterate over models in a specific layer of the model graph.
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
layer
|
|
132
|
+
Layer index (0 for base models, 1+ for ensemble layers)
|
|
133
|
+
|
|
134
|
+
Yields
|
|
135
|
+
------
|
|
136
|
+
model_name
|
|
137
|
+
Name of the model
|
|
138
|
+
model
|
|
139
|
+
Loaded model instance
|
|
140
|
+
"""
|
|
141
|
+
rootset = self._get_rootset(self.model_graph)
|
|
142
|
+
layer_iter = nx.traversal.bfs_layers(self.model_graph, rootset)
|
|
143
|
+
for layer_idx, layer_keys in enumerate(layer_iter):
|
|
144
|
+
if layer_idx != layer:
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
for model_name in layer_keys:
|
|
148
|
+
model = self._load_model(model_name)
|
|
149
|
+
yield model_name, model
|
|
150
|
+
|
|
151
|
+
def iter_ensembles(self) -> Iterator[tuple[int, AbstractTimeSeriesEnsembleModel, list[str]]]:
|
|
152
|
+
"""Iterate over trained ensemble models, layer by layer. Used by the Trainer to copy the
|
|
153
|
+
fitted models in EnsembleComposer's ``model_graph``.
|
|
154
|
+
|
|
155
|
+
Yields
|
|
156
|
+
------
|
|
157
|
+
layer_idx
|
|
158
|
+
The layer index of the ensemble.
|
|
159
|
+
model
|
|
160
|
+
The ensemble model object
|
|
161
|
+
base_model_names
|
|
162
|
+
The names of the base models that are part of the ensemble.
|
|
163
|
+
"""
|
|
164
|
+
for layer_idx in range(1, self.num_layers + 1):
|
|
165
|
+
for model_name, model in self._iter_models(layer=layer_idx):
|
|
166
|
+
yield (layer_idx, model, list(self.model_graph.predecessors(model_name)))
|
|
167
|
+
|
|
168
|
+
def fit(
|
|
169
|
+
self,
|
|
170
|
+
data_per_window: list[TimeSeriesDataFrame],
|
|
171
|
+
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
172
|
+
time_limit: float | None = None,
|
|
173
|
+
) -> Self:
|
|
174
|
+
base_model_names = [name for name, _ in self._iter_models(layer=0)]
|
|
175
|
+
if not self._can_fit_ensemble(time_limit, len(base_model_names)):
|
|
176
|
+
return self
|
|
177
|
+
|
|
178
|
+
num_ensembles = sum(
|
|
179
|
+
len(list(self.iter_layer_models_and_hps(layer))) for layer in range(1, self.num_layers + 1)
|
|
180
|
+
)
|
|
181
|
+
logger.info(f"Fitting {num_ensembles} ensemble(s), in {self.num_layers} layers.")
|
|
182
|
+
|
|
183
|
+
assert len(data_per_window) == sum(self.num_windows_per_layer)
|
|
184
|
+
|
|
185
|
+
def get_inputs_for_layer(layer_idx, model_names):
|
|
186
|
+
"""Retrieve predictions from previous layer models for current layer training."""
|
|
187
|
+
if layer_idx == 1:
|
|
188
|
+
# we need base models, so we use predictions_per_window provided by the trainer,
|
|
189
|
+
# which contains base model predictions for all windows where ensembles will be
|
|
190
|
+
# trained.
|
|
191
|
+
num_windows = self.num_windows_per_layer[0]
|
|
192
|
+
inputs = {name: predictions_per_window[name][:num_windows] for name in model_names}
|
|
193
|
+
else:
|
|
194
|
+
# if layer_idx > 1, we will be relying on predictions of previously trained ensembles
|
|
195
|
+
window_start = -sum(self.num_windows_per_layer[layer_idx - 1 :])
|
|
196
|
+
window_slice = slice(
|
|
197
|
+
window_start,
|
|
198
|
+
window_start + self.num_windows_per_layer[layer_idx - 1] if layer_idx < self.num_layers else None,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
inputs = {}
|
|
202
|
+
for model_name in model_names:
|
|
203
|
+
oof_predictions = self._get_model_oof_predictions(model_name)
|
|
204
|
+
inputs[model_name] = oof_predictions[window_slice]
|
|
205
|
+
|
|
206
|
+
return inputs
|
|
207
|
+
|
|
208
|
+
def get_ground_truth_for_layer(layer_idx):
|
|
209
|
+
window_start = sum(self.num_windows_per_layer[: layer_idx - 1])
|
|
210
|
+
window_end = window_start + self.num_windows_per_layer[layer_idx - 1]
|
|
211
|
+
return data_per_window[window_start:window_end]
|
|
212
|
+
|
|
213
|
+
main_loop_timer = SplitTimer(time_limit, rounds=num_ensembles).start()
|
|
214
|
+
|
|
215
|
+
# main loop over layers of ensembles
|
|
216
|
+
for layer_idx in range(1, self.num_layers + 1):
|
|
217
|
+
layer_input_model_names = [name for name, _ in self._iter_models(layer=layer_idx - 1)]
|
|
218
|
+
layer_input_model_scores = {
|
|
219
|
+
name: self.model_graph.nodes[name]["val_score"] for name in layer_input_model_names
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
layer_predictions_per_window = get_inputs_for_layer(layer_idx, model_names=layer_input_model_names)
|
|
223
|
+
layer_data_per_window = get_ground_truth_for_layer(layer_idx)
|
|
224
|
+
|
|
225
|
+
for ensemble_name, ensemble_hp_dict in self.iter_layer_models_and_hps(layer_idx):
|
|
226
|
+
try:
|
|
227
|
+
# train the ensemble model
|
|
228
|
+
time_start = time.monotonic()
|
|
229
|
+
|
|
230
|
+
ensemble = self._fit_single_ensemble(
|
|
231
|
+
model_name=ensemble_name,
|
|
232
|
+
hyperparameters=ensemble_hp_dict,
|
|
233
|
+
predictions_per_window=layer_predictions_per_window,
|
|
234
|
+
data_per_window=layer_data_per_window,
|
|
235
|
+
base_model_scores=layer_input_model_scores,
|
|
236
|
+
layer_idx=layer_idx,
|
|
237
|
+
time_limit=main_loop_timer.round_time_remaining(),
|
|
238
|
+
)
|
|
239
|
+
ensemble.fit_time = time.monotonic() - time_start
|
|
240
|
+
|
|
241
|
+
# for all windows of all layers starting from this layer, predict and save predictions
|
|
242
|
+
predictions = []
|
|
243
|
+
predict_time = 0
|
|
244
|
+
for pred_layer_idx in range(layer_idx, self.num_layers + 1):
|
|
245
|
+
predict_time_start = time.monotonic()
|
|
246
|
+
|
|
247
|
+
pred_base_predictions = get_inputs_for_layer(pred_layer_idx, ensemble.model_names)
|
|
248
|
+
for window_idx in range(self.num_windows_per_layer[pred_layer_idx - 1]):
|
|
249
|
+
prediction = ensemble.predict(
|
|
250
|
+
{n: pred_base_predictions[n][window_idx] for n in ensemble.model_names}
|
|
251
|
+
)
|
|
252
|
+
predictions.append(prediction)
|
|
253
|
+
|
|
254
|
+
predict_time = time.monotonic() - predict_time_start
|
|
255
|
+
|
|
256
|
+
# record marginal prediction time per window in the last layer's data
|
|
257
|
+
ensemble.predict_time_marginal = predict_time / self.num_windows_per_layer[-1]
|
|
258
|
+
ensemble.cache_oof_predictions(predictions)
|
|
259
|
+
|
|
260
|
+
# compute validation score using the last layer's validation windows
|
|
261
|
+
last_layer_oof_predictions = ensemble.get_oof_predictions()[-self.num_windows_per_layer[-1] :]
|
|
262
|
+
last_layer_ground_truth = get_ground_truth_for_layer(self.num_layers)
|
|
263
|
+
score_per_fold = [
|
|
264
|
+
self.eval_metric(data, prediction, target=self.target)
|
|
265
|
+
for prediction, data in zip(last_layer_oof_predictions, last_layer_ground_truth)
|
|
266
|
+
]
|
|
267
|
+
ensemble.val_score = float(np.mean(score_per_fold, dtype=np.float64))
|
|
268
|
+
|
|
269
|
+
# add model to the graph, compute predict time, and save
|
|
270
|
+
self._add_model(ensemble, base_models=ensemble.model_names)
|
|
271
|
+
ensemble.predict_time = self._calculate_predict_time(ensemble)
|
|
272
|
+
self.model_graph.nodes[ensemble.name]["predict_time"] = ensemble.predict_time
|
|
273
|
+
ensemble.save()
|
|
274
|
+
|
|
275
|
+
# log performance
|
|
276
|
+
log_scores_and_times(
|
|
277
|
+
ensemble.val_score,
|
|
278
|
+
ensemble.fit_time,
|
|
279
|
+
ensemble.predict_time,
|
|
280
|
+
eval_metric_name=self.eval_metric.name_with_sign,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# check time and advance round
|
|
284
|
+
if main_loop_timer.timed_out():
|
|
285
|
+
logger.warning(
|
|
286
|
+
"Time limit exceeded during ensemble training, will stop training new ensembles."
|
|
287
|
+
)
|
|
288
|
+
return self
|
|
289
|
+
|
|
290
|
+
except Exception as err: # noqa
|
|
291
|
+
logger.error(
|
|
292
|
+
f"\tWarning: Exception caused {ensemble_name} to fail during training... Skipping this model."
|
|
293
|
+
)
|
|
294
|
+
logger.error(f"\t{err}")
|
|
295
|
+
logger.debug(traceback.format_exc())
|
|
296
|
+
|
|
297
|
+
finally:
|
|
298
|
+
main_loop_timer.next_round()
|
|
299
|
+
|
|
300
|
+
return self
|
|
301
|
+
|
|
302
|
+
def iter_layer_models_and_hps(self, layer_idx: int):
|
|
303
|
+
layer_hps = self.ensemble_hyperparameters[layer_idx - 1]
|
|
304
|
+
|
|
305
|
+
for model_name, hps in layer_hps.items():
|
|
306
|
+
if isinstance(hps, list):
|
|
307
|
+
# If a list is provided, create one ensemble per hyperparameter dict
|
|
308
|
+
for hp in hps:
|
|
309
|
+
yield model_name, hp
|
|
310
|
+
else:
|
|
311
|
+
yield model_name, hps
|
|
312
|
+
|
|
313
|
+
def _fit_single_ensemble(
|
|
314
|
+
self,
|
|
315
|
+
model_name: str,
|
|
316
|
+
hyperparameters: dict,
|
|
317
|
+
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
318
|
+
data_per_window: list[TimeSeriesDataFrame],
|
|
319
|
+
base_model_scores: dict[str, float],
|
|
320
|
+
layer_idx: int,
|
|
321
|
+
time_limit: float | None = None,
|
|
322
|
+
) -> AbstractTimeSeriesEnsembleModel:
|
|
323
|
+
ensemble_class = get_ensemble_class(model_name)
|
|
324
|
+
|
|
325
|
+
# TODO: remove this after PerformanceWeightedEnsemble is removed. This is a temporary fix
|
|
326
|
+
# to make sure PerformanceWeightedEnsemble is not fit on the validation scores of future
|
|
327
|
+
# out-of-fold splits.
|
|
328
|
+
if layer_idx < self.num_layers and ensemble_class is PerformanceWeightedEnsemble:
|
|
329
|
+
raise RuntimeError(
|
|
330
|
+
"PerformanceWeightedEnsemble is not supported for multi-layer stack ensembles, except "
|
|
331
|
+
"when it's used in the last layer of the ensemble."
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
ensemble: AbstractTimeSeriesEnsembleModel = ensemble_class(
|
|
335
|
+
eval_metric=self.eval_metric,
|
|
336
|
+
target=self.target,
|
|
337
|
+
prediction_length=self.prediction_length,
|
|
338
|
+
path=self.path,
|
|
339
|
+
freq=data_per_window[0].freq,
|
|
340
|
+
quantile_levels=self.quantile_levels,
|
|
341
|
+
hyperparameters=hyperparameters,
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# update name to prevent name collisions
|
|
345
|
+
old_name = ensemble.name
|
|
346
|
+
ensemble.name = self._get_ensemble_model_name(ensemble.name, layer_idx)
|
|
347
|
+
if ensemble.name != old_name:
|
|
348
|
+
path_obj = Path(ensemble.path)
|
|
349
|
+
ensemble.path = str(path_obj.parent / ensemble.name)
|
|
350
|
+
|
|
351
|
+
fit_log_message = f"Training ensemble model {ensemble.name}. "
|
|
352
|
+
if time_limit is not None:
|
|
353
|
+
fit_log_message += f"Training for up to {time_limit:.1f}s."
|
|
354
|
+
logger.info(fit_log_message)
|
|
355
|
+
|
|
356
|
+
with warning_filter():
|
|
357
|
+
ensemble.fit(
|
|
358
|
+
predictions_per_window=predictions_per_window,
|
|
359
|
+
data_per_window=data_per_window,
|
|
360
|
+
model_scores=base_model_scores,
|
|
361
|
+
time_limit=time_limit,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
return ensemble
|
|
365
|
+
|
|
366
|
+
def _get_model_oof_predictions(self, model_name: str) -> list[TimeSeriesDataFrame]:
|
|
367
|
+
model_attrs = self.model_graph.nodes[model_name]
|
|
368
|
+
model_path = os.path.join(self.path, *model_attrs["path"])
|
|
369
|
+
return model_attrs["type"].load_oof_predictions(path=model_path)
|
|
370
|
+
|
|
371
|
+
def _add_model(self, model, base_models: list[str]):
|
|
372
|
+
self.model_graph.add_node(
|
|
373
|
+
model.name,
|
|
374
|
+
path=os.path.relpath(model.path, self.path).split(os.sep),
|
|
375
|
+
type=type(model),
|
|
376
|
+
fit_time=model.fit_time,
|
|
377
|
+
predict_time=model.predict_time,
|
|
378
|
+
val_score=model.val_score,
|
|
379
|
+
)
|
|
380
|
+
for base_model in base_models:
|
|
381
|
+
self.model_graph.add_edge(base_model, model.name)
|
|
382
|
+
self.banned_model_names.append(model.name)
|
|
383
|
+
|
|
384
|
+
def _can_fit_ensemble(
|
|
385
|
+
self,
|
|
386
|
+
time_limit: float | None,
|
|
387
|
+
num_models_available_for_ensemble: int,
|
|
388
|
+
) -> bool:
|
|
389
|
+
if time_limit is not None and time_limit <= 0:
|
|
390
|
+
logger.info(f"Not fitting ensemble due to lack of time remaining. Time left: {time_limit:.1f} seconds")
|
|
391
|
+
return False
|
|
392
|
+
|
|
393
|
+
if num_models_available_for_ensemble <= 1:
|
|
394
|
+
logger.info(
|
|
395
|
+
"Not fitting ensemble as "
|
|
396
|
+
+ (
|
|
397
|
+
"no models were successfully trained."
|
|
398
|
+
if not num_models_available_for_ensemble
|
|
399
|
+
else "only 1 model was trained."
|
|
400
|
+
)
|
|
401
|
+
)
|
|
402
|
+
return False
|
|
403
|
+
|
|
404
|
+
return True
|
|
405
|
+
|
|
406
|
+
def _get_ensemble_model_name(self, name: str, layer_idx: int) -> str:
|
|
407
|
+
"""Revise name for an ensemble model, ensuring we don't have name collisions"""
|
|
408
|
+
base_name = name
|
|
409
|
+
layer_suffix = f"_L{layer_idx + 1}" if self.num_layers > 1 else ""
|
|
410
|
+
name = f"{base_name}" + layer_suffix
|
|
411
|
+
increment = 1
|
|
412
|
+
while name in self.banned_model_names:
|
|
413
|
+
increment += 1
|
|
414
|
+
name = f"{base_name}_{increment}" + layer_suffix
|
|
415
|
+
return name
|
|
416
|
+
|
|
417
|
+
def _calculate_predict_time(self, model: AbstractTimeSeriesEnsembleModel) -> float:
|
|
418
|
+
"""Calculate ensemble predict time as sum of base model predict times."""
|
|
419
|
+
assert model.predict_time_marginal is not None
|
|
420
|
+
predict_time = model.predict_time_marginal
|
|
421
|
+
for model_name in nx.ancestors(self.model_graph, model.name):
|
|
422
|
+
ancestor = self._load_model(model_name)
|
|
423
|
+
if isinstance(ancestor, AbstractTimeSeriesEnsembleModel):
|
|
424
|
+
assert ancestor.predict_time_marginal is not None
|
|
425
|
+
predict_time += ancestor.predict_time_marginal
|
|
426
|
+
else:
|
|
427
|
+
predict_time += ancestor.predict_time
|
|
428
|
+
|
|
429
|
+
return predict_time
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def validate_ensemble_hyperparameters(hyperparameters: list[dict[str, dict | list[dict]]]) -> None:
|
|
433
|
+
if not isinstance(hyperparameters, list):
|
|
434
|
+
raise ValueError(f"ensemble_hyperparameters must be list, got {type(hyperparameters)}")
|
|
435
|
+
|
|
436
|
+
for layer_idx, layer_hp in enumerate(hyperparameters):
|
|
437
|
+
if not isinstance(layer_hp, dict):
|
|
438
|
+
raise ValueError(f"Layer {layer_idx} hyperparameters must be dict, got {type(layer_hp)}")
|
|
439
|
+
for ensemble_name, ensemble_hp in layer_hp.items():
|
|
440
|
+
get_ensemble_class(ensemble_name) # Will raise if unknown
|
|
441
|
+
hp_is_dict = isinstance(ensemble_hp, dict)
|
|
442
|
+
hp_is_valid_list = isinstance(ensemble_hp, list) and all(isinstance(d, dict) for d in ensemble_hp)
|
|
443
|
+
if not (hp_is_dict or hp_is_valid_list):
|
|
444
|
+
raise ValueError(f"Hyperparameters for {ensemble_name} must be dict or list, got {type(ensemble_hp)}")
|