openstef-meta 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef_meta/__init__.py +13 -0
- openstef_meta/models/__init__.py +5 -0
- openstef_meta/models/ensemble_forecasting_model.py +677 -0
- openstef_meta/models/forecast_combiners/__init__.py +25 -0
- openstef_meta/models/forecast_combiners/forecast_combiner.py +129 -0
- openstef_meta/models/forecast_combiners/learned_weights_combiner.py +397 -0
- openstef_meta/models/forecast_combiners/stacking_combiner.py +133 -0
- openstef_meta/presets/__init__.py +12 -0
- openstef_meta/presets/forecasting_workflow.py +645 -0
- openstef_meta/utils/__init__.py +9 -0
- openstef_meta/utils/datasets.py +48 -0
- openstef_meta-4.0.0.dist-info/METADATA +21 -0
- openstef_meta-4.0.0.dist-info/RECORD +14 -0
- openstef_meta-4.0.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2017-2025 Contributors to the OpenSTEF project <short.term.energy.forecasts@alliander.com>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
+
"""Meta models for OpenSTEF."""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
# Set up logging configuration
|
|
9
|
+
root_logger = logging.getLogger(name=__name__)
|
|
10
|
+
if not root_logger.handlers:
|
|
11
|
+
root_logger.addHandler(logging.NullHandler())
|
|
12
|
+
|
|
13
|
+
__all__ = []
|
|
@@ -0,0 +1,677 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <short.term.energy.forecasts@alliander.com>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
+
|
|
5
|
+
"""Ensemble forecasting model combining multiple base forecasters.
|
|
6
|
+
|
|
7
|
+
Orchestrates parallel base forecasters whose predictions are aggregated by a
|
|
8
|
+
``ForecastCombiner``. Extends ``BaseForecastingModel`` as a sibling of
|
|
9
|
+
``ForecastingModel``.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from functools import partial
|
|
15
|
+
from typing import Self, cast, override
|
|
16
|
+
|
|
17
|
+
import pandas as pd
|
|
18
|
+
from pydantic import Field, PrivateAttr, model_validator
|
|
19
|
+
|
|
20
|
+
from openstef_core.datasets import (
|
|
21
|
+
ForecastDataset,
|
|
22
|
+
ForecastInputDataset,
|
|
23
|
+
TimeSeriesDataset,
|
|
24
|
+
)
|
|
25
|
+
from openstef_core.datasets.timeseries_dataset import validate_horizons_present
|
|
26
|
+
from openstef_core.datasets.validated_datasets import EnsembleForecastDataset
|
|
27
|
+
from openstef_core.exceptions import NotFittedError
|
|
28
|
+
from openstef_core.mixins import HyperParams, TransformPipeline
|
|
29
|
+
from openstef_core.types import LeadTime, Quantile
|
|
30
|
+
from openstef_meta.models.forecast_combiners.forecast_combiner import ForecastCombiner
|
|
31
|
+
from openstef_models.explainability.mixins import ExplainableForecaster
|
|
32
|
+
from openstef_models.models.forecasting.forecaster import Forecaster
|
|
33
|
+
from openstef_models.models.forecasting_model import BaseForecastingModel, ModelFitResult, restore_target
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class EnsembleModelFitResult(ModelFitResult):
|
|
39
|
+
"""Fit result for EnsembleForecastingModel.
|
|
40
|
+
|
|
41
|
+
Extends ModelFitResult with per-forecaster details. The base class fields
|
|
42
|
+
(input_dataset, metrics_*, etc.) represent the combiner's fit results.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
forecaster_fit_results: dict[str, ModelFitResult] = Field(description="ModelFitResult for each base forecaster")
|
|
46
|
+
|
|
47
|
+
@override
|
|
48
|
+
def metrics_to_flat_dict(self) -> dict[str, float]:
|
|
49
|
+
result = super().metrics_to_flat_dict()
|
|
50
|
+
for name, child in self.forecaster_fit_results.items():
|
|
51
|
+
result.update({f"{name}_{k}": v for k, v in child.metrics_to_flat_dict().items()})
|
|
52
|
+
return result
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
@override
|
|
56
|
+
def component_fit_results(self) -> dict[str, ModelFitResult]:
|
|
57
|
+
return self.forecaster_fit_results
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class EnsembleForecastingModel(BaseForecastingModel):
|
|
61
|
+
"""Ensemble forecasting pipeline: common preprocessing -> N forecasters -> combiner.
|
|
62
|
+
|
|
63
|
+
Runs multiple base forecasters in parallel, aggregates their predictions via a
|
|
64
|
+
``ForecastCombiner``, and applies shared postprocessing. Extends
|
|
65
|
+
``BaseForecastingModel`` as a sibling of ``ForecastingModel`` — not a subclass.
|
|
66
|
+
|
|
67
|
+
The ``preprocessing`` field (inherited from base) holds the **common preprocessing**
|
|
68
|
+
shared across all base forecasters. ``model_specific_preprocessing`` adds
|
|
69
|
+
per-forecaster transforms on top.
|
|
70
|
+
|
|
71
|
+
Invariants:
|
|
72
|
+
- fit() must be called before predict()
|
|
73
|
+
- All forecaster horizons must be present in the input data
|
|
74
|
+
|
|
75
|
+
Important:
|
|
76
|
+
The ``cutoff_history`` parameter is crucial when using lag-based features.
|
|
77
|
+
Set it to exclude incomplete rows from training (e.g. ``timedelta(days=14)``
|
|
78
|
+
for a lag-14 transform).
|
|
79
|
+
|
|
80
|
+
Example:
|
|
81
|
+
>>> from openstef_models.models.forecasting.constant_quantile_forecaster import (
|
|
82
|
+
... ConstantQuantileForecaster,
|
|
83
|
+
... )
|
|
84
|
+
>>> from openstef_meta.models.forecast_combiners.learned_weights_combiner import WeightsCombiner
|
|
85
|
+
>>> from openstef_core.types import LeadTime
|
|
86
|
+
>>> from datetime import timedelta
|
|
87
|
+
>>>
|
|
88
|
+
>>> forecaster_1 = ConstantQuantileForecaster(
|
|
89
|
+
... horizons=[LeadTime.from_string("PT36H")]
|
|
90
|
+
... )
|
|
91
|
+
>>> forecaster_2 = ConstantQuantileForecaster(
|
|
92
|
+
... horizons=[LeadTime.from_string("PT36H")]
|
|
93
|
+
... )
|
|
94
|
+
>>> combiner = WeightsCombiner(
|
|
95
|
+
... horizons=[LeadTime.from_string("PT36H")],
|
|
96
|
+
... )
|
|
97
|
+
>>> model = EnsembleForecastingModel(
|
|
98
|
+
... forecasters={"constant_median": forecaster_1, "constant_median_2": forecaster_2},
|
|
99
|
+
... combiner=combiner,
|
|
100
|
+
... cutoff_history=timedelta(days=14),
|
|
101
|
+
... )
|
|
102
|
+
>>> model.fit(training_data) # doctest: +SKIP
|
|
103
|
+
>>> forecasts = model.predict(new_data) # doctest: +SKIP
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
forecasters: dict[str, Forecaster] = Field(
|
|
107
|
+
default=...,
|
|
108
|
+
description="Named base forecasters whose predictions are combined.",
|
|
109
|
+
exclude=True,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
combiner: ForecastCombiner = Field(
|
|
113
|
+
default=...,
|
|
114
|
+
description="Combiner that aggregates base forecaster predictions.",
|
|
115
|
+
exclude=True,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
model_specific_preprocessing: dict[str, TransformPipeline[TimeSeriesDataset]] = Field(
|
|
119
|
+
default_factory=dict,
|
|
120
|
+
description="Per-forecaster preprocessing pipelines applied after common preprocessing.",
|
|
121
|
+
exclude=True,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
combiner_preprocessing: TransformPipeline[TimeSeriesDataset] = Field(
|
|
125
|
+
default_factory=TransformPipeline[TimeSeriesDataset],
|
|
126
|
+
description="Feature engineering for the forecast combiner.",
|
|
127
|
+
exclude=True,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
model_specific_postprocessing: TransformPipeline[ForecastDataset] = Field(
|
|
131
|
+
default_factory=TransformPipeline[ForecastDataset],
|
|
132
|
+
description="Per-forecaster postprocessing applied before the combiner sees predictions.",
|
|
133
|
+
exclude=True,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
combiner_postprocessing: TransformPipeline[ForecastDataset] = Field(
|
|
137
|
+
default_factory=TransformPipeline[ForecastDataset],
|
|
138
|
+
description="Combiner-specific postprocessing applied after shared postprocessing.",
|
|
139
|
+
exclude=True,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
_logger: logging.Logger = PrivateAttr(default=logging.getLogger(__name__))
|
|
143
|
+
|
|
144
|
+
@model_validator(mode="after")
|
|
145
|
+
def _validate_horizons_consistent(self) -> Self:
|
|
146
|
+
"""All forecasters and the combiner must share the same horizons list.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Validated model instance.
|
|
150
|
+
|
|
151
|
+
Raises:
|
|
152
|
+
ValueError: If forecasters dict is empty or any forecaster's horizons differ from the combiner's.
|
|
153
|
+
"""
|
|
154
|
+
if not self.forecasters:
|
|
155
|
+
msg = "At least one forecaster is required."
|
|
156
|
+
raise ValueError(msg)
|
|
157
|
+
|
|
158
|
+
expected = sorted(self.combiner.horizons)
|
|
159
|
+
for name, forecaster in self.forecasters.items():
|
|
160
|
+
if sorted(forecaster.horizons) != expected:
|
|
161
|
+
msg = (
|
|
162
|
+
f"Forecaster '{name}' horizons {forecaster.horizons} "
|
|
163
|
+
f"do not match combiner horizons {self.combiner.horizons}"
|
|
164
|
+
)
|
|
165
|
+
raise ValueError(msg)
|
|
166
|
+
return self
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def forecaster_configs(self) -> dict[str, Forecaster]:
|
|
170
|
+
"""Configuration of each base forecaster, keyed by name."""
|
|
171
|
+
return dict(self.forecasters)
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
@override
|
|
175
|
+
def quantiles(self) -> list[Quantile]:
|
|
176
|
+
return self.combiner.quantiles
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
@override
|
|
180
|
+
def max_horizon(self) -> LeadTime:
|
|
181
|
+
return self.combiner.max_horizon
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
@override
|
|
185
|
+
def hyperparams(self) -> HyperParams:
|
|
186
|
+
return self.combiner.hparams
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
@override
|
|
190
|
+
def is_fitted(self) -> bool:
|
|
191
|
+
return all(f.is_fitted for f in self.forecasters.values()) and self.combiner.is_fitted
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
@override
|
|
195
|
+
def component_hyperparams(self) -> dict[str, HyperParams]:
|
|
196
|
+
return {name: f.hparams for name, f in self.forecasters.items()}
|
|
197
|
+
|
|
198
|
+
@override
|
|
199
|
+
def get_explainable_components(self) -> dict[str, ExplainableForecaster]:
|
|
200
|
+
components: dict[str, ExplainableForecaster] = {
|
|
201
|
+
name: forecaster
|
|
202
|
+
for name, forecaster in self.forecasters.items()
|
|
203
|
+
if isinstance(forecaster, ExplainableForecaster)
|
|
204
|
+
}
|
|
205
|
+
# ForecastCombiner is always ExplainableForecaster, but skip if importances are empty
|
|
206
|
+
if not self.combiner.feature_importances.empty:
|
|
207
|
+
components["combiner"] = self.combiner
|
|
208
|
+
return components
|
|
209
|
+
|
|
210
|
+
@property
|
|
211
|
+
def forecaster_names(self) -> list[str]:
|
|
212
|
+
"""Returns the names of the underlying forecasters."""
|
|
213
|
+
return list(self.forecasters.keys())
|
|
214
|
+
|
|
215
|
+
@override
|
|
216
|
+
def fit(
|
|
217
|
+
self,
|
|
218
|
+
data: TimeSeriesDataset,
|
|
219
|
+
data_val: TimeSeriesDataset | None = None,
|
|
220
|
+
data_test: TimeSeriesDataset | None = None,
|
|
221
|
+
) -> EnsembleModelFitResult:
|
|
222
|
+
"""Train all base forecasters and then the combiner.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
data: Historical time series data with features and target values.
|
|
226
|
+
data_val: Optional validation data. If provided, splitters are ignored for validation.
|
|
227
|
+
data_test: Optional test data. If provided, splitters are ignored for test.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
FitResult containing training details and metrics.
|
|
231
|
+
"""
|
|
232
|
+
# Phase 1: fit each base forecaster and collect their in-sample predictions
|
|
233
|
+
train_ensemble, val_ensemble, test_ensemble, forecaster_fit_results = self._fit_forecasters(
|
|
234
|
+
data=data,
|
|
235
|
+
data_val=data_val,
|
|
236
|
+
data_test=data_test,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Phase 2: fit the combiner on base forecasters' in-sample predictions
|
|
240
|
+
combiner_fit_result = self._fit_combiner(
|
|
241
|
+
train_ensemble_dataset=train_ensemble,
|
|
242
|
+
val_ensemble_dataset=val_ensemble,
|
|
243
|
+
test_ensemble_dataset=test_ensemble,
|
|
244
|
+
data=data,
|
|
245
|
+
data_val=data_val,
|
|
246
|
+
data_test=data_test,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
return EnsembleModelFitResult(
|
|
250
|
+
forecaster_fit_results=forecaster_fit_results,
|
|
251
|
+
**combiner_fit_result.model_dump(),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
@staticmethod
|
|
255
|
+
def _combine_datasets(
|
|
256
|
+
data: ForecastInputDataset, additional_features: ForecastInputDataset
|
|
257
|
+
) -> ForecastInputDataset:
|
|
258
|
+
"""Combine Forecaster learner predictions with additional features for ForecastCombiner input.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
data: ForecastInputDataset containing base Forecaster predictions.
|
|
262
|
+
additional_features: ForecastInputDataset containing additional features.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
ForecastInputDataset with combined features.
|
|
266
|
+
"""
|
|
267
|
+
additional_df = additional_features.data.loc[
|
|
268
|
+
:, [col for col in additional_features.data.columns if col not in data.data.columns]
|
|
269
|
+
]
|
|
270
|
+
# Merge on index to combine datasets
|
|
271
|
+
combined_df = data.data.join(additional_df)
|
|
272
|
+
|
|
273
|
+
return ForecastInputDataset(
|
|
274
|
+
data=combined_df,
|
|
275
|
+
sample_interval=data.sample_interval,
|
|
276
|
+
forecast_start=data.forecast_start,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
def _transform_combiner_data(self, data: TimeSeriesDataset) -> ForecastInputDataset | None:
|
|
280
|
+
# Returns None when no combiner preprocessing is configured, signalling the combiner
|
|
281
|
+
# should work without additional features.
|
|
282
|
+
if len(self.combiner_preprocessing.transforms) == 0:
|
|
283
|
+
return None
|
|
284
|
+
combiner_data = self.combiner_preprocessing.transform(data)
|
|
285
|
+
return ForecastInputDataset.from_timeseries(combiner_data, target_column=self.target_column)
|
|
286
|
+
|
|
287
|
+
def _fit_prepare_combiner_data(
|
|
288
|
+
self,
|
|
289
|
+
data: TimeSeriesDataset,
|
|
290
|
+
data_val: TimeSeriesDataset | None = None,
|
|
291
|
+
data_test: TimeSeriesDataset | None = None,
|
|
292
|
+
) -> tuple[ForecastInputDataset | None, ForecastInputDataset | None, ForecastInputDataset | None]:
|
|
293
|
+
# Fits combiner preprocessing on train data and transforms all splits.
|
|
294
|
+
# Returns (None, None, None) when no combiner preprocessing is configured.
|
|
295
|
+
if len(self.combiner_preprocessing.transforms) == 0:
|
|
296
|
+
return None, None, None
|
|
297
|
+
self.combiner_preprocessing.fit(data=data)
|
|
298
|
+
|
|
299
|
+
input_data_train = self.combiner_preprocessing.transform(data)
|
|
300
|
+
input_data_val = self.combiner_preprocessing.transform(data_val) if data_val else None
|
|
301
|
+
input_data_test = self.combiner_preprocessing.transform(data_test) if data_test else None
|
|
302
|
+
|
|
303
|
+
input_data_train, input_data_val, input_data_test = self.data_splitter.split_dataset(
|
|
304
|
+
data=input_data_train, data_val=input_data_val, data_test=input_data_test, target_column=self.target_column
|
|
305
|
+
)
|
|
306
|
+
combiner_data = ForecastInputDataset.from_timeseries(input_data_train, target_column=self.target_column)
|
|
307
|
+
|
|
308
|
+
combiner_data_val = (
|
|
309
|
+
ForecastInputDataset.from_timeseries(input_data_val, target_column=self.target_column)
|
|
310
|
+
if input_data_val
|
|
311
|
+
else None
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
combiner_data_test = (
|
|
315
|
+
ForecastInputDataset.from_timeseries(input_data_test, target_column=self.target_column)
|
|
316
|
+
if input_data_test
|
|
317
|
+
else None
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
return combiner_data, combiner_data_val, combiner_data_test
|
|
321
|
+
|
|
322
|
+
def _fit_forecasters(
|
|
323
|
+
self,
|
|
324
|
+
data: TimeSeriesDataset,
|
|
325
|
+
data_val: TimeSeriesDataset | None = None,
|
|
326
|
+
data_test: TimeSeriesDataset | None = None,
|
|
327
|
+
) -> tuple[
|
|
328
|
+
EnsembleForecastDataset,
|
|
329
|
+
EnsembleForecastDataset | None,
|
|
330
|
+
EnsembleForecastDataset | None,
|
|
331
|
+
dict[str, ModelFitResult],
|
|
332
|
+
]:
|
|
333
|
+
# Fits common + per-forecaster preprocessing, trains each forecaster,
|
|
334
|
+
# and bundles their in-sample predictions into EnsembleForecastDatasets.
|
|
335
|
+
predictions_train: dict[str, ForecastDataset] = {}
|
|
336
|
+
predictions_val: dict[str, ForecastDataset | None] = {}
|
|
337
|
+
predictions_test: dict[str, ForecastDataset | None] = {}
|
|
338
|
+
results: dict[str, ModelFitResult] = {}
|
|
339
|
+
|
|
340
|
+
# Fit the feature engineering transforms
|
|
341
|
+
self.preprocessing.fit(data=data)
|
|
342
|
+
data_transformed = self.preprocessing.transform(data=data)
|
|
343
|
+
# Fit per-forecaster transforms on the common-preprocessed output (not raw data)
|
|
344
|
+
for name in self.model_specific_preprocessing:
|
|
345
|
+
self.model_specific_preprocessing[name].fit(data=data_transformed)
|
|
346
|
+
logger.debug("Completed fitting preprocessing pipelines.")
|
|
347
|
+
|
|
348
|
+
# Fit the forecasters
|
|
349
|
+
for name in self.forecasters:
|
|
350
|
+
logger.debug("Fitting Forecaster '%s'.", name)
|
|
351
|
+
predictions_train[name], predictions_val[name], predictions_test[name], results[name] = (
|
|
352
|
+
self._fit_forecaster(
|
|
353
|
+
data=data,
|
|
354
|
+
data_val=data_val,
|
|
355
|
+
data_test=data_test,
|
|
356
|
+
forecaster_name=name,
|
|
357
|
+
)
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Attach original (unsplit) target so the combiner can compute loss across all timesteps
|
|
361
|
+
train_ensemble = EnsembleForecastDataset.from_forecast_datasets(
|
|
362
|
+
predictions_train, target_series=data.data[self.target_column]
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
if all(isinstance(v, ForecastDataset) for v in predictions_val.values()):
|
|
366
|
+
val_ensemble = EnsembleForecastDataset.from_forecast_datasets(
|
|
367
|
+
{k: v for k, v in predictions_val.items() if v is not None},
|
|
368
|
+
target_series=data.data[self.target_column],
|
|
369
|
+
)
|
|
370
|
+
else:
|
|
371
|
+
val_ensemble = None
|
|
372
|
+
|
|
373
|
+
if all(isinstance(v, ForecastDataset) for v in predictions_test.values()):
|
|
374
|
+
test_ensemble = EnsembleForecastDataset.from_forecast_datasets(
|
|
375
|
+
{k: v for k, v in predictions_test.items() if v is not None},
|
|
376
|
+
target_series=data.data[self.target_column],
|
|
377
|
+
)
|
|
378
|
+
else:
|
|
379
|
+
test_ensemble = None
|
|
380
|
+
|
|
381
|
+
return train_ensemble, val_ensemble, test_ensemble, results
|
|
382
|
+
|
|
383
|
+
def _fit_forecaster(
|
|
384
|
+
self,
|
|
385
|
+
data: TimeSeriesDataset,
|
|
386
|
+
data_val: TimeSeriesDataset | None = None,
|
|
387
|
+
data_test: TimeSeriesDataset | None = None,
|
|
388
|
+
forecaster_name: str = "",
|
|
389
|
+
) -> tuple[
|
|
390
|
+
ForecastDataset,
|
|
391
|
+
ForecastDataset | None,
|
|
392
|
+
ForecastDataset | None,
|
|
393
|
+
ModelFitResult,
|
|
394
|
+
]:
|
|
395
|
+
"""Train the forecaster on the provided dataset.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
data: Historical time series data with features and target values.
|
|
399
|
+
data_val: Optional validation data.
|
|
400
|
+
data_test: Optional test data.
|
|
401
|
+
forecaster_name: Name of the forecaster to train.
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
ForecastDataset containing the trained forecaster's predictions.
|
|
405
|
+
"""
|
|
406
|
+
forecaster = self.forecasters[forecaster_name]
|
|
407
|
+
validate_horizons_present(data, forecaster.horizons)
|
|
408
|
+
|
|
409
|
+
# Transform and split input data
|
|
410
|
+
input_data_train = self.prepare_forecaster_input(data=data, forecaster_name=forecaster_name)
|
|
411
|
+
input_data_val = (
|
|
412
|
+
self.prepare_forecaster_input(data=data_val, forecaster_name=forecaster_name) if data_val else None
|
|
413
|
+
)
|
|
414
|
+
input_data_test = (
|
|
415
|
+
self.prepare_forecaster_input(data=data_test, forecaster_name=forecaster_name) if data_test else None
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# Drop target column nan's from training data. One can not train on missing targets.
|
|
419
|
+
target_dropna = partial(pd.DataFrame.dropna, subset=[self.target_column]) # pyright: ignore[reportUnknownMemberType]
|
|
420
|
+
input_data_train = input_data_train.pipe_pandas(target_dropna)
|
|
421
|
+
input_data_val = input_data_val.pipe_pandas(target_dropna) if input_data_val else None
|
|
422
|
+
input_data_test = input_data_test.pipe_pandas(target_dropna) if input_data_test else None
|
|
423
|
+
|
|
424
|
+
# Transform the input data to a valid forecast input and split into train/val/test
|
|
425
|
+
input_data_train, input_data_val, input_data_test = self.data_splitter.split_dataset(
|
|
426
|
+
data=input_data_train, data_val=input_data_val, data_test=input_data_test, target_column=self.target_column
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
# Fit the model
|
|
430
|
+
logger.debug("Started fitting forecaster '%s'.", forecaster_name)
|
|
431
|
+
forecaster.fit(data=input_data_train, data_val=input_data_val)
|
|
432
|
+
logger.debug("Completed fitting forecaster '%s'.", forecaster_name)
|
|
433
|
+
|
|
434
|
+
prediction_train = self._predict_forecaster(input_data=input_data_train, forecaster_name=forecaster_name)
|
|
435
|
+
metrics_train = self._calculate_score(prediction=prediction_train)
|
|
436
|
+
|
|
437
|
+
if input_data_val is not None:
|
|
438
|
+
prediction_val = self._predict_forecaster(input_data=input_data_val, forecaster_name=forecaster_name)
|
|
439
|
+
metrics_val = self._calculate_score(prediction=prediction_val)
|
|
440
|
+
else:
|
|
441
|
+
prediction_val = None
|
|
442
|
+
metrics_val = None
|
|
443
|
+
|
|
444
|
+
if input_data_test is not None:
|
|
445
|
+
prediction_test = self._predict_forecaster(input_data=input_data_test, forecaster_name=forecaster_name)
|
|
446
|
+
metrics_test = self._calculate_score(prediction=prediction_test)
|
|
447
|
+
else:
|
|
448
|
+
prediction_test = None
|
|
449
|
+
metrics_test = None
|
|
450
|
+
|
|
451
|
+
result = ModelFitResult(
|
|
452
|
+
input_dataset=input_data_train,
|
|
453
|
+
input_data_train=input_data_train,
|
|
454
|
+
input_data_val=input_data_val,
|
|
455
|
+
input_data_test=input_data_test,
|
|
456
|
+
metrics_train=metrics_train,
|
|
457
|
+
metrics_val=metrics_val,
|
|
458
|
+
metrics_test=metrics_test,
|
|
459
|
+
metrics_full=metrics_train,
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
return prediction_train, prediction_val, prediction_test, result
|
|
463
|
+
|
|
464
|
+
def _predict_forecaster(self, input_data: ForecastInputDataset, forecaster_name: str) -> ForecastDataset:
|
|
465
|
+
logger.debug("Predicting forecaster '%s'.", forecaster_name)
|
|
466
|
+
prediction_raw = self.forecasters[forecaster_name].predict(data=input_data)
|
|
467
|
+
prediction = restore_target(
|
|
468
|
+
dataset=prediction_raw, original_dataset=input_data, target_column=self.target_column
|
|
469
|
+
)
|
|
470
|
+
prediction = self.model_specific_postprocessing.transform(prediction)
|
|
471
|
+
return self.postprocessing.transform(prediction)
|
|
472
|
+
|
|
473
|
+
def _predict_forecasters(
|
|
474
|
+
self,
|
|
475
|
+
data: TimeSeriesDataset,
|
|
476
|
+
forecast_start: datetime | None = None,
|
|
477
|
+
) -> EnsembleForecastDataset:
|
|
478
|
+
predictions: dict[str, ForecastDataset] = {}
|
|
479
|
+
for name in self.forecasters:
|
|
480
|
+
logger.debug("Generating predictions for forecaster '%s'.", name)
|
|
481
|
+
input_data = self.prepare_forecaster_input(data=data, forecast_start=forecast_start, forecaster_name=name)
|
|
482
|
+
predictions[name] = self._predict_forecaster(
|
|
483
|
+
input_data=input_data,
|
|
484
|
+
forecaster_name=name,
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
return EnsembleForecastDataset.from_forecast_datasets(predictions, target_series=data.data[self.target_column])
|
|
488
|
+
|
|
489
|
+
def prepare_forecaster_input(
|
|
490
|
+
self,
|
|
491
|
+
data: TimeSeriesDataset,
|
|
492
|
+
forecaster_name: str = "",
|
|
493
|
+
forecast_start: datetime | None = None,
|
|
494
|
+
) -> ForecastInputDataset:
|
|
495
|
+
"""Prepare input data for a specific base forecaster.
|
|
496
|
+
|
|
497
|
+
Applies common preprocessing, then model-specific preprocessing, restores
|
|
498
|
+
the target column, and trims history via the shared base ``prepare_input``.
|
|
499
|
+
|
|
500
|
+
Args:
|
|
501
|
+
data: Raw time series dataset.
|
|
502
|
+
forecaster_name: Which forecaster to prepare data for.
|
|
503
|
+
forecast_start: Optional forecast start time override.
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
Processed forecast input dataset ready for the named forecaster.
|
|
507
|
+
"""
|
|
508
|
+
logger.debug("Preparing input data for forecaster '%s'.", forecaster_name)
|
|
509
|
+
# Apply model-specific preprocessing on top of the common pipeline
|
|
510
|
+
if forecaster_name in self.model_specific_preprocessing:
|
|
511
|
+
logger.debug("Applying model-specific preprocessing for forecaster '%s'.", forecaster_name)
|
|
512
|
+
preprocessed = self.preprocessing.transform(data=data)
|
|
513
|
+
preprocessed = self.model_specific_preprocessing[forecaster_name].transform(data=preprocessed)
|
|
514
|
+
preprocessed = restore_target(dataset=preprocessed, original_dataset=data, target_column=self.target_column)
|
|
515
|
+
# Apply cutoff and create ForecastInputDataset
|
|
516
|
+
input_data_start = cast("pd.Series[pd.Timestamp]", preprocessed.index).min().to_pydatetime()
|
|
517
|
+
input_data_cutoff = input_data_start + self.cutoff_history
|
|
518
|
+
if forecast_start is not None and forecast_start < input_data_cutoff:
|
|
519
|
+
input_data_cutoff = forecast_start
|
|
520
|
+
self._logger.warning(
|
|
521
|
+
"Forecast start %s is before input data start + cutoff history %s. Using forecast start as cutoff.",
|
|
522
|
+
forecast_start,
|
|
523
|
+
input_data_cutoff,
|
|
524
|
+
)
|
|
525
|
+
preprocessed = preprocessed.filter_by_range(start=input_data_cutoff)
|
|
526
|
+
|
|
527
|
+
return ForecastInputDataset.from_timeseries(
|
|
528
|
+
dataset=preprocessed,
|
|
529
|
+
target_column=self.target_column,
|
|
530
|
+
forecast_start=forecast_start,
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
# No model-specific preprocessing — delegate entirely to shared base method
|
|
534
|
+
return self.prepare_input(data=data, forecast_start=forecast_start)
|
|
535
|
+
|
|
536
|
+
def _predict_transform_combiner(
|
|
537
|
+
self, ensemble_dataset: EnsembleForecastDataset, original_data: TimeSeriesDataset
|
|
538
|
+
) -> ForecastDataset:
|
|
539
|
+
logger.debug("Predicting combiner.")
|
|
540
|
+
features = self._transform_combiner_data(data=original_data)
|
|
541
|
+
|
|
542
|
+
return self._predict_combiner(ensemble_dataset, features)
|
|
543
|
+
|
|
544
|
+
def _predict_combiner(
|
|
545
|
+
self,
|
|
546
|
+
ensemble_dataset: EnsembleForecastDataset,
|
|
547
|
+
features: ForecastInputDataset | None,
|
|
548
|
+
) -> ForecastDataset:
|
|
549
|
+
logger.debug("Predicting combiner.")
|
|
550
|
+
prediction_raw = self.combiner.predict(ensemble_dataset, additional_features=features)
|
|
551
|
+
prediction = restore_target(
|
|
552
|
+
dataset=prediction_raw, original_dataset=ensemble_dataset, target_column=self.target_column
|
|
553
|
+
)
|
|
554
|
+
prediction = self.combiner_postprocessing.transform(prediction)
|
|
555
|
+
return self.postprocessing.transform(prediction)
|
|
556
|
+
|
|
557
|
+
def _fit_combiner(
|
|
558
|
+
self,
|
|
559
|
+
data: TimeSeriesDataset,
|
|
560
|
+
train_ensemble_dataset: EnsembleForecastDataset,
|
|
561
|
+
data_val: TimeSeriesDataset | None = None,
|
|
562
|
+
data_test: TimeSeriesDataset | None = None,
|
|
563
|
+
val_ensemble_dataset: EnsembleForecastDataset | None = None,
|
|
564
|
+
test_ensemble_dataset: EnsembleForecastDataset | None = None,
|
|
565
|
+
) -> ModelFitResult:
|
|
566
|
+
# Prepare additional features for the combiner (e.g. sample weights) — split separately from ensemble data
|
|
567
|
+
features_train, features_val, features_test = self._fit_prepare_combiner_data(
|
|
568
|
+
data=data, data_val=data_val, data_test=data_test
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
logger.debug("Fitting combiner.")
|
|
572
|
+
self.combiner.fit(
|
|
573
|
+
data=train_ensemble_dataset, data_val=val_ensemble_dataset, additional_features=features_train
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
# Fit combiner postprocessing on training predictions
|
|
577
|
+
prediction_raw = self.combiner.predict(train_ensemble_dataset, additional_features=features_train)
|
|
578
|
+
prediction_raw = restore_target(
|
|
579
|
+
dataset=prediction_raw, original_dataset=train_ensemble_dataset, target_column=self.target_column
|
|
580
|
+
)
|
|
581
|
+
self.combiner_postprocessing.fit_transform(prediction_raw)
|
|
582
|
+
|
|
583
|
+
prediction_train = self._predict_combiner(train_ensemble_dataset, features=features_train)
|
|
584
|
+
metrics_train = self._calculate_score(prediction=prediction_train)
|
|
585
|
+
|
|
586
|
+
if val_ensemble_dataset is not None:
|
|
587
|
+
prediction_val = self._predict_combiner(val_ensemble_dataset, features=features_val)
|
|
588
|
+
metrics_val = self._calculate_score(prediction=prediction_val)
|
|
589
|
+
else:
|
|
590
|
+
prediction_val = None
|
|
591
|
+
metrics_val = None
|
|
592
|
+
|
|
593
|
+
if test_ensemble_dataset is not None:
|
|
594
|
+
prediction_test = self._predict_combiner(test_ensemble_dataset, features=features_test)
|
|
595
|
+
metrics_test = self._calculate_score(prediction=prediction_test)
|
|
596
|
+
else:
|
|
597
|
+
prediction_test = None
|
|
598
|
+
metrics_test = None
|
|
599
|
+
|
|
600
|
+
return ModelFitResult(
|
|
601
|
+
input_dataset=train_ensemble_dataset,
|
|
602
|
+
# ModelFitResult expects ForecastInputDataset; use first quantile as a representative slice
|
|
603
|
+
input_data_train=train_ensemble_dataset.get_base_predictions_for_quantile(quantile=self.quantiles[0]),
|
|
604
|
+
input_data_val=val_ensemble_dataset.get_base_predictions_for_quantile(quantile=self.quantiles[0])
|
|
605
|
+
if val_ensemble_dataset
|
|
606
|
+
else None,
|
|
607
|
+
input_data_test=test_ensemble_dataset.get_base_predictions_for_quantile(quantile=self.quantiles[0])
|
|
608
|
+
if test_ensemble_dataset
|
|
609
|
+
else None,
|
|
610
|
+
metrics_train=metrics_train,
|
|
611
|
+
metrics_val=metrics_val,
|
|
612
|
+
metrics_test=metrics_test,
|
|
613
|
+
metrics_full=metrics_train,
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
def _predict_contributions_combiner(
|
|
617
|
+
self, ensemble_dataset: EnsembleForecastDataset, original_data: TimeSeriesDataset
|
|
618
|
+
) -> TimeSeriesDataset:
|
|
619
|
+
features = self._transform_combiner_data(data=original_data)
|
|
620
|
+
return self.combiner.predict_contributions(ensemble_dataset, additional_features=features)
|
|
621
|
+
|
|
622
|
+
@override
|
|
623
|
+
def predict(self, data: TimeSeriesDataset, forecast_start: datetime | None = None) -> ForecastDataset:
|
|
624
|
+
"""Generate forecasts for the provided dataset.
|
|
625
|
+
|
|
626
|
+
Args:
|
|
627
|
+
data: Input time series dataset for prediction.
|
|
628
|
+
forecast_start: Optional start time for forecasts.
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
ForecastDataset containing the generated forecasts.
|
|
632
|
+
|
|
633
|
+
Raises:
|
|
634
|
+
NotFittedError: If the model has not been fitted yet.
|
|
635
|
+
"""
|
|
636
|
+
if not self.is_fitted:
|
|
637
|
+
raise NotFittedError(self.__class__.__name__)
|
|
638
|
+
logger.debug("Generating predictions.")
|
|
639
|
+
|
|
640
|
+
ensemble_predictions = self._predict_forecasters(data=data, forecast_start=forecast_start)
|
|
641
|
+
|
|
642
|
+
# Predict and restore target column
|
|
643
|
+
return self._predict_transform_combiner(
|
|
644
|
+
ensemble_dataset=ensemble_predictions,
|
|
645
|
+
original_data=data,
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
@override
|
|
649
|
+
def predict_contributions(
|
|
650
|
+
self,
|
|
651
|
+
data: TimeSeriesDataset,
|
|
652
|
+
forecast_start: datetime | None = None,
|
|
653
|
+
) -> TimeSeriesDataset:
|
|
654
|
+
"""Compute per-model contributions for the ensemble prediction.
|
|
655
|
+
|
|
656
|
+
Args:
|
|
657
|
+
data: Input time series dataset.
|
|
658
|
+
forecast_start: Optional start time for forecasts.
|
|
659
|
+
|
|
660
|
+
Returns:
|
|
661
|
+
TimeSeriesDataset where each column is a base model's contribution.
|
|
662
|
+
|
|
663
|
+
Raises:
|
|
664
|
+
NotFittedError: If the model has not been fitted yet.
|
|
665
|
+
"""
|
|
666
|
+
if not self.is_fitted:
|
|
667
|
+
raise NotFittedError(self.__class__.__name__)
|
|
668
|
+
|
|
669
|
+
ensemble_predictions = self._predict_forecasters(data=data, forecast_start=forecast_start)
|
|
670
|
+
|
|
671
|
+
return self._predict_contributions_combiner(
|
|
672
|
+
ensemble_dataset=ensemble_predictions,
|
|
673
|
+
original_data=data,
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
__all__ = ["EnsembleForecastingModel", "EnsembleModelFitResult", "ModelFitResult"]
|