openstef-meta 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ # SPDX-FileCopyrightText: 2017-2025 Contributors to the OpenSTEF project <short.term.energy.forecasts@alliander.com>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ """Meta models for OpenSTEF."""
5
+
6
+ import logging
7
+
8
+ # Set up logging configuration
9
+ root_logger = logging.getLogger(name=__name__)
10
+ if not root_logger.handlers:
11
+ root_logger.addHandler(logging.NullHandler())
12
+
13
+ __all__ = []
@@ -0,0 +1,5 @@
1
+ # SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <short.term.energy.forecasts@alliander.com>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ """Meta Forecasting models."""
@@ -0,0 +1,677 @@
1
+ # SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <short.term.energy.forecasts@alliander.com>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ """Ensemble forecasting model combining multiple base forecasters.
6
+
7
+ Orchestrates parallel base forecasters whose predictions are aggregated by a
8
+ ``ForecastCombiner``. Extends ``BaseForecastingModel`` as a sibling of
9
+ ``ForecastingModel``.
10
+ """
11
+
12
+ import logging
13
+ from datetime import datetime
14
+ from functools import partial
15
+ from typing import Self, cast, override
16
+
17
+ import pandas as pd
18
+ from pydantic import Field, PrivateAttr, model_validator
19
+
20
+ from openstef_core.datasets import (
21
+ ForecastDataset,
22
+ ForecastInputDataset,
23
+ TimeSeriesDataset,
24
+ )
25
+ from openstef_core.datasets.timeseries_dataset import validate_horizons_present
26
+ from openstef_core.datasets.validated_datasets import EnsembleForecastDataset
27
+ from openstef_core.exceptions import NotFittedError
28
+ from openstef_core.mixins import HyperParams, TransformPipeline
29
+ from openstef_core.types import LeadTime, Quantile
30
+ from openstef_meta.models.forecast_combiners.forecast_combiner import ForecastCombiner
31
+ from openstef_models.explainability.mixins import ExplainableForecaster
32
+ from openstef_models.models.forecasting.forecaster import Forecaster
33
+ from openstef_models.models.forecasting_model import BaseForecastingModel, ModelFitResult, restore_target
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ class EnsembleModelFitResult(ModelFitResult):
39
+ """Fit result for EnsembleForecastingModel.
40
+
41
+ Extends ModelFitResult with per-forecaster details. The base class fields
42
+ (input_dataset, metrics_*, etc.) represent the combiner's fit results.
43
+ """
44
+
45
+ forecaster_fit_results: dict[str, ModelFitResult] = Field(description="ModelFitResult for each base forecaster")
46
+
47
+ @override
48
+ def metrics_to_flat_dict(self) -> dict[str, float]:
49
+ result = super().metrics_to_flat_dict()
50
+ for name, child in self.forecaster_fit_results.items():
51
+ result.update({f"{name}_{k}": v for k, v in child.metrics_to_flat_dict().items()})
52
+ return result
53
+
54
+ @property
55
+ @override
56
+ def component_fit_results(self) -> dict[str, ModelFitResult]:
57
+ return self.forecaster_fit_results
58
+
59
+
60
+ class EnsembleForecastingModel(BaseForecastingModel):
61
+ """Ensemble forecasting pipeline: common preprocessing -> N forecasters -> combiner.
62
+
63
+ Runs multiple base forecasters in parallel, aggregates their predictions via a
64
+ ``ForecastCombiner``, and applies shared postprocessing. Extends
65
+ ``BaseForecastingModel`` as a sibling of ``ForecastingModel`` — not a subclass.
66
+
67
+ The ``preprocessing`` field (inherited from base) holds the **common preprocessing**
68
+ shared across all base forecasters. ``model_specific_preprocessing`` adds
69
+ per-forecaster transforms on top.
70
+
71
+ Invariants:
72
+ - fit() must be called before predict()
73
+ - All forecaster horizons must be present in the input data
74
+
75
+ Important:
76
+ The ``cutoff_history`` parameter is crucial when using lag-based features.
77
+ Set it to exclude incomplete rows from training (e.g. ``timedelta(days=14)``
78
+ for a lag-14 transform).
79
+
80
+ Example:
81
+ >>> from openstef_models.models.forecasting.constant_quantile_forecaster import (
82
+ ... ConstantQuantileForecaster,
83
+ ... )
84
+ >>> from openstef_meta.models.forecast_combiners.learned_weights_combiner import WeightsCombiner
85
+ >>> from openstef_core.types import LeadTime
86
+ >>> from datetime import timedelta
87
+ >>>
88
+ >>> forecaster_1 = ConstantQuantileForecaster(
89
+ ... horizons=[LeadTime.from_string("PT36H")]
90
+ ... )
91
+ >>> forecaster_2 = ConstantQuantileForecaster(
92
+ ... horizons=[LeadTime.from_string("PT36H")]
93
+ ... )
94
+ >>> combiner = WeightsCombiner(
95
+ ... horizons=[LeadTime.from_string("PT36H")],
96
+ ... )
97
+ >>> model = EnsembleForecastingModel(
98
+ ... forecasters={"constant_median": forecaster_1, "constant_median_2": forecaster_2},
99
+ ... combiner=combiner,
100
+ ... cutoff_history=timedelta(days=14),
101
+ ... )
102
+ >>> model.fit(training_data) # doctest: +SKIP
103
+ >>> forecasts = model.predict(new_data) # doctest: +SKIP
104
+ """
105
+
106
+ forecasters: dict[str, Forecaster] = Field(
107
+ default=...,
108
+ description="Named base forecasters whose predictions are combined.",
109
+ exclude=True,
110
+ )
111
+
112
+ combiner: ForecastCombiner = Field(
113
+ default=...,
114
+ description="Combiner that aggregates base forecaster predictions.",
115
+ exclude=True,
116
+ )
117
+
118
+ model_specific_preprocessing: dict[str, TransformPipeline[TimeSeriesDataset]] = Field(
119
+ default_factory=dict,
120
+ description="Per-forecaster preprocessing pipelines applied after common preprocessing.",
121
+ exclude=True,
122
+ )
123
+
124
+ combiner_preprocessing: TransformPipeline[TimeSeriesDataset] = Field(
125
+ default_factory=TransformPipeline[TimeSeriesDataset],
126
+ description="Feature engineering for the forecast combiner.",
127
+ exclude=True,
128
+ )
129
+
130
+ model_specific_postprocessing: TransformPipeline[ForecastDataset] = Field(
131
+ default_factory=TransformPipeline[ForecastDataset],
132
+ description="Per-forecaster postprocessing applied before the combiner sees predictions.",
133
+ exclude=True,
134
+ )
135
+
136
+ combiner_postprocessing: TransformPipeline[ForecastDataset] = Field(
137
+ default_factory=TransformPipeline[ForecastDataset],
138
+ description="Combiner-specific postprocessing applied after shared postprocessing.",
139
+ exclude=True,
140
+ )
141
+
142
+ _logger: logging.Logger = PrivateAttr(default=logging.getLogger(__name__))
143
+
144
+ @model_validator(mode="after")
145
+ def _validate_horizons_consistent(self) -> Self:
146
+ """All forecasters and the combiner must share the same horizons list.
147
+
148
+ Returns:
149
+ Validated model instance.
150
+
151
+ Raises:
152
+ ValueError: If forecasters dict is empty or any forecaster's horizons differ from the combiner's.
153
+ """
154
+ if not self.forecasters:
155
+ msg = "At least one forecaster is required."
156
+ raise ValueError(msg)
157
+
158
+ expected = sorted(self.combiner.horizons)
159
+ for name, forecaster in self.forecasters.items():
160
+ if sorted(forecaster.horizons) != expected:
161
+ msg = (
162
+ f"Forecaster '{name}' horizons {forecaster.horizons} "
163
+ f"do not match combiner horizons {self.combiner.horizons}"
164
+ )
165
+ raise ValueError(msg)
166
+ return self
167
+
168
+ @property
169
+ def forecaster_configs(self) -> dict[str, Forecaster]:
170
+ """Configuration of each base forecaster, keyed by name."""
171
+ return dict(self.forecasters)
172
+
173
+ @property
174
+ @override
175
+ def quantiles(self) -> list[Quantile]:
176
+ return self.combiner.quantiles
177
+
178
+ @property
179
+ @override
180
+ def max_horizon(self) -> LeadTime:
181
+ return self.combiner.max_horizon
182
+
183
+ @property
184
+ @override
185
+ def hyperparams(self) -> HyperParams:
186
+ return self.combiner.hparams
187
+
188
+ @property
189
+ @override
190
+ def is_fitted(self) -> bool:
191
+ return all(f.is_fitted for f in self.forecasters.values()) and self.combiner.is_fitted
192
+
193
+ @property
194
+ @override
195
+ def component_hyperparams(self) -> dict[str, HyperParams]:
196
+ return {name: f.hparams for name, f in self.forecasters.items()}
197
+
198
+ @override
199
+ def get_explainable_components(self) -> dict[str, ExplainableForecaster]:
200
+ components: dict[str, ExplainableForecaster] = {
201
+ name: forecaster
202
+ for name, forecaster in self.forecasters.items()
203
+ if isinstance(forecaster, ExplainableForecaster)
204
+ }
205
+ # ForecastCombiner is always ExplainableForecaster, but skip if importances are empty
206
+ if not self.combiner.feature_importances.empty:
207
+ components["combiner"] = self.combiner
208
+ return components
209
+
210
+ @property
211
+ def forecaster_names(self) -> list[str]:
212
+ """Returns the names of the underlying forecasters."""
213
+ return list(self.forecasters.keys())
214
+
215
+ @override
216
+ def fit(
217
+ self,
218
+ data: TimeSeriesDataset,
219
+ data_val: TimeSeriesDataset | None = None,
220
+ data_test: TimeSeriesDataset | None = None,
221
+ ) -> EnsembleModelFitResult:
222
+ """Train all base forecasters and then the combiner.
223
+
224
+ Args:
225
+ data: Historical time series data with features and target values.
226
+ data_val: Optional validation data. If provided, splitters are ignored for validation.
227
+ data_test: Optional test data. If provided, splitters are ignored for test.
228
+
229
+ Returns:
230
+ FitResult containing training details and metrics.
231
+ """
232
+ # Phase 1: fit each base forecaster and collect their in-sample predictions
233
+ train_ensemble, val_ensemble, test_ensemble, forecaster_fit_results = self._fit_forecasters(
234
+ data=data,
235
+ data_val=data_val,
236
+ data_test=data_test,
237
+ )
238
+
239
+ # Phase 2: fit the combiner on base forecasters' in-sample predictions
240
+ combiner_fit_result = self._fit_combiner(
241
+ train_ensemble_dataset=train_ensemble,
242
+ val_ensemble_dataset=val_ensemble,
243
+ test_ensemble_dataset=test_ensemble,
244
+ data=data,
245
+ data_val=data_val,
246
+ data_test=data_test,
247
+ )
248
+
249
+ return EnsembleModelFitResult(
250
+ forecaster_fit_results=forecaster_fit_results,
251
+ **combiner_fit_result.model_dump(),
252
+ )
253
+
254
+ @staticmethod
255
+ def _combine_datasets(
256
+ data: ForecastInputDataset, additional_features: ForecastInputDataset
257
+ ) -> ForecastInputDataset:
258
+ """Combine Forecaster learner predictions with additional features for ForecastCombiner input.
259
+
260
+ Args:
261
+ data: ForecastInputDataset containing base Forecaster predictions.
262
+ additional_features: ForecastInputDataset containing additional features.
263
+
264
+ Returns:
265
+ ForecastInputDataset with combined features.
266
+ """
267
+ additional_df = additional_features.data.loc[
268
+ :, [col for col in additional_features.data.columns if col not in data.data.columns]
269
+ ]
270
+ # Merge on index to combine datasets
271
+ combined_df = data.data.join(additional_df)
272
+
273
+ return ForecastInputDataset(
274
+ data=combined_df,
275
+ sample_interval=data.sample_interval,
276
+ forecast_start=data.forecast_start,
277
+ )
278
+
279
+ def _transform_combiner_data(self, data: TimeSeriesDataset) -> ForecastInputDataset | None:
280
+ # Returns None when no combiner preprocessing is configured, signalling the combiner
281
+ # should work without additional features.
282
+ if len(self.combiner_preprocessing.transforms) == 0:
283
+ return None
284
+ combiner_data = self.combiner_preprocessing.transform(data)
285
+ return ForecastInputDataset.from_timeseries(combiner_data, target_column=self.target_column)
286
+
287
+ def _fit_prepare_combiner_data(
288
+ self,
289
+ data: TimeSeriesDataset,
290
+ data_val: TimeSeriesDataset | None = None,
291
+ data_test: TimeSeriesDataset | None = None,
292
+ ) -> tuple[ForecastInputDataset | None, ForecastInputDataset | None, ForecastInputDataset | None]:
293
+ # Fits combiner preprocessing on train data and transforms all splits.
294
+ # Returns (None, None, None) when no combiner preprocessing is configured.
295
+ if len(self.combiner_preprocessing.transforms) == 0:
296
+ return None, None, None
297
+ self.combiner_preprocessing.fit(data=data)
298
+
299
+ input_data_train = self.combiner_preprocessing.transform(data)
300
+ input_data_val = self.combiner_preprocessing.transform(data_val) if data_val else None
301
+ input_data_test = self.combiner_preprocessing.transform(data_test) if data_test else None
302
+
303
+ input_data_train, input_data_val, input_data_test = self.data_splitter.split_dataset(
304
+ data=input_data_train, data_val=input_data_val, data_test=input_data_test, target_column=self.target_column
305
+ )
306
+ combiner_data = ForecastInputDataset.from_timeseries(input_data_train, target_column=self.target_column)
307
+
308
+ combiner_data_val = (
309
+ ForecastInputDataset.from_timeseries(input_data_val, target_column=self.target_column)
310
+ if input_data_val
311
+ else None
312
+ )
313
+
314
+ combiner_data_test = (
315
+ ForecastInputDataset.from_timeseries(input_data_test, target_column=self.target_column)
316
+ if input_data_test
317
+ else None
318
+ )
319
+
320
+ return combiner_data, combiner_data_val, combiner_data_test
321
+
322
+ def _fit_forecasters(
323
+ self,
324
+ data: TimeSeriesDataset,
325
+ data_val: TimeSeriesDataset | None = None,
326
+ data_test: TimeSeriesDataset | None = None,
327
+ ) -> tuple[
328
+ EnsembleForecastDataset,
329
+ EnsembleForecastDataset | None,
330
+ EnsembleForecastDataset | None,
331
+ dict[str, ModelFitResult],
332
+ ]:
333
+ # Fits common + per-forecaster preprocessing, trains each forecaster,
334
+ # and bundles their in-sample predictions into EnsembleForecastDatasets.
335
+ predictions_train: dict[str, ForecastDataset] = {}
336
+ predictions_val: dict[str, ForecastDataset | None] = {}
337
+ predictions_test: dict[str, ForecastDataset | None] = {}
338
+ results: dict[str, ModelFitResult] = {}
339
+
340
+ # Fit the feature engineering transforms
341
+ self.preprocessing.fit(data=data)
342
+ data_transformed = self.preprocessing.transform(data=data)
343
+ # Fit per-forecaster transforms on the common-preprocessed output (not raw data)
344
+ for name in self.model_specific_preprocessing:
345
+ self.model_specific_preprocessing[name].fit(data=data_transformed)
346
+ logger.debug("Completed fitting preprocessing pipelines.")
347
+
348
+ # Fit the forecasters
349
+ for name in self.forecasters:
350
+ logger.debug("Fitting Forecaster '%s'.", name)
351
+ predictions_train[name], predictions_val[name], predictions_test[name], results[name] = (
352
+ self._fit_forecaster(
353
+ data=data,
354
+ data_val=data_val,
355
+ data_test=data_test,
356
+ forecaster_name=name,
357
+ )
358
+ )
359
+
360
+ # Attach original (unsplit) target so the combiner can compute loss across all timesteps
361
+ train_ensemble = EnsembleForecastDataset.from_forecast_datasets(
362
+ predictions_train, target_series=data.data[self.target_column]
363
+ )
364
+
365
+ if all(isinstance(v, ForecastDataset) for v in predictions_val.values()):
366
+ val_ensemble = EnsembleForecastDataset.from_forecast_datasets(
367
+ {k: v for k, v in predictions_val.items() if v is not None},
368
+ target_series=data.data[self.target_column],
369
+ )
370
+ else:
371
+ val_ensemble = None
372
+
373
+ if all(isinstance(v, ForecastDataset) for v in predictions_test.values()):
374
+ test_ensemble = EnsembleForecastDataset.from_forecast_datasets(
375
+ {k: v for k, v in predictions_test.items() if v is not None},
376
+ target_series=data.data[self.target_column],
377
+ )
378
+ else:
379
+ test_ensemble = None
380
+
381
+ return train_ensemble, val_ensemble, test_ensemble, results
382
+
383
+ def _fit_forecaster(
384
+ self,
385
+ data: TimeSeriesDataset,
386
+ data_val: TimeSeriesDataset | None = None,
387
+ data_test: TimeSeriesDataset | None = None,
388
+ forecaster_name: str = "",
389
+ ) -> tuple[
390
+ ForecastDataset,
391
+ ForecastDataset | None,
392
+ ForecastDataset | None,
393
+ ModelFitResult,
394
+ ]:
395
+ """Train the forecaster on the provided dataset.
396
+
397
+ Args:
398
+ data: Historical time series data with features and target values.
399
+ data_val: Optional validation data.
400
+ data_test: Optional test data.
401
+ forecaster_name: Name of the forecaster to train.
402
+
403
+ Returns:
404
+ ForecastDataset containing the trained forecaster's predictions.
405
+ """
406
+ forecaster = self.forecasters[forecaster_name]
407
+ validate_horizons_present(data, forecaster.horizons)
408
+
409
+ # Transform and split input data
410
+ input_data_train = self.prepare_forecaster_input(data=data, forecaster_name=forecaster_name)
411
+ input_data_val = (
412
+ self.prepare_forecaster_input(data=data_val, forecaster_name=forecaster_name) if data_val else None
413
+ )
414
+ input_data_test = (
415
+ self.prepare_forecaster_input(data=data_test, forecaster_name=forecaster_name) if data_test else None
416
+ )
417
+
418
+ # Drop target column nan's from training data. One can not train on missing targets.
419
+ target_dropna = partial(pd.DataFrame.dropna, subset=[self.target_column]) # pyright: ignore[reportUnknownMemberType]
420
+ input_data_train = input_data_train.pipe_pandas(target_dropna)
421
+ input_data_val = input_data_val.pipe_pandas(target_dropna) if input_data_val else None
422
+ input_data_test = input_data_test.pipe_pandas(target_dropna) if input_data_test else None
423
+
424
+ # Transform the input data to a valid forecast input and split into train/val/test
425
+ input_data_train, input_data_val, input_data_test = self.data_splitter.split_dataset(
426
+ data=input_data_train, data_val=input_data_val, data_test=input_data_test, target_column=self.target_column
427
+ )
428
+
429
+ # Fit the model
430
+ logger.debug("Started fitting forecaster '%s'.", forecaster_name)
431
+ forecaster.fit(data=input_data_train, data_val=input_data_val)
432
+ logger.debug("Completed fitting forecaster '%s'.", forecaster_name)
433
+
434
+ prediction_train = self._predict_forecaster(input_data=input_data_train, forecaster_name=forecaster_name)
435
+ metrics_train = self._calculate_score(prediction=prediction_train)
436
+
437
+ if input_data_val is not None:
438
+ prediction_val = self._predict_forecaster(input_data=input_data_val, forecaster_name=forecaster_name)
439
+ metrics_val = self._calculate_score(prediction=prediction_val)
440
+ else:
441
+ prediction_val = None
442
+ metrics_val = None
443
+
444
+ if input_data_test is not None:
445
+ prediction_test = self._predict_forecaster(input_data=input_data_test, forecaster_name=forecaster_name)
446
+ metrics_test = self._calculate_score(prediction=prediction_test)
447
+ else:
448
+ prediction_test = None
449
+ metrics_test = None
450
+
451
+ result = ModelFitResult(
452
+ input_dataset=input_data_train,
453
+ input_data_train=input_data_train,
454
+ input_data_val=input_data_val,
455
+ input_data_test=input_data_test,
456
+ metrics_train=metrics_train,
457
+ metrics_val=metrics_val,
458
+ metrics_test=metrics_test,
459
+ metrics_full=metrics_train,
460
+ )
461
+
462
+ return prediction_train, prediction_val, prediction_test, result
463
+
464
+ def _predict_forecaster(self, input_data: ForecastInputDataset, forecaster_name: str) -> ForecastDataset:
465
+ logger.debug("Predicting forecaster '%s'.", forecaster_name)
466
+ prediction_raw = self.forecasters[forecaster_name].predict(data=input_data)
467
+ prediction = restore_target(
468
+ dataset=prediction_raw, original_dataset=input_data, target_column=self.target_column
469
+ )
470
+ prediction = self.model_specific_postprocessing.transform(prediction)
471
+ return self.postprocessing.transform(prediction)
472
+
473
+ def _predict_forecasters(
474
+ self,
475
+ data: TimeSeriesDataset,
476
+ forecast_start: datetime | None = None,
477
+ ) -> EnsembleForecastDataset:
478
+ predictions: dict[str, ForecastDataset] = {}
479
+ for name in self.forecasters:
480
+ logger.debug("Generating predictions for forecaster '%s'.", name)
481
+ input_data = self.prepare_forecaster_input(data=data, forecast_start=forecast_start, forecaster_name=name)
482
+ predictions[name] = self._predict_forecaster(
483
+ input_data=input_data,
484
+ forecaster_name=name,
485
+ )
486
+
487
+ return EnsembleForecastDataset.from_forecast_datasets(predictions, target_series=data.data[self.target_column])
488
+
489
+ def prepare_forecaster_input(
490
+ self,
491
+ data: TimeSeriesDataset,
492
+ forecaster_name: str = "",
493
+ forecast_start: datetime | None = None,
494
+ ) -> ForecastInputDataset:
495
+ """Prepare input data for a specific base forecaster.
496
+
497
+ Applies common preprocessing, then model-specific preprocessing, restores
498
+ the target column, and trims history via the shared base ``prepare_input``.
499
+
500
+ Args:
501
+ data: Raw time series dataset.
502
+ forecaster_name: Which forecaster to prepare data for.
503
+ forecast_start: Optional forecast start time override.
504
+
505
+ Returns:
506
+ Processed forecast input dataset ready for the named forecaster.
507
+ """
508
+ logger.debug("Preparing input data for forecaster '%s'.", forecaster_name)
509
+ # Apply model-specific preprocessing on top of the common pipeline
510
+ if forecaster_name in self.model_specific_preprocessing:
511
+ logger.debug("Applying model-specific preprocessing for forecaster '%s'.", forecaster_name)
512
+ preprocessed = self.preprocessing.transform(data=data)
513
+ preprocessed = self.model_specific_preprocessing[forecaster_name].transform(data=preprocessed)
514
+ preprocessed = restore_target(dataset=preprocessed, original_dataset=data, target_column=self.target_column)
515
+ # Apply cutoff and create ForecastInputDataset
516
+ input_data_start = cast("pd.Series[pd.Timestamp]", preprocessed.index).min().to_pydatetime()
517
+ input_data_cutoff = input_data_start + self.cutoff_history
518
+ if forecast_start is not None and forecast_start < input_data_cutoff:
519
+ input_data_cutoff = forecast_start
520
+ self._logger.warning(
521
+ "Forecast start %s is before input data start + cutoff history %s. Using forecast start as cutoff.",
522
+ forecast_start,
523
+ input_data_cutoff,
524
+ )
525
+ preprocessed = preprocessed.filter_by_range(start=input_data_cutoff)
526
+
527
+ return ForecastInputDataset.from_timeseries(
528
+ dataset=preprocessed,
529
+ target_column=self.target_column,
530
+ forecast_start=forecast_start,
531
+ )
532
+
533
+ # No model-specific preprocessing — delegate entirely to shared base method
534
+ return self.prepare_input(data=data, forecast_start=forecast_start)
535
+
536
+ def _predict_transform_combiner(
537
+ self, ensemble_dataset: EnsembleForecastDataset, original_data: TimeSeriesDataset
538
+ ) -> ForecastDataset:
539
+ logger.debug("Predicting combiner.")
540
+ features = self._transform_combiner_data(data=original_data)
541
+
542
+ return self._predict_combiner(ensemble_dataset, features)
543
+
544
+ def _predict_combiner(
545
+ self,
546
+ ensemble_dataset: EnsembleForecastDataset,
547
+ features: ForecastInputDataset | None,
548
+ ) -> ForecastDataset:
549
+ logger.debug("Predicting combiner.")
550
+ prediction_raw = self.combiner.predict(ensemble_dataset, additional_features=features)
551
+ prediction = restore_target(
552
+ dataset=prediction_raw, original_dataset=ensemble_dataset, target_column=self.target_column
553
+ )
554
+ prediction = self.combiner_postprocessing.transform(prediction)
555
+ return self.postprocessing.transform(prediction)
556
+
557
+ def _fit_combiner(
558
+ self,
559
+ data: TimeSeriesDataset,
560
+ train_ensemble_dataset: EnsembleForecastDataset,
561
+ data_val: TimeSeriesDataset | None = None,
562
+ data_test: TimeSeriesDataset | None = None,
563
+ val_ensemble_dataset: EnsembleForecastDataset | None = None,
564
+ test_ensemble_dataset: EnsembleForecastDataset | None = None,
565
+ ) -> ModelFitResult:
566
+ # Prepare additional features for the combiner (e.g. sample weights) — split separately from ensemble data
567
+ features_train, features_val, features_test = self._fit_prepare_combiner_data(
568
+ data=data, data_val=data_val, data_test=data_test
569
+ )
570
+
571
+ logger.debug("Fitting combiner.")
572
+ self.combiner.fit(
573
+ data=train_ensemble_dataset, data_val=val_ensemble_dataset, additional_features=features_train
574
+ )
575
+
576
+ # Fit combiner postprocessing on training predictions
577
+ prediction_raw = self.combiner.predict(train_ensemble_dataset, additional_features=features_train)
578
+ prediction_raw = restore_target(
579
+ dataset=prediction_raw, original_dataset=train_ensemble_dataset, target_column=self.target_column
580
+ )
581
+ self.combiner_postprocessing.fit_transform(prediction_raw)
582
+
583
+ prediction_train = self._predict_combiner(train_ensemble_dataset, features=features_train)
584
+ metrics_train = self._calculate_score(prediction=prediction_train)
585
+
586
+ if val_ensemble_dataset is not None:
587
+ prediction_val = self._predict_combiner(val_ensemble_dataset, features=features_val)
588
+ metrics_val = self._calculate_score(prediction=prediction_val)
589
+ else:
590
+ prediction_val = None
591
+ metrics_val = None
592
+
593
+ if test_ensemble_dataset is not None:
594
+ prediction_test = self._predict_combiner(test_ensemble_dataset, features=features_test)
595
+ metrics_test = self._calculate_score(prediction=prediction_test)
596
+ else:
597
+ prediction_test = None
598
+ metrics_test = None
599
+
600
+ return ModelFitResult(
601
+ input_dataset=train_ensemble_dataset,
602
+ # ModelFitResult expects ForecastInputDataset; use first quantile as a representative slice
603
+ input_data_train=train_ensemble_dataset.get_base_predictions_for_quantile(quantile=self.quantiles[0]),
604
+ input_data_val=val_ensemble_dataset.get_base_predictions_for_quantile(quantile=self.quantiles[0])
605
+ if val_ensemble_dataset
606
+ else None,
607
+ input_data_test=test_ensemble_dataset.get_base_predictions_for_quantile(quantile=self.quantiles[0])
608
+ if test_ensemble_dataset
609
+ else None,
610
+ metrics_train=metrics_train,
611
+ metrics_val=metrics_val,
612
+ metrics_test=metrics_test,
613
+ metrics_full=metrics_train,
614
+ )
615
+
616
+ def _predict_contributions_combiner(
617
+ self, ensemble_dataset: EnsembleForecastDataset, original_data: TimeSeriesDataset
618
+ ) -> TimeSeriesDataset:
619
+ features = self._transform_combiner_data(data=original_data)
620
+ return self.combiner.predict_contributions(ensemble_dataset, additional_features=features)
621
+
622
+ @override
623
+ def predict(self, data: TimeSeriesDataset, forecast_start: datetime | None = None) -> ForecastDataset:
624
+ """Generate forecasts for the provided dataset.
625
+
626
+ Args:
627
+ data: Input time series dataset for prediction.
628
+ forecast_start: Optional start time for forecasts.
629
+
630
+ Returns:
631
+ ForecastDataset containing the generated forecasts.
632
+
633
+ Raises:
634
+ NotFittedError: If the model has not been fitted yet.
635
+ """
636
+ if not self.is_fitted:
637
+ raise NotFittedError(self.__class__.__name__)
638
+ logger.debug("Generating predictions.")
639
+
640
+ ensemble_predictions = self._predict_forecasters(data=data, forecast_start=forecast_start)
641
+
642
+ # Predict and restore target column
643
+ return self._predict_transform_combiner(
644
+ ensemble_dataset=ensemble_predictions,
645
+ original_data=data,
646
+ )
647
+
648
+ @override
649
+ def predict_contributions(
650
+ self,
651
+ data: TimeSeriesDataset,
652
+ forecast_start: datetime | None = None,
653
+ ) -> TimeSeriesDataset:
654
+ """Compute per-model contributions for the ensemble prediction.
655
+
656
+ Args:
657
+ data: Input time series dataset.
658
+ forecast_start: Optional start time for forecasts.
659
+
660
+ Returns:
661
+ TimeSeriesDataset where each column is a base model's contribution.
662
+
663
+ Raises:
664
+ NotFittedError: If the model has not been fitted yet.
665
+ """
666
+ if not self.is_fitted:
667
+ raise NotFittedError(self.__class__.__name__)
668
+
669
+ ensemble_predictions = self._predict_forecasters(data=data, forecast_start=forecast_start)
670
+
671
+ return self._predict_contributions_combiner(
672
+ ensemble_dataset=ensemble_predictions,
673
+ original_data=data,
674
+ )
675
+
676
+
677
+ __all__ = ["EnsembleForecastingModel", "EnsembleModelFitResult", "ModelFitResult"]