openstef 3.4.29__py3-none-any.whl → 3.4.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  2. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +18 -0
  3. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +3 -0
  4. openstef/data/dutch_holidays.csv +1759 -0
  5. openstef/data/dutch_holidays.csv.license +3 -0
  6. openstef/data_classes/prediction_job.py +3 -1
  7. openstef/enums.py +105 -2
  8. openstef/feature_engineering/apply_features.py +26 -1
  9. openstef/feature_engineering/bidding_zone_to_country_mapping.py +106 -0
  10. openstef/feature_engineering/cyclic_features.py +102 -0
  11. openstef/feature_engineering/holiday_features.py +35 -26
  12. openstef/feature_engineering/missing_values_transformer.py +57 -15
  13. openstef/model/model_creator.py +24 -20
  14. openstef/model/objective.py +7 -7
  15. openstef/model/objective_creator.py +11 -11
  16. openstef/model/regressors/flatliner.py +4 -9
  17. openstef/model/regressors/linear_quantile.py +58 -9
  18. openstef/model/regressors/xgb.py +23 -0
  19. openstef/model_selection/model_selection.py +1 -1
  20. openstef/pipeline/create_component_forecast.py +13 -6
  21. openstef/pipeline/train_model.py +8 -5
  22. openstef/tasks/calculate_kpi.py +3 -3
  23. openstef/tasks/create_basecase_forecast.py +2 -2
  24. openstef/tasks/create_components_forecast.py +4 -4
  25. openstef/tasks/create_forecast.py +4 -4
  26. openstef/tasks/create_solar_forecast.py +4 -4
  27. openstef/tasks/optimize_hyperparameters.py +2 -2
  28. openstef/tasks/split_forecast.py +2 -2
  29. openstef/tasks/train_model.py +2 -2
  30. openstef/validation/validation.py +1 -1
  31. {openstef-3.4.29.dist-info → openstef-3.4.44.dist-info}/METADATA +38 -26
  32. {openstef-3.4.29.dist-info → openstef-3.4.44.dist-info}/RECORD +36 -30
  33. {openstef-3.4.29.dist-info → openstef-3.4.44.dist-info}/WHEEL +1 -1
  34. openstef/data/dutch_holidays_2020-2022.csv +0 -831
  35. /openstef/data/{dutch_holidays_2020-2022.csv.license → dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license} +0 -0
  36. {openstef-3.4.29.dist-info → openstef-3.4.44.dist-info}/LICENSE +0 -0
  37. {openstef-3.4.29.dist-info → openstef-3.4.44.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ from typing import Union
6
6
 
7
7
  import structlog
8
8
 
9
- from openstef.enums import MLModelType
9
+ from openstef.enums import ModelType
10
10
  from openstef.model.regressors.arima import ARIMAOpenstfRegressor
11
11
  from openstef.model.regressors.custom_regressor import is_custom_type, load_custom_model
12
12
  from openstef.model.regressors.lgbm import LGBMOpenstfRegressor
@@ -29,7 +29,7 @@ structlog.configure(
29
29
  logger = structlog.get_logger(__name__)
30
30
 
31
31
  valid_model_kwargs = {
32
- MLModelType.XGB: [
32
+ ModelType.XGB: [
33
33
  "n_estimators",
34
34
  "objective",
35
35
  "max_depth",
@@ -60,7 +60,7 @@ valid_model_kwargs = {
60
60
  "validate_parameters",
61
61
  "early_stopping_rounds",
62
62
  ],
63
- MLModelType.LGB: [
63
+ ModelType.LGB: [
64
64
  "boosting_type",
65
65
  "objective",
66
66
  "num_leaves",
@@ -82,7 +82,7 @@ valid_model_kwargs = {
82
82
  "importance_type",
83
83
  "early_stopping_rounds",
84
84
  ],
85
- MLModelType.XGB_QUANTILE: [
85
+ ModelType.XGB_QUANTILE: [
86
86
  "quantiles",
87
87
  "gamma",
88
88
  "colsample_bytree",
@@ -91,7 +91,7 @@ valid_model_kwargs = {
91
91
  "max_depth",
92
92
  "early_stopping_rounds",
93
93
  ],
94
- MLModelType.XGB_MULTIOUTPUT_QUANTILE: [
94
+ ModelType.XGB_MULTIOUTPUT_QUANTILE: [
95
95
  "quantiles",
96
96
  "gamma",
97
97
  "colsample_bytree",
@@ -101,23 +101,27 @@ valid_model_kwargs = {
101
101
  "early_stopping_rounds",
102
102
  "arctan_smoothing",
103
103
  ],
104
- MLModelType.LINEAR: [
104
+ ModelType.LINEAR: [
105
105
  "missing_values",
106
106
  "imputation_strategy",
107
107
  "fill_value",
108
108
  ],
109
- MLModelType.FLATLINER: [
109
+ ModelType.FLATLINER: [
110
110
  "quantiles",
111
111
  ],
112
- MLModelType.LINEAR_QUANTILE: [
112
+ ModelType.LINEAR_QUANTILE: [
113
113
  "alpha",
114
114
  "quantiles",
115
115
  "solver",
116
116
  "missing_values",
117
117
  "imputation_strategy",
118
118
  "fill_value",
119
+ "weight_scale_percentile",
120
+ "weight_exponent",
121
+ "weight_floor",
122
+ "no_fill_future_values_features",
119
123
  ],
120
- MLModelType.ARIMA: [
124
+ ModelType.ARIMA: [
121
125
  "backtest_max_horizon",
122
126
  "order",
123
127
  "seasonal_order",
@@ -131,18 +135,18 @@ class ModelCreator:
131
135
 
132
136
  # Set object mapping
133
137
  MODEL_CONSTRUCTORS = {
134
- MLModelType.XGB: XGBOpenstfRegressor,
135
- MLModelType.LGB: LGBMOpenstfRegressor,
136
- MLModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
137
- MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
138
- MLModelType.LINEAR: LinearOpenstfRegressor,
139
- MLModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
140
- MLModelType.ARIMA: ARIMAOpenstfRegressor,
141
- MLModelType.FLATLINER: FlatlinerRegressor,
138
+ ModelType.XGB: XGBOpenstfRegressor,
139
+ ModelType.LGB: LGBMOpenstfRegressor,
140
+ ModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
141
+ ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
142
+ ModelType.LINEAR: LinearOpenstfRegressor,
143
+ ModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
144
+ ModelType.ARIMA: ARIMAOpenstfRegressor,
145
+ ModelType.FLATLINER: FlatlinerRegressor,
142
146
  }
143
147
 
144
148
  @staticmethod
145
- def create_model(model_type: Union[MLModelType, str], **kwargs) -> OpenstfRegressor:
149
+ def create_model(model_type: Union[ModelType, str], **kwargs) -> OpenstfRegressor:
146
150
  """Create a machine learning model based on model type.
147
151
 
148
152
  Args:
@@ -163,7 +167,7 @@ class ModelCreator:
163
167
  model_class = load_custom_model(model_type)
164
168
  valid_kwargs = model_class.valid_kwargs()
165
169
  else:
166
- model_type = MLModelType(model_type)
170
+ model_type = ModelType(model_type)
167
171
  model_class = ModelCreator.MODEL_CONSTRUCTORS[model_type]
168
172
  valid_kwargs = valid_model_kwargs[model_type]
169
173
  # Check if model as imported
@@ -174,7 +178,7 @@ class ModelCreator:
174
178
  "Please refer to the ReadMe for instructions"
175
179
  )
176
180
  except ValueError as e:
177
- valid_types = [t.value for t in MLModelType]
181
+ valid_types = [t.value for t in ModelType]
178
182
  raise NotImplementedError(
179
183
  f"No constructor for '{model_type}', "
180
184
  f"valid model_types are: {valid_types} "
@@ -8,7 +8,7 @@ from typing import Any, Callable, Optional
8
8
  import optuna
9
9
  import pandas as pd
10
10
 
11
- from openstef.enums import MLModelType
11
+ from openstef.enums import ModelType
12
12
  from openstef.metrics import metrics
13
13
  from openstef.metrics.reporter import Report, Reporter
14
14
  from openstef.model.regressors.regressor import OpenstfRegressor
@@ -245,7 +245,7 @@ class RegressorObjective:
245
245
  class XGBRegressorObjective(RegressorObjective):
246
246
  def __init__(self, *args, **kwargs):
247
247
  super().__init__(*args, **kwargs)
248
- self.model_type = MLModelType.XGB
248
+ self.model_type = ModelType.XGB
249
249
 
250
250
  # extend the parameters with the model specific ones per implementation
251
251
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
@@ -282,7 +282,7 @@ class XGBRegressorObjective(RegressorObjective):
282
282
  class LGBRegressorObjective(RegressorObjective):
283
283
  def __init__(self, *args, **kwargs):
284
284
  super().__init__(*args, **kwargs)
285
- self.model_type = MLModelType.LGB
285
+ self.model_type = ModelType.LGB
286
286
 
287
287
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
288
288
  """Get parameters for LGB Regressor Objective with objective specific parameters.
@@ -323,7 +323,7 @@ class LGBRegressorObjective(RegressorObjective):
323
323
  class XGBQuantileRegressorObjective(RegressorObjective):
324
324
  def __init__(self, *args, **kwargs):
325
325
  super().__init__(*args, **kwargs)
326
- self.model_type = MLModelType.XGB_QUANTILE
326
+ self.model_type = ModelType.XGB_QUANTILE
327
327
 
328
328
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
329
329
  """Get parameters for XGBQuantile Regressor Objective with objective specific parameters.
@@ -352,7 +352,7 @@ class XGBQuantileRegressorObjective(RegressorObjective):
352
352
  class XGBMultioutputQuantileRegressorObjective(RegressorObjective):
353
353
  def __init__(self, *args, **kwargs):
354
354
  super().__init__(*args, **kwargs)
355
- self.model_type = MLModelType.XGB_QUANTILE
355
+ self.model_type = ModelType.XGB_QUANTILE
356
356
 
357
357
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
358
358
  """Get parameters for XGB Multioutput Quantile Regressor Objective with objective specific parameters.
@@ -382,7 +382,7 @@ class XGBMultioutputQuantileRegressorObjective(RegressorObjective):
382
382
  class LinearRegressorObjective(RegressorObjective):
383
383
  def __init__(self, *args, **kwargs):
384
384
  super().__init__(*args, **kwargs)
385
- self.model_type = MLModelType.LINEAR
385
+ self.model_type = ModelType.LINEAR
386
386
 
387
387
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
388
388
  """Get parameters for Linear Regressor Objective with objective specific parameters.
@@ -405,7 +405,7 @@ class LinearRegressorObjective(RegressorObjective):
405
405
  class ARIMARegressorObjective(RegressorObjective):
406
406
  def __init__(self, *args, **kwargs):
407
407
  super().__init__(*args, **kwargs)
408
- self.model_type = MLModelType.ARIMA
408
+ self.model_type = ModelType.ARIMA
409
409
 
410
410
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
411
411
  """Get parameters for ARIMA Regressor Objective with objective specific parameters.
@@ -4,7 +4,7 @@
4
4
 
5
5
  from typing import Union
6
6
 
7
- from openstef.enums import MLModelType
7
+ from openstef.enums import ModelType
8
8
  from openstef.model.objective import (
9
9
  ARIMARegressorObjective,
10
10
  LGBRegressorObjective,
@@ -22,17 +22,17 @@ from openstef.model.regressors.custom_regressor import (
22
22
 
23
23
  class ObjectiveCreator:
24
24
  OBJECTIVES = {
25
- MLModelType.XGB: XGBRegressorObjective,
26
- MLModelType.LGB: LGBRegressorObjective,
27
- MLModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
28
- MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
29
- MLModelType.LINEAR: LinearRegressorObjective,
30
- MLModelType.LINEAR_QUANTILE: LinearRegressorObjective,
31
- MLModelType.ARIMA: ARIMARegressorObjective,
25
+ ModelType.XGB: XGBRegressorObjective,
26
+ ModelType.LGB: LGBRegressorObjective,
27
+ ModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
28
+ ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
29
+ ModelType.LINEAR: LinearRegressorObjective,
30
+ ModelType.LINEAR_QUANTILE: LinearRegressorObjective,
31
+ ModelType.ARIMA: ARIMARegressorObjective,
32
32
  }
33
33
 
34
34
  @staticmethod
35
- def create_objective(model_type: Union[MLModelType, str]) -> RegressorObjective:
35
+ def create_objective(model_type: Union[ModelType, str]) -> RegressorObjective:
36
36
  """Create an objective function based on model type.
37
37
 
38
38
  Args:
@@ -51,10 +51,10 @@ class ObjectiveCreator:
51
51
  if is_custom_type(model_type):
52
52
  objective = create_custom_objective(model_type)
53
53
  else:
54
- model_type = MLModelType(model_type)
54
+ model_type = ModelType(model_type)
55
55
  objective = ObjectiveCreator.OBJECTIVES[model_type]
56
56
  except ValueError as e:
57
- valid_types = [t.value for t in MLModelType]
57
+ valid_types = [t.value for t in ModelType]
58
58
  raise NotImplementedError(
59
59
  f"No objective for '{model_type}', "
60
60
  f"valid model_types are: {valid_types}"
@@ -2,18 +2,13 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
  import re
5
- from typing import Dict, Union, Set, Optional, List
5
+ from typing import List
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
9
9
  from sklearn.base import RegressorMixin
10
- from sklearn.linear_model import QuantileRegressor
11
- from sklearn.preprocessing import MinMaxScaler
12
10
  from sklearn.utils.validation import check_is_fitted
13
11
 
14
- from openstef.feature_engineering.missing_values_transformer import (
15
- MissingValuesTransformer,
16
- )
17
12
  from openstef.model.regressors.regressor import OpenstfRegressor
18
13
 
19
14
 
@@ -23,9 +18,9 @@ class FlatlinerRegressor(OpenstfRegressor, RegressorMixin):
23
18
  def __init__(self, quantiles=None):
24
19
  """Initialize FlatlinerRegressor.
25
20
 
26
- The model always predicts 0.0, regardless of the input features. The model is
27
- meant to be used for flatliner locations that still expect a prediction while
28
- preserving the prediction interface.
21
+ The model always predicts 0.0, regardless of the input features. The model is meant to be used for flatliner
22
+ locations that still expect a prediction while preserving the prediction interface.
23
+
29
24
  """
30
25
  super().__init__()
31
26
  self.quantiles = quantiles
@@ -2,13 +2,13 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
  import re
5
- from typing import Dict, Union, Set, Optional
5
+ from typing import Dict, Union, Set, Optional, List
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
9
9
  from sklearn.base import RegressorMixin
10
10
  from sklearn.linear_model import QuantileRegressor
11
- from sklearn.preprocessing import MinMaxScaler
11
+ from sklearn.preprocessing import StandardScaler
12
12
  from sklearn.utils.validation import check_is_fitted
13
13
 
14
14
  from openstef.feature_engineering.missing_values_transformer import (
@@ -25,8 +25,8 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
25
25
  solver: str
26
26
 
27
27
  imputer_: MissingValuesTransformer
28
- x_scaler_: MinMaxScaler
29
- y_scaler_: MinMaxScaler
28
+ x_scaler_: StandardScaler
29
+ y_scaler_: StandardScaler
30
30
  models_: Dict[float, QuantileRegressor]
31
31
 
32
32
  is_fitted_: bool = False
@@ -47,6 +47,10 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
47
47
  missing_values: Union[int, float, str, None] = np.nan,
48
48
  imputation_strategy: Optional[str] = "mean",
49
49
  fill_value: Union[str, int, float] = None,
50
+ weight_scale_percentile: int = 95,
51
+ weight_exponent: float = 1,
52
+ weight_floor: float = 0.1,
53
+ no_fill_future_values_features: List[str] = None,
50
54
  ):
51
55
  """Initialize LinearQuantileOpenstfRegressor.
52
56
 
@@ -69,6 +73,12 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
69
73
  missing_values: Value to be considered as missing value
70
74
  imputation_strategy: Imputation strategy
71
75
  fill_value: Fill value
76
+ weight_scale_percentile: Percentile used in scaling of the samples
77
+ weight_exponent: Exponent used in sample weighing
78
+ weight_floor: Minimum weight for samples
79
+ no_fill_future_values_features: The features for which it does not make sense
80
+ to fill future values. Rows that contain trailing null values for these
81
+ features will be removed from the data.
72
82
 
73
83
  """
74
84
  super().__init__()
@@ -82,13 +92,17 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
82
92
  self.quantiles = quantiles
83
93
  self.alpha = alpha
84
94
  self.solver = solver
95
+ self.weight_scale_percentile = weight_scale_percentile
96
+ self.weight_exponent = weight_exponent
97
+ self.weight_floor = weight_floor
85
98
  self.imputer_ = MissingValuesTransformer(
86
99
  missing_values=missing_values,
87
100
  imputation_strategy=imputation_strategy,
88
101
  fill_value=fill_value,
102
+ no_fill_future_values_features=no_fill_future_values_features,
89
103
  )
90
- self.x_scaler_ = MinMaxScaler(feature_range=(-1, 1))
91
- self.y_scaler_ = MinMaxScaler(feature_range=(-1, 1))
104
+ self.x_scaler_ = StandardScaler()
105
+ self.y_scaler_ = StandardScaler()
92
106
  self.models_ = {
93
107
  quantile: QuantileRegressor(alpha=alpha, quantile=quantile, solver=solver)
94
108
  for quantile in quantiles
@@ -165,7 +179,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
165
179
  x = self._remove_ignored_features(x)
166
180
 
167
181
  # Fix nan columns
168
- x = self.imputer_.fit_transform(x)
182
+ x, y = self.imputer_.fit_transform(x, y)
169
183
  if x.isna().any().any():
170
184
  raise ValueError(
171
185
  "There are nan values in the input data. Set "
@@ -177,7 +191,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
177
191
  y_scaled = self.y_scaler_.fit_transform(y.to_frame())[:, 0]
178
192
 
179
193
  # Add more focus on extreme / peak values
180
- sample_weight = np.abs(y_scaled)
194
+ sample_weight = self._calculate_sample_weights(y.values.squeeze())
181
195
 
182
196
  # Fit quantile regressors
183
197
  for quantile in self.quantiles:
@@ -191,6 +205,33 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
191
205
 
192
206
  return self
193
207
 
208
+ def _calculate_sample_weights(self, y: np.array):
209
+ """Calculate sample weights based on the y values of arbitrary scale.
210
+
211
+ The resulting weights are in the range [0,1] and are used to put more emphasis
212
+ on certain samples. The sample weighting function does:
213
+
214
+ * Rescale data to a [-1, 1] range using quantile scaling. 90% of the data will
215
+ be within this range. Rest is outside.
216
+ * Calculate the weight by taking the exponent of scaled data.
217
+ * exponent=0: Results in uniform weights for all samples.
218
+ * exponent=1: Results in linearly increasing weights for samples that are
219
+ closer to the extremes.
220
+ * exponent>1: Results in exponentially increasing weights for samples that are
221
+ closer to the extremes.
222
+ * Clip the data to [0, 1] range with weight_floor as the minimum weight.
223
+ * Weight floor is used to make sure that all the samples are considered.
224
+
225
+ """
226
+ return np.clip(
227
+ _weight_exp(
228
+ _scale_percentile(y, percentile=self.weight_scale_percentile),
229
+ exponent=self.weight_exponent,
230
+ ),
231
+ a_min=self.weight_floor,
232
+ a_max=1,
233
+ )
234
+
194
235
  def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
195
236
  """Makes a prediction for a desired quantile.
196
237
 
@@ -231,7 +272,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
231
272
  return np.array(
232
273
  [
233
274
  reg_feature_importances_dict.get(c, 0)
234
- for c in self.imputer_.in_feature_names
275
+ for c in self.imputer_.non_null_feature_names
235
276
  ]
236
277
  )
237
278
 
@@ -245,3 +286,11 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
245
286
 
246
287
  def __sklearn_is_fitted__(self) -> bool:
247
288
  return self.is_fitted_
289
+
290
+
291
+ def _scale_percentile(x: np.ndarray, percentile: int = 95):
292
+ return np.abs(x / np.percentile(np.abs(x), percentile))
293
+
294
+
295
+ def _weight_exp(x: np.ndarray, exponent: float = 1):
296
+ return np.abs(x) ** exponent
@@ -1,6 +1,10 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ from typing import Optional
5
+
6
+ import numpy as np
7
+ from sklearn.base import RegressorMixin
4
8
 
5
9
  from xgboost import XGBRegressor
6
10
 
@@ -27,3 +31,22 @@ class XGBOpenstfRegressor(XGBRegressor, OpenstfRegressor):
27
31
  "gain_importance_name": "total_gain",
28
32
  "weight_importance_name": "weight",
29
33
  }
34
+
35
+ def fit(
36
+ self,
37
+ x: np.array,
38
+ y: np.array,
39
+ *,
40
+ early_stopping_rounds: Optional[int] = None,
41
+ callbacks: Optional[list] = None,
42
+ eval_metric: Optional[str] = None,
43
+ **kwargs
44
+ ):
45
+ if early_stopping_rounds is not None:
46
+ self.set_params(early_stopping_rounds=early_stopping_rounds)
47
+ if callbacks is not None:
48
+ self.set_params(callbacks=callbacks)
49
+ if eval_metric is not None:
50
+ self.set_params(eval_metric=eval_metric)
51
+
52
+ super().fit(x, y, **kwargs)
@@ -106,7 +106,7 @@ def split_data_train_validation_test(
106
106
  validation_fraction: float = 0.15,
107
107
  back_test: bool = False,
108
108
  stratification_min_max: bool = True,
109
- ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
109
+ ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
110
110
  """Split input data into train, test and validation set.
111
111
 
112
112
  Function for splitting data with features in a train, test and
@@ -108,7 +108,7 @@ def create_components_forecast_pipeline(
108
108
 
109
109
  # Make component forecasts
110
110
  try:
111
- input_data = create_input(pj, input_data, weather_data)
111
+ dazls_input_data = create_input(pj, input_data, weather_data)
112
112
 
113
113
  # Save and load the model as .sav file (or as .z file)
114
114
  # For the code contact: korte.termijn.prognoses@alliander.com
@@ -119,13 +119,13 @@ def create_components_forecast_pipeline(
119
119
 
120
120
  # Use the predict function of Dazls model
121
121
  # As input data we use the input_data function which takes into consideration what we want as an input for the forecast and what Dazls can accept as an input
122
- forecasts = dazls_model.predict(x=input_data)
122
+ forecasts = dazls_model.predict(x=dazls_input_data)
123
123
 
124
124
  # Set the columns for the output forecast dataframe
125
125
  forecasts = pd.DataFrame(
126
126
  forecasts,
127
127
  columns=["forecast_wind_on_shore", "forecast_solar"],
128
- index=input_data.index,
128
+ index=dazls_input_data.index,
129
129
  )
130
130
 
131
131
  # Make post-processed forecasts for solar and wind power
@@ -140,18 +140,25 @@ def create_components_forecast_pipeline(
140
140
 
141
141
  # Make forecast for the component: "forecast_other"
142
142
  forecasts["forecast_other"] = (
143
- input_data["total_load"]
143
+ dazls_input_data["total_load"]
144
144
  - forecasts["forecast_solar"]
145
145
  - forecasts["forecast_wind_on_shore"]
146
146
  )
147
+
148
+ # Make sure the forecasts have the same form as the input data. Pad with 0 if necessary
149
+ forecasts = forecasts.reindex(index=input_data.index, fill_value=0)
147
150
  except Exception as e:
148
- # In case something goes wrong we fall back on aan empty dataframe
151
+ # In case something goes wrong we fall back on an a zero-filled dataframe
149
152
  logger.warning(
150
153
  f"Could not make component forecasts: {e}, falling back on series of"
151
154
  " zeros!",
152
155
  exc_info=e,
153
156
  )
154
- forecasts = pd.DataFrame()
157
+ forecasts = pd.DataFrame(
158
+ data=0,
159
+ index=input_data.index,
160
+ columns=["forecast_wind_on_shore", "forecast_solar", "forecast_other"],
161
+ )
155
162
 
156
163
  # Prepare for output
157
164
  # Add more prediction properties to the forecast ("pid","customer","description","type","algtype)
@@ -3,7 +3,7 @@
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
  import logging
5
5
  import os
6
- from typing import Optional, Union
6
+ from typing import Optional, Union, Tuple
7
7
 
8
8
  import pandas as pd
9
9
  import structlog
@@ -155,7 +155,7 @@ def train_model_pipeline_core(
155
155
  input_data: pd.DataFrame,
156
156
  old_model: OpenstfRegressor = None,
157
157
  horizons: list[float] = DEFAULT_TRAIN_HORIZONS_HOURS,
158
- ) -> Union[
158
+ ) -> Tuple[
159
159
  OpenstfRegressor,
160
160
  Report,
161
161
  ModelSpecificationDataClass,
@@ -246,7 +246,9 @@ def train_pipeline_common(
246
246
  test_fraction: float = 0.0,
247
247
  backtest: bool = False,
248
248
  test_data_predefined: pd.DataFrame = pd.DataFrame(),
249
- ) -> tuple[OpenstfRegressor, Report, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
249
+ ) -> tuple[
250
+ OpenstfRegressor, Report, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame
251
+ ]:
250
252
  """Common pipeline shared with operational training and backtest training.
251
253
 
252
254
  Args:
@@ -314,7 +316,8 @@ def train_pipeline_common(
314
316
 
315
317
  def train_pipeline_step_load_model(
316
318
  pj: PredictionJobDataClass, serializer: MLflowSerializer
317
- ) -> tuple[OpenstfRegressor, ModelSpecificationDataClass, Union[int, float]]:
319
+ ) -> Tuple[OpenstfRegressor, ModelSpecificationDataClass, Union[int, float]]:
320
+ old_model: Optional[OpenstfRegressor]
318
321
  try:
319
322
  old_model, model_specs = serializer.load_model(experiment_name=str(pj.id))
320
323
  old_model_age = old_model.age # Age attribute is openstef specific
@@ -509,7 +512,7 @@ def train_pipeline_step_split_data(
509
512
  test_fraction: float,
510
513
  backtest: bool = False,
511
514
  test_data_predefined: pd.DataFrame = pd.DataFrame(),
512
- ) -> Union[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
515
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
513
516
  """The default way to perform train, val, test split.
514
517
 
515
518
  Args:
@@ -29,7 +29,7 @@ import pandas as pd
29
29
  import structlog
30
30
 
31
31
  from openstef.data_classes.prediction_job import PredictionJobDataClass
32
- from openstef.enums import MLModelType
32
+ from openstef.enums import ModelType
33
33
  from openstef.exceptions import NoPredictedLoadError, NoRealisedLoadError
34
34
  from openstef.metrics import metrics
35
35
  from openstef.settings import Settings
@@ -42,7 +42,7 @@ THRESHOLD_RETRAINING = 0.25
42
42
  THRESHOLD_OPTIMIZING = 0.50
43
43
 
44
44
 
45
- def main(model_type: MLModelType = None, config=None, database=None) -> None:
45
+ def main(model_type: ModelType = None, config=None, database=None) -> None:
46
46
  taskname = Path(__file__).name.replace(".py", "")
47
47
 
48
48
  if database is None or config is None:
@@ -52,7 +52,7 @@ def main(model_type: MLModelType = None, config=None, database=None) -> None:
52
52
  )
53
53
 
54
54
  if model_type is None:
55
- model_type = [ml.value for ml in MLModelType]
55
+ model_type = [ml.value for ml in ModelType]
56
56
 
57
57
  with TaskContext(taskname, config, database) as context:
58
58
  # Set start and end time
@@ -97,7 +97,7 @@ def create_basecase_forecast_task(
97
97
  context.database.write_forecast(basecase_forecast, t_ahead_series=True)
98
98
 
99
99
 
100
- def main(config: object = None, database: object = None):
100
+ def main(config: object = None, database: object = None, **kwargs):
101
101
  taskname = Path(__file__).name.replace(".py", "")
102
102
 
103
103
  if database is None or config is None:
@@ -110,7 +110,7 @@ def main(config: object = None, database: object = None):
110
110
  model_type = ["xgb", "xgb_quantile", "lgb"]
111
111
 
112
112
  PredictionJobLoop(context, model_type=model_type).map(
113
- create_basecase_forecast_task, context
113
+ create_basecase_forecast_task, context, **kwargs
114
114
  )
115
115
 
116
116
 
@@ -29,7 +29,7 @@ import pandas as pd
29
29
  import structlog
30
30
 
31
31
  from openstef.data_classes.prediction_job import PredictionJobDataClass
32
- from openstef.enums import MLModelType
32
+ from openstef.enums import ModelType
33
33
  from openstef.exceptions import ComponentForecastTooShortHorizonError
34
34
  from openstef.pipeline.create_component_forecast import (
35
35
  create_components_forecast_pipeline,
@@ -140,7 +140,7 @@ def create_components_forecast_task(
140
140
  )
141
141
 
142
142
 
143
- def main(config: object = None, database: object = None):
143
+ def main(config: object = None, database: object = None, **kwargs):
144
144
  taskname = Path(__file__).name.replace(".py", "")
145
145
 
146
146
  if database is None or config is None:
@@ -150,12 +150,12 @@ def main(config: object = None, database: object = None):
150
150
  )
151
151
 
152
152
  with TaskContext(taskname, config, database) as context:
153
- model_type = [ml.value for ml in MLModelType]
153
+ model_type = [ml.value for ml in ModelType]
154
154
 
155
155
  PredictionJobLoop(
156
156
  context,
157
157
  model_type=model_type,
158
- ).map(create_components_forecast_task, context)
158
+ ).map(create_components_forecast_task, context, **kwargs)
159
159
 
160
160
 
161
161
  if __name__ == "__main__":
@@ -24,7 +24,7 @@ from datetime import datetime, timedelta
24
24
  from pathlib import Path
25
25
 
26
26
  from openstef.data_classes.prediction_job import PredictionJobDataClass
27
- from openstef.enums import MLModelType, PipelineType
27
+ from openstef.enums import ModelType, PipelineType
28
28
  from openstef.exceptions import InputDataOngoingZeroFlatlinerError
29
29
  from openstef.pipeline.create_forecast import create_forecast_pipeline
30
30
  from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
@@ -118,7 +118,7 @@ def create_forecast_task(
118
118
  context.database.write_forecast(forecast, t_ahead_series=True)
119
119
 
120
120
 
121
- def main(model_type=None, config=None, database=None):
121
+ def main(model_type=None, config=None, database=None, **kwargs):
122
122
  taskname = Path(__file__).name.replace(".py", "")
123
123
 
124
124
  if database is None or config is None:
@@ -129,10 +129,10 @@ def main(model_type=None, config=None, database=None):
129
129
 
130
130
  with TaskContext(taskname, config, database) as context:
131
131
  if model_type is None:
132
- model_type = [ml.value for ml in MLModelType]
132
+ model_type = [ml.value for ml in ModelType]
133
133
 
134
134
  PredictionJobLoop(context, model_type=model_type).map(
135
- create_forecast_task, context
135
+ create_forecast_task, context, **kwargs
136
136
  )
137
137
 
138
138