openstef 3.4.29__py3-none-any.whl → 3.4.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +18 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +3 -0
- openstef/data/dutch_holidays.csv +1759 -0
- openstef/data/dutch_holidays.csv.license +3 -0
- openstef/data_classes/prediction_job.py +3 -1
- openstef/enums.py +105 -2
- openstef/feature_engineering/apply_features.py +26 -1
- openstef/feature_engineering/bidding_zone_to_country_mapping.py +106 -0
- openstef/feature_engineering/cyclic_features.py +102 -0
- openstef/feature_engineering/holiday_features.py +35 -26
- openstef/feature_engineering/missing_values_transformer.py +57 -15
- openstef/model/model_creator.py +24 -20
- openstef/model/objective.py +7 -7
- openstef/model/objective_creator.py +11 -11
- openstef/model/regressors/flatliner.py +4 -9
- openstef/model/regressors/linear_quantile.py +58 -9
- openstef/model/regressors/xgb.py +23 -0
- openstef/model_selection/model_selection.py +1 -1
- openstef/pipeline/create_component_forecast.py +13 -6
- openstef/pipeline/train_model.py +8 -5
- openstef/tasks/calculate_kpi.py +3 -3
- openstef/tasks/create_basecase_forecast.py +2 -2
- openstef/tasks/create_components_forecast.py +4 -4
- openstef/tasks/create_forecast.py +4 -4
- openstef/tasks/create_solar_forecast.py +4 -4
- openstef/tasks/optimize_hyperparameters.py +2 -2
- openstef/tasks/split_forecast.py +2 -2
- openstef/tasks/train_model.py +2 -2
- openstef/validation/validation.py +1 -1
- {openstef-3.4.29.dist-info → openstef-3.4.44.dist-info}/METADATA +38 -26
- {openstef-3.4.29.dist-info → openstef-3.4.44.dist-info}/RECORD +36 -30
- {openstef-3.4.29.dist-info → openstef-3.4.44.dist-info}/WHEEL +1 -1
- openstef/data/dutch_holidays_2020-2022.csv +0 -831
- /openstef/data/{dutch_holidays_2020-2022.csv.license → dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license} +0 -0
- {openstef-3.4.29.dist-info → openstef-3.4.44.dist-info}/LICENSE +0 -0
- {openstef-3.4.29.dist-info → openstef-3.4.44.dist-info}/top_level.txt +0 -0
openstef/model/model_creator.py
CHANGED
@@ -6,7 +6,7 @@ from typing import Union
|
|
6
6
|
|
7
7
|
import structlog
|
8
8
|
|
9
|
-
from openstef.enums import
|
9
|
+
from openstef.enums import ModelType
|
10
10
|
from openstef.model.regressors.arima import ARIMAOpenstfRegressor
|
11
11
|
from openstef.model.regressors.custom_regressor import is_custom_type, load_custom_model
|
12
12
|
from openstef.model.regressors.lgbm import LGBMOpenstfRegressor
|
@@ -29,7 +29,7 @@ structlog.configure(
|
|
29
29
|
logger = structlog.get_logger(__name__)
|
30
30
|
|
31
31
|
valid_model_kwargs = {
|
32
|
-
|
32
|
+
ModelType.XGB: [
|
33
33
|
"n_estimators",
|
34
34
|
"objective",
|
35
35
|
"max_depth",
|
@@ -60,7 +60,7 @@ valid_model_kwargs = {
|
|
60
60
|
"validate_parameters",
|
61
61
|
"early_stopping_rounds",
|
62
62
|
],
|
63
|
-
|
63
|
+
ModelType.LGB: [
|
64
64
|
"boosting_type",
|
65
65
|
"objective",
|
66
66
|
"num_leaves",
|
@@ -82,7 +82,7 @@ valid_model_kwargs = {
|
|
82
82
|
"importance_type",
|
83
83
|
"early_stopping_rounds",
|
84
84
|
],
|
85
|
-
|
85
|
+
ModelType.XGB_QUANTILE: [
|
86
86
|
"quantiles",
|
87
87
|
"gamma",
|
88
88
|
"colsample_bytree",
|
@@ -91,7 +91,7 @@ valid_model_kwargs = {
|
|
91
91
|
"max_depth",
|
92
92
|
"early_stopping_rounds",
|
93
93
|
],
|
94
|
-
|
94
|
+
ModelType.XGB_MULTIOUTPUT_QUANTILE: [
|
95
95
|
"quantiles",
|
96
96
|
"gamma",
|
97
97
|
"colsample_bytree",
|
@@ -101,23 +101,27 @@ valid_model_kwargs = {
|
|
101
101
|
"early_stopping_rounds",
|
102
102
|
"arctan_smoothing",
|
103
103
|
],
|
104
|
-
|
104
|
+
ModelType.LINEAR: [
|
105
105
|
"missing_values",
|
106
106
|
"imputation_strategy",
|
107
107
|
"fill_value",
|
108
108
|
],
|
109
|
-
|
109
|
+
ModelType.FLATLINER: [
|
110
110
|
"quantiles",
|
111
111
|
],
|
112
|
-
|
112
|
+
ModelType.LINEAR_QUANTILE: [
|
113
113
|
"alpha",
|
114
114
|
"quantiles",
|
115
115
|
"solver",
|
116
116
|
"missing_values",
|
117
117
|
"imputation_strategy",
|
118
118
|
"fill_value",
|
119
|
+
"weight_scale_percentile",
|
120
|
+
"weight_exponent",
|
121
|
+
"weight_floor",
|
122
|
+
"no_fill_future_values_features",
|
119
123
|
],
|
120
|
-
|
124
|
+
ModelType.ARIMA: [
|
121
125
|
"backtest_max_horizon",
|
122
126
|
"order",
|
123
127
|
"seasonal_order",
|
@@ -131,18 +135,18 @@ class ModelCreator:
|
|
131
135
|
|
132
136
|
# Set object mapping
|
133
137
|
MODEL_CONSTRUCTORS = {
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
138
|
+
ModelType.XGB: XGBOpenstfRegressor,
|
139
|
+
ModelType.LGB: LGBMOpenstfRegressor,
|
140
|
+
ModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
|
141
|
+
ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
|
142
|
+
ModelType.LINEAR: LinearOpenstfRegressor,
|
143
|
+
ModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
|
144
|
+
ModelType.ARIMA: ARIMAOpenstfRegressor,
|
145
|
+
ModelType.FLATLINER: FlatlinerRegressor,
|
142
146
|
}
|
143
147
|
|
144
148
|
@staticmethod
|
145
|
-
def create_model(model_type: Union[
|
149
|
+
def create_model(model_type: Union[ModelType, str], **kwargs) -> OpenstfRegressor:
|
146
150
|
"""Create a machine learning model based on model type.
|
147
151
|
|
148
152
|
Args:
|
@@ -163,7 +167,7 @@ class ModelCreator:
|
|
163
167
|
model_class = load_custom_model(model_type)
|
164
168
|
valid_kwargs = model_class.valid_kwargs()
|
165
169
|
else:
|
166
|
-
model_type =
|
170
|
+
model_type = ModelType(model_type)
|
167
171
|
model_class = ModelCreator.MODEL_CONSTRUCTORS[model_type]
|
168
172
|
valid_kwargs = valid_model_kwargs[model_type]
|
169
173
|
# Check if model as imported
|
@@ -174,7 +178,7 @@ class ModelCreator:
|
|
174
178
|
"Please refer to the ReadMe for instructions"
|
175
179
|
)
|
176
180
|
except ValueError as e:
|
177
|
-
valid_types = [t.value for t in
|
181
|
+
valid_types = [t.value for t in ModelType]
|
178
182
|
raise NotImplementedError(
|
179
183
|
f"No constructor for '{model_type}', "
|
180
184
|
f"valid model_types are: {valid_types} "
|
openstef/model/objective.py
CHANGED
@@ -8,7 +8,7 @@ from typing import Any, Callable, Optional
|
|
8
8
|
import optuna
|
9
9
|
import pandas as pd
|
10
10
|
|
11
|
-
from openstef.enums import
|
11
|
+
from openstef.enums import ModelType
|
12
12
|
from openstef.metrics import metrics
|
13
13
|
from openstef.metrics.reporter import Report, Reporter
|
14
14
|
from openstef.model.regressors.regressor import OpenstfRegressor
|
@@ -245,7 +245,7 @@ class RegressorObjective:
|
|
245
245
|
class XGBRegressorObjective(RegressorObjective):
|
246
246
|
def __init__(self, *args, **kwargs):
|
247
247
|
super().__init__(*args, **kwargs)
|
248
|
-
self.model_type =
|
248
|
+
self.model_type = ModelType.XGB
|
249
249
|
|
250
250
|
# extend the parameters with the model specific ones per implementation
|
251
251
|
def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
|
@@ -282,7 +282,7 @@ class XGBRegressorObjective(RegressorObjective):
|
|
282
282
|
class LGBRegressorObjective(RegressorObjective):
|
283
283
|
def __init__(self, *args, **kwargs):
|
284
284
|
super().__init__(*args, **kwargs)
|
285
|
-
self.model_type =
|
285
|
+
self.model_type = ModelType.LGB
|
286
286
|
|
287
287
|
def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
|
288
288
|
"""Get parameters for LGB Regressor Objective with objective specific parameters.
|
@@ -323,7 +323,7 @@ class LGBRegressorObjective(RegressorObjective):
|
|
323
323
|
class XGBQuantileRegressorObjective(RegressorObjective):
|
324
324
|
def __init__(self, *args, **kwargs):
|
325
325
|
super().__init__(*args, **kwargs)
|
326
|
-
self.model_type =
|
326
|
+
self.model_type = ModelType.XGB_QUANTILE
|
327
327
|
|
328
328
|
def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
|
329
329
|
"""Get parameters for XGBQuantile Regressor Objective with objective specific parameters.
|
@@ -352,7 +352,7 @@ class XGBQuantileRegressorObjective(RegressorObjective):
|
|
352
352
|
class XGBMultioutputQuantileRegressorObjective(RegressorObjective):
|
353
353
|
def __init__(self, *args, **kwargs):
|
354
354
|
super().__init__(*args, **kwargs)
|
355
|
-
self.model_type =
|
355
|
+
self.model_type = ModelType.XGB_QUANTILE
|
356
356
|
|
357
357
|
def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
|
358
358
|
"""Get parameters for XGB Multioutput Quantile Regressor Objective with objective specific parameters.
|
@@ -382,7 +382,7 @@ class XGBMultioutputQuantileRegressorObjective(RegressorObjective):
|
|
382
382
|
class LinearRegressorObjective(RegressorObjective):
|
383
383
|
def __init__(self, *args, **kwargs):
|
384
384
|
super().__init__(*args, **kwargs)
|
385
|
-
self.model_type =
|
385
|
+
self.model_type = ModelType.LINEAR
|
386
386
|
|
387
387
|
def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
|
388
388
|
"""Get parameters for Linear Regressor Objective with objective specific parameters.
|
@@ -405,7 +405,7 @@ class LinearRegressorObjective(RegressorObjective):
|
|
405
405
|
class ARIMARegressorObjective(RegressorObjective):
|
406
406
|
def __init__(self, *args, **kwargs):
|
407
407
|
super().__init__(*args, **kwargs)
|
408
|
-
self.model_type =
|
408
|
+
self.model_type = ModelType.ARIMA
|
409
409
|
|
410
410
|
def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
|
411
411
|
"""Get parameters for ARIMA Regressor Objective with objective specific parameters.
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from typing import Union
|
6
6
|
|
7
|
-
from openstef.enums import
|
7
|
+
from openstef.enums import ModelType
|
8
8
|
from openstef.model.objective import (
|
9
9
|
ARIMARegressorObjective,
|
10
10
|
LGBRegressorObjective,
|
@@ -22,17 +22,17 @@ from openstef.model.regressors.custom_regressor import (
|
|
22
22
|
|
23
23
|
class ObjectiveCreator:
|
24
24
|
OBJECTIVES = {
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
25
|
+
ModelType.XGB: XGBRegressorObjective,
|
26
|
+
ModelType.LGB: LGBRegressorObjective,
|
27
|
+
ModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
|
28
|
+
ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
|
29
|
+
ModelType.LINEAR: LinearRegressorObjective,
|
30
|
+
ModelType.LINEAR_QUANTILE: LinearRegressorObjective,
|
31
|
+
ModelType.ARIMA: ARIMARegressorObjective,
|
32
32
|
}
|
33
33
|
|
34
34
|
@staticmethod
|
35
|
-
def create_objective(model_type: Union[
|
35
|
+
def create_objective(model_type: Union[ModelType, str]) -> RegressorObjective:
|
36
36
|
"""Create an objective function based on model type.
|
37
37
|
|
38
38
|
Args:
|
@@ -51,10 +51,10 @@ class ObjectiveCreator:
|
|
51
51
|
if is_custom_type(model_type):
|
52
52
|
objective = create_custom_objective(model_type)
|
53
53
|
else:
|
54
|
-
model_type =
|
54
|
+
model_type = ModelType(model_type)
|
55
55
|
objective = ObjectiveCreator.OBJECTIVES[model_type]
|
56
56
|
except ValueError as e:
|
57
|
-
valid_types = [t.value for t in
|
57
|
+
valid_types = [t.value for t in ModelType]
|
58
58
|
raise NotImplementedError(
|
59
59
|
f"No objective for '{model_type}', "
|
60
60
|
f"valid model_types are: {valid_types}"
|
@@ -2,18 +2,13 @@
|
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
4
|
import re
|
5
|
-
from typing import
|
5
|
+
from typing import List
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
9
9
|
from sklearn.base import RegressorMixin
|
10
|
-
from sklearn.linear_model import QuantileRegressor
|
11
|
-
from sklearn.preprocessing import MinMaxScaler
|
12
10
|
from sklearn.utils.validation import check_is_fitted
|
13
11
|
|
14
|
-
from openstef.feature_engineering.missing_values_transformer import (
|
15
|
-
MissingValuesTransformer,
|
16
|
-
)
|
17
12
|
from openstef.model.regressors.regressor import OpenstfRegressor
|
18
13
|
|
19
14
|
|
@@ -23,9 +18,9 @@ class FlatlinerRegressor(OpenstfRegressor, RegressorMixin):
|
|
23
18
|
def __init__(self, quantiles=None):
|
24
19
|
"""Initialize FlatlinerRegressor.
|
25
20
|
|
26
|
-
The model always predicts 0.0, regardless of the input features. The model is
|
27
|
-
|
28
|
-
|
21
|
+
The model always predicts 0.0, regardless of the input features. The model is meant to be used for flatliner
|
22
|
+
locations that still expect a prediction while preserving the prediction interface.
|
23
|
+
|
29
24
|
"""
|
30
25
|
super().__init__()
|
31
26
|
self.quantiles = quantiles
|
@@ -2,13 +2,13 @@
|
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
4
|
import re
|
5
|
-
from typing import Dict, Union, Set, Optional
|
5
|
+
from typing import Dict, Union, Set, Optional, List
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
9
9
|
from sklearn.base import RegressorMixin
|
10
10
|
from sklearn.linear_model import QuantileRegressor
|
11
|
-
from sklearn.preprocessing import
|
11
|
+
from sklearn.preprocessing import StandardScaler
|
12
12
|
from sklearn.utils.validation import check_is_fitted
|
13
13
|
|
14
14
|
from openstef.feature_engineering.missing_values_transformer import (
|
@@ -25,8 +25,8 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
25
25
|
solver: str
|
26
26
|
|
27
27
|
imputer_: MissingValuesTransformer
|
28
|
-
x_scaler_:
|
29
|
-
y_scaler_:
|
28
|
+
x_scaler_: StandardScaler
|
29
|
+
y_scaler_: StandardScaler
|
30
30
|
models_: Dict[float, QuantileRegressor]
|
31
31
|
|
32
32
|
is_fitted_: bool = False
|
@@ -47,6 +47,10 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
47
47
|
missing_values: Union[int, float, str, None] = np.nan,
|
48
48
|
imputation_strategy: Optional[str] = "mean",
|
49
49
|
fill_value: Union[str, int, float] = None,
|
50
|
+
weight_scale_percentile: int = 95,
|
51
|
+
weight_exponent: float = 1,
|
52
|
+
weight_floor: float = 0.1,
|
53
|
+
no_fill_future_values_features: List[str] = None,
|
50
54
|
):
|
51
55
|
"""Initialize LinearQuantileOpenstfRegressor.
|
52
56
|
|
@@ -69,6 +73,12 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
69
73
|
missing_values: Value to be considered as missing value
|
70
74
|
imputation_strategy: Imputation strategy
|
71
75
|
fill_value: Fill value
|
76
|
+
weight_scale_percentile: Percentile used in scaling of the samples
|
77
|
+
weight_exponent: Exponent used in sample weighing
|
78
|
+
weight_floor: Minimum weight for samples
|
79
|
+
no_fill_future_values_features: The features for which it does not make sense
|
80
|
+
to fill future values. Rows that contain trailing null values for these
|
81
|
+
features will be removed from the data.
|
72
82
|
|
73
83
|
"""
|
74
84
|
super().__init__()
|
@@ -82,13 +92,17 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
82
92
|
self.quantiles = quantiles
|
83
93
|
self.alpha = alpha
|
84
94
|
self.solver = solver
|
95
|
+
self.weight_scale_percentile = weight_scale_percentile
|
96
|
+
self.weight_exponent = weight_exponent
|
97
|
+
self.weight_floor = weight_floor
|
85
98
|
self.imputer_ = MissingValuesTransformer(
|
86
99
|
missing_values=missing_values,
|
87
100
|
imputation_strategy=imputation_strategy,
|
88
101
|
fill_value=fill_value,
|
102
|
+
no_fill_future_values_features=no_fill_future_values_features,
|
89
103
|
)
|
90
|
-
self.x_scaler_ =
|
91
|
-
self.y_scaler_ =
|
104
|
+
self.x_scaler_ = StandardScaler()
|
105
|
+
self.y_scaler_ = StandardScaler()
|
92
106
|
self.models_ = {
|
93
107
|
quantile: QuantileRegressor(alpha=alpha, quantile=quantile, solver=solver)
|
94
108
|
for quantile in quantiles
|
@@ -165,7 +179,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
165
179
|
x = self._remove_ignored_features(x)
|
166
180
|
|
167
181
|
# Fix nan columns
|
168
|
-
x = self.imputer_.fit_transform(x)
|
182
|
+
x, y = self.imputer_.fit_transform(x, y)
|
169
183
|
if x.isna().any().any():
|
170
184
|
raise ValueError(
|
171
185
|
"There are nan values in the input data. Set "
|
@@ -177,7 +191,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
177
191
|
y_scaled = self.y_scaler_.fit_transform(y.to_frame())[:, 0]
|
178
192
|
|
179
193
|
# Add more focus on extreme / peak values
|
180
|
-
sample_weight =
|
194
|
+
sample_weight = self._calculate_sample_weights(y.values.squeeze())
|
181
195
|
|
182
196
|
# Fit quantile regressors
|
183
197
|
for quantile in self.quantiles:
|
@@ -191,6 +205,33 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
191
205
|
|
192
206
|
return self
|
193
207
|
|
208
|
+
def _calculate_sample_weights(self, y: np.array):
|
209
|
+
"""Calculate sample weights based on the y values of arbitrary scale.
|
210
|
+
|
211
|
+
The resulting weights are in the range [0,1] and are used to put more emphasis
|
212
|
+
on certain samples. The sample weighting function does:
|
213
|
+
|
214
|
+
* Rescale data to a [-1, 1] range using quantile scaling. 90% of the data will
|
215
|
+
be within this range. Rest is outside.
|
216
|
+
* Calculate the weight by taking the exponent of scaled data.
|
217
|
+
* exponent=0: Results in uniform weights for all samples.
|
218
|
+
* exponent=1: Results in linearly increasing weights for samples that are
|
219
|
+
closer to the extremes.
|
220
|
+
* exponent>1: Results in exponentially increasing weights for samples that are
|
221
|
+
closer to the extremes.
|
222
|
+
* Clip the data to [0, 1] range with weight_floor as the minimum weight.
|
223
|
+
* Weight floor is used to make sure that all the samples are considered.
|
224
|
+
|
225
|
+
"""
|
226
|
+
return np.clip(
|
227
|
+
_weight_exp(
|
228
|
+
_scale_percentile(y, percentile=self.weight_scale_percentile),
|
229
|
+
exponent=self.weight_exponent,
|
230
|
+
),
|
231
|
+
a_min=self.weight_floor,
|
232
|
+
a_max=1,
|
233
|
+
)
|
234
|
+
|
194
235
|
def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
|
195
236
|
"""Makes a prediction for a desired quantile.
|
196
237
|
|
@@ -231,7 +272,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
231
272
|
return np.array(
|
232
273
|
[
|
233
274
|
reg_feature_importances_dict.get(c, 0)
|
234
|
-
for c in self.imputer_.
|
275
|
+
for c in self.imputer_.non_null_feature_names
|
235
276
|
]
|
236
277
|
)
|
237
278
|
|
@@ -245,3 +286,11 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
245
286
|
|
246
287
|
def __sklearn_is_fitted__(self) -> bool:
|
247
288
|
return self.is_fitted_
|
289
|
+
|
290
|
+
|
291
|
+
def _scale_percentile(x: np.ndarray, percentile: int = 95):
|
292
|
+
return np.abs(x / np.percentile(np.abs(x), percentile))
|
293
|
+
|
294
|
+
|
295
|
+
def _weight_exp(x: np.ndarray, exponent: float = 1):
|
296
|
+
return np.abs(x) ** exponent
|
openstef/model/regressors/xgb.py
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
from sklearn.base import RegressorMixin
|
4
8
|
|
5
9
|
from xgboost import XGBRegressor
|
6
10
|
|
@@ -27,3 +31,22 @@ class XGBOpenstfRegressor(XGBRegressor, OpenstfRegressor):
|
|
27
31
|
"gain_importance_name": "total_gain",
|
28
32
|
"weight_importance_name": "weight",
|
29
33
|
}
|
34
|
+
|
35
|
+
def fit(
|
36
|
+
self,
|
37
|
+
x: np.array,
|
38
|
+
y: np.array,
|
39
|
+
*,
|
40
|
+
early_stopping_rounds: Optional[int] = None,
|
41
|
+
callbacks: Optional[list] = None,
|
42
|
+
eval_metric: Optional[str] = None,
|
43
|
+
**kwargs
|
44
|
+
):
|
45
|
+
if early_stopping_rounds is not None:
|
46
|
+
self.set_params(early_stopping_rounds=early_stopping_rounds)
|
47
|
+
if callbacks is not None:
|
48
|
+
self.set_params(callbacks=callbacks)
|
49
|
+
if eval_metric is not None:
|
50
|
+
self.set_params(eval_metric=eval_metric)
|
51
|
+
|
52
|
+
super().fit(x, y, **kwargs)
|
@@ -106,7 +106,7 @@ def split_data_train_validation_test(
|
|
106
106
|
validation_fraction: float = 0.15,
|
107
107
|
back_test: bool = False,
|
108
108
|
stratification_min_max: bool = True,
|
109
|
-
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
109
|
+
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
110
110
|
"""Split input data into train, test and validation set.
|
111
111
|
|
112
112
|
Function for splitting data with features in a train, test and
|
@@ -108,7 +108,7 @@ def create_components_forecast_pipeline(
|
|
108
108
|
|
109
109
|
# Make component forecasts
|
110
110
|
try:
|
111
|
-
|
111
|
+
dazls_input_data = create_input(pj, input_data, weather_data)
|
112
112
|
|
113
113
|
# Save and load the model as .sav file (or as .z file)
|
114
114
|
# For the code contact: korte.termijn.prognoses@alliander.com
|
@@ -119,13 +119,13 @@ def create_components_forecast_pipeline(
|
|
119
119
|
|
120
120
|
# Use the predict function of Dazls model
|
121
121
|
# As input data we use the input_data function which takes into consideration what we want as an input for the forecast and what Dazls can accept as an input
|
122
|
-
forecasts = dazls_model.predict(x=
|
122
|
+
forecasts = dazls_model.predict(x=dazls_input_data)
|
123
123
|
|
124
124
|
# Set the columns for the output forecast dataframe
|
125
125
|
forecasts = pd.DataFrame(
|
126
126
|
forecasts,
|
127
127
|
columns=["forecast_wind_on_shore", "forecast_solar"],
|
128
|
-
index=
|
128
|
+
index=dazls_input_data.index,
|
129
129
|
)
|
130
130
|
|
131
131
|
# Make post-processed forecasts for solar and wind power
|
@@ -140,18 +140,25 @@ def create_components_forecast_pipeline(
|
|
140
140
|
|
141
141
|
# Make forecast for the component: "forecast_other"
|
142
142
|
forecasts["forecast_other"] = (
|
143
|
-
|
143
|
+
dazls_input_data["total_load"]
|
144
144
|
- forecasts["forecast_solar"]
|
145
145
|
- forecasts["forecast_wind_on_shore"]
|
146
146
|
)
|
147
|
+
|
148
|
+
# Make sure the forecasts have the same form as the input data. Pad with 0 if necessary
|
149
|
+
forecasts = forecasts.reindex(index=input_data.index, fill_value=0)
|
147
150
|
except Exception as e:
|
148
|
-
# In case something goes wrong we fall back on
|
151
|
+
# In case something goes wrong we fall back on an a zero-filled dataframe
|
149
152
|
logger.warning(
|
150
153
|
f"Could not make component forecasts: {e}, falling back on series of"
|
151
154
|
" zeros!",
|
152
155
|
exc_info=e,
|
153
156
|
)
|
154
|
-
forecasts = pd.DataFrame(
|
157
|
+
forecasts = pd.DataFrame(
|
158
|
+
data=0,
|
159
|
+
index=input_data.index,
|
160
|
+
columns=["forecast_wind_on_shore", "forecast_solar", "forecast_other"],
|
161
|
+
)
|
155
162
|
|
156
163
|
# Prepare for output
|
157
164
|
# Add more prediction properties to the forecast ("pid","customer","description","type","algtype)
|
openstef/pipeline/train_model.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
4
|
import logging
|
5
5
|
import os
|
6
|
-
from typing import Optional, Union
|
6
|
+
from typing import Optional, Union, Tuple
|
7
7
|
|
8
8
|
import pandas as pd
|
9
9
|
import structlog
|
@@ -155,7 +155,7 @@ def train_model_pipeline_core(
|
|
155
155
|
input_data: pd.DataFrame,
|
156
156
|
old_model: OpenstfRegressor = None,
|
157
157
|
horizons: list[float] = DEFAULT_TRAIN_HORIZONS_HOURS,
|
158
|
-
) ->
|
158
|
+
) -> Tuple[
|
159
159
|
OpenstfRegressor,
|
160
160
|
Report,
|
161
161
|
ModelSpecificationDataClass,
|
@@ -246,7 +246,9 @@ def train_pipeline_common(
|
|
246
246
|
test_fraction: float = 0.0,
|
247
247
|
backtest: bool = False,
|
248
248
|
test_data_predefined: pd.DataFrame = pd.DataFrame(),
|
249
|
-
) -> tuple[
|
249
|
+
) -> tuple[
|
250
|
+
OpenstfRegressor, Report, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame
|
251
|
+
]:
|
250
252
|
"""Common pipeline shared with operational training and backtest training.
|
251
253
|
|
252
254
|
Args:
|
@@ -314,7 +316,8 @@ def train_pipeline_common(
|
|
314
316
|
|
315
317
|
def train_pipeline_step_load_model(
|
316
318
|
pj: PredictionJobDataClass, serializer: MLflowSerializer
|
317
|
-
) ->
|
319
|
+
) -> Tuple[OpenstfRegressor, ModelSpecificationDataClass, Union[int, float]]:
|
320
|
+
old_model: Optional[OpenstfRegressor]
|
318
321
|
try:
|
319
322
|
old_model, model_specs = serializer.load_model(experiment_name=str(pj.id))
|
320
323
|
old_model_age = old_model.age # Age attribute is openstef specific
|
@@ -509,7 +512,7 @@ def train_pipeline_step_split_data(
|
|
509
512
|
test_fraction: float,
|
510
513
|
backtest: bool = False,
|
511
514
|
test_data_predefined: pd.DataFrame = pd.DataFrame(),
|
512
|
-
) ->
|
515
|
+
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
513
516
|
"""The default way to perform train, val, test split.
|
514
517
|
|
515
518
|
Args:
|
openstef/tasks/calculate_kpi.py
CHANGED
@@ -29,7 +29,7 @@ import pandas as pd
|
|
29
29
|
import structlog
|
30
30
|
|
31
31
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
32
|
-
from openstef.enums import
|
32
|
+
from openstef.enums import ModelType
|
33
33
|
from openstef.exceptions import NoPredictedLoadError, NoRealisedLoadError
|
34
34
|
from openstef.metrics import metrics
|
35
35
|
from openstef.settings import Settings
|
@@ -42,7 +42,7 @@ THRESHOLD_RETRAINING = 0.25
|
|
42
42
|
THRESHOLD_OPTIMIZING = 0.50
|
43
43
|
|
44
44
|
|
45
|
-
def main(model_type:
|
45
|
+
def main(model_type: ModelType = None, config=None, database=None) -> None:
|
46
46
|
taskname = Path(__file__).name.replace(".py", "")
|
47
47
|
|
48
48
|
if database is None or config is None:
|
@@ -52,7 +52,7 @@ def main(model_type: MLModelType = None, config=None, database=None) -> None:
|
|
52
52
|
)
|
53
53
|
|
54
54
|
if model_type is None:
|
55
|
-
model_type = [ml.value for ml in
|
55
|
+
model_type = [ml.value for ml in ModelType]
|
56
56
|
|
57
57
|
with TaskContext(taskname, config, database) as context:
|
58
58
|
# Set start and end time
|
@@ -97,7 +97,7 @@ def create_basecase_forecast_task(
|
|
97
97
|
context.database.write_forecast(basecase_forecast, t_ahead_series=True)
|
98
98
|
|
99
99
|
|
100
|
-
def main(config: object = None, database: object = None):
|
100
|
+
def main(config: object = None, database: object = None, **kwargs):
|
101
101
|
taskname = Path(__file__).name.replace(".py", "")
|
102
102
|
|
103
103
|
if database is None or config is None:
|
@@ -110,7 +110,7 @@ def main(config: object = None, database: object = None):
|
|
110
110
|
model_type = ["xgb", "xgb_quantile", "lgb"]
|
111
111
|
|
112
112
|
PredictionJobLoop(context, model_type=model_type).map(
|
113
|
-
create_basecase_forecast_task, context
|
113
|
+
create_basecase_forecast_task, context, **kwargs
|
114
114
|
)
|
115
115
|
|
116
116
|
|
@@ -29,7 +29,7 @@ import pandas as pd
|
|
29
29
|
import structlog
|
30
30
|
|
31
31
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
32
|
-
from openstef.enums import
|
32
|
+
from openstef.enums import ModelType
|
33
33
|
from openstef.exceptions import ComponentForecastTooShortHorizonError
|
34
34
|
from openstef.pipeline.create_component_forecast import (
|
35
35
|
create_components_forecast_pipeline,
|
@@ -140,7 +140,7 @@ def create_components_forecast_task(
|
|
140
140
|
)
|
141
141
|
|
142
142
|
|
143
|
-
def main(config: object = None, database: object = None):
|
143
|
+
def main(config: object = None, database: object = None, **kwargs):
|
144
144
|
taskname = Path(__file__).name.replace(".py", "")
|
145
145
|
|
146
146
|
if database is None or config is None:
|
@@ -150,12 +150,12 @@ def main(config: object = None, database: object = None):
|
|
150
150
|
)
|
151
151
|
|
152
152
|
with TaskContext(taskname, config, database) as context:
|
153
|
-
model_type = [ml.value for ml in
|
153
|
+
model_type = [ml.value for ml in ModelType]
|
154
154
|
|
155
155
|
PredictionJobLoop(
|
156
156
|
context,
|
157
157
|
model_type=model_type,
|
158
|
-
).map(create_components_forecast_task, context)
|
158
|
+
).map(create_components_forecast_task, context, **kwargs)
|
159
159
|
|
160
160
|
|
161
161
|
if __name__ == "__main__":
|
@@ -24,7 +24,7 @@ from datetime import datetime, timedelta
|
|
24
24
|
from pathlib import Path
|
25
25
|
|
26
26
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
27
|
-
from openstef.enums import
|
27
|
+
from openstef.enums import ModelType, PipelineType
|
28
28
|
from openstef.exceptions import InputDataOngoingZeroFlatlinerError
|
29
29
|
from openstef.pipeline.create_forecast import create_forecast_pipeline
|
30
30
|
from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
|
@@ -118,7 +118,7 @@ def create_forecast_task(
|
|
118
118
|
context.database.write_forecast(forecast, t_ahead_series=True)
|
119
119
|
|
120
120
|
|
121
|
-
def main(model_type=None, config=None, database=None):
|
121
|
+
def main(model_type=None, config=None, database=None, **kwargs):
|
122
122
|
taskname = Path(__file__).name.replace(".py", "")
|
123
123
|
|
124
124
|
if database is None or config is None:
|
@@ -129,10 +129,10 @@ def main(model_type=None, config=None, database=None):
|
|
129
129
|
|
130
130
|
with TaskContext(taskname, config, database) as context:
|
131
131
|
if model_type is None:
|
132
|
-
model_type = [ml.value for ml in
|
132
|
+
model_type = [ml.value for ml in ModelType]
|
133
133
|
|
134
134
|
PredictionJobLoop(context, model_type=model_type).map(
|
135
|
-
create_forecast_task, context
|
135
|
+
create_forecast_task, context, **kwargs
|
136
136
|
)
|
137
137
|
|
138
138
|
|