openstef 3.4.10__py3-none-any.whl → 3.4.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef/app_settings.py +19 -0
- openstef/data_classes/data_prep.py +1 -1
- openstef/data_classes/prediction_job.py +12 -8
- openstef/enums.py +3 -7
- openstef/exceptions.py +1 -1
- openstef/feature_engineering/apply_features.py +0 -6
- openstef/feature_engineering/data_preparation.py +12 -5
- openstef/feature_engineering/feature_applicator.py +1 -5
- openstef/feature_engineering/general.py +14 -0
- openstef/feature_engineering/missing_values_transformer.py +99 -0
- openstef/feature_engineering/weather_features.py +7 -0
- openstef/metrics/figure.py +3 -0
- openstef/metrics/metrics.py +58 -1
- openstef/metrics/reporter.py +7 -0
- openstef/model/confidence_interval_applicator.py +28 -3
- openstef/model/model_creator.py +36 -27
- openstef/model/objective.py +11 -28
- openstef/model/objective_creator.py +4 -3
- openstef/model/regressors/arima.py +1 -1
- openstef/model/regressors/dazls.py +35 -96
- openstef/model/regressors/flatliner.py +100 -0
- openstef/model/regressors/linear_quantile.py +247 -0
- openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
- openstef/model/regressors/xgb_quantile.py +3 -0
- openstef/model/serializer.py +10 -0
- openstef/model_selection/model_selection.py +3 -0
- openstef/monitoring/performance_meter.py +1 -2
- openstef/monitoring/teams.py +11 -0
- openstef/pipeline/create_basecase_forecast.py +11 -1
- openstef/pipeline/create_component_forecast.py +11 -22
- openstef/pipeline/create_forecast.py +20 -1
- openstef/pipeline/optimize_hyperparameters.py +18 -16
- openstef/pipeline/train_create_forecast_backtest.py +11 -1
- openstef/pipeline/train_model.py +23 -7
- openstef/pipeline/utils.py +3 -0
- openstef/postprocessing/postprocessing.py +29 -0
- openstef/settings.py +15 -0
- openstef/tasks/calculate_kpi.py +20 -17
- openstef/tasks/create_basecase_forecast.py +13 -5
- openstef/tasks/create_components_forecast.py +20 -4
- openstef/tasks/create_forecast.py +5 -2
- openstef/tasks/split_forecast.py +7 -0
- openstef/tasks/train_model.py +7 -5
- openstef/tasks/utils/taskcontext.py +7 -0
- openstef/validation/validation.py +27 -2
- {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/METADATA +34 -38
- openstef-3.4.29.dist-info/RECORD +91 -0
- {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/WHEEL +1 -1
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
- openstef/feature_engineering/historic_features.py +0 -40
- openstef/model/regressors/proloaf.py +0 -281
- openstef/tasks/run_tracy.py +0 -145
- openstef-3.4.10.dist-info/RECORD +0 -104
- {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/LICENSE +0 -0
- {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,261 @@
|
|
1
|
+
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
from functools import partial
|
5
|
+
from typing import Dict, Optional, Sequence, Tuple, Union
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
import sklearn.base
|
10
|
+
import xgboost as xgb
|
11
|
+
from sklearn.compose import TransformedTargetRegressor
|
12
|
+
from sklearn.preprocessing import StandardScaler
|
13
|
+
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
|
14
|
+
from xgboost import Booster
|
15
|
+
|
16
|
+
import openstef.metrics.metrics as metrics
|
17
|
+
from openstef.model.regressors.regressor import OpenstfRegressor
|
18
|
+
|
19
|
+
DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
|
20
|
+
|
21
|
+
|
22
|
+
class XGBMultiOutputQuantileOpenstfRegressor(OpenstfRegressor):
|
23
|
+
r"""Model that provides multioutput quantile regression with XGBoost by default using the arctan loss function.
|
24
|
+
|
25
|
+
Arctan loss:
|
26
|
+
Refence: https://github.com/LaurensSluyterman/XGBoost_quantile_regression/tree/master
|
27
|
+
The key idea is to use a smooth approximation of the pinball loss, the arctan
|
28
|
+
pinball loss, that has a relatively large second derivative.
|
29
|
+
|
30
|
+
The approximation is given by:
|
31
|
+
$$L^{(\text{arctan})}_{\tau, s}(u) = (\tau - 0.5 + \frac{\arctan (u/s)}{\pi})u + \frac{s}{\pi}$$. # noqa E501
|
32
|
+
|
33
|
+
Some important settings:
|
34
|
+
|
35
|
+
* The parameter in the loss function determines the amount of smoothing. A
|
36
|
+
smaller values gives a closer approximation but also a much smaller second
|
37
|
+
derivative. A larger value gives more conservative quantiles when
|
38
|
+
is larger than 0.5, the quantile becomes larger and vice versa.
|
39
|
+
Values between 0.05 and 0.1 appear to work well. It may be a good idea to
|
40
|
+
optimize this parameter.
|
41
|
+
* Set min-child-weight to zero. The second derivatives can be a lot smaller
|
42
|
+
than 1 and this parameter may prevent any splits.
|
43
|
+
* Use a relatively small max-delta-step. We used a default of 0.5.
|
44
|
+
This prevents excessive steps that could happen due to the relatively
|
45
|
+
small second derivative.
|
46
|
+
* For the same reason, use a slightly lower learning rate of 0.05.
|
47
|
+
|
48
|
+
"""
|
49
|
+
|
50
|
+
estimator_: TransformedTargetRegressor
|
51
|
+
quantile_indices_: Dict[float, int]
|
52
|
+
|
53
|
+
@staticmethod
|
54
|
+
def _get_importance_names():
|
55
|
+
return {
|
56
|
+
"gain_importance_name": "total_gain",
|
57
|
+
"weight_importance_name": "weight",
|
58
|
+
}
|
59
|
+
|
60
|
+
def __init__(
|
61
|
+
self,
|
62
|
+
quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
|
63
|
+
gamma: float = 0.0,
|
64
|
+
colsample_bytree: float = 1.0,
|
65
|
+
subsample: float = 1.0,
|
66
|
+
min_child_weight: int = 0,
|
67
|
+
max_depth: int = 6,
|
68
|
+
learning_rate: float = 0.22,
|
69
|
+
alpha: float = 0.0,
|
70
|
+
max_delta_step: int = 0.5,
|
71
|
+
arctan_smoothing: float = 0.055,
|
72
|
+
early_stopping_rounds: Optional[int] = None,
|
73
|
+
):
|
74
|
+
"""Initialize XGBMultiQuantileRegressor.
|
75
|
+
|
76
|
+
Model that provides quantile regression with XGBoost.
|
77
|
+
For each desired quantile an XGBoost model is trained,
|
78
|
+
these can later be used to predict quantiles.
|
79
|
+
|
80
|
+
Args:
|
81
|
+
quantiles: Tuple with desired quantiles, quantile 0.5 is required.
|
82
|
+
For example: (0.1, 0.5, 0.9)
|
83
|
+
gamma: Gamma.
|
84
|
+
colsample_bytree: Colsample by tree.
|
85
|
+
subsample: Subsample.
|
86
|
+
min_child_weight: Minimum child weight.
|
87
|
+
max_depth: Maximum depth.
|
88
|
+
learning_rate: Learning rate.
|
89
|
+
alpha: Alpha.
|
90
|
+
max_delta_step: Maximum delta step.
|
91
|
+
arctan_smoothing: smoothing parameter of the arctan loss function.
|
92
|
+
early_stopping_rounds: Number of rounds to stop training if no improvement
|
93
|
+
is made.
|
94
|
+
|
95
|
+
Raises:
|
96
|
+
ValueError in case quantile 0.5 is not in the requested quantiles.
|
97
|
+
|
98
|
+
"""
|
99
|
+
super().__init__()
|
100
|
+
if 0.5 not in quantiles:
|
101
|
+
raise ValueError(
|
102
|
+
"Cannot train quantile model as 0.5 is not in requested quantiles!"
|
103
|
+
)
|
104
|
+
|
105
|
+
self.quantiles = quantiles
|
106
|
+
|
107
|
+
# Set attributes for hyper parameters
|
108
|
+
self.subsample = subsample
|
109
|
+
self.min_child_weight = min_child_weight
|
110
|
+
self.max_depth = max_depth
|
111
|
+
self.gamma = gamma
|
112
|
+
self.alpha = alpha
|
113
|
+
self.max_delta_step = max_delta_step
|
114
|
+
self.colsample_bytree = colsample_bytree
|
115
|
+
self.learning_rate = learning_rate
|
116
|
+
self.early_stopping_rounds = early_stopping_rounds
|
117
|
+
self.arctan_smoothing = arctan_smoothing
|
118
|
+
|
119
|
+
# Get fitting parameters - only those required for xgbooster's
|
120
|
+
xgb_regressor_params = {
|
121
|
+
key: value
|
122
|
+
for key, value in self.get_params().items()
|
123
|
+
if key in xgb.XGBRegressor().get_params().keys()
|
124
|
+
}
|
125
|
+
|
126
|
+
# Define the model
|
127
|
+
objective = partial(
|
128
|
+
metrics.arctan_loss, taus=self.quantiles, s=arctan_smoothing
|
129
|
+
)
|
130
|
+
xgb_model: xgb.XGBRegressor = xgb.XGBRegressor(
|
131
|
+
objective=objective,
|
132
|
+
base_score=0,
|
133
|
+
multi_strategy="one_output_per_tree",
|
134
|
+
**xgb_regressor_params,
|
135
|
+
)
|
136
|
+
self.estimator_ = TransformedTargetRegressor(
|
137
|
+
regressor=xgb_model, transformer=StandardScaler()
|
138
|
+
)
|
139
|
+
|
140
|
+
# Set quantile indices to remap multioutput predictions
|
141
|
+
self.quantile_indices_ = {
|
142
|
+
quantile: i for i, quantile in enumerate(self.quantiles)
|
143
|
+
}
|
144
|
+
|
145
|
+
def fit(
|
146
|
+
self,
|
147
|
+
x: np.array,
|
148
|
+
y: np.array,
|
149
|
+
eval_set: Optional[Sequence[Tuple[np.array, np.array]]] = None,
|
150
|
+
verbose: Optional[Union[bool, int]] = 0,
|
151
|
+
**kwargs
|
152
|
+
) -> OpenstfRegressor:
|
153
|
+
"""Fits xgb quantile model.
|
154
|
+
|
155
|
+
Args:
|
156
|
+
x: Feature matrix.
|
157
|
+
y: Labels.
|
158
|
+
eval_set: Evaluation set to monitor training performance.
|
159
|
+
verbose: Verbosity level (disabled by default).
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
Fitted XGBQuantile model.
|
163
|
+
|
164
|
+
"""
|
165
|
+
if isinstance(y, pd.Series):
|
166
|
+
y = y.to_numpy()
|
167
|
+
|
168
|
+
if not isinstance(x, pd.DataFrame):
|
169
|
+
x = pd.DataFrame(np.asarray(x))
|
170
|
+
|
171
|
+
# Check/validate input
|
172
|
+
check_X_y(x, y, force_all_finite="allow-nan")
|
173
|
+
|
174
|
+
# Prepare inputs
|
175
|
+
y_multioutput = replicate_for_multioutput(y, len(self.quantiles))
|
176
|
+
|
177
|
+
# Define watchlist if eval_set is defined
|
178
|
+
eval_set_multioutput = []
|
179
|
+
if eval_set:
|
180
|
+
for x_eval, y_eval in eval_set:
|
181
|
+
if isinstance(y_eval, pd.Series):
|
182
|
+
y_eval = y_eval.to_numpy()
|
183
|
+
|
184
|
+
y_eval_multioutput = replicate_for_multioutput(
|
185
|
+
y=y_eval, num_quantiles=len(self.quantiles)
|
186
|
+
)
|
187
|
+
eval_set_multioutput.append((x_eval, y_eval_multioutput))
|
188
|
+
|
189
|
+
eval_set_multioutput.append((x, y_multioutput))
|
190
|
+
|
191
|
+
self.estimator_.fit(
|
192
|
+
X=x.copy(deep=True),
|
193
|
+
y=y_multioutput,
|
194
|
+
eval_set=eval_set_multioutput,
|
195
|
+
verbose=verbose,
|
196
|
+
)
|
197
|
+
|
198
|
+
# Update state of the estimator
|
199
|
+
self.feature_importances_ = self.estimator_.regressor_.feature_importances_
|
200
|
+
self.is_fitted_ = True
|
201
|
+
|
202
|
+
return self
|
203
|
+
|
204
|
+
def predict(self, x: np.array, quantile: float = 0.5) -> np.array:
|
205
|
+
"""Makes a prediction for a desired quantile.
|
206
|
+
|
207
|
+
Args:
|
208
|
+
x: Feature matrix.
|
209
|
+
quantile: Quantile for which a prediciton is desired,
|
210
|
+
note that only quantile are available for which a model is trained,
|
211
|
+
and that this is a quantile-model specific keyword.
|
212
|
+
|
213
|
+
Returns:
|
214
|
+
Prediction
|
215
|
+
|
216
|
+
Raises:
|
217
|
+
ValueError in case no model is trained for the requested quantile.
|
218
|
+
|
219
|
+
"""
|
220
|
+
# Check if model is trained for this quantile
|
221
|
+
if quantile not in self.quantiles:
|
222
|
+
raise ValueError("No model trained for requested quantile!")
|
223
|
+
|
224
|
+
# Check/validate input
|
225
|
+
check_array(x, force_all_finite="allow-nan")
|
226
|
+
check_is_fitted(self)
|
227
|
+
|
228
|
+
# best_iteration is only available if early stopping was used during training
|
229
|
+
prediction: np.array
|
230
|
+
if hasattr(self.estimator_, "best_iteration"):
|
231
|
+
prediction = self.estimator_.predict(
|
232
|
+
X=x,
|
233
|
+
iteration_range=(0, self.estimator_.best_iteration + 1),
|
234
|
+
)
|
235
|
+
else:
|
236
|
+
prediction = self.estimator_.predict(X=x)
|
237
|
+
|
238
|
+
quantile_index = self.quantile_indices_[quantile]
|
239
|
+
return prediction[:, quantile_index]
|
240
|
+
|
241
|
+
@property
|
242
|
+
def feature_names(self):
|
243
|
+
return self.estimator_.feature_names_in_
|
244
|
+
|
245
|
+
@property
|
246
|
+
def can_predict_quantiles(self):
|
247
|
+
return True
|
248
|
+
|
249
|
+
|
250
|
+
def replicate_for_multioutput(y: np.array, num_quantiles: int) -> np.array:
|
251
|
+
"""Replicates a 1D array to a 2D array for multioutput regression.
|
252
|
+
|
253
|
+
Args:
|
254
|
+
y: 1D array.
|
255
|
+
num_quantiles: Number of columns in the output array.
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
2D array with shape (len(y), num_quantiles)
|
259
|
+
|
260
|
+
"""
|
261
|
+
return np.repeat(y[:, None], num_quantiles, axis=1)
|
@@ -52,6 +52,9 @@ class XGBQuantileOpenstfRegressor(OpenstfRegressor):
|
|
52
52
|
alpha: Alpha
|
53
53
|
max_delta_step: Maximum delta step
|
54
54
|
|
55
|
+
Raises:
|
56
|
+
ValueError in case quantile 0.5 is not in the requested quantiles
|
57
|
+
|
55
58
|
"""
|
56
59
|
super().__init__()
|
57
60
|
# Check if quantile 0.5 is pressent this is required
|
openstef/model/serializer.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
4
|
import json
|
5
|
+
import logging
|
5
6
|
import os
|
6
7
|
import shutil
|
7
8
|
from datetime import datetime
|
@@ -20,10 +21,16 @@ from xgboost import XGBModel # Temporary for backward compatibility
|
|
20
21
|
from openstef.data_classes.model_specifications import ModelSpecificationDataClass
|
21
22
|
from openstef.metrics.reporter import Report
|
22
23
|
from openstef.model.regressors.regressor import OpenstfRegressor
|
24
|
+
from openstef.settings import Settings
|
23
25
|
|
24
26
|
|
25
27
|
class MLflowSerializer:
|
26
28
|
def __init__(self, mlflow_tracking_uri: str):
|
29
|
+
structlog.configure(
|
30
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
31
|
+
logging.getLevelName(Settings.log_level)
|
32
|
+
)
|
33
|
+
)
|
27
34
|
self.logger = structlog.get_logger(self.__class__.__name__)
|
28
35
|
mlflow.set_tracking_uri(mlflow_tracking_uri)
|
29
36
|
self.logger.debug(f"MLflow tracking uri at init= {mlflow_tracking_uri}")
|
@@ -147,6 +154,9 @@ class MLflowSerializer:
|
|
147
154
|
Args:
|
148
155
|
experiment_name: Name of the experiment, often the id of the predition job.
|
149
156
|
|
157
|
+
Raises:
|
158
|
+
LookupError: If model is not found in MLflow.
|
159
|
+
|
150
160
|
"""
|
151
161
|
try:
|
152
162
|
models_df = self._find_models(
|
@@ -140,6 +140,9 @@ def split_data_train_validation_test(
|
|
140
140
|
- Validation data.
|
141
141
|
- Test data.
|
142
142
|
|
143
|
+
Raises:
|
144
|
+
ValueError: When the test and validation fractions are too high.
|
145
|
+
|
143
146
|
"""
|
144
147
|
test_fraction = test_fraction if back_test else 0
|
145
148
|
train_fraction = 1 - (test_fraction + validation_fraction)
|
@@ -20,8 +20,7 @@ class PerformanceMeter:
|
|
20
20
|
|
21
21
|
Args:
|
22
22
|
level_label: The label of the new level. This could i.e. be 'task'
|
23
|
-
level_name: The name of the specified level.
|
24
|
-
'tracy_todo'
|
23
|
+
level_name: The name of the specified level.
|
25
24
|
**kwargs: Any other kwargs are appended to the logging.
|
26
25
|
|
27
26
|
Returns:
|
openstef/monitoring/teams.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
import logging
|
4
5
|
from typing import Union
|
5
6
|
|
6
7
|
import pandas as pd
|
@@ -8,6 +9,8 @@ import pymsteams
|
|
8
9
|
import structlog
|
9
10
|
from pymsteams import cardsection
|
10
11
|
|
12
|
+
from openstef.settings import Settings
|
13
|
+
|
11
14
|
|
12
15
|
def post_teams(
|
13
16
|
msg: Union[str, dict],
|
@@ -38,6 +41,14 @@ def post_teams(
|
|
38
41
|
Note:
|
39
42
|
This function is namespace-specific.
|
40
43
|
"""
|
44
|
+
if not Settings.post_teams_messages:
|
45
|
+
return
|
46
|
+
|
47
|
+
structlog.configure(
|
48
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
49
|
+
logging.getLevelName(Settings.log_level)
|
50
|
+
)
|
51
|
+
)
|
41
52
|
logger = structlog.get_logger(__name__)
|
42
53
|
# If no url is passed, give warning and don't send teams message
|
43
54
|
if url is None:
|
@@ -1,13 +1,14 @@
|
|
1
1
|
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
import logging
|
4
5
|
from pathlib import Path
|
5
6
|
|
6
7
|
import pandas as pd
|
7
8
|
import structlog
|
8
9
|
|
9
10
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
10
|
-
from openstef.exceptions import
|
11
|
+
from openstef.exceptions import InputDataOngoingZeroFlatlinerError, NoRealisedLoadError
|
11
12
|
from openstef.feature_engineering.feature_applicator import (
|
12
13
|
OperationalPredictFeatureApplicator,
|
13
14
|
)
|
@@ -18,6 +19,7 @@ from openstef.postprocessing.postprocessing import (
|
|
18
19
|
add_components_base_case_forecast,
|
19
20
|
add_prediction_job_properties_to_forecast,
|
20
21
|
)
|
22
|
+
from openstef.settings import Settings
|
21
23
|
from openstef.validation import validation
|
22
24
|
|
23
25
|
MODEL_LOCATION = Path(".")
|
@@ -38,7 +40,15 @@ def create_basecase_forecast_pipeline(
|
|
38
40
|
Returns:
|
39
41
|
Base case forecast
|
40
42
|
|
43
|
+
Raises:
|
44
|
+
NoRealisedLoadError: When no realised load for given datetime range.
|
45
|
+
|
41
46
|
"""
|
47
|
+
structlog.configure(
|
48
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
49
|
+
logging.getLevelName(Settings.log_level)
|
50
|
+
)
|
51
|
+
)
|
42
52
|
logger = structlog.get_logger(__name__)
|
43
53
|
|
44
54
|
logger.info("Preprocessing data for basecase forecast")
|
@@ -2,7 +2,10 @@
|
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
4
|
|
5
|
+
import logging
|
6
|
+
|
5
7
|
import joblib
|
8
|
+
import numpy as np
|
6
9
|
import pandas as pd
|
7
10
|
import structlog
|
8
11
|
|
@@ -11,12 +14,11 @@ from openstef import PROJECT_ROOT
|
|
11
14
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
12
15
|
from openstef.enums import ForecastType
|
13
16
|
from openstef.model.regressors.dazls import Dazls
|
14
|
-
|
15
|
-
import numpy as np
|
17
|
+
from openstef.settings import Settings
|
16
18
|
|
17
19
|
# Set the path for the Dazls stored model
|
18
20
|
DAZLS_STORED = str(
|
19
|
-
PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.
|
21
|
+
PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.24" / "dazls_stored_3.4.24_"
|
20
22
|
)
|
21
23
|
|
22
24
|
|
@@ -96,6 +98,11 @@ def create_components_forecast_pipeline(
|
|
96
98
|
"algtype"
|
97
99
|
|
98
100
|
"""
|
101
|
+
structlog.configure(
|
102
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
103
|
+
logging.getLevelName(Settings.log_level)
|
104
|
+
)
|
105
|
+
)
|
99
106
|
logger = structlog.get_logger(__name__)
|
100
107
|
logger.info("Make components prediction", pid=pj["id"])
|
101
108
|
|
@@ -106,26 +113,8 @@ def create_components_forecast_pipeline(
|
|
106
113
|
# Save and load the model as .sav file (or as .z file)
|
107
114
|
# For the code contact: korte.termijn.prognoses@alliander.com
|
108
115
|
dazls_model = Dazls()
|
109
|
-
dazls_model.
|
110
|
-
dazls_model.domain_model_scaler = joblib.load(
|
111
|
-
DAZLS_STORED + "domain_model_scaler.z"
|
112
|
-
)
|
113
|
-
dazls_model.domain_model_input_columns = joblib.load(
|
114
|
-
DAZLS_STORED + "domain_model_features.z"
|
115
|
-
)
|
116
|
-
|
117
|
-
dazls_model.adaptation_model = joblib.load(DAZLS_STORED + "adaptation_model.z")
|
118
|
-
dazls_model.adaptation_model_scaler = joblib.load(
|
119
|
-
DAZLS_STORED + "adaptation_model_scaler.z"
|
120
|
-
)
|
121
|
-
dazls_model.adaptation_model_input_columns = joblib.load(
|
122
|
-
DAZLS_STORED + "adaptation_model_features.z"
|
123
|
-
)
|
124
|
-
|
125
|
-
dazls_model.target_columns = joblib.load(DAZLS_STORED + "target.z")
|
126
|
-
dazls_model.target_scaler = joblib.load(DAZLS_STORED + "target_scaler.z")
|
116
|
+
dazls_model.model_ = joblib.load(DAZLS_STORED + "baseline_model.z")
|
127
117
|
|
128
|
-
logger = structlog.get_logger(__name__)
|
129
118
|
logger.info("DAZLS model loaded", dazls_model=str(dazls_model))
|
130
119
|
|
131
120
|
# Use the predict function of Dazls model
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
import logging
|
5
|
+
|
4
6
|
import pandas as pd
|
5
7
|
import structlog
|
6
8
|
|
@@ -16,7 +18,9 @@ from openstef.model.serializer import MLflowSerializer
|
|
16
18
|
from openstef.pipeline.utils import generate_forecast_datetime_range
|
17
19
|
from openstef.postprocessing.postprocessing import (
|
18
20
|
add_prediction_job_properties_to_forecast,
|
21
|
+
sort_quantiles,
|
19
22
|
)
|
23
|
+
from openstef.settings import Settings
|
20
24
|
from openstef.validation import validation
|
21
25
|
|
22
26
|
|
@@ -40,6 +44,10 @@ def create_forecast_pipeline(
|
|
40
44
|
Returns:
|
41
45
|
DataFrame with the forecast
|
42
46
|
|
47
|
+
Raises:
|
48
|
+
InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
|
49
|
+
LookupError: When no model is found for the given prediction job in MLflow.
|
50
|
+
|
43
51
|
"""
|
44
52
|
prediction_model_pid = pj["id"]
|
45
53
|
# Use the alternative forecast model if it's specify in the pj
|
@@ -64,7 +72,7 @@ def create_forecast_pipeline_core(
|
|
64
72
|
Computes the forecasts and confidence intervals given a prediction job and input data.
|
65
73
|
This pipeline has no database or persisitent storage dependencies.
|
66
74
|
|
67
|
-
Expected prediction job keys: "resolution_minutes", "
|
75
|
+
Expected prediction job keys: "resolution_minutes", "id", "type",
|
68
76
|
"name", "quantiles"
|
69
77
|
|
70
78
|
Args:
|
@@ -76,7 +84,15 @@ def create_forecast_pipeline_core(
|
|
76
84
|
Returns:
|
77
85
|
Forecast
|
78
86
|
|
87
|
+
Raises:
|
88
|
+
InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
|
89
|
+
|
79
90
|
"""
|
91
|
+
structlog.configure(
|
92
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
93
|
+
logging.getLevelName(Settings.log_level)
|
94
|
+
)
|
95
|
+
)
|
80
96
|
logger = structlog.get_logger(__name__)
|
81
97
|
|
82
98
|
fallback_strategy = "extreme_day" # this can later be expanded
|
@@ -142,6 +158,9 @@ def create_forecast_pipeline_core(
|
|
142
158
|
model, forecast_input_data
|
143
159
|
).add_confidence_interval(forecast, pj)
|
144
160
|
|
161
|
+
# Sort quantiles - prevents crossing and is statistically sound
|
162
|
+
forecast = sort_quantiles(forecast)
|
163
|
+
|
145
164
|
# Prepare for output
|
146
165
|
forecast = add_prediction_job_properties_to_forecast(
|
147
166
|
pj,
|
@@ -1,8 +1,9 @@
|
|
1
1
|
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
import logging
|
4
5
|
import os
|
5
|
-
from typing import Any
|
6
|
+
from typing import Any
|
6
7
|
|
7
8
|
import optuna
|
8
9
|
import pandas as pd
|
@@ -21,16 +22,22 @@ from openstef.model.objective import RegressorObjective
|
|
21
22
|
from openstef.model.objective_creator import ObjectiveCreator
|
22
23
|
from openstef.model.regressors.regressor import OpenstfRegressor
|
23
24
|
from openstef.model.serializer import MLflowSerializer
|
25
|
+
from openstef.model_selection.model_selection import split_data_train_validation_test
|
24
26
|
from openstef.pipeline.train_model import (
|
25
27
|
DEFAULT_TRAIN_HORIZONS_HOURS,
|
26
28
|
train_model_pipeline_core,
|
27
29
|
)
|
30
|
+
from openstef.settings import Settings
|
28
31
|
from openstef.validation import validation
|
29
|
-
from openstef.model_selection.model_selection import split_data_train_validation_test
|
30
32
|
|
31
33
|
optuna.logging.enable_propagation() # Propagate logs to the root logger.
|
32
34
|
optuna.logging.disable_default_handler() # Stop showing logs in sys.stderr.
|
33
35
|
|
36
|
+
structlog.configure(
|
37
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
38
|
+
logging.getLevelName(Settings.log_level)
|
39
|
+
)
|
40
|
+
)
|
34
41
|
logger = structlog.get_logger(__name__)
|
35
42
|
|
36
43
|
# See https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.Study.html#optuna.study.Study.optimize
|
@@ -59,6 +66,9 @@ def optimize_hyperparameters_pipeline(
|
|
59
66
|
|
60
67
|
Raises:
|
61
68
|
ValueError: If the input_date is insufficient.
|
69
|
+
InputDataInsufficientError: If the input dataframe is empty.
|
70
|
+
InputDataWrongColumnOrderError: If the load column is missing in the input dataframe.
|
71
|
+
OldModelHigherScoreError: When old model is better than new model.
|
62
72
|
|
63
73
|
Returns:
|
64
74
|
Optimized hyperparameters.
|
@@ -119,6 +129,10 @@ def optimize_hyperparameters_pipeline_core(
|
|
119
129
|
|
120
130
|
Raises:
|
121
131
|
ValueError: If the input_date is insufficient.
|
132
|
+
InputDataInsufficientError: If the input dataframe is empty.
|
133
|
+
InputDataWrongColumnOrderError: If the load column is missing in the input dataframe.
|
134
|
+
OldModelHigherScoreError: When old model is better than new model.
|
135
|
+
InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
|
122
136
|
|
123
137
|
Returns:
|
124
138
|
- Best model,
|
@@ -175,18 +189,6 @@ def optimize_hyperparameters_pipeline_core(
|
|
175
189
|
horizons=horizons, feature_names=feature_names, feature_modules=feature_modules
|
176
190
|
).add_features(validated_data, pj=pj)
|
177
191
|
|
178
|
-
# Adds additional proloaf features to the input data, historic_load (equal to the load, first column)
|
179
|
-
if pj["model"] == "proloaf" and "historic_load" not in list(
|
180
|
-
validated_data_with_features.columns
|
181
|
-
):
|
182
|
-
validated_data_with_features[
|
183
|
-
"historic_load"
|
184
|
-
] = validated_data_with_features.iloc[:, 0]
|
185
|
-
# Make sure horizons is last column
|
186
|
-
temp_cols = validated_data_with_features.columns.tolist()
|
187
|
-
new_cols = temp_cols[:-2] + [temp_cols[-1]] + [temp_cols[-2]]
|
188
|
-
validated_data_with_features = validated_data_with_features[new_cols]
|
189
|
-
|
190
192
|
# Create objective (NOTE: this is a callable class)
|
191
193
|
objective = ObjectiveCreator.create_objective(model_type=pj["model"])
|
192
194
|
|
@@ -245,7 +247,7 @@ def optuna_optimization(
|
|
245
247
|
- The objective object used by optuna
|
246
248
|
|
247
249
|
"""
|
248
|
-
model = ModelCreator.create_model(pj["model"])
|
250
|
+
model = ModelCreator.create_model(pj["model"], **(pj.model_kwargs or {}))
|
249
251
|
# Apply set to default hyperparameters if they are specified in the pj
|
250
252
|
if pj.default_modelspecs:
|
251
253
|
valid_hyper_parameters = {
|
@@ -268,7 +270,7 @@ def optuna_optimization(
|
|
268
270
|
if pj.train_split_func is None:
|
269
271
|
split_func = split_data_train_validation_test
|
270
272
|
split_args = {
|
271
|
-
"stratification_min_max":
|
273
|
+
"stratification_min_max": True,
|
272
274
|
"back_test": True,
|
273
275
|
}
|
274
276
|
else:
|
@@ -56,10 +56,16 @@ def train_model_and_forecast_back_test(
|
|
56
56
|
- Validation data sets (list[pd.DataFrame])
|
57
57
|
- Test data sets (list[pd.DataFrame])
|
58
58
|
|
59
|
+
Raises:
|
60
|
+
InputDataInsufficientError: when input data is insufficient.
|
61
|
+
InputDataWrongColumnOrderError: when input data has a invalid column order.
|
62
|
+
ValueError: when the horizon is a string and the corresponding column in not in the input data
|
63
|
+
InputDataOngoingZeroFlatlinerError: when all recent load measurements are zero.
|
64
|
+
|
59
65
|
"""
|
60
66
|
if pj.backtest_split_func is None:
|
61
67
|
backtest_split_func = backtest_split_default
|
62
|
-
backtest_split_args = {"stratification_min_max":
|
68
|
+
backtest_split_args = {"stratification_min_max": True}
|
63
69
|
else:
|
64
70
|
backtest_split_func, backtest_split_args = pj.backtest_split_func.load(
|
65
71
|
required_arguments=["data", "n_folds"]
|
@@ -124,6 +130,10 @@ def train_model_and_forecast_test_core(
|
|
124
130
|
- The trained model
|
125
131
|
- The forecast on the test set.
|
126
132
|
|
133
|
+
Raises:
|
134
|
+
NotImplementedError: When using invalid model type in the prediction job.
|
135
|
+
InputDataWrongColumnOrderError: When 'load' column is not first and 'horizon' column is not last.
|
136
|
+
|
127
137
|
"""
|
128
138
|
model = train_model.train_pipeline_step_train_model(
|
129
139
|
pj, modelspecs, train_data, validation_data
|