openstef 3.4.19__py3-none-any.whl → 3.4.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef/data_classes/prediction_job.py +3 -0
- openstef/enums.py +1 -0
- openstef/feature_engineering/missing_values_transformer.py +99 -0
- openstef/model/model_creator.py +10 -0
- openstef/model/regressors/linear_quantile.py +243 -0
- openstef/pipeline/optimize_hyperparameters.py +1 -1
- openstef/pipeline/train_create_forecast_backtest.py +2 -0
- openstef/pipeline/train_model.py +1 -0
- openstef/postprocessing/postprocessing.py +3 -3
- openstef/tasks/calculate_kpi.py +6 -4
- openstef/tasks/create_basecase_forecast.py +8 -3
- openstef/tasks/create_components_forecast.py +8 -3
- openstef/tasks/create_forecast.py +5 -2
- {openstef-3.4.19.dist-info → openstef-3.4.21.dist-info}/METADATA +2 -1
- {openstef-3.4.19.dist-info → openstef-3.4.21.dist-info}/RECORD +18 -16
- {openstef-3.4.19.dist-info → openstef-3.4.21.dist-info}/LICENSE +0 -0
- {openstef-3.4.19.dist-info → openstef-3.4.21.dist-info}/WHEEL +0 -0
- {openstef-3.4.19.dist-info → openstef-3.4.21.dist-info}/top_level.txt +0 -0
@@ -25,10 +25,13 @@ class PredictionJobDataClass(BaseModel):
|
|
25
25
|
- ``"xgb_quantile"``
|
26
26
|
- ``"lgb"``
|
27
27
|
- ``"linear"``
|
28
|
+
- ``"linear_quantile"``
|
28
29
|
|
29
30
|
If unsure what to pick, choose ``"xgb"``.
|
30
31
|
|
31
32
|
"""
|
33
|
+
model_kwargs: Optional[dict]
|
34
|
+
"""The model parameters that should be used."""
|
32
35
|
forecast_type: str
|
33
36
|
"""The type of forecasts that should be made.
|
34
37
|
|
openstef/enums.py
CHANGED
@@ -0,0 +1,99 @@
|
|
1
|
+
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
from typing import Union, List, Optional
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
import pandas as pd
|
8
|
+
from sklearn.impute import SimpleImputer
|
9
|
+
from sklearn.preprocessing import FunctionTransformer
|
10
|
+
from sklearn.utils.validation import check_array
|
11
|
+
|
12
|
+
|
13
|
+
class MissingValuesTransformer:
|
14
|
+
"""MissingValuesTransformer handles missing values in data by imputing them with a given strategy.
|
15
|
+
|
16
|
+
It also removes columns that are always null from the data.
|
17
|
+
|
18
|
+
"""
|
19
|
+
|
20
|
+
in_feature_names: Optional[List[str]] = None
|
21
|
+
_n_in_features: Optional[int] = None
|
22
|
+
|
23
|
+
non_null_feature_names: List[str] = None
|
24
|
+
|
25
|
+
def __init__(
|
26
|
+
self,
|
27
|
+
missing_values: Union[int, float, str, None] = np.nan,
|
28
|
+
imputation_strategy: str = None,
|
29
|
+
fill_value: Union[str, int, float] = None,
|
30
|
+
):
|
31
|
+
"""Initialize missing values handler.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
missing_values: The placeholder for the missing values. All occurrences of
|
35
|
+
`missing_values` will be imputed.
|
36
|
+
imputation_strategy: The imputation strategy to use
|
37
|
+
Can be one of "mean", "median", "most_frequent", "constant" or None.
|
38
|
+
fill_value: When strategy == "constant", fill_value is used to replace all
|
39
|
+
occurrences of missing_values.
|
40
|
+
|
41
|
+
"""
|
42
|
+
self.missing_values = missing_values
|
43
|
+
self.imputation_strategy = imputation_strategy
|
44
|
+
self.fill_value = fill_value
|
45
|
+
|
46
|
+
def fit(self, x, y=None):
|
47
|
+
"""Fit the imputer on the input data."""
|
48
|
+
_ = check_array(x, force_all_finite="allow-nan")
|
49
|
+
if not isinstance(x, pd.DataFrame):
|
50
|
+
x = pd.DataFrame(np.asarray(x))
|
51
|
+
|
52
|
+
self.in_feature_names = list(x.columns)
|
53
|
+
self._n_in_features = x.shape[1]
|
54
|
+
|
55
|
+
# Remove always null columns
|
56
|
+
is_column_null = x.isnull().all(axis="index")
|
57
|
+
self.non_null_feature_names = list(x.columns[~is_column_null])
|
58
|
+
|
59
|
+
# Build the proper imputation transformer
|
60
|
+
# - Identity function if strategy is None
|
61
|
+
# - SimpleImputer with the dedicated strategy
|
62
|
+
if self.imputation_strategy is None:
|
63
|
+
self.imputer_ = FunctionTransformer(func=self._identity)
|
64
|
+
else:
|
65
|
+
self.imputer_ = SimpleImputer(
|
66
|
+
missing_values=self.missing_values,
|
67
|
+
strategy=self.imputation_strategy,
|
68
|
+
fill_value=self.fill_value,
|
69
|
+
).set_output(transform="pandas")
|
70
|
+
|
71
|
+
# Imputers do not support labels
|
72
|
+
self.imputer_.fit(X=x, y=None)
|
73
|
+
|
74
|
+
def transform(self, x) -> pd.DataFrame:
|
75
|
+
"""Transform the input data by imputing missing values."""
|
76
|
+
_ = check_array(x, force_all_finite="allow-nan")
|
77
|
+
if not isinstance(x, pd.DataFrame):
|
78
|
+
x = pd.DataFrame(np.asarray(x))
|
79
|
+
|
80
|
+
x = x[self.non_null_feature_names]
|
81
|
+
|
82
|
+
return self.imputer_.transform(x)
|
83
|
+
|
84
|
+
def fit_transform(self, x, y=None):
|
85
|
+
"""Fit the imputer on the input data and transform it.
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
The data with missing values imputed.
|
89
|
+
|
90
|
+
"""
|
91
|
+
self.fit(x, y)
|
92
|
+
return self.transform(x)
|
93
|
+
|
94
|
+
@classmethod
|
95
|
+
def _identity(cls, x):
|
96
|
+
return x
|
97
|
+
|
98
|
+
def __sklearn_is_fitted__(self) -> bool:
|
99
|
+
return self.in_feature_names is not None
|
openstef/model/model_creator.py
CHANGED
@@ -11,6 +11,7 @@ from openstef.model.regressors.arima import ARIMAOpenstfRegressor
|
|
11
11
|
from openstef.model.regressors.custom_regressor import is_custom_type, load_custom_model
|
12
12
|
from openstef.model.regressors.lgbm import LGBMOpenstfRegressor
|
13
13
|
from openstef.model.regressors.linear import LinearOpenstfRegressor
|
14
|
+
from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegressor
|
14
15
|
from openstef.model.regressors.regressor import OpenstfRegressor
|
15
16
|
from openstef.model.regressors.xgb import XGBOpenstfRegressor
|
16
17
|
from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor
|
@@ -91,6 +92,14 @@ valid_model_kwargs = {
|
|
91
92
|
"imputation_strategy",
|
92
93
|
"fill_value",
|
93
94
|
],
|
95
|
+
MLModelType.LINEAR_QUANTILE: [
|
96
|
+
"alpha",
|
97
|
+
"quantiles",
|
98
|
+
"solver",
|
99
|
+
"missing_values",
|
100
|
+
"imputation_strategy",
|
101
|
+
"fill_value",
|
102
|
+
],
|
94
103
|
MLModelType.ARIMA: [
|
95
104
|
"backtest_max_horizon",
|
96
105
|
"order",
|
@@ -109,6 +118,7 @@ class ModelCreator:
|
|
109
118
|
MLModelType.LGB: LGBMOpenstfRegressor,
|
110
119
|
MLModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
|
111
120
|
MLModelType.LINEAR: LinearOpenstfRegressor,
|
121
|
+
MLModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
|
112
122
|
MLModelType.ARIMA: ARIMAOpenstfRegressor,
|
113
123
|
}
|
114
124
|
|
@@ -0,0 +1,243 @@
|
|
1
|
+
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
import re
|
5
|
+
from typing import Dict, Union, Set
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
from sklearn.base import RegressorMixin
|
10
|
+
from sklearn.linear_model import QuantileRegressor
|
11
|
+
from sklearn.preprocessing import MinMaxScaler
|
12
|
+
from sklearn.utils.validation import check_is_fitted
|
13
|
+
|
14
|
+
from openstef.feature_engineering.missing_values_transformer import (
|
15
|
+
MissingValuesTransformer,
|
16
|
+
)
|
17
|
+
from openstef.model.regressors.regressor import OpenstfRegressor
|
18
|
+
|
19
|
+
DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
|
20
|
+
|
21
|
+
|
22
|
+
class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
23
|
+
quantiles: tuple[float, ...]
|
24
|
+
alpha: float
|
25
|
+
solver: str
|
26
|
+
|
27
|
+
imputer_: MissingValuesTransformer
|
28
|
+
x_scaler_: MinMaxScaler
|
29
|
+
y_scaler_: MinMaxScaler
|
30
|
+
models_: Dict[float, QuantileRegressor]
|
31
|
+
|
32
|
+
is_fitted_: bool = False
|
33
|
+
|
34
|
+
FEATURE_IGNORE_LIST: Set[str] = {
|
35
|
+
"IsWeekendDay",
|
36
|
+
"IsWeekDay",
|
37
|
+
"IsSunday",
|
38
|
+
"Month",
|
39
|
+
"Quarter",
|
40
|
+
}
|
41
|
+
|
42
|
+
def __init__(
|
43
|
+
self,
|
44
|
+
quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
|
45
|
+
alpha: float = 0.0,
|
46
|
+
solver: str = "highs",
|
47
|
+
missing_values: Union[int, float, str, None] = np.nan,
|
48
|
+
imputation_strategy: str = None,
|
49
|
+
fill_value: Union[str, int, float] = None,
|
50
|
+
):
|
51
|
+
"""Initialize LinearQuantileOpenstfRegressor.
|
52
|
+
|
53
|
+
Model that provides quantile regression with SKLearn QuantileRegressor.
|
54
|
+
For each desired quantile an QuantileRegressor model is trained,
|
55
|
+
these can later be used to predict quantiles.
|
56
|
+
|
57
|
+
This model is sensitive to feature quality and therefore has logic to remove
|
58
|
+
some custom features produced by OpenSTEF. The features that are removed are:
|
59
|
+
- Holiday features (is_christmas, is_*)
|
60
|
+
- Lagged features (T-1d, T-*)
|
61
|
+
- Point in time features (IsWeekendDay, IsWeekDay, IsSunday, Month, Quarter)
|
62
|
+
|
63
|
+
Args:
|
64
|
+
quantiles: Tuple with desired quantiles, quantile 0.5 is required.
|
65
|
+
For example: (0.1, 0.5, 0.9)
|
66
|
+
alpha: Regularization constant for L1 regularization
|
67
|
+
solver: Solver to use for optimization
|
68
|
+
missing_values: Value to be considered as missing value
|
69
|
+
imputation_strategy: Imputation strategy
|
70
|
+
fill_value: Fill value
|
71
|
+
|
72
|
+
"""
|
73
|
+
super().__init__()
|
74
|
+
|
75
|
+
# Check if quantile 0.5 is present. This is required.
|
76
|
+
if 0.5 not in quantiles:
|
77
|
+
raise ValueError(
|
78
|
+
"Cannot train quantile model as 0.5 is not in requested quantiles!"
|
79
|
+
)
|
80
|
+
|
81
|
+
self.quantiles = quantiles
|
82
|
+
self.alpha = alpha
|
83
|
+
self.solver = solver
|
84
|
+
self.imputer_ = MissingValuesTransformer(
|
85
|
+
missing_values=missing_values,
|
86
|
+
imputation_strategy=imputation_strategy,
|
87
|
+
fill_value=fill_value,
|
88
|
+
)
|
89
|
+
self.x_scaler_ = MinMaxScaler(feature_range=(-1, 1))
|
90
|
+
self.y_scaler_ = MinMaxScaler(feature_range=(-1, 1))
|
91
|
+
self.models_ = {
|
92
|
+
quantile: QuantileRegressor(alpha=alpha, quantile=quantile, solver=solver)
|
93
|
+
for quantile in quantiles
|
94
|
+
}
|
95
|
+
|
96
|
+
@property
|
97
|
+
def feature_names(self) -> list:
|
98
|
+
"""The names of the features used to train the model."""
|
99
|
+
check_is_fitted(self)
|
100
|
+
return self.imputer_.non_null_feature_names
|
101
|
+
|
102
|
+
@staticmethod
|
103
|
+
def _get_importance_names():
|
104
|
+
return {
|
105
|
+
"gain_importance_name": "total_gain",
|
106
|
+
"weight_importance_name": "weight",
|
107
|
+
}
|
108
|
+
|
109
|
+
@property
|
110
|
+
def can_predict_quantiles(self) -> bool:
|
111
|
+
"""Attribute that indicates if the model predict particular quantiles."""
|
112
|
+
return True
|
113
|
+
|
114
|
+
def _is_feature_ignored(self, feature_name: str) -> bool:
|
115
|
+
"""Check if a feature is ignored by the model.
|
116
|
+
|
117
|
+
Args:
|
118
|
+
feature_name: Feature name
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
True if the feature is ignored, False otherwise
|
122
|
+
|
123
|
+
"""
|
124
|
+
return (
|
125
|
+
# Ignore named features
|
126
|
+
feature_name in self.FEATURE_IGNORE_LIST
|
127
|
+
or
|
128
|
+
# Ignore holiday features
|
129
|
+
re.match(r"is_", feature_name) is not None
|
130
|
+
or
|
131
|
+
# Ignore lag features
|
132
|
+
re.match(r"T-", feature_name) is not None
|
133
|
+
)
|
134
|
+
|
135
|
+
def _remove_ignored_features(self, x: pd.DataFrame) -> pd.DataFrame:
|
136
|
+
"""Remove ignored features from the input data.
|
137
|
+
|
138
|
+
Args:
|
139
|
+
x: Input data
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
Data without ignored features
|
143
|
+
|
144
|
+
"""
|
145
|
+
return x.drop(columns=[c for c in x.columns if self._is_feature_ignored(c)])
|
146
|
+
|
147
|
+
def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
|
148
|
+
"""Fits linear quantile model.
|
149
|
+
|
150
|
+
Args:
|
151
|
+
x: Feature matrix
|
152
|
+
y: Labels
|
153
|
+
|
154
|
+
Returns:
|
155
|
+
Fitted LinearQuantile model
|
156
|
+
|
157
|
+
"""
|
158
|
+
if not isinstance(y, pd.Series):
|
159
|
+
y = pd.Series(np.asarray(y), name="load")
|
160
|
+
|
161
|
+
x = self._remove_ignored_features(x)
|
162
|
+
|
163
|
+
# Fix nan columns
|
164
|
+
x = self.imputer_.fit_transform(x)
|
165
|
+
if x.isna().any().any():
|
166
|
+
raise ValueError(
|
167
|
+
"There are nan values in the input data. Set "
|
168
|
+
"imputation_strategy to solve them."
|
169
|
+
)
|
170
|
+
|
171
|
+
# Apply feature scaling
|
172
|
+
x_scaled = self.x_scaler_.fit_transform(x)
|
173
|
+
y_scaled = self.y_scaler_.fit_transform(y.to_frame())[:, 0]
|
174
|
+
|
175
|
+
# Add more focus on extreme / peak values
|
176
|
+
sample_weight = np.abs(y_scaled)
|
177
|
+
|
178
|
+
# Fit quantile regressors
|
179
|
+
for quantile in self.quantiles:
|
180
|
+
self.models_[quantile].fit(
|
181
|
+
X=x_scaled, y=y_scaled, sample_weight=sample_weight
|
182
|
+
)
|
183
|
+
|
184
|
+
self.is_fitted_ = True
|
185
|
+
|
186
|
+
self.feature_importances_ = self._get_feature_importance_from_linear()
|
187
|
+
|
188
|
+
return self
|
189
|
+
|
190
|
+
def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
|
191
|
+
"""Makes a prediction for a desired quantile.
|
192
|
+
|
193
|
+
Args:
|
194
|
+
x: Feature matrix
|
195
|
+
quantile: Quantile for which a prediciton is desired,
|
196
|
+
note that only quantile are available for which a model is trained,
|
197
|
+
and that this is a quantile-model specific keyword
|
198
|
+
|
199
|
+
Returns:
|
200
|
+
Prediction
|
201
|
+
|
202
|
+
Raises:
|
203
|
+
ValueError in case no model is trained for the requested quantile
|
204
|
+
|
205
|
+
"""
|
206
|
+
check_is_fitted(self)
|
207
|
+
|
208
|
+
# Preprocess input data
|
209
|
+
x = self._remove_ignored_features(x)
|
210
|
+
x = self.imputer_.transform(x)
|
211
|
+
x_scaled = self.x_scaler_.transform(x)
|
212
|
+
|
213
|
+
# Make prediction
|
214
|
+
y_pred = self.models_[quantile].predict(X=x_scaled)
|
215
|
+
|
216
|
+
# Inverse scaling
|
217
|
+
y_pred = self.y_scaler_.inverse_transform(y_pred.reshape(-1, 1))[:, 0]
|
218
|
+
|
219
|
+
return y_pred
|
220
|
+
|
221
|
+
def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array:
|
222
|
+
check_is_fitted(self)
|
223
|
+
feature_importance_linear = np.abs(self.models_[quantile].coef_)
|
224
|
+
reg_feature_importances_dict = dict(
|
225
|
+
zip(self.imputer_.non_null_feature_names, feature_importance_linear)
|
226
|
+
)
|
227
|
+
return np.array(
|
228
|
+
[
|
229
|
+
reg_feature_importances_dict.get(c, 0)
|
230
|
+
for c in self.imputer_.in_feature_names
|
231
|
+
]
|
232
|
+
)
|
233
|
+
|
234
|
+
@classmethod
|
235
|
+
def _get_param_names(cls):
|
236
|
+
return [
|
237
|
+
"quantiles",
|
238
|
+
"alpha",
|
239
|
+
"solver",
|
240
|
+
]
|
241
|
+
|
242
|
+
def __sklearn_is_fitted__(self) -> bool:
|
243
|
+
return self.is_fitted_
|
@@ -247,7 +247,7 @@ def optuna_optimization(
|
|
247
247
|
- The objective object used by optuna
|
248
248
|
|
249
249
|
"""
|
250
|
-
model = ModelCreator.create_model(pj["model"])
|
250
|
+
model = ModelCreator.create_model(pj["model"], **(pj.model_kwargs or {}))
|
251
251
|
# Apply set to default hyperparameters if they are specified in the pj
|
252
252
|
if pj.default_modelspecs:
|
253
253
|
valid_hyper_parameters = {
|
@@ -61,6 +61,7 @@ def train_model_and_forecast_back_test(
|
|
61
61
|
InputDataWrongColumnOrderError: when input data has a invalid column order.
|
62
62
|
ValueError: when the horizon is a string and the corresponding column in not in the input data
|
63
63
|
InputDataOngoingZeroFlatlinerError: when all recent load measurements are zero.
|
64
|
+
|
64
65
|
"""
|
65
66
|
if pj.backtest_split_func is None:
|
66
67
|
backtest_split_func = backtest_split_default
|
@@ -132,6 +133,7 @@ def train_model_and_forecast_test_core(
|
|
132
133
|
Raises:
|
133
134
|
NotImplementedError: When using invalid model type in the prediction job.
|
134
135
|
InputDataWrongColumnOrderError: When 'load' column is not first and 'horizon' column is not last.
|
136
|
+
|
135
137
|
"""
|
136
138
|
model = train_model.train_pipeline_step_train_model(
|
137
139
|
pj, modelspecs, train_data, validation_data
|
openstef/pipeline/train_model.py
CHANGED
@@ -258,9 +258,9 @@ def sort_quantiles(
|
|
258
258
|
) -> pd.DataFrame:
|
259
259
|
"""Sort quantile values so quantiles do not cross.
|
260
260
|
|
261
|
-
This function assumes that all quantile columns start with 'quantile_P'
|
262
|
-
|
263
|
-
|
261
|
+
This function assumes that all quantile columns start with 'quantile_P' For more academic details on why this is
|
262
|
+
mathematically sounds, please refer to Quantile and Probability Curves Without Crossing (Chernozhukov, 2010)
|
263
|
+
|
264
264
|
"""
|
265
265
|
p_columns = [col for col in forecast.columns if col.startswith(quantile_col_start)]
|
266
266
|
|
openstef/tasks/calculate_kpi.py
CHANGED
@@ -72,6 +72,8 @@ def check_kpi_task(
|
|
72
72
|
context: TaskContext,
|
73
73
|
start_time: datetime,
|
74
74
|
end_time: datetime,
|
75
|
+
threshold_optimizing=THRESHOLD_OPTIMIZING,
|
76
|
+
threshold_retraining=THRESHOLD_RETRAINING,
|
75
77
|
) -> None:
|
76
78
|
# Apply default parameters if none are provided
|
77
79
|
if start_time is None:
|
@@ -102,20 +104,20 @@ def check_kpi_task(
|
|
102
104
|
|
103
105
|
# Add pid to the list of pids that should be retrained or optimized if
|
104
106
|
# performance is insufficient
|
105
|
-
if kpis["47.0h"]["rMAE"] >
|
107
|
+
if kpis["47.0h"]["rMAE"] > threshold_retraining:
|
106
108
|
context.logger.warning(
|
107
109
|
"Need to retrain model, retraining threshold rMAE 47h exceeded",
|
108
110
|
t_ahead="47.0h",
|
109
111
|
rMAE=kpis["47.0h"]["rMAE"],
|
110
|
-
retraining_threshold=
|
112
|
+
retraining_threshold=threshold_retraining,
|
111
113
|
)
|
112
114
|
|
113
|
-
if kpis["47.0h"]["rMAE"] >
|
115
|
+
if kpis["47.0h"]["rMAE"] > threshold_optimizing:
|
114
116
|
context.logger.warning(
|
115
117
|
"Need to optimize hyperparameters, optimizing threshold rMAE 47h exceeded",
|
116
118
|
t_ahead="47.0h",
|
117
119
|
rMAE=kpis["47.0h"]["rMAE"],
|
118
|
-
optimizing_threshold=
|
120
|
+
optimizing_threshold=threshold_optimizing,
|
119
121
|
)
|
120
122
|
|
121
123
|
|
@@ -32,7 +32,10 @@ T_AHEAD_DAYS: int = 14
|
|
32
32
|
|
33
33
|
|
34
34
|
def create_basecase_forecast_task(
|
35
|
-
pj: PredictionJobDataClass,
|
35
|
+
pj: PredictionJobDataClass,
|
36
|
+
context: TaskContext,
|
37
|
+
t_behind_days=T_BEHIND_DAYS,
|
38
|
+
t_ahead_days=T_AHEAD_DAYS,
|
36
39
|
) -> None:
|
37
40
|
"""Top level task that creates a basecase forecast.
|
38
41
|
|
@@ -41,6 +44,8 @@ def create_basecase_forecast_task(
|
|
41
44
|
Args:
|
42
45
|
pj: Prediction job
|
43
46
|
context: Contect object that holds a config manager and a database connection
|
47
|
+
t_behind_days: number of days included as history. This is used to generated lagged features for the to-be-forecasted period
|
48
|
+
t_ahead_days: number of days a basecase forecast is created for
|
44
49
|
|
45
50
|
"""
|
46
51
|
# Check pipeline types
|
@@ -63,8 +68,8 @@ def create_basecase_forecast_task(
|
|
63
68
|
return
|
64
69
|
|
65
70
|
# Define datetime range for input data
|
66
|
-
datetime_start = datetime.utcnow() - timedelta(days=
|
67
|
-
datetime_end = datetime.utcnow() + timedelta(days=
|
71
|
+
datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
|
72
|
+
datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
|
68
73
|
|
69
74
|
# Retrieve input data
|
70
75
|
input_data = context.database.get_model_input(
|
@@ -43,7 +43,10 @@ T_AHEAD_DAYS = 3
|
|
43
43
|
|
44
44
|
|
45
45
|
def create_components_forecast_task(
|
46
|
-
pj: PredictionJobDataClass,
|
46
|
+
pj: PredictionJobDataClass,
|
47
|
+
context: TaskContext,
|
48
|
+
t_behind_days: int = T_BEHIND_DAYS,
|
49
|
+
t_ahead_days: int = T_AHEAD_DAYS,
|
47
50
|
) -> None:
|
48
51
|
"""Top level task that creates a components forecast.
|
49
52
|
|
@@ -52,6 +55,8 @@ def create_components_forecast_task(
|
|
52
55
|
Args:
|
53
56
|
pj: Prediction job
|
54
57
|
context: Contect object that holds a config manager and a database connection
|
58
|
+
t_behind_days: number of days in the past that the component forecast is created for
|
59
|
+
t_ahead_days: number of days in the future that the component forecast is created for
|
55
60
|
|
56
61
|
Raises:
|
57
62
|
ComponentForecastTooShortHorizonError: If the forecast horizon is too short
|
@@ -71,8 +76,8 @@ def create_components_forecast_task(
|
|
71
76
|
return
|
72
77
|
|
73
78
|
# Define datetime range for input data
|
74
|
-
datetime_start = datetime.utcnow() - timedelta(days=
|
75
|
-
datetime_end = datetime.utcnow() + timedelta(days=
|
79
|
+
datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
|
80
|
+
datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
|
76
81
|
|
77
82
|
logger.info(
|
78
83
|
"Get predicted load", datetime_start=datetime_start, datetime_end=datetime_end
|
@@ -34,7 +34,9 @@ from openstef.validation.validation import detect_ongoing_zero_flatliner
|
|
34
34
|
T_BEHIND_DAYS: int = 14
|
35
35
|
|
36
36
|
|
37
|
-
def create_forecast_task(
|
37
|
+
def create_forecast_task(
|
38
|
+
pj: PredictionJobDataClass, context: TaskContext, t_behind_days: int = T_BEHIND_DAYS
|
39
|
+
) -> None:
|
38
40
|
"""Top level task that creates a forecast.
|
39
41
|
|
40
42
|
On this task level all database and context manager dependencies are resolved.
|
@@ -45,6 +47,7 @@ def create_forecast_task(pj: PredictionJobDataClass, context: TaskContext) -> No
|
|
45
47
|
Args:
|
46
48
|
pj: Prediction job
|
47
49
|
context: Contect object that holds a config manager and a database connection
|
50
|
+
t_behind_days: number of days included as history. This is used to generated lagged features for the to-be-forecasted period
|
48
51
|
|
49
52
|
"""
|
50
53
|
# Check pipeline types
|
@@ -70,7 +73,7 @@ def create_forecast_task(pj: PredictionJobDataClass, context: TaskContext) -> No
|
|
70
73
|
mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri
|
71
74
|
|
72
75
|
# Define datetime range for input data
|
73
|
-
datetime_start = datetime.utcnow() - timedelta(days=
|
76
|
+
datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
|
74
77
|
datetime_end = datetime.utcnow() + timedelta(seconds=pj.horizon_minutes * 60)
|
75
78
|
|
76
79
|
# Retrieve input data
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: openstef
|
3
|
-
Version: 3.4.
|
3
|
+
Version: 3.4.21
|
4
4
|
Summary: Open short term energy forecaster
|
5
5
|
Home-page: https://github.com/OpenSTEF/openstef
|
6
6
|
Author: Alliander N.V
|
@@ -128,3 +128,4 @@ Please read [CODE_OF_CONDUCT.md](https://github.com/OpenSTEF/.github/blob/main/C
|
|
128
128
|
|
129
129
|
# Contact
|
130
130
|
Please read [SUPPORT.md](https://github.com/OpenSTEF/.github/blob/main/SUPPORT.md) for how to connect and get into contact with the OpenSTEF project
|
131
|
+
|
@@ -1,7 +1,7 @@
|
|
1
1
|
openstef/__init__.py,sha256=93UM6m0LLQhO69-mSqLuUy73jgs4W7Iuxfo3Lm8c98g,419
|
2
2
|
openstef/__main__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
3
3
|
openstef/app_settings.py,sha256=EJTDtimctFQQ-3f7ZcOQaRYohpZk3JD6aZBWPFYM2_A,582
|
4
|
-
openstef/enums.py,sha256=
|
4
|
+
openstef/enums.py,sha256=LRKvmrCP_ntLE-r-AzKeSGt569WUfX0uX5db8Rk_Ag8,669
|
5
5
|
openstef/exceptions.py,sha256=U4u2LTcdT6cmzpipT2Jh7kq9nCjT_-6gntn8yjuhGU0,1993
|
6
6
|
openstef/settings.py,sha256=nSgkBqFxuqB3w7Rwo60i8j37c5ngDbt6vpjHS6QtJXQ,354
|
7
7
|
openstef/data/dutch_holidays_2020-2022.csv,sha256=pS-CjE0igYXd-2dG-MlqyvR2fgYgXkbNmgCKyTjmwxs,23704
|
@@ -29,7 +29,7 @@ openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z.license,sha25
|
|
29
29
|
openstef/data_classes/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
30
30
|
openstef/data_classes/data_prep.py,sha256=gRSL7UiHvZis8m8z7VoTCZc0Ccffhef5_hmSyApnqK0,3417
|
31
31
|
openstef/data_classes/model_specifications.py,sha256=Uod1W3QzhRqVLb6zvXwxh9wRL3EHCzSvX0oDNd28cFk,1197
|
32
|
-
openstef/data_classes/prediction_job.py,sha256=
|
32
|
+
openstef/data_classes/prediction_job.py,sha256=t4PtGFWAX27AeaqDOtdhECuiosuCiuL0hBK7D1l3ghU,5165
|
33
33
|
openstef/data_classes/split_function.py,sha256=ljQIQQu1t1Y_CVWGAy25jrM6wG9odIVVQVimrT1n-1s,3358
|
34
34
|
openstef/feature_engineering/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
35
35
|
openstef/feature_engineering/apply_features.py,sha256=-3fyisOVj9ckIkRe2iYfWutbXSX8iqBkcvt8AYr-gmE,3906
|
@@ -39,6 +39,7 @@ openstef/feature_engineering/feature_applicator.py,sha256=DR7jayrEMlra4BFL1Ps5WV
|
|
39
39
|
openstef/feature_engineering/general.py,sha256=tgU4_1stag9jJmaQAfWCMhfBscznVuQvW5hPK_z9_9g,4438
|
40
40
|
openstef/feature_engineering/holiday_features.py,sha256=3Ff4Lkm26h8wJVoBplUewt4HfsvOUS9zj0x0MxewIm8,7842
|
41
41
|
openstef/feature_engineering/lag_features.py,sha256=Dr6qS8UhdgEHPZZSe-w6ibtjl_lcbcQohhqdZN9fqEU,5652
|
42
|
+
openstef/feature_engineering/missing_values_transformer.py,sha256=pKz_vRZRzfUNBw9Z-mF2AXRPeCzKbTha2gPb73bpkdw,3381
|
42
43
|
openstef/feature_engineering/weather_features.py,sha256=Lr9DItyHvJ2CpWQ1r6A83tJKtR2k_Wwn32FdFTGblO0,15750
|
43
44
|
openstef/metrics/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
44
45
|
openstef/metrics/figure.py,sha256=KDoezYem9wdS13kUx7M7FOy-4u88Sg3OX1DuhNT6kgQ,9751
|
@@ -48,7 +49,7 @@ openstef/model/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,16
|
|
48
49
|
openstef/model/basecase.py,sha256=caI6Q-8y0ymlxGK9Js_H3Vh0q6ruNHlGD5RG0_kE5M0,2878
|
49
50
|
openstef/model/confidence_interval_applicator.py,sha256=XAk3m4gPhneCoQJlOmyEcoeI0sdHNC-ch4T1CWWHPrQ,8934
|
50
51
|
openstef/model/fallback.py,sha256=VV9ehgnoMZtWzqKk9H1t8wnERFh5CyC4TvDIuRP_ZDI,2861
|
51
|
-
openstef/model/model_creator.py,sha256=
|
52
|
+
openstef/model/model_creator.py,sha256=XQXtYI_q4Q2a2P-8I3lUuTMZqet3nqsNE7mzmjNsxIg,5310
|
52
53
|
openstef/model/objective.py,sha256=eqNBYGfhEVGegOm0PbizowuFImKblRqHgxkp9lgaKQc,13500
|
53
54
|
openstef/model/objective_creator.py,sha256=Rjd2YF1Ie9Z-au_v4fOuR63IcM69EEeoe_5Hj_Dz8-E,1970
|
54
55
|
openstef/model/serializer.py,sha256=IUiiAWvoGVoWzmS-akI6LC7jHRY5Ln_vOCBZy1LnESY,17238
|
@@ -62,6 +63,7 @@ openstef/model/regressors/custom_regressor.py,sha256=Hsmxahc9nfSWD0aEZ6cm4pxW2no
|
|
62
63
|
openstef/model/regressors/dazls.py,sha256=cCYFewJEv3Fn01wdZpaKNSiYmEwzuED7PQrrWzwyTEg,8084
|
63
64
|
openstef/model/regressors/lgbm.py,sha256=zCdn1euEdSFxYJzH8XqQFFnb6R4JVUnmineKjX_Gy-g,800
|
64
65
|
openstef/model/regressors/linear.py,sha256=uOvZMLGZH_9nXfmS5honCMfyVeyGXP1Cza9A_BdXlVw,3665
|
66
|
+
openstef/model/regressors/linear_quantile.py,sha256=TmZi1p0H3cNFCQEdYhl36dvtxGOJ1d2EmY5BfUBRtiA,7650
|
65
67
|
openstef/model/regressors/regressor.py,sha256=uJcx59AyCPE9f_yPcAQ59h2ZS7eNsDpIHJrladKvHIw,3461
|
66
68
|
openstef/model/regressors/xgb.py,sha256=HggA1U10srzdysjV560BMMX66kfaxCKAnOZB3JyyT_Y,808
|
67
69
|
openstef/model/regressors/xgb_quantile.py,sha256=PzKIxqN_CnEPFmzXACNuzLSmZSHbooTuiJ5ckJ9vh_E,7805
|
@@ -74,19 +76,19 @@ openstef/pipeline/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU
|
|
74
76
|
openstef/pipeline/create_basecase_forecast.py,sha256=YkpiqohETTAETb4GiVlK_btw5dpixJy2LmFZdm10iaI,4623
|
75
77
|
openstef/pipeline/create_component_forecast.py,sha256=A0dmILy_BuAAf2U_9i2FOj6KItIdZdGzi6hNDk-da4Q,6416
|
76
78
|
openstef/pipeline/create_forecast.py,sha256=F09civdIumNQwJq2hraea5QTQx7DgvEliXKs4Y3f8Mc,5689
|
77
|
-
openstef/pipeline/optimize_hyperparameters.py,sha256=
|
78
|
-
openstef/pipeline/train_create_forecast_backtest.py,sha256
|
79
|
-
openstef/pipeline/train_model.py,sha256=
|
79
|
+
openstef/pipeline/optimize_hyperparameters.py,sha256=3SLkcLR7XC4IeN48C-XT_lxlfCqW_D0NoMpZcrB9UUM,11045
|
80
|
+
openstef/pipeline/train_create_forecast_backtest.py,sha256=-kZqCWal5zYLL0k0Sapks1zTmU5unNAooVPaPos1_7E,6050
|
81
|
+
openstef/pipeline/train_model.py,sha256=Bxtwb8xQvyEYIo-6D5yNvvXl3WfCZLY1ok8aqaKv6zg,19660
|
80
82
|
openstef/pipeline/utils.py,sha256=23mB31p19FoGWelLJzxNmqlzGwEr3fCDBEA37V2kpYY,2167
|
81
83
|
openstef/postprocessing/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
82
|
-
openstef/postprocessing/postprocessing.py,sha256=
|
84
|
+
openstef/postprocessing/postprocessing.py,sha256=iR1dhfIqBSRl1NpQiMAceDsY-fHs1qnzDc-b5jFdzvc,9321
|
83
85
|
openstef/preprocessing/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
84
86
|
openstef/preprocessing/preprocessing.py,sha256=bM_cSSSb2vGTD79RGzUrI6KoELbzlCyJwc7jqQGNEsE,1454
|
85
87
|
openstef/tasks/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
86
|
-
openstef/tasks/calculate_kpi.py,sha256=
|
87
|
-
openstef/tasks/create_basecase_forecast.py,sha256=
|
88
|
-
openstef/tasks/create_components_forecast.py,sha256=
|
89
|
-
openstef/tasks/create_forecast.py,sha256=
|
88
|
+
openstef/tasks/calculate_kpi.py,sha256=78DuK30ohWIHuc6oneRXalcNMXQ5mzy2qDr9xsPdSQs,11882
|
89
|
+
openstef/tasks/create_basecase_forecast.py,sha256=lxor1E3WQ_XAZDYWdNJKE1PY57scz39bKu2Id9U2GwE,4126
|
90
|
+
openstef/tasks/create_components_forecast.py,sha256=j4m9AGjnMDx23FmsaZGPYn9rBMHsRd_h-m1RAfhF8to,6139
|
91
|
+
openstef/tasks/create_forecast.py,sha256=NWd2fdbZ9CKDi190v7PF14IUdz6pyME2A-ssRNDdaYs,5750
|
90
92
|
openstef/tasks/create_solar_forecast.py,sha256=bTr7NThTF6Yj405qAqRaJmlBUrL7HATqVVzsi9hMdMw,15049
|
91
93
|
openstef/tasks/create_wind_forecast.py,sha256=RhshkmNSyFWx4Y6yQn02GzHjWTREbN5A5GAeWv0JpcE,2907
|
92
94
|
openstef/tasks/optimize_hyperparameters.py,sha256=s-z8YQJF6Lf3DdYgKHEpAdlbFJ3a-0Gj0Ahsqj1DErc,4758
|
@@ -98,8 +100,8 @@ openstef/tasks/utils/predictionjobloop.py,sha256=Ysy3zF5lzPMz_asYDKeF5m0qgVT3tCt
|
|
98
100
|
openstef/tasks/utils/taskcontext.py,sha256=L9K14ycwgVxbIVUjH2DIn_QWbnu-OfxcGtQ1K9T6sus,5630
|
99
101
|
openstef/validation/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
100
102
|
openstef/validation/validation.py,sha256=628xaDbAm8B4AYtFOAn8_SXLjejNfULGCfX3hVf_mU0,11119
|
101
|
-
openstef-3.4.
|
102
|
-
openstef-3.4.
|
103
|
-
openstef-3.4.
|
104
|
-
openstef-3.4.
|
105
|
-
openstef-3.4.
|
103
|
+
openstef-3.4.21.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
|
104
|
+
openstef-3.4.21.dist-info/METADATA,sha256=oQxA5n-ScXZWQLROZ2yZZzNK7wJQt2tRk0rWzSpycEE,7393
|
105
|
+
openstef-3.4.21.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
106
|
+
openstef-3.4.21.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
|
107
|
+
openstef-3.4.21.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|