openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef-4.0.0a3.dist-info/METADATA +177 -0
- openstef-4.0.0a3.dist-info/RECORD +4 -0
- {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
- openstef/__init__.py +0 -14
- openstef/__main__.py +0 -3
- openstef/app_settings.py +0 -19
- openstef/data/NL_terrestrial_radiation.csv +0 -25585
- openstef/data/NL_terrestrial_radiation.csv.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
- openstef/data/dutch_holidays.csv +0 -1759
- openstef/data/dutch_holidays.csv.license +0 -3
- openstef/data/pv_single_coefs.csv +0 -601
- openstef/data/pv_single_coefs.csv.license +0 -3
- openstef/data_classes/__init__.py +0 -3
- openstef/data_classes/data_prep.py +0 -99
- openstef/data_classes/model_specifications.py +0 -30
- openstef/data_classes/prediction_job.py +0 -135
- openstef/data_classes/split_function.py +0 -97
- openstef/enums.py +0 -140
- openstef/exceptions.py +0 -74
- openstef/feature_engineering/__init__.py +0 -3
- openstef/feature_engineering/apply_features.py +0 -138
- openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
- openstef/feature_engineering/cyclic_features.py +0 -161
- openstef/feature_engineering/data_preparation.py +0 -152
- openstef/feature_engineering/feature_adder.py +0 -206
- openstef/feature_engineering/feature_applicator.py +0 -202
- openstef/feature_engineering/general.py +0 -141
- openstef/feature_engineering/holiday_features.py +0 -231
- openstef/feature_engineering/lag_features.py +0 -165
- openstef/feature_engineering/missing_values_transformer.py +0 -141
- openstef/feature_engineering/rolling_features.py +0 -58
- openstef/feature_engineering/weather_features.py +0 -492
- openstef/metrics/__init__.py +0 -3
- openstef/metrics/figure.py +0 -303
- openstef/metrics/metrics.py +0 -486
- openstef/metrics/reporter.py +0 -222
- openstef/model/__init__.py +0 -3
- openstef/model/basecase.py +0 -82
- openstef/model/confidence_interval_applicator.py +0 -242
- openstef/model/fallback.py +0 -77
- openstef/model/metamodels/__init__.py +0 -3
- openstef/model/metamodels/feature_clipper.py +0 -90
- openstef/model/metamodels/grouped_regressor.py +0 -222
- openstef/model/metamodels/missing_values_handler.py +0 -138
- openstef/model/model_creator.py +0 -214
- openstef/model/objective.py +0 -426
- openstef/model/objective_creator.py +0 -65
- openstef/model/regressors/__init__.py +0 -3
- openstef/model/regressors/arima.py +0 -197
- openstef/model/regressors/custom_regressor.py +0 -64
- openstef/model/regressors/dazls.py +0 -116
- openstef/model/regressors/flatliner.py +0 -95
- openstef/model/regressors/gblinear_quantile.py +0 -334
- openstef/model/regressors/lgbm.py +0 -29
- openstef/model/regressors/linear.py +0 -90
- openstef/model/regressors/linear_quantile.py +0 -305
- openstef/model/regressors/regressor.py +0 -114
- openstef/model/regressors/xgb.py +0 -52
- openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
- openstef/model/regressors/xgb_quantile.py +0 -228
- openstef/model/serializer.py +0 -431
- openstef/model/standard_deviation_generator.py +0 -81
- openstef/model_selection/__init__.py +0 -3
- openstef/model_selection/model_selection.py +0 -311
- openstef/monitoring/__init__.py +0 -3
- openstef/monitoring/performance_meter.py +0 -92
- openstef/monitoring/teams.py +0 -203
- openstef/pipeline/__init__.py +0 -3
- openstef/pipeline/create_basecase_forecast.py +0 -133
- openstef/pipeline/create_component_forecast.py +0 -168
- openstef/pipeline/create_forecast.py +0 -171
- openstef/pipeline/optimize_hyperparameters.py +0 -317
- openstef/pipeline/train_create_forecast_backtest.py +0 -163
- openstef/pipeline/train_model.py +0 -561
- openstef/pipeline/utils.py +0 -52
- openstef/postprocessing/__init__.py +0 -3
- openstef/postprocessing/postprocessing.py +0 -275
- openstef/preprocessing/__init__.py +0 -3
- openstef/preprocessing/preprocessing.py +0 -42
- openstef/settings.py +0 -15
- openstef/tasks/__init__.py +0 -3
- openstef/tasks/calculate_kpi.py +0 -324
- openstef/tasks/create_basecase_forecast.py +0 -118
- openstef/tasks/create_components_forecast.py +0 -162
- openstef/tasks/create_forecast.py +0 -145
- openstef/tasks/create_solar_forecast.py +0 -420
- openstef/tasks/create_wind_forecast.py +0 -80
- openstef/tasks/optimize_hyperparameters.py +0 -135
- openstef/tasks/split_forecast.py +0 -273
- openstef/tasks/train_model.py +0 -224
- openstef/tasks/utils/__init__.py +0 -3
- openstef/tasks/utils/dependencies.py +0 -107
- openstef/tasks/utils/predictionjobloop.py +0 -243
- openstef/tasks/utils/taskcontext.py +0 -160
- openstef/validation/__init__.py +0 -3
- openstef/validation/validation.py +0 -322
- openstef-3.4.56.dist-info/METADATA +0 -154
- openstef-3.4.56.dist-info/RECORD +0 -102
- openstef-3.4.56.dist-info/top_level.txt +0 -1
- /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
|
@@ -1,420 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
"""This module contains the CRON job that is periodically executed to make prognoses of solar features.
|
|
5
|
-
|
|
6
|
-
These are useful for splitting the load in solar and wind contributions.
|
|
7
|
-
|
|
8
|
-
Example:
|
|
9
|
-
This module is meant to be called directly from a CRON job. A description of
|
|
10
|
-
the CRON job can be found in the /k8s/CronJobs folder.
|
|
11
|
-
Alternatively this code can be run directly by running::
|
|
12
|
-
$ python create_solar_forecast
|
|
13
|
-
|
|
14
|
-
"""
|
|
15
|
-
from datetime import datetime, timedelta
|
|
16
|
-
from pathlib import Path
|
|
17
|
-
|
|
18
|
-
import numpy as np
|
|
19
|
-
import pandas as pd
|
|
20
|
-
from scipy import optimize
|
|
21
|
-
|
|
22
|
-
from openstef import PROJECT_ROOT
|
|
23
|
-
from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
|
|
24
|
-
from openstef.tasks.utils.taskcontext import TaskContext
|
|
25
|
-
|
|
26
|
-
# TODO move to config
|
|
27
|
-
PV_COEFS_FILEPATH = PROJECT_ROOT / "openstef" / "data" / "pv_single_coefs.csv"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def make_solar_prediction_pj(pj, context, radius=30, peak_power=180961000.0):
|
|
31
|
-
"""Make a solar prediction for a specific prediction job.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
pj: (dict) prediction job
|
|
35
|
-
context: Task context
|
|
36
|
-
radius: Radius us to collect PV systems.
|
|
37
|
-
peak_power: Peak power.
|
|
38
|
-
|
|
39
|
-
"""
|
|
40
|
-
context.logger.info("Get solar input data from database")
|
|
41
|
-
# pvdata is only stored in the prd database
|
|
42
|
-
solar_input = context.database.get_solar_input(
|
|
43
|
-
(pj["lat"], pj["lon"]),
|
|
44
|
-
pj["horizon_minutes"],
|
|
45
|
-
pj["resolution_minutes"],
|
|
46
|
-
radius=radius,
|
|
47
|
-
sid=pj["sid"],
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
if len(solar_input) == 0:
|
|
51
|
-
raise ValueError("Empty solar input")
|
|
52
|
-
|
|
53
|
-
context.logger.info("Make solar prediction using Fides")
|
|
54
|
-
power = fides(
|
|
55
|
-
solar_input[["aggregated", "radiation"]].rename(
|
|
56
|
-
columns=dict(radiation="insolation", aggregated="load")
|
|
57
|
-
)
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
# if the forecast is for a region, output should be scaled to peak power
|
|
61
|
-
if (radius != 0) and (not np.isnan(peak_power)):
|
|
62
|
-
power = peak_power / max(solar_input.aggregated) * power
|
|
63
|
-
context.logger.info("Store solar prediction in database")
|
|
64
|
-
power["pid"] = pj["id"]
|
|
65
|
-
power["type"] = "solar"
|
|
66
|
-
power["algtype"] = "Fides"
|
|
67
|
-
power["customer"] = pj["name"]
|
|
68
|
-
power["description"] = pj["description"]
|
|
69
|
-
context.database.write_forecast(power)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def combine_forecasts(forecasts, combination_coefs):
|
|
73
|
-
"""This function combines several independent forecasts into one, using predetermined coefficients.
|
|
74
|
-
|
|
75
|
-
Input:
|
|
76
|
-
- forecasts: pd.DataFrame(index = datetime, algorithm1, ..., algorithmn)
|
|
77
|
-
- combinationcoefs: pd.DataFrame(param1, ..., paramn, algorithm1, ..., algorithmn)
|
|
78
|
-
|
|
79
|
-
Output:
|
|
80
|
-
- pd.DataFrame(datetime, forecast)
|
|
81
|
-
|
|
82
|
-
"""
|
|
83
|
-
models = [x for x in list(forecasts) if x not in ["created", "datetime"]]
|
|
84
|
-
|
|
85
|
-
# Add subset parameters to df
|
|
86
|
-
# Identify which parameters should be used to define subsets based on the
|
|
87
|
-
# combination coefs
|
|
88
|
-
subset_columns = [
|
|
89
|
-
"tAhead",
|
|
90
|
-
"hForecasted",
|
|
91
|
-
"weekday",
|
|
92
|
-
"hForecastedPer6h",
|
|
93
|
-
"tAheadPer2h",
|
|
94
|
-
"hCreated",
|
|
95
|
-
]
|
|
96
|
-
subset_defs = [x for x in list(combination_coefs) if x in subset_columns]
|
|
97
|
-
|
|
98
|
-
df = forecasts.copy()
|
|
99
|
-
# Now add these subsetparams to df
|
|
100
|
-
if "tAhead" in subset_defs:
|
|
101
|
-
t_ahead = (df["datetime"] - df["created"]).dt.total_seconds() / 3600
|
|
102
|
-
df["tAhead"] = t_ahead
|
|
103
|
-
|
|
104
|
-
if "hForecasted" in subset_defs:
|
|
105
|
-
df["hForecasted"] = df.datetime.dt.hour
|
|
106
|
-
|
|
107
|
-
if "weekday" in subset_defs:
|
|
108
|
-
df["weekday"] = df.datetime.dt.weekday
|
|
109
|
-
|
|
110
|
-
if "hForecastedPer6h" in subset_defs:
|
|
111
|
-
df["hForecastedPer6h"] = pd.to_numeric(
|
|
112
|
-
np.floor(df.datetime.dt.hour / 6) * 6, downcast="integer"
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
if "tAheadPer2h" in subset_defs:
|
|
116
|
-
df["tAheadPer2h"] = pd.to_numeric(
|
|
117
|
-
np.floor((df.datetime - df.created).dt.total_seconds() / 60 / 60 / 2) * 2,
|
|
118
|
-
downcast="integer",
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
if "hCreated" in subset_defs:
|
|
122
|
-
df["hCreated"] = df.created.dt.hour
|
|
123
|
-
|
|
124
|
-
# Start building combinationcoef dataframe that later will be multiplied with the
|
|
125
|
-
# individual forecasts
|
|
126
|
-
# This is the best way for a backtest:
|
|
127
|
-
# uniquevalues = list([np.unique(df[param].values) for param in subsetDefs])
|
|
128
|
-
# permutations = list(itertools.product(*uniquevalues))
|
|
129
|
-
|
|
130
|
-
# This is the best way for a single forecast
|
|
131
|
-
permutations = [tuple(x) for x in df[subset_defs].values]
|
|
132
|
-
|
|
133
|
-
result_df = pd.DataFrame()
|
|
134
|
-
|
|
135
|
-
for subsetvalues in permutations:
|
|
136
|
-
subset = df.copy()
|
|
137
|
-
coefs = combination_coefs
|
|
138
|
-
|
|
139
|
-
# Create subset based on all subsetparams, for forecasts and coefs
|
|
140
|
-
for value, param in zip(subsetvalues, subset_defs):
|
|
141
|
-
subset = subset.loc[subset[param] == value]
|
|
142
|
-
# Define function which find closest match of a value from an array of values.
|
|
143
|
-
# Use this later to find best coefficient from the given subsetting dividers
|
|
144
|
-
closest_match = min(coefs[param], key=lambda x, val=value: abs(x - val))
|
|
145
|
-
coefs = coefs.loc[coefs[param] == closest_match]
|
|
146
|
-
# Find closest matching value for combinationCoefParams corresponding to
|
|
147
|
-
# available subsetValues
|
|
148
|
-
|
|
149
|
-
# Of course, not all possible subsets have to be defined in the forecast.
|
|
150
|
-
# Skip empty subsets
|
|
151
|
-
if len(subset) == 0:
|
|
152
|
-
continue
|
|
153
|
-
|
|
154
|
-
# Multiply forecasts with their coefficients
|
|
155
|
-
result = np.multiply(subset[models], np.array(coefs[models]))
|
|
156
|
-
result["forecast"] = result.apply(np.nansum, axis=1)
|
|
157
|
-
# Add handling with NA values for a single forecast
|
|
158
|
-
result["coefsum"] = np.nansum(coefs[models].values)
|
|
159
|
-
nanselector = np.isnan(subset[models].iloc[0].values)
|
|
160
|
-
result["nonnacoefsum"] = np.nansum(coefs[models].values.flatten() * nanselector)
|
|
161
|
-
result["forecast"] = (
|
|
162
|
-
result["forecast"]
|
|
163
|
-
* result["coefsum"]
|
|
164
|
-
/ (result["coefsum"] - result["nonnacoefsum"])
|
|
165
|
-
)
|
|
166
|
-
result["datetime"] = subset["datetime"]
|
|
167
|
-
result["created"] = subset["created"]
|
|
168
|
-
result = result[["datetime", "created", "forecast"]]
|
|
169
|
-
result_df = pd.concat([result_df, result])
|
|
170
|
-
# sort by datetime
|
|
171
|
-
result_df.sort_values(["datetime", "created"], inplace=True)
|
|
172
|
-
|
|
173
|
-
return result_df
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
def fides(data: pd.DataFrame, all_forecasts: bool = False):
|
|
177
|
-
"""Fides makes a forecast based on persistence and a direct fit with insolation.
|
|
178
|
-
|
|
179
|
-
Args:
|
|
180
|
-
data: pd.DataFrame(index = datetime, columns =['output','insolation'])
|
|
181
|
-
all_forecasts: Should all forecasts be returned or only the combination
|
|
182
|
-
|
|
183
|
-
Example:
|
|
184
|
-
import numpy as np
|
|
185
|
-
index = pd.date_range(start = "2017-01-01 09:00:00", freq = '15T', periods = 300)
|
|
186
|
-
data = pd.DataFrame(index = index,
|
|
187
|
-
data = dict(load=np.sin(index.hour/24*np.pi)*np.random.uniform(0.7,1.7, 300)))
|
|
188
|
-
data['insolation'] = data.load * np.random.uniform(0.8, 1.2, len(index)) + 0.1
|
|
189
|
-
data.loc[int(len(index)/3*2):,"load"] = np.nan
|
|
190
|
-
|
|
191
|
-
"""
|
|
192
|
-
insolation_forecast = apply_fit_insol(data, add_to_df=False)
|
|
193
|
-
persistence = apply_persistence(data, how="mean", smooth_entries=4, add_to_df=True)
|
|
194
|
-
|
|
195
|
-
df = insolation_forecast.merge(persistence, left_index=True, right_index=True)
|
|
196
|
-
|
|
197
|
-
coefs = pd.read_csv(PV_COEFS_FILEPATH)
|
|
198
|
-
|
|
199
|
-
# Apply combination coefs
|
|
200
|
-
df["created"] = df.loc[df.load.isnull()].index.min()
|
|
201
|
-
forecast = combine_forecasts(
|
|
202
|
-
df.loc[df.load.isnull(), ["forecaopenstefitInsol", "persistence", "created"]]
|
|
203
|
-
.reset_index()
|
|
204
|
-
.rename(columns=dict(index="datetime")),
|
|
205
|
-
coefs,
|
|
206
|
-
).set_index("datetime")[["forecast"]]
|
|
207
|
-
|
|
208
|
-
if all_forecasts:
|
|
209
|
-
forecast = forecast.merge(
|
|
210
|
-
df[["persistence", "forecaopenstefitInsol"]],
|
|
211
|
-
left_index=True,
|
|
212
|
-
right_index=True,
|
|
213
|
-
how="left",
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
return forecast
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
def main(config=None, database=None, **kwargs):
|
|
220
|
-
taskname = Path(__file__).name.replace(".py", "")
|
|
221
|
-
|
|
222
|
-
if database is None or config is None:
|
|
223
|
-
raise RuntimeError(
|
|
224
|
-
"Please specify a config object and/or database connection object. These"
|
|
225
|
-
" can be found in the openstef-dbc package."
|
|
226
|
-
)
|
|
227
|
-
|
|
228
|
-
with TaskContext(taskname, config, database) as context:
|
|
229
|
-
context.logger.info("Querying solar prediction jobs from database")
|
|
230
|
-
prediction_jobs = context.database.get_prediction_jobs_solar()
|
|
231
|
-
num_prediction_jobs = len(prediction_jobs)
|
|
232
|
-
|
|
233
|
-
# only make customer = Provincie once an hour
|
|
234
|
-
utc_now_minute = datetime.utcnow().minute
|
|
235
|
-
if utc_now_minute >= 15:
|
|
236
|
-
prediction_jobs = [
|
|
237
|
-
pj for pj in prediction_jobs if str(pj["name"]).startswith("Provincie")
|
|
238
|
-
]
|
|
239
|
-
num_removed_jobs = num_prediction_jobs - len(prediction_jobs)
|
|
240
|
-
num_prediction_jobs = len(prediction_jobs)
|
|
241
|
-
context.logger.info(
|
|
242
|
-
"Remove 'Provincie' solar predictions",
|
|
243
|
-
num_removed_jobs=num_removed_jobs,
|
|
244
|
-
num_prediction_jobs=num_prediction_jobs,
|
|
245
|
-
)
|
|
246
|
-
|
|
247
|
-
PredictionJobLoop(context, prediction_jobs=prediction_jobs).map(
|
|
248
|
-
make_solar_prediction_pj, context, kwargs=kwargs
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
def calc_norm(data, how="max", add_to_df=True):
|
|
253
|
-
"""This script calculates the norm of a given dataset.
|
|
254
|
-
|
|
255
|
-
Input:
|
|
256
|
-
- data: pd.DataFrame(index = datetime, columns = [load])
|
|
257
|
-
- how: str can be any function from numpy, recognized by np.'how'
|
|
258
|
-
Optional:
|
|
259
|
-
- add_to_df: Bool, add the norm to the data
|
|
260
|
-
|
|
261
|
-
Output:
|
|
262
|
-
- pd.DataFrame(index = datetime, columns = [load])
|
|
263
|
-
NB: range of datetime of input is equal to range of datetime of output
|
|
264
|
-
|
|
265
|
-
Example:
|
|
266
|
-
import pandas as pd
|
|
267
|
-
import numpy as np
|
|
268
|
-
index = pd.date_range(start = "2017-01-01 09:00:00", freq = '15T', periods = 200)
|
|
269
|
-
data = pd.DataFrame(index = index,
|
|
270
|
-
data = dict(load=np.sin(index.hour/24*np.pi)*np.random.uniform(0.7,1.7, 200)))
|
|
271
|
-
|
|
272
|
-
"""
|
|
273
|
-
colname = list(data)[0]
|
|
274
|
-
if how == "max":
|
|
275
|
-
df = data.groupby(data.index.time).apply(lambda x: x.max(skipna=True))
|
|
276
|
-
if how == "mean":
|
|
277
|
-
df = data.groupby(data.index.time).apply(lambda x: x.mean(skipna=True))
|
|
278
|
-
|
|
279
|
-
# rename
|
|
280
|
-
df.rename(columns={colname: "Norm"}, inplace=True)
|
|
281
|
-
|
|
282
|
-
# Merge to dataframe if add_to_df == True
|
|
283
|
-
if add_to_df:
|
|
284
|
-
df = data.merge(df, left_on=data.index.time, right_index=True)[
|
|
285
|
-
[colname, "Norm"]
|
|
286
|
-
].sort_index()
|
|
287
|
-
|
|
288
|
-
return df
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
def apply_persistence(data, how="mean", smooth_entries=4, add_to_df=True, colname=None):
|
|
292
|
-
"""This script calculates the persistence forecast.
|
|
293
|
-
|
|
294
|
-
Input:
|
|
295
|
-
- data: pd.DataFrame(index = datetime, columns = [load]), datetime is expected to have historic values, as well as NA values
|
|
296
|
-
Optional:
|
|
297
|
-
- how: str, how to determine the norm (abs or mean)
|
|
298
|
-
- smoothEntries: int, number of historic entries over which the persistence is smoothed
|
|
299
|
-
- add_to_df: Bool, add the forecast to the data
|
|
300
|
-
- option of specifying colname if load is not first column
|
|
301
|
-
|
|
302
|
-
Output:
|
|
303
|
-
- pd.DataFrame(index = datetime, columns = [(load,) persistence])
|
|
304
|
-
NB: range of datetime of input is equal to range of datetime of output
|
|
305
|
-
|
|
306
|
-
Example:
|
|
307
|
-
import pandas as pd
|
|
308
|
-
import numpy as np
|
|
309
|
-
index = pd.date_range(start = "2017-01-01 09:00:00", freq = '15T', periods = 300)
|
|
310
|
-
data = pd.DataFrame(index = index,
|
|
311
|
-
data = dict(load=np.sin(index.hour/24*np.pi)*np.random.uniform(0.7,1.7, 300)))
|
|
312
|
-
data.loc[200:,"load"] = np.nan
|
|
313
|
-
|
|
314
|
-
"""
|
|
315
|
-
data = data.sort_index()
|
|
316
|
-
|
|
317
|
-
if colname is None:
|
|
318
|
-
colname = list(data)[0]
|
|
319
|
-
|
|
320
|
-
df = calc_norm(data, how=how, add_to_df=True)
|
|
321
|
-
|
|
322
|
-
# this selects the last non NA values
|
|
323
|
-
last_entries = df.loc[df[colname].notnull()][-smooth_entries:]
|
|
324
|
-
|
|
325
|
-
norm_mean = last_entries.Norm.mean()
|
|
326
|
-
if norm_mean == 0:
|
|
327
|
-
norm_mean = 1
|
|
328
|
-
|
|
329
|
-
factor = last_entries[colname].mean() / norm_mean
|
|
330
|
-
df["persistence"] = df.Norm * factor
|
|
331
|
-
|
|
332
|
-
if add_to_df:
|
|
333
|
-
df = df[[colname, "persistence"]]
|
|
334
|
-
else:
|
|
335
|
-
df = df[["persistence"]]
|
|
336
|
-
|
|
337
|
-
return df
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
def apply_fit_insol(data, add_to_df=True, hours_delta=None, polynomial=False):
|
|
341
|
-
"""This model fits insolation to PV yield and uses this fit to forecast PV yield. It uses a 2nd order polynomial.
|
|
342
|
-
|
|
343
|
-
Input:
|
|
344
|
-
- data: pd.DataFrame(index = datetime, columns = [load, insolation])
|
|
345
|
-
Optional:
|
|
346
|
-
- hoursDelta: period of forecast in hours [int] (e.g. every 6 hours for KNMI)
|
|
347
|
-
- addToDF: Bool, add the norm to the data
|
|
348
|
-
|
|
349
|
-
Output:
|
|
350
|
-
- pd.DataFrame(index = datetime, columns = [(load), forecaopenstefitInsol])
|
|
351
|
-
NB: range of datetime of input is equal to range of datetime of output
|
|
352
|
-
|
|
353
|
-
Example:
|
|
354
|
-
import pandas as pd
|
|
355
|
-
import numpy as np
|
|
356
|
-
index = pd.date_range(start = "2017-01-01 09:00:00", freq = '15T', periods = 300)
|
|
357
|
-
data = pd.DataFrame(index = index,
|
|
358
|
-
data = dict(load=np.sin(index.hour/24*np.pi)*np.random.uniform(0.7,1.7, len(index))))
|
|
359
|
-
data['insolation'] = data.load * np.random.uniform(0.8, 1.2, len(index)) + 0.1
|
|
360
|
-
data.loc[int(len(index)/3*2):,"load"] = np.nan
|
|
361
|
-
|
|
362
|
-
"""
|
|
363
|
-
colname = list(data)[0]
|
|
364
|
-
|
|
365
|
-
# Define subset, only keep non-NaN values and the most recent forecasts
|
|
366
|
-
# This ensures a good training set
|
|
367
|
-
if hours_delta is None:
|
|
368
|
-
subset = data.loc[(data[colname].notnull()) & (data[colname] > 0)]
|
|
369
|
-
else:
|
|
370
|
-
subset = data.loc[
|
|
371
|
-
(data[colname].notnull())
|
|
372
|
-
& (data[colname] > 0)
|
|
373
|
-
& (data["tAhead"] < timedelta(hours=hours_delta))
|
|
374
|
-
& (data["tAhead"] >= timedelta(hours=0))
|
|
375
|
-
]
|
|
376
|
-
|
|
377
|
-
def linear_fun(coefs, values):
|
|
378
|
-
return coefs[0] * values + coefs[1]
|
|
379
|
-
|
|
380
|
-
def second_order_poly(coefs, values):
|
|
381
|
-
return coefs[0] * values**2 + coefs[1] * values + coefs[2]
|
|
382
|
-
|
|
383
|
-
# Define function to be minimized and subsequently minimize this function
|
|
384
|
-
if polynomial:
|
|
385
|
-
# Define starting guess
|
|
386
|
-
x0 = [1, 1, 0] # ax**2 + bx + c.
|
|
387
|
-
fun = (
|
|
388
|
-
lambda x: (second_order_poly(x, subset.insolation) - subset[colname])
|
|
389
|
-
.abs()
|
|
390
|
-
.mean()
|
|
391
|
-
)
|
|
392
|
-
# , bounds = bnds, constraints = cons)
|
|
393
|
-
res = optimize.minimize(fun, x0)
|
|
394
|
-
# Apply fit
|
|
395
|
-
df = second_order_poly(res.x, data[["insolation"]]).rename(
|
|
396
|
-
columns=dict(insolation="forecaopenstefitInsol")
|
|
397
|
-
)
|
|
398
|
-
|
|
399
|
-
else:
|
|
400
|
-
x0 = [1, 0]
|
|
401
|
-
fun = (
|
|
402
|
-
lambda x: (linear_fun(x, subset.insolation) - subset[colname]).abs().mean()
|
|
403
|
-
)
|
|
404
|
-
res = optimize.minimize(fun, x0)
|
|
405
|
-
df = linear_fun(res.x, data[["insolation"]]).rename(
|
|
406
|
-
columns=dict(insolation="forecaopenstefitInsol")
|
|
407
|
-
)
|
|
408
|
-
|
|
409
|
-
# Merge to dataframe if addToDF == True
|
|
410
|
-
if add_to_df:
|
|
411
|
-
if hours_delta is None:
|
|
412
|
-
df = data.merge(df, left_index=True, right_index=True)
|
|
413
|
-
else:
|
|
414
|
-
df = pd.concat([data, df], axis=1)
|
|
415
|
-
|
|
416
|
-
return df
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
if __name__ == "__main__":
|
|
420
|
-
main()
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
|
|
5
|
-
"""This module contains the CRON job that is periodically executed to make prognoses of wind features.
|
|
6
|
-
|
|
7
|
-
These features are usefull for splitting the load in solar and wind contributions and
|
|
8
|
-
making prognoses.
|
|
9
|
-
|
|
10
|
-
Example:
|
|
11
|
-
This module is meant to be called directly from a CRON job. A description of the
|
|
12
|
-
CRON job can be found in the /k8s/CronJobs folder.
|
|
13
|
-
Alternatively this code can be run directly by running::
|
|
14
|
-
$ python create_wind_forecast
|
|
15
|
-
|
|
16
|
-
"""
|
|
17
|
-
from pathlib import Path
|
|
18
|
-
|
|
19
|
-
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
|
20
|
-
from openstef.feature_engineering import weather_features
|
|
21
|
-
from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
|
|
22
|
-
from openstef.tasks.utils.taskcontext import TaskContext
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def make_wind_forecast_pj(pj: PredictionJobDataClass, context: TaskContext) -> None:
|
|
26
|
-
"""Make a wind prediction for a specific prediction job.
|
|
27
|
-
|
|
28
|
-
Args:
|
|
29
|
-
pj: Prediction job
|
|
30
|
-
context: Context manager
|
|
31
|
-
|
|
32
|
-
"""
|
|
33
|
-
context.logger.info("Get turbine data", turbine_type=pj["turbine_type"])
|
|
34
|
-
turbine_data = context.database.get_power_curve(pj["turbine_type"])
|
|
35
|
-
|
|
36
|
-
context.logger.info(
|
|
37
|
-
"Get windspeed", location=[pj["lat"], pj["lon"]], hub_height=pj["hub_height"]
|
|
38
|
-
)
|
|
39
|
-
windspeed = context.database.get_wind_input(
|
|
40
|
-
(pj["lat"], pj["lon"]),
|
|
41
|
-
pj["hub_height"],
|
|
42
|
-
pj["horizon_minutes"],
|
|
43
|
-
pj["resolution_minutes"],
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
context.logger.info("Calculate windturbine power", n_turbines=pj["n_turbines"])
|
|
47
|
-
power = weather_features.calculate_windturbine_power_output(
|
|
48
|
-
windspeed, pj["n_turbines"], turbine_data
|
|
49
|
-
).rename(columns=dict(windspeed_100m="forecast"))
|
|
50
|
-
|
|
51
|
-
context.logger.info("Store wind prediction in database")
|
|
52
|
-
power["pid"] = pj["id"]
|
|
53
|
-
power["type"] = "wind"
|
|
54
|
-
power["algtype"] = "powerCurve"
|
|
55
|
-
power["customer"] = pj["name"]
|
|
56
|
-
power["description"] = pj["description"]
|
|
57
|
-
context.database.write_forecast(power, t_ahead_series=True)
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def main(config=None, database=None):
|
|
61
|
-
taskname = Path(__file__).name.replace(".py", "")
|
|
62
|
-
|
|
63
|
-
if database is None or config is None:
|
|
64
|
-
raise RuntimeError(
|
|
65
|
-
"Please specifiy a config object and/or database connection object. These"
|
|
66
|
-
" can be found in the openstef-dbc package."
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
with TaskContext(taskname, config, database) as context:
|
|
70
|
-
context.logger.info("Querying wind prediction jobs from database")
|
|
71
|
-
prediction_jobs = context.database.get_prediction_jobs_wind()
|
|
72
|
-
prediction_jobs = [x for x in prediction_jobs if x["model"] == "latest"]
|
|
73
|
-
|
|
74
|
-
PredictionJobLoop(context, prediction_jobs=prediction_jobs).map(
|
|
75
|
-
make_wind_forecast_pj, context
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
if __name__ == "__main__":
|
|
80
|
-
main()
|
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
|
|
5
|
-
# -*- coding: utf-8 -*-
|
|
6
|
-
"""optimize_hyper_params.py.
|
|
7
|
-
|
|
8
|
-
This module contains the CRON job that is periodically executed to optimize the
|
|
9
|
-
hyperparameters for the prognosis models.
|
|
10
|
-
|
|
11
|
-
Example:
|
|
12
|
-
This module is meant to be called directly from a CRON job. A description of
|
|
13
|
-
the CRON job can be found in the /k8s/CronJobs folder.
|
|
14
|
-
Alternatively this code can be run directly by running::
|
|
15
|
-
|
|
16
|
-
$ python optimize_hyperparameters.py
|
|
17
|
-
|
|
18
|
-
"""
|
|
19
|
-
from datetime import datetime, timedelta
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
|
|
22
|
-
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
|
23
|
-
from openstef.enums import ModelType, PipelineType
|
|
24
|
-
from openstef.model.serializer import MLflowSerializer
|
|
25
|
-
from openstef.monitoring import teams
|
|
26
|
-
from openstef.pipeline.optimize_hyperparameters import optimize_hyperparameters_pipeline
|
|
27
|
-
from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
|
|
28
|
-
from openstef.tasks.utils.taskcontext import TaskContext
|
|
29
|
-
|
|
30
|
-
MAX_AGE_HYPER_PARAMS_DAYS = 31
|
|
31
|
-
DEFAULT_CHECK_HYPER_PARAMS_AGE = True
|
|
32
|
-
DEFAULT_TRAINING_PERIOD_DAYS = 121
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def optimize_hyperparameters_task(
|
|
36
|
-
pj: PredictionJobDataClass,
|
|
37
|
-
context: TaskContext,
|
|
38
|
-
check_hyper_param_age: bool = DEFAULT_CHECK_HYPER_PARAMS_AGE,
|
|
39
|
-
) -> None:
|
|
40
|
-
"""Optimize hyperparameters task.
|
|
41
|
-
|
|
42
|
-
Expected prediction job keys: "id", "model", "lat", "lon", "name", "description"
|
|
43
|
-
Only used for logging: "name", "description"
|
|
44
|
-
|
|
45
|
-
Args:
|
|
46
|
-
pj: Prediction job
|
|
47
|
-
context: Task context
|
|
48
|
-
check_hyper_param_age: Boolean indicating if optimization can be skipped in case existing
|
|
49
|
-
hyperparameters do not exceed the maximum age.
|
|
50
|
-
|
|
51
|
-
"""
|
|
52
|
-
# Check pipeline types
|
|
53
|
-
if PipelineType.HYPER_PARMATERS not in pj.pipelines_to_run:
|
|
54
|
-
context.logger.info(
|
|
55
|
-
"Skip this PredictionJob because hyper_parameters pipeline is not specified in the pj."
|
|
56
|
-
)
|
|
57
|
-
return
|
|
58
|
-
|
|
59
|
-
# TODO: Improve implementation by using a field in the database and leveraging the
|
|
60
|
-
# `pipelines_to_run` attribute of the `PredictionJobDataClass` object. This
|
|
61
|
-
# would require a change to the MySQL datamodel.
|
|
62
|
-
if (
|
|
63
|
-
context.config.externally_posted_forecasts_pids
|
|
64
|
-
and pj.id in context.config.externally_posted_forecasts_pids
|
|
65
|
-
):
|
|
66
|
-
context.logger.info(
|
|
67
|
-
"Skip this PredictionJob because its forecasts are posted by an external process."
|
|
68
|
-
)
|
|
69
|
-
return
|
|
70
|
-
|
|
71
|
-
# Retrieve the paths for storing model and reports from the config manager
|
|
72
|
-
mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri
|
|
73
|
-
artifact_folder = context.config.paths_artifact_folder
|
|
74
|
-
|
|
75
|
-
# Determine if we need to optimize hyperparams
|
|
76
|
-
# retrieve last model age where hyperparameters were optimized
|
|
77
|
-
mlflow_serializer = MLflowSerializer(mlflow_tracking_uri=mlflow_tracking_uri)
|
|
78
|
-
hyper_params_age = mlflow_serializer.get_model_age(
|
|
79
|
-
experiment_name=str(pj["id"]), hyperparameter_optimization_only=True
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
if (hyper_params_age < MAX_AGE_HYPER_PARAMS_DAYS) and check_hyper_param_age:
|
|
83
|
-
context.logger.warning(
|
|
84
|
-
"Skip hyperparameter optimization",
|
|
85
|
-
pid=pj["id"],
|
|
86
|
-
hyper_params_age=hyper_params_age,
|
|
87
|
-
max_age=MAX_AGE_HYPER_PARAMS_DAYS,
|
|
88
|
-
)
|
|
89
|
-
return
|
|
90
|
-
|
|
91
|
-
datetime_start = datetime.utcnow() - timedelta(days=DEFAULT_TRAINING_PERIOD_DAYS)
|
|
92
|
-
datetime_end = datetime.utcnow()
|
|
93
|
-
|
|
94
|
-
input_data = context.database.get_model_input(
|
|
95
|
-
pid=pj["id"],
|
|
96
|
-
location=[pj["lat"], pj["lon"]],
|
|
97
|
-
datetime_start=datetime_start,
|
|
98
|
-
datetime_end=datetime_end,
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
# Optimize hyperparams
|
|
102
|
-
hyperparameters = optimize_hyperparameters_pipeline(
|
|
103
|
-
pj,
|
|
104
|
-
input_data,
|
|
105
|
-
mlflow_tracking_uri=mlflow_tracking_uri,
|
|
106
|
-
artifact_folder=artifact_folder,
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
# Sent message to Teams
|
|
110
|
-
title = (
|
|
111
|
-
f'Optimized hyperparameters for prediction job {pj["name"]} {pj["description"]}'
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
teams.post_teams(teams.format_message(title=title, params=hyperparameters))
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def main(config=None, database=None):
|
|
118
|
-
taskname = Path(__file__).name.replace(".py", "")
|
|
119
|
-
|
|
120
|
-
if database is None or config is None:
|
|
121
|
-
raise RuntimeError(
|
|
122
|
-
"Please specify a config object and/or database connection object. These"
|
|
123
|
-
" can be found in the openstef-dbc package."
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
with TaskContext(taskname, config, database) as context:
|
|
127
|
-
model_type = [ml.value for ml in ModelType]
|
|
128
|
-
|
|
129
|
-
PredictionJobLoop(context, model_type=model_type).map(
|
|
130
|
-
optimize_hyperparameters_task, context
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
if __name__ == "__main__":
|
|
135
|
-
main()
|