openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. openstef-4.0.0a3.dist-info/METADATA +177 -0
  2. openstef-4.0.0a3.dist-info/RECORD +4 -0
  3. {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
  4. openstef/__init__.py +0 -14
  5. openstef/__main__.py +0 -3
  6. openstef/app_settings.py +0 -19
  7. openstef/data/NL_terrestrial_radiation.csv +0 -25585
  8. openstef/data/NL_terrestrial_radiation.csv.license +0 -3
  9. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  10. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
  11. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
  12. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
  13. openstef/data/dutch_holidays.csv +0 -1759
  14. openstef/data/dutch_holidays.csv.license +0 -3
  15. openstef/data/pv_single_coefs.csv +0 -601
  16. openstef/data/pv_single_coefs.csv.license +0 -3
  17. openstef/data_classes/__init__.py +0 -3
  18. openstef/data_classes/data_prep.py +0 -99
  19. openstef/data_classes/model_specifications.py +0 -30
  20. openstef/data_classes/prediction_job.py +0 -135
  21. openstef/data_classes/split_function.py +0 -97
  22. openstef/enums.py +0 -140
  23. openstef/exceptions.py +0 -74
  24. openstef/feature_engineering/__init__.py +0 -3
  25. openstef/feature_engineering/apply_features.py +0 -138
  26. openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
  27. openstef/feature_engineering/cyclic_features.py +0 -161
  28. openstef/feature_engineering/data_preparation.py +0 -152
  29. openstef/feature_engineering/feature_adder.py +0 -206
  30. openstef/feature_engineering/feature_applicator.py +0 -202
  31. openstef/feature_engineering/general.py +0 -141
  32. openstef/feature_engineering/holiday_features.py +0 -231
  33. openstef/feature_engineering/lag_features.py +0 -165
  34. openstef/feature_engineering/missing_values_transformer.py +0 -141
  35. openstef/feature_engineering/rolling_features.py +0 -58
  36. openstef/feature_engineering/weather_features.py +0 -492
  37. openstef/metrics/__init__.py +0 -3
  38. openstef/metrics/figure.py +0 -303
  39. openstef/metrics/metrics.py +0 -486
  40. openstef/metrics/reporter.py +0 -222
  41. openstef/model/__init__.py +0 -3
  42. openstef/model/basecase.py +0 -82
  43. openstef/model/confidence_interval_applicator.py +0 -242
  44. openstef/model/fallback.py +0 -77
  45. openstef/model/metamodels/__init__.py +0 -3
  46. openstef/model/metamodels/feature_clipper.py +0 -90
  47. openstef/model/metamodels/grouped_regressor.py +0 -222
  48. openstef/model/metamodels/missing_values_handler.py +0 -138
  49. openstef/model/model_creator.py +0 -214
  50. openstef/model/objective.py +0 -426
  51. openstef/model/objective_creator.py +0 -65
  52. openstef/model/regressors/__init__.py +0 -3
  53. openstef/model/regressors/arima.py +0 -197
  54. openstef/model/regressors/custom_regressor.py +0 -64
  55. openstef/model/regressors/dazls.py +0 -116
  56. openstef/model/regressors/flatliner.py +0 -95
  57. openstef/model/regressors/gblinear_quantile.py +0 -334
  58. openstef/model/regressors/lgbm.py +0 -29
  59. openstef/model/regressors/linear.py +0 -90
  60. openstef/model/regressors/linear_quantile.py +0 -305
  61. openstef/model/regressors/regressor.py +0 -114
  62. openstef/model/regressors/xgb.py +0 -52
  63. openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
  64. openstef/model/regressors/xgb_quantile.py +0 -228
  65. openstef/model/serializer.py +0 -431
  66. openstef/model/standard_deviation_generator.py +0 -81
  67. openstef/model_selection/__init__.py +0 -3
  68. openstef/model_selection/model_selection.py +0 -311
  69. openstef/monitoring/__init__.py +0 -3
  70. openstef/monitoring/performance_meter.py +0 -92
  71. openstef/monitoring/teams.py +0 -203
  72. openstef/pipeline/__init__.py +0 -3
  73. openstef/pipeline/create_basecase_forecast.py +0 -133
  74. openstef/pipeline/create_component_forecast.py +0 -168
  75. openstef/pipeline/create_forecast.py +0 -171
  76. openstef/pipeline/optimize_hyperparameters.py +0 -317
  77. openstef/pipeline/train_create_forecast_backtest.py +0 -163
  78. openstef/pipeline/train_model.py +0 -561
  79. openstef/pipeline/utils.py +0 -52
  80. openstef/postprocessing/__init__.py +0 -3
  81. openstef/postprocessing/postprocessing.py +0 -275
  82. openstef/preprocessing/__init__.py +0 -3
  83. openstef/preprocessing/preprocessing.py +0 -42
  84. openstef/settings.py +0 -15
  85. openstef/tasks/__init__.py +0 -3
  86. openstef/tasks/calculate_kpi.py +0 -324
  87. openstef/tasks/create_basecase_forecast.py +0 -118
  88. openstef/tasks/create_components_forecast.py +0 -162
  89. openstef/tasks/create_forecast.py +0 -145
  90. openstef/tasks/create_solar_forecast.py +0 -420
  91. openstef/tasks/create_wind_forecast.py +0 -80
  92. openstef/tasks/optimize_hyperparameters.py +0 -135
  93. openstef/tasks/split_forecast.py +0 -273
  94. openstef/tasks/train_model.py +0 -224
  95. openstef/tasks/utils/__init__.py +0 -3
  96. openstef/tasks/utils/dependencies.py +0 -107
  97. openstef/tasks/utils/predictionjobloop.py +0 -243
  98. openstef/tasks/utils/taskcontext.py +0 -160
  99. openstef/validation/__init__.py +0 -3
  100. openstef/validation/validation.py +0 -322
  101. openstef-3.4.56.dist-info/METADATA +0 -154
  102. openstef-3.4.56.dist-info/RECORD +0 -102
  103. openstef-3.4.56.dist-info/top_level.txt +0 -1
  104. /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
@@ -1,420 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- """This module contains the CRON job that is periodically executed to make prognoses of solar features.
5
-
6
- These are useful for splitting the load in solar and wind contributions.
7
-
8
- Example:
9
- This module is meant to be called directly from a CRON job. A description of
10
- the CRON job can be found in the /k8s/CronJobs folder.
11
- Alternatively this code can be run directly by running::
12
- $ python create_solar_forecast
13
-
14
- """
15
- from datetime import datetime, timedelta
16
- from pathlib import Path
17
-
18
- import numpy as np
19
- import pandas as pd
20
- from scipy import optimize
21
-
22
- from openstef import PROJECT_ROOT
23
- from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
24
- from openstef.tasks.utils.taskcontext import TaskContext
25
-
26
- # TODO move to config
27
- PV_COEFS_FILEPATH = PROJECT_ROOT / "openstef" / "data" / "pv_single_coefs.csv"
28
-
29
-
30
- def make_solar_prediction_pj(pj, context, radius=30, peak_power=180961000.0):
31
- """Make a solar prediction for a specific prediction job.
32
-
33
- Args:
34
- pj: (dict) prediction job
35
- context: Task context
36
- radius: Radius us to collect PV systems.
37
- peak_power: Peak power.
38
-
39
- """
40
- context.logger.info("Get solar input data from database")
41
- # pvdata is only stored in the prd database
42
- solar_input = context.database.get_solar_input(
43
- (pj["lat"], pj["lon"]),
44
- pj["horizon_minutes"],
45
- pj["resolution_minutes"],
46
- radius=radius,
47
- sid=pj["sid"],
48
- )
49
-
50
- if len(solar_input) == 0:
51
- raise ValueError("Empty solar input")
52
-
53
- context.logger.info("Make solar prediction using Fides")
54
- power = fides(
55
- solar_input[["aggregated", "radiation"]].rename(
56
- columns=dict(radiation="insolation", aggregated="load")
57
- )
58
- )
59
-
60
- # if the forecast is for a region, output should be scaled to peak power
61
- if (radius != 0) and (not np.isnan(peak_power)):
62
- power = peak_power / max(solar_input.aggregated) * power
63
- context.logger.info("Store solar prediction in database")
64
- power["pid"] = pj["id"]
65
- power["type"] = "solar"
66
- power["algtype"] = "Fides"
67
- power["customer"] = pj["name"]
68
- power["description"] = pj["description"]
69
- context.database.write_forecast(power)
70
-
71
-
72
- def combine_forecasts(forecasts, combination_coefs):
73
- """This function combines several independent forecasts into one, using predetermined coefficients.
74
-
75
- Input:
76
- - forecasts: pd.DataFrame(index = datetime, algorithm1, ..., algorithmn)
77
- - combinationcoefs: pd.DataFrame(param1, ..., paramn, algorithm1, ..., algorithmn)
78
-
79
- Output:
80
- - pd.DataFrame(datetime, forecast)
81
-
82
- """
83
- models = [x for x in list(forecasts) if x not in ["created", "datetime"]]
84
-
85
- # Add subset parameters to df
86
- # Identify which parameters should be used to define subsets based on the
87
- # combination coefs
88
- subset_columns = [
89
- "tAhead",
90
- "hForecasted",
91
- "weekday",
92
- "hForecastedPer6h",
93
- "tAheadPer2h",
94
- "hCreated",
95
- ]
96
- subset_defs = [x for x in list(combination_coefs) if x in subset_columns]
97
-
98
- df = forecasts.copy()
99
- # Now add these subsetparams to df
100
- if "tAhead" in subset_defs:
101
- t_ahead = (df["datetime"] - df["created"]).dt.total_seconds() / 3600
102
- df["tAhead"] = t_ahead
103
-
104
- if "hForecasted" in subset_defs:
105
- df["hForecasted"] = df.datetime.dt.hour
106
-
107
- if "weekday" in subset_defs:
108
- df["weekday"] = df.datetime.dt.weekday
109
-
110
- if "hForecastedPer6h" in subset_defs:
111
- df["hForecastedPer6h"] = pd.to_numeric(
112
- np.floor(df.datetime.dt.hour / 6) * 6, downcast="integer"
113
- )
114
-
115
- if "tAheadPer2h" in subset_defs:
116
- df["tAheadPer2h"] = pd.to_numeric(
117
- np.floor((df.datetime - df.created).dt.total_seconds() / 60 / 60 / 2) * 2,
118
- downcast="integer",
119
- )
120
-
121
- if "hCreated" in subset_defs:
122
- df["hCreated"] = df.created.dt.hour
123
-
124
- # Start building combinationcoef dataframe that later will be multiplied with the
125
- # individual forecasts
126
- # This is the best way for a backtest:
127
- # uniquevalues = list([np.unique(df[param].values) for param in subsetDefs])
128
- # permutations = list(itertools.product(*uniquevalues))
129
-
130
- # This is the best way for a single forecast
131
- permutations = [tuple(x) for x in df[subset_defs].values]
132
-
133
- result_df = pd.DataFrame()
134
-
135
- for subsetvalues in permutations:
136
- subset = df.copy()
137
- coefs = combination_coefs
138
-
139
- # Create subset based on all subsetparams, for forecasts and coefs
140
- for value, param in zip(subsetvalues, subset_defs):
141
- subset = subset.loc[subset[param] == value]
142
- # Define function which find closest match of a value from an array of values.
143
- # Use this later to find best coefficient from the given subsetting dividers
144
- closest_match = min(coefs[param], key=lambda x, val=value: abs(x - val))
145
- coefs = coefs.loc[coefs[param] == closest_match]
146
- # Find closest matching value for combinationCoefParams corresponding to
147
- # available subsetValues
148
-
149
- # Of course, not all possible subsets have to be defined in the forecast.
150
- # Skip empty subsets
151
- if len(subset) == 0:
152
- continue
153
-
154
- # Multiply forecasts with their coefficients
155
- result = np.multiply(subset[models], np.array(coefs[models]))
156
- result["forecast"] = result.apply(np.nansum, axis=1)
157
- # Add handling with NA values for a single forecast
158
- result["coefsum"] = np.nansum(coefs[models].values)
159
- nanselector = np.isnan(subset[models].iloc[0].values)
160
- result["nonnacoefsum"] = np.nansum(coefs[models].values.flatten() * nanselector)
161
- result["forecast"] = (
162
- result["forecast"]
163
- * result["coefsum"]
164
- / (result["coefsum"] - result["nonnacoefsum"])
165
- )
166
- result["datetime"] = subset["datetime"]
167
- result["created"] = subset["created"]
168
- result = result[["datetime", "created", "forecast"]]
169
- result_df = pd.concat([result_df, result])
170
- # sort by datetime
171
- result_df.sort_values(["datetime", "created"], inplace=True)
172
-
173
- return result_df
174
-
175
-
176
- def fides(data: pd.DataFrame, all_forecasts: bool = False):
177
- """Fides makes a forecast based on persistence and a direct fit with insolation.
178
-
179
- Args:
180
- data: pd.DataFrame(index = datetime, columns =['output','insolation'])
181
- all_forecasts: Should all forecasts be returned or only the combination
182
-
183
- Example:
184
- import numpy as np
185
- index = pd.date_range(start = "2017-01-01 09:00:00", freq = '15T', periods = 300)
186
- data = pd.DataFrame(index = index,
187
- data = dict(load=np.sin(index.hour/24*np.pi)*np.random.uniform(0.7,1.7, 300)))
188
- data['insolation'] = data.load * np.random.uniform(0.8, 1.2, len(index)) + 0.1
189
- data.loc[int(len(index)/3*2):,"load"] = np.nan
190
-
191
- """
192
- insolation_forecast = apply_fit_insol(data, add_to_df=False)
193
- persistence = apply_persistence(data, how="mean", smooth_entries=4, add_to_df=True)
194
-
195
- df = insolation_forecast.merge(persistence, left_index=True, right_index=True)
196
-
197
- coefs = pd.read_csv(PV_COEFS_FILEPATH)
198
-
199
- # Apply combination coefs
200
- df["created"] = df.loc[df.load.isnull()].index.min()
201
- forecast = combine_forecasts(
202
- df.loc[df.load.isnull(), ["forecaopenstefitInsol", "persistence", "created"]]
203
- .reset_index()
204
- .rename(columns=dict(index="datetime")),
205
- coefs,
206
- ).set_index("datetime")[["forecast"]]
207
-
208
- if all_forecasts:
209
- forecast = forecast.merge(
210
- df[["persistence", "forecaopenstefitInsol"]],
211
- left_index=True,
212
- right_index=True,
213
- how="left",
214
- )
215
-
216
- return forecast
217
-
218
-
219
- def main(config=None, database=None, **kwargs):
220
- taskname = Path(__file__).name.replace(".py", "")
221
-
222
- if database is None or config is None:
223
- raise RuntimeError(
224
- "Please specify a config object and/or database connection object. These"
225
- " can be found in the openstef-dbc package."
226
- )
227
-
228
- with TaskContext(taskname, config, database) as context:
229
- context.logger.info("Querying solar prediction jobs from database")
230
- prediction_jobs = context.database.get_prediction_jobs_solar()
231
- num_prediction_jobs = len(prediction_jobs)
232
-
233
- # only make customer = Provincie once an hour
234
- utc_now_minute = datetime.utcnow().minute
235
- if utc_now_minute >= 15:
236
- prediction_jobs = [
237
- pj for pj in prediction_jobs if str(pj["name"]).startswith("Provincie")
238
- ]
239
- num_removed_jobs = num_prediction_jobs - len(prediction_jobs)
240
- num_prediction_jobs = len(prediction_jobs)
241
- context.logger.info(
242
- "Remove 'Provincie' solar predictions",
243
- num_removed_jobs=num_removed_jobs,
244
- num_prediction_jobs=num_prediction_jobs,
245
- )
246
-
247
- PredictionJobLoop(context, prediction_jobs=prediction_jobs).map(
248
- make_solar_prediction_pj, context, kwargs=kwargs
249
- )
250
-
251
-
252
- def calc_norm(data, how="max", add_to_df=True):
253
- """This script calculates the norm of a given dataset.
254
-
255
- Input:
256
- - data: pd.DataFrame(index = datetime, columns = [load])
257
- - how: str can be any function from numpy, recognized by np.'how'
258
- Optional:
259
- - add_to_df: Bool, add the norm to the data
260
-
261
- Output:
262
- - pd.DataFrame(index = datetime, columns = [load])
263
- NB: range of datetime of input is equal to range of datetime of output
264
-
265
- Example:
266
- import pandas as pd
267
- import numpy as np
268
- index = pd.date_range(start = "2017-01-01 09:00:00", freq = '15T', periods = 200)
269
- data = pd.DataFrame(index = index,
270
- data = dict(load=np.sin(index.hour/24*np.pi)*np.random.uniform(0.7,1.7, 200)))
271
-
272
- """
273
- colname = list(data)[0]
274
- if how == "max":
275
- df = data.groupby(data.index.time).apply(lambda x: x.max(skipna=True))
276
- if how == "mean":
277
- df = data.groupby(data.index.time).apply(lambda x: x.mean(skipna=True))
278
-
279
- # rename
280
- df.rename(columns={colname: "Norm"}, inplace=True)
281
-
282
- # Merge to dataframe if add_to_df == True
283
- if add_to_df:
284
- df = data.merge(df, left_on=data.index.time, right_index=True)[
285
- [colname, "Norm"]
286
- ].sort_index()
287
-
288
- return df
289
-
290
-
291
- def apply_persistence(data, how="mean", smooth_entries=4, add_to_df=True, colname=None):
292
- """This script calculates the persistence forecast.
293
-
294
- Input:
295
- - data: pd.DataFrame(index = datetime, columns = [load]), datetime is expected to have historic values, as well as NA values
296
- Optional:
297
- - how: str, how to determine the norm (abs or mean)
298
- - smoothEntries: int, number of historic entries over which the persistence is smoothed
299
- - add_to_df: Bool, add the forecast to the data
300
- - option of specifying colname if load is not first column
301
-
302
- Output:
303
- - pd.DataFrame(index = datetime, columns = [(load,) persistence])
304
- NB: range of datetime of input is equal to range of datetime of output
305
-
306
- Example:
307
- import pandas as pd
308
- import numpy as np
309
- index = pd.date_range(start = "2017-01-01 09:00:00", freq = '15T', periods = 300)
310
- data = pd.DataFrame(index = index,
311
- data = dict(load=np.sin(index.hour/24*np.pi)*np.random.uniform(0.7,1.7, 300)))
312
- data.loc[200:,"load"] = np.nan
313
-
314
- """
315
- data = data.sort_index()
316
-
317
- if colname is None:
318
- colname = list(data)[0]
319
-
320
- df = calc_norm(data, how=how, add_to_df=True)
321
-
322
- # this selects the last non NA values
323
- last_entries = df.loc[df[colname].notnull()][-smooth_entries:]
324
-
325
- norm_mean = last_entries.Norm.mean()
326
- if norm_mean == 0:
327
- norm_mean = 1
328
-
329
- factor = last_entries[colname].mean() / norm_mean
330
- df["persistence"] = df.Norm * factor
331
-
332
- if add_to_df:
333
- df = df[[colname, "persistence"]]
334
- else:
335
- df = df[["persistence"]]
336
-
337
- return df
338
-
339
-
340
- def apply_fit_insol(data, add_to_df=True, hours_delta=None, polynomial=False):
341
- """This model fits insolation to PV yield and uses this fit to forecast PV yield. It uses a 2nd order polynomial.
342
-
343
- Input:
344
- - data: pd.DataFrame(index = datetime, columns = [load, insolation])
345
- Optional:
346
- - hoursDelta: period of forecast in hours [int] (e.g. every 6 hours for KNMI)
347
- - addToDF: Bool, add the norm to the data
348
-
349
- Output:
350
- - pd.DataFrame(index = datetime, columns = [(load), forecaopenstefitInsol])
351
- NB: range of datetime of input is equal to range of datetime of output
352
-
353
- Example:
354
- import pandas as pd
355
- import numpy as np
356
- index = pd.date_range(start = "2017-01-01 09:00:00", freq = '15T', periods = 300)
357
- data = pd.DataFrame(index = index,
358
- data = dict(load=np.sin(index.hour/24*np.pi)*np.random.uniform(0.7,1.7, len(index))))
359
- data['insolation'] = data.load * np.random.uniform(0.8, 1.2, len(index)) + 0.1
360
- data.loc[int(len(index)/3*2):,"load"] = np.nan
361
-
362
- """
363
- colname = list(data)[0]
364
-
365
- # Define subset, only keep non-NaN values and the most recent forecasts
366
- # This ensures a good training set
367
- if hours_delta is None:
368
- subset = data.loc[(data[colname].notnull()) & (data[colname] > 0)]
369
- else:
370
- subset = data.loc[
371
- (data[colname].notnull())
372
- & (data[colname] > 0)
373
- & (data["tAhead"] < timedelta(hours=hours_delta))
374
- & (data["tAhead"] >= timedelta(hours=0))
375
- ]
376
-
377
- def linear_fun(coefs, values):
378
- return coefs[0] * values + coefs[1]
379
-
380
- def second_order_poly(coefs, values):
381
- return coefs[0] * values**2 + coefs[1] * values + coefs[2]
382
-
383
- # Define function to be minimized and subsequently minimize this function
384
- if polynomial:
385
- # Define starting guess
386
- x0 = [1, 1, 0] # ax**2 + bx + c.
387
- fun = (
388
- lambda x: (second_order_poly(x, subset.insolation) - subset[colname])
389
- .abs()
390
- .mean()
391
- )
392
- # , bounds = bnds, constraints = cons)
393
- res = optimize.minimize(fun, x0)
394
- # Apply fit
395
- df = second_order_poly(res.x, data[["insolation"]]).rename(
396
- columns=dict(insolation="forecaopenstefitInsol")
397
- )
398
-
399
- else:
400
- x0 = [1, 0]
401
- fun = (
402
- lambda x: (linear_fun(x, subset.insolation) - subset[colname]).abs().mean()
403
- )
404
- res = optimize.minimize(fun, x0)
405
- df = linear_fun(res.x, data[["insolation"]]).rename(
406
- columns=dict(insolation="forecaopenstefitInsol")
407
- )
408
-
409
- # Merge to dataframe if addToDF == True
410
- if add_to_df:
411
- if hours_delta is None:
412
- df = data.merge(df, left_index=True, right_index=True)
413
- else:
414
- df = pd.concat([data, df], axis=1)
415
-
416
- return df
417
-
418
-
419
- if __name__ == "__main__":
420
- main()
@@ -1,80 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- """This module contains the CRON job that is periodically executed to make prognoses of wind features.
6
-
7
- These features are usefull for splitting the load in solar and wind contributions and
8
- making prognoses.
9
-
10
- Example:
11
- This module is meant to be called directly from a CRON job. A description of the
12
- CRON job can be found in the /k8s/CronJobs folder.
13
- Alternatively this code can be run directly by running::
14
- $ python create_wind_forecast
15
-
16
- """
17
- from pathlib import Path
18
-
19
- from openstef.data_classes.prediction_job import PredictionJobDataClass
20
- from openstef.feature_engineering import weather_features
21
- from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
22
- from openstef.tasks.utils.taskcontext import TaskContext
23
-
24
-
25
- def make_wind_forecast_pj(pj: PredictionJobDataClass, context: TaskContext) -> None:
26
- """Make a wind prediction for a specific prediction job.
27
-
28
- Args:
29
- pj: Prediction job
30
- context: Context manager
31
-
32
- """
33
- context.logger.info("Get turbine data", turbine_type=pj["turbine_type"])
34
- turbine_data = context.database.get_power_curve(pj["turbine_type"])
35
-
36
- context.logger.info(
37
- "Get windspeed", location=[pj["lat"], pj["lon"]], hub_height=pj["hub_height"]
38
- )
39
- windspeed = context.database.get_wind_input(
40
- (pj["lat"], pj["lon"]),
41
- pj["hub_height"],
42
- pj["horizon_minutes"],
43
- pj["resolution_minutes"],
44
- )
45
-
46
- context.logger.info("Calculate windturbine power", n_turbines=pj["n_turbines"])
47
- power = weather_features.calculate_windturbine_power_output(
48
- windspeed, pj["n_turbines"], turbine_data
49
- ).rename(columns=dict(windspeed_100m="forecast"))
50
-
51
- context.logger.info("Store wind prediction in database")
52
- power["pid"] = pj["id"]
53
- power["type"] = "wind"
54
- power["algtype"] = "powerCurve"
55
- power["customer"] = pj["name"]
56
- power["description"] = pj["description"]
57
- context.database.write_forecast(power, t_ahead_series=True)
58
-
59
-
60
- def main(config=None, database=None):
61
- taskname = Path(__file__).name.replace(".py", "")
62
-
63
- if database is None or config is None:
64
- raise RuntimeError(
65
- "Please specifiy a config object and/or database connection object. These"
66
- " can be found in the openstef-dbc package."
67
- )
68
-
69
- with TaskContext(taskname, config, database) as context:
70
- context.logger.info("Querying wind prediction jobs from database")
71
- prediction_jobs = context.database.get_prediction_jobs_wind()
72
- prediction_jobs = [x for x in prediction_jobs if x["model"] == "latest"]
73
-
74
- PredictionJobLoop(context, prediction_jobs=prediction_jobs).map(
75
- make_wind_forecast_pj, context
76
- )
77
-
78
-
79
- if __name__ == "__main__":
80
- main()
@@ -1,135 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- # -*- coding: utf-8 -*-
6
- """optimize_hyper_params.py.
7
-
8
- This module contains the CRON job that is periodically executed to optimize the
9
- hyperparameters for the prognosis models.
10
-
11
- Example:
12
- This module is meant to be called directly from a CRON job. A description of
13
- the CRON job can be found in the /k8s/CronJobs folder.
14
- Alternatively this code can be run directly by running::
15
-
16
- $ python optimize_hyperparameters.py
17
-
18
- """
19
- from datetime import datetime, timedelta
20
- from pathlib import Path
21
-
22
- from openstef.data_classes.prediction_job import PredictionJobDataClass
23
- from openstef.enums import ModelType, PipelineType
24
- from openstef.model.serializer import MLflowSerializer
25
- from openstef.monitoring import teams
26
- from openstef.pipeline.optimize_hyperparameters import optimize_hyperparameters_pipeline
27
- from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
28
- from openstef.tasks.utils.taskcontext import TaskContext
29
-
30
- MAX_AGE_HYPER_PARAMS_DAYS = 31
31
- DEFAULT_CHECK_HYPER_PARAMS_AGE = True
32
- DEFAULT_TRAINING_PERIOD_DAYS = 121
33
-
34
-
35
- def optimize_hyperparameters_task(
36
- pj: PredictionJobDataClass,
37
- context: TaskContext,
38
- check_hyper_param_age: bool = DEFAULT_CHECK_HYPER_PARAMS_AGE,
39
- ) -> None:
40
- """Optimize hyperparameters task.
41
-
42
- Expected prediction job keys: "id", "model", "lat", "lon", "name", "description"
43
- Only used for logging: "name", "description"
44
-
45
- Args:
46
- pj: Prediction job
47
- context: Task context
48
- check_hyper_param_age: Boolean indicating if optimization can be skipped in case existing
49
- hyperparameters do not exceed the maximum age.
50
-
51
- """
52
- # Check pipeline types
53
- if PipelineType.HYPER_PARMATERS not in pj.pipelines_to_run:
54
- context.logger.info(
55
- "Skip this PredictionJob because hyper_parameters pipeline is not specified in the pj."
56
- )
57
- return
58
-
59
- # TODO: Improve implementation by using a field in the database and leveraging the
60
- # `pipelines_to_run` attribute of the `PredictionJobDataClass` object. This
61
- # would require a change to the MySQL datamodel.
62
- if (
63
- context.config.externally_posted_forecasts_pids
64
- and pj.id in context.config.externally_posted_forecasts_pids
65
- ):
66
- context.logger.info(
67
- "Skip this PredictionJob because its forecasts are posted by an external process."
68
- )
69
- return
70
-
71
- # Retrieve the paths for storing model and reports from the config manager
72
- mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri
73
- artifact_folder = context.config.paths_artifact_folder
74
-
75
- # Determine if we need to optimize hyperparams
76
- # retrieve last model age where hyperparameters were optimized
77
- mlflow_serializer = MLflowSerializer(mlflow_tracking_uri=mlflow_tracking_uri)
78
- hyper_params_age = mlflow_serializer.get_model_age(
79
- experiment_name=str(pj["id"]), hyperparameter_optimization_only=True
80
- )
81
-
82
- if (hyper_params_age < MAX_AGE_HYPER_PARAMS_DAYS) and check_hyper_param_age:
83
- context.logger.warning(
84
- "Skip hyperparameter optimization",
85
- pid=pj["id"],
86
- hyper_params_age=hyper_params_age,
87
- max_age=MAX_AGE_HYPER_PARAMS_DAYS,
88
- )
89
- return
90
-
91
- datetime_start = datetime.utcnow() - timedelta(days=DEFAULT_TRAINING_PERIOD_DAYS)
92
- datetime_end = datetime.utcnow()
93
-
94
- input_data = context.database.get_model_input(
95
- pid=pj["id"],
96
- location=[pj["lat"], pj["lon"]],
97
- datetime_start=datetime_start,
98
- datetime_end=datetime_end,
99
- )
100
-
101
- # Optimize hyperparams
102
- hyperparameters = optimize_hyperparameters_pipeline(
103
- pj,
104
- input_data,
105
- mlflow_tracking_uri=mlflow_tracking_uri,
106
- artifact_folder=artifact_folder,
107
- )
108
-
109
- # Sent message to Teams
110
- title = (
111
- f'Optimized hyperparameters for prediction job {pj["name"]} {pj["description"]}'
112
- )
113
-
114
- teams.post_teams(teams.format_message(title=title, params=hyperparameters))
115
-
116
-
117
- def main(config=None, database=None):
118
- taskname = Path(__file__).name.replace(".py", "")
119
-
120
- if database is None or config is None:
121
- raise RuntimeError(
122
- "Please specify a config object and/or database connection object. These"
123
- " can be found in the openstef-dbc package."
124
- )
125
-
126
- with TaskContext(taskname, config, database) as context:
127
- model_type = [ml.value for ml in ModelType]
128
-
129
- PredictionJobLoop(context, model_type=model_type).map(
130
- optimize_hyperparameters_task, context
131
- )
132
-
133
-
134
- if __name__ == "__main__":
135
- main()