openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. openstef-4.0.0a3.dist-info/METADATA +177 -0
  2. openstef-4.0.0a3.dist-info/RECORD +4 -0
  3. {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
  4. openstef/__init__.py +0 -14
  5. openstef/__main__.py +0 -3
  6. openstef/app_settings.py +0 -19
  7. openstef/data/NL_terrestrial_radiation.csv +0 -25585
  8. openstef/data/NL_terrestrial_radiation.csv.license +0 -3
  9. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  10. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
  11. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
  12. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
  13. openstef/data/dutch_holidays.csv +0 -1759
  14. openstef/data/dutch_holidays.csv.license +0 -3
  15. openstef/data/pv_single_coefs.csv +0 -601
  16. openstef/data/pv_single_coefs.csv.license +0 -3
  17. openstef/data_classes/__init__.py +0 -3
  18. openstef/data_classes/data_prep.py +0 -99
  19. openstef/data_classes/model_specifications.py +0 -30
  20. openstef/data_classes/prediction_job.py +0 -135
  21. openstef/data_classes/split_function.py +0 -97
  22. openstef/enums.py +0 -140
  23. openstef/exceptions.py +0 -74
  24. openstef/feature_engineering/__init__.py +0 -3
  25. openstef/feature_engineering/apply_features.py +0 -138
  26. openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
  27. openstef/feature_engineering/cyclic_features.py +0 -161
  28. openstef/feature_engineering/data_preparation.py +0 -152
  29. openstef/feature_engineering/feature_adder.py +0 -206
  30. openstef/feature_engineering/feature_applicator.py +0 -202
  31. openstef/feature_engineering/general.py +0 -141
  32. openstef/feature_engineering/holiday_features.py +0 -231
  33. openstef/feature_engineering/lag_features.py +0 -165
  34. openstef/feature_engineering/missing_values_transformer.py +0 -141
  35. openstef/feature_engineering/rolling_features.py +0 -58
  36. openstef/feature_engineering/weather_features.py +0 -492
  37. openstef/metrics/__init__.py +0 -3
  38. openstef/metrics/figure.py +0 -303
  39. openstef/metrics/metrics.py +0 -486
  40. openstef/metrics/reporter.py +0 -222
  41. openstef/model/__init__.py +0 -3
  42. openstef/model/basecase.py +0 -82
  43. openstef/model/confidence_interval_applicator.py +0 -242
  44. openstef/model/fallback.py +0 -77
  45. openstef/model/metamodels/__init__.py +0 -3
  46. openstef/model/metamodels/feature_clipper.py +0 -90
  47. openstef/model/metamodels/grouped_regressor.py +0 -222
  48. openstef/model/metamodels/missing_values_handler.py +0 -138
  49. openstef/model/model_creator.py +0 -214
  50. openstef/model/objective.py +0 -426
  51. openstef/model/objective_creator.py +0 -65
  52. openstef/model/regressors/__init__.py +0 -3
  53. openstef/model/regressors/arima.py +0 -197
  54. openstef/model/regressors/custom_regressor.py +0 -64
  55. openstef/model/regressors/dazls.py +0 -116
  56. openstef/model/regressors/flatliner.py +0 -95
  57. openstef/model/regressors/gblinear_quantile.py +0 -334
  58. openstef/model/regressors/lgbm.py +0 -29
  59. openstef/model/regressors/linear.py +0 -90
  60. openstef/model/regressors/linear_quantile.py +0 -305
  61. openstef/model/regressors/regressor.py +0 -114
  62. openstef/model/regressors/xgb.py +0 -52
  63. openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
  64. openstef/model/regressors/xgb_quantile.py +0 -228
  65. openstef/model/serializer.py +0 -431
  66. openstef/model/standard_deviation_generator.py +0 -81
  67. openstef/model_selection/__init__.py +0 -3
  68. openstef/model_selection/model_selection.py +0 -311
  69. openstef/monitoring/__init__.py +0 -3
  70. openstef/monitoring/performance_meter.py +0 -92
  71. openstef/monitoring/teams.py +0 -203
  72. openstef/pipeline/__init__.py +0 -3
  73. openstef/pipeline/create_basecase_forecast.py +0 -133
  74. openstef/pipeline/create_component_forecast.py +0 -168
  75. openstef/pipeline/create_forecast.py +0 -171
  76. openstef/pipeline/optimize_hyperparameters.py +0 -317
  77. openstef/pipeline/train_create_forecast_backtest.py +0 -163
  78. openstef/pipeline/train_model.py +0 -561
  79. openstef/pipeline/utils.py +0 -52
  80. openstef/postprocessing/__init__.py +0 -3
  81. openstef/postprocessing/postprocessing.py +0 -275
  82. openstef/preprocessing/__init__.py +0 -3
  83. openstef/preprocessing/preprocessing.py +0 -42
  84. openstef/settings.py +0 -15
  85. openstef/tasks/__init__.py +0 -3
  86. openstef/tasks/calculate_kpi.py +0 -324
  87. openstef/tasks/create_basecase_forecast.py +0 -118
  88. openstef/tasks/create_components_forecast.py +0 -162
  89. openstef/tasks/create_forecast.py +0 -145
  90. openstef/tasks/create_solar_forecast.py +0 -420
  91. openstef/tasks/create_wind_forecast.py +0 -80
  92. openstef/tasks/optimize_hyperparameters.py +0 -135
  93. openstef/tasks/split_forecast.py +0 -273
  94. openstef/tasks/train_model.py +0 -224
  95. openstef/tasks/utils/__init__.py +0 -3
  96. openstef/tasks/utils/dependencies.py +0 -107
  97. openstef/tasks/utils/predictionjobloop.py +0 -243
  98. openstef/tasks/utils/taskcontext.py +0 -160
  99. openstef/validation/__init__.py +0 -3
  100. openstef/validation/validation.py +0 -322
  101. openstef-3.4.56.dist-info/METADATA +0 -154
  102. openstef-3.4.56.dist-info/RECORD +0 -102
  103. openstef-3.4.56.dist-info/top_level.txt +0 -1
  104. /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
@@ -1,492 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- """This module contains all wheather related functions used for feature engineering."""
6
- import logging
7
- from typing import Union
8
-
9
- import numpy as np
10
- import pandas as pd
11
- import pvlib
12
- import structlog
13
- from pvlib.location import Location
14
-
15
- from openstef.data_classes.prediction_job import PredictionJobDataClass
16
- from openstef.settings import Settings
17
-
18
- structlog.configure(
19
- wrapper_class=structlog.make_filtering_bound_logger(
20
- logging.getLevelName(Settings.log_level)
21
- )
22
- )
23
- logger = structlog.get_logger(__name__)
24
-
25
-
26
- # Set some (nameless) constants for the Antoine equation:
27
- A: float = 6.116
28
- M: float = 7.6
29
- TN: float = 240.7
30
- # Set some constants
31
- TORR: float = 133.322368 # 1 torr = 133 Pa
32
- # 1.168 is the mass of 1 m^3 of air on sea level with standard pressure.
33
- D: float = 1.168
34
-
35
- DEFAULT_LAT: float = 52.132633
36
- DEFAULT_LON: float = 5.291266
37
-
38
-
39
- def calc_saturation_pressure(
40
- temperature: Union[float, np.ndarray]
41
- ) -> Union[float, np.ndarray]:
42
- """Calculate the water vapour pressure from the temperature.
43
-
44
- See https://www.vaisala.com/sites/default/files/documents/Humidity_Conversion_Formulas_B210973EN-F.pdf.
45
-
46
- Args:
47
- temperature: Temperature in C
48
- Returns:
49
- The saturation pressure of water at the respective temperature
50
-
51
- """
52
- psat = A * 10 ** ((M * temperature) / (temperature + TN))
53
- return psat
54
-
55
-
56
- def calc_vapour_pressure(
57
- rh: Union[float, np.ndarray], psat: Union[float, np.ndarray]
58
- ) -> Union[float, np.ndarray]:
59
- """Calculates the vapour pressure.
60
-
61
- Args:
62
- rh: Relative humidity
63
- psat: Saturation pressure: see calc_saturation_pressure
64
- Returns:
65
- The water vapour pressure
66
-
67
- """
68
- return rh * psat
69
-
70
-
71
- def calc_dewpoint(
72
- vapour_pressure: Union[float, np.ndarray]
73
- ) -> Union[float, np.ndarray]:
74
- """Calculates the dewpoint, see https://en.wikipedia.org/wiki/Dew_point for mroe info.
75
-
76
- Args:
77
- vapour_pressure: The vapour pressure for which the dewpoint should be calculated
78
- Returns:
79
- Dewpoint
80
-
81
- """
82
- return TN / ((M / np.log10(vapour_pressure / A)) - 1)
83
-
84
-
85
- def calc_air_density(
86
- temperature: Union[float, np.ndarray],
87
- pressure: Union[float, np.ndarray],
88
- rh: Union[float, np.ndarray],
89
- ) -> Union[float, np.ndarray]:
90
- """Calculates the dewpoint.
91
-
92
- Args:
93
- temperature: The temperature in C
94
- pressure: the atmospheric pressure in Pa
95
- rh: Relative humidity
96
-
97
- Returns:
98
- The air density (kg/m^3)
99
-
100
- """
101
- # Calculate saturation pressure
102
- psat = calc_saturation_pressure(temperature)
103
- # Calculate the current vapour pressure
104
- vapour_pressure = calc_vapour_pressure(rh, psat)
105
-
106
- # Set tempareture to K
107
- temperature_k = temperature + 273.15
108
-
109
- # Calculate air density
110
- air_density = (
111
- D
112
- * (273.15 / temperature_k)
113
- * ((pressure - 0.3783 * vapour_pressure) / 760 / TORR)
114
- )
115
-
116
- return air_density
117
-
118
-
119
- def add_humidity_features(
120
- data: pd.DataFrame, feature_names: list[str] = None
121
- ) -> pd.DataFrame:
122
- """Adds humidity features to the input dataframe.
123
-
124
- These features are calculated using functions defines in this module. A list of
125
- requested features is used to determine whether to add the humidity features or not.
126
-
127
- Args:
128
- data: Input dataframe to which features have to be added
129
- feature_names: list of requested features.
130
-
131
- Returns:
132
- Same as input dataframe with extra columns for the humidty features.
133
-
134
- """
135
- # If features is none add humidity feature anyway
136
- if feature_names is None:
137
- humidity_features = True
138
-
139
- # Otherwise check if they are among the reuqested features
140
- else:
141
- humidity_features = any(
142
- x
143
- in [
144
- "saturation_pressure",
145
- "vapour_pressure",
146
- "dewpoint",
147
- "air_density",
148
- ]
149
- for x in feature_names
150
- )
151
-
152
- # Check if any of the humidity features are requested and add them
153
- if humidity_features:
154
- # Try to add humidity calculations, ignore if required columns are missing
155
- try:
156
- humidity_df = humidity_calculations(data.temp, data.humidity, data.pressure)
157
- data = data.join(humidity_df)
158
- except AttributeError:
159
- pass # This happens when a required column for humidity_calculations
160
- # is not present
161
-
162
- return data
163
-
164
-
165
- def humidity_calculations(
166
- temperature: Union[float, np.ndarray],
167
- rh: Union[float, np.ndarray],
168
- pressure: Union[float, np.ndarray],
169
- ) -> Union[dict, np.ndarray]:
170
- """Function that calculates weather features based on humidity..
171
-
172
- These features are:
173
- - Saturation pressure
174
- - Vapour pressure
175
- - Dewpoint
176
- - Air density
177
-
178
- Args:
179
- temperature: Temperature in C
180
- rh: Relative humidity in %
181
- pressure: The air pressure in hPa
182
-
183
- Returns:
184
- If the input is an np.ndarray; a pandas dataframe with the calculated moisture indices,
185
- if the input is numeric; a dict with the calculated moisture indices
186
-
187
- """
188
- # First: a sanity check on the relative humidity and the air pressure
189
- # We only check on the type of temperature, because they need to be the same anyway
190
- is_series = isinstance(temperature, (np.ndarray, pd.Series))
191
- is_scalar = isinstance(temperature, (float, int))
192
-
193
- if is_scalar is False and is_series is False:
194
- raise TypeError(
195
- "The input should be a pandas series or np.ndarry, or float or int"
196
- )
197
-
198
- # Suppres copy warnings
199
- with pd.option_context("mode.chained_assignment", None):
200
- if is_series:
201
- rh[rh > 1] = rh / 100 # This triggers copy warnings
202
- pressure[pressure < 80000] = np.nan # This triggers copy warnings
203
- else:
204
- if rh > 1:
205
- rh /= 100
206
- if pressure < 80000:
207
- pressure = np.nan
208
-
209
- # If the input is a dataframe or np.ndarrays: return a dataframe
210
- if is_series:
211
- humidity_df = pd.DataFrame(
212
- columns=[
213
- "saturation_pressure",
214
- "vapour_pressure",
215
- "dewpoint",
216
- "air_density",
217
- ]
218
- )
219
- humidity_df["saturation_pressure"] = calc_saturation_pressure(temperature)
220
- humidity_df["vapour_pressure"] = calc_vapour_pressure(
221
- rh, humidity_df.saturation_pressure
222
- )
223
- humidity_df["dewpoint"] = calc_dewpoint(humidity_df.vapour_pressure)
224
- humidity_df["air_density"] = calc_air_density(temperature, pressure, rh)
225
-
226
- return humidity_df
227
-
228
- # Else: if the input is numeric: return a dict
229
- psat = calc_saturation_pressure(temperature)
230
- pw = calc_vapour_pressure(rh, psat)
231
- td = calc_dewpoint(pw)
232
- air_density = calc_air_density(temperature, pressure, rh)
233
- return {
234
- "saturation_pressure": psat,
235
- "vapour_pressure": pw,
236
- "dewpoint": td,
237
- "air_density": air_density,
238
- }
239
-
240
-
241
- def calculate_windspeed_at_hubheight(
242
- windspeed: Union[float, pd.Series],
243
- fromheight: float = 10.0,
244
- hub_height: float = 100.0,
245
- ) -> pd.Series:
246
- """Calculate windspeed at hubheight.
247
-
248
- Calculates the windspeed at hubheigh by extrapolation from a given height to a given
249
- hub height using the wind power law https://en.wikipedia.org/wiki/Wind_profile_power_law
250
-
251
- Args:
252
- windspeed: float OR pandas series of windspeed at height = height
253
- fromheight: height (m) of the windspeed data. Default is 10m
254
- hubheight: height (m) of the turbine
255
-
256
- Returns:
257
- Windspeed at hubheight.
258
-
259
- """
260
- alpha = 0.143
261
-
262
- if not isinstance(windspeed, (np.ndarray, float, int, pd.Series)):
263
- raise TypeError(
264
- "The windspeed is not of the expected type!\n Got"
265
- " {}, expected np.ndarray, pd series or numeric".format(type(windspeed))
266
- )
267
-
268
- try:
269
- if any(windspeed < 0):
270
- raise ValueError(
271
- "The windspeed cannot be negative, as it is the lenght of a vector"
272
- )
273
- except TypeError:
274
- if windspeed < 0:
275
- raise ValueError(
276
- "The windspeed cannot be negative, as it is the lenght of a vector"
277
- )
278
- windspeed = abs(windspeed)
279
-
280
- return windspeed * (hub_height / fromheight) ** alpha
281
-
282
-
283
- def calculate_windturbine_power_output(
284
- windspeed: pd.Series, n_turbines: int = 1, turbine_data: dict = None
285
- ) -> pd.Series:
286
- """Calculate wind turbine power output.
287
-
288
- These values are related through the power curve, which is described by turbine_data.
289
- If no turbine_data is given, default values are used and results are normalized to 1MWp.
290
- If n_turbines=0, the result is normalized to a rated power of 1.
291
-
292
- Args:
293
- windspeed: pd.DataFrame(index = datetime, columns = ["windspeedHub"])
294
- nTurbines: The number of turbines
295
- turbineData: slope_center, rated_power, steepness
296
-
297
- Returns:
298
- pd.DataFrame(index = datetime, columns = ["forecast"])
299
-
300
- """
301
- if turbine_data is None:
302
- turbine_data = {
303
- "name": "Lagerwey L100", # not used here
304
- "cut_in": 3, # not used here
305
- "cut_off": 25, # not used here
306
- "kind": "onshore", # not used here
307
- "manufacturer": "Lagerwey", # not used here
308
- "peak_capacity": 1, # not used here
309
- "rated_power": 1,
310
- "slope_center": 8.07,
311
- "steepness": 0.664,
312
- }
313
- else:
314
- required_properties = ["rated_power", "steepness", "slope_center"]
315
- for prop in required_properties:
316
- if prop not in turbine_data.keys():
317
- raise KeyError(f"Required property '{prop}' not set in turbine data")
318
-
319
- generated_power = turbine_data["rated_power"] / (
320
- 1
321
- + np.exp(
322
- -turbine_data["steepness"] * (windspeed - turbine_data["slope_center"])
323
- )
324
- )
325
- generated_power *= n_turbines
326
-
327
- return generated_power
328
-
329
-
330
- def add_additional_wind_features(
331
- data: pd.DataFrame, feature_names: list[str] = None
332
- ) -> pd.DataFrame:
333
- """Adds additional wind features to the input data.
334
-
335
- Args:
336
- data: Dataframe to which the wind features have to be added
337
- feature_names: List of requested features
338
-
339
- Returns:
340
- DataFrame same as input dataframe with extra columns for the added wind features
341
-
342
- """
343
- if feature_names is None:
344
- additional_wind_features = True
345
- else:
346
- additional_wind_features = any(
347
- x
348
- in [
349
- "windspeed_100mExtrapolated",
350
- "windPowerFit_extrapolated",
351
- "windpowerFit_harm_arome",
352
- ]
353
- for x in feature_names
354
- )
355
-
356
- # Add add_additional_wind_features
357
- if "windspeed" in data.columns and additional_wind_features:
358
- data["windspeed_100mExtrapolated"] = calculate_windspeed_at_hubheight(
359
- data["windspeed"]
360
- )
361
-
362
- data["windPowerFit_extrapolated"] = calculate_windturbine_power_output(
363
- data["windspeed_100mExtrapolated"]
364
- )
365
-
366
- # Do extra check
367
- if "windspeed_100m" in data.columns and additional_wind_features:
368
- data["windpowerFit_harm_arome"] = calculate_windturbine_power_output(
369
- data["windspeed_100m"].astype(float)
370
- )
371
-
372
- return data
373
-
374
-
375
- def calculate_dni(radiation: pd.Series, pj: PredictionJobDataClass) -> pd.Series:
376
- """Calculate the direct normal irradiance (DNI).
377
-
378
- This function uses the predicted radiation and information derived from the location (obtained from pj)
379
-
380
-
381
- Args:
382
- radiation: predicted radiation including DatetimeIndex with right time-zone
383
- pj: PredictJob including information about the location (lat, lon)
384
-
385
- Returns:
386
- Direct normal irradiance (DNI).
387
-
388
- """
389
- loc = Location(pj.get("lat", DEFAULT_LAT), pj.get("lon", DEFAULT_LON), tz="utc")
390
- times = radiation.index
391
-
392
- # calculate data for loc(ation) at times with clear_sky, as if there would be a clear sky.
393
- cs = loc.get_clearsky(times)
394
-
395
- # get solar position variable(s) for loc(ation) at times
396
- solpos = pvlib.solarposition.get_solarposition(times, loc.latitude, loc.longitude)
397
- solar_zenith = solpos.apparent_zenith
398
-
399
- # convert radiation (ghi) to right unit (J/m^2 to kWh/m^2)
400
- # TODO: check whether unit conversion is necessary
401
- ghi_forecasted = radiation / 3600
402
- # convert ghi to dni
403
- dni_converted = pvlib.irradiance.dni(
404
- ghi_forecasted, cs.dhi, solar_zenith, clearsky_dni=cs.dni
405
- )
406
- dni_converted = dni_converted.fillna(0)
407
- return dni_converted
408
-
409
-
410
- def calculate_gti(
411
- radiation: pd.Series,
412
- pj: PredictionJobDataClass,
413
- surface_tilt: float = 34.0,
414
- surface_azimuth: float = 180,
415
- ) -> pd.Series:
416
- """Calculate the GTI/POA using the radiation.
417
-
418
- This function assumes Global Tilted Irradiance (GTI) = Plane of Array (POA)
419
-
420
- Args:
421
- radiation: pandas series with DatetimeIndex with right timezone information
422
- pj: prediction job which should at least contain the latitude and longitude location.
423
- surface_tilt: The tilt of the surface of, for example, your PhotoVoltaic-system.
424
- surface_azimuth: The way the surface is facing. South facing 180 degrees, North facing 0 degrees, East facing 90 degrees and West facing 270 degrees
425
-
426
- Returns:
427
- Global Tilted Irradiance (GTI)
428
-
429
- """
430
- loc = Location(pj.get("lat", DEFAULT_LAT), pj.get("lon", DEFAULT_LON), tz="utc")
431
- times = radiation.index
432
-
433
- # calculate data for loc(ation) at times with clear_sky, as if there would be a clear sky.
434
- cs = loc.get_clearsky(times)
435
- dni = calculate_dni(radiation, pj)
436
-
437
- # get solar position variable(s) for loc(ation) at times
438
- solpos = pvlib.solarposition.get_solarposition(times, loc.latitude, loc.longitude)
439
- solar_zenith = solpos.apparent_zenith
440
- solar_azimuth = solpos.azimuth
441
-
442
- ghi_forecasted = radiation / 3600
443
- gti = pvlib.irradiance.get_total_irradiance(
444
- surface_tilt,
445
- surface_azimuth,
446
- solar_zenith,
447
- solar_azimuth,
448
- dni=dni,
449
- ghi=ghi_forecasted,
450
- dhi=cs.dhi,
451
- )
452
-
453
- return gti["poa_global"]
454
-
455
-
456
- def add_additional_solar_features(
457
- data: pd.DataFrame,
458
- pj: PredictionJobDataClass = None,
459
- feature_names: list[str] = None,
460
- ) -> pd.DataFrame:
461
- """Adds additional solar features to the input data.
462
-
463
- Args:
464
- data: Dataframe to which the solar features have to be added
465
- pj: prediction job which should at least contain the latitude and longitude location.
466
- feature_names: List of requested features
467
-
468
- Returns:
469
- DataFrame same as input dataframe with extra columns for the added solar features
470
-
471
- """
472
- # If pj is none add solar features with Utrecht as default location
473
- if pj is None:
474
- logger.info(
475
- "No prediction job, default location will be used for additional radiation features."
476
- )
477
- pj = {"lat": DEFAULT_LAT, "lon": DEFAULT_LON}
478
-
479
- # If features is none add solar feature anyway
480
- if feature_names is None:
481
- additional_solar_features = True
482
-
483
- # Otherwise check if they are among the requested features
484
- else:
485
- additional_solar_features = any(x in ["dni", "gti"] for x in feature_names)
486
-
487
- # Add add_additional_solar_features
488
- if "radiation" in data.columns and additional_solar_features:
489
- data["dni"] = calculate_dni(data["radiation"], pj)
490
- data["gti"] = calculate_gti(data["radiation"], pj)
491
-
492
- return data
@@ -1,3 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0