openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. openstef-4.0.0a3.dist-info/METADATA +177 -0
  2. openstef-4.0.0a3.dist-info/RECORD +4 -0
  3. {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
  4. openstef/__init__.py +0 -14
  5. openstef/__main__.py +0 -3
  6. openstef/app_settings.py +0 -19
  7. openstef/data/NL_terrestrial_radiation.csv +0 -25585
  8. openstef/data/NL_terrestrial_radiation.csv.license +0 -3
  9. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  10. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
  11. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
  12. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
  13. openstef/data/dutch_holidays.csv +0 -1759
  14. openstef/data/dutch_holidays.csv.license +0 -3
  15. openstef/data/pv_single_coefs.csv +0 -601
  16. openstef/data/pv_single_coefs.csv.license +0 -3
  17. openstef/data_classes/__init__.py +0 -3
  18. openstef/data_classes/data_prep.py +0 -99
  19. openstef/data_classes/model_specifications.py +0 -30
  20. openstef/data_classes/prediction_job.py +0 -135
  21. openstef/data_classes/split_function.py +0 -97
  22. openstef/enums.py +0 -140
  23. openstef/exceptions.py +0 -74
  24. openstef/feature_engineering/__init__.py +0 -3
  25. openstef/feature_engineering/apply_features.py +0 -138
  26. openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
  27. openstef/feature_engineering/cyclic_features.py +0 -161
  28. openstef/feature_engineering/data_preparation.py +0 -152
  29. openstef/feature_engineering/feature_adder.py +0 -206
  30. openstef/feature_engineering/feature_applicator.py +0 -202
  31. openstef/feature_engineering/general.py +0 -141
  32. openstef/feature_engineering/holiday_features.py +0 -231
  33. openstef/feature_engineering/lag_features.py +0 -165
  34. openstef/feature_engineering/missing_values_transformer.py +0 -141
  35. openstef/feature_engineering/rolling_features.py +0 -58
  36. openstef/feature_engineering/weather_features.py +0 -492
  37. openstef/metrics/__init__.py +0 -3
  38. openstef/metrics/figure.py +0 -303
  39. openstef/metrics/metrics.py +0 -486
  40. openstef/metrics/reporter.py +0 -222
  41. openstef/model/__init__.py +0 -3
  42. openstef/model/basecase.py +0 -82
  43. openstef/model/confidence_interval_applicator.py +0 -242
  44. openstef/model/fallback.py +0 -77
  45. openstef/model/metamodels/__init__.py +0 -3
  46. openstef/model/metamodels/feature_clipper.py +0 -90
  47. openstef/model/metamodels/grouped_regressor.py +0 -222
  48. openstef/model/metamodels/missing_values_handler.py +0 -138
  49. openstef/model/model_creator.py +0 -214
  50. openstef/model/objective.py +0 -426
  51. openstef/model/objective_creator.py +0 -65
  52. openstef/model/regressors/__init__.py +0 -3
  53. openstef/model/regressors/arima.py +0 -197
  54. openstef/model/regressors/custom_regressor.py +0 -64
  55. openstef/model/regressors/dazls.py +0 -116
  56. openstef/model/regressors/flatliner.py +0 -95
  57. openstef/model/regressors/gblinear_quantile.py +0 -334
  58. openstef/model/regressors/lgbm.py +0 -29
  59. openstef/model/regressors/linear.py +0 -90
  60. openstef/model/regressors/linear_quantile.py +0 -305
  61. openstef/model/regressors/regressor.py +0 -114
  62. openstef/model/regressors/xgb.py +0 -52
  63. openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
  64. openstef/model/regressors/xgb_quantile.py +0 -228
  65. openstef/model/serializer.py +0 -431
  66. openstef/model/standard_deviation_generator.py +0 -81
  67. openstef/model_selection/__init__.py +0 -3
  68. openstef/model_selection/model_selection.py +0 -311
  69. openstef/monitoring/__init__.py +0 -3
  70. openstef/monitoring/performance_meter.py +0 -92
  71. openstef/monitoring/teams.py +0 -203
  72. openstef/pipeline/__init__.py +0 -3
  73. openstef/pipeline/create_basecase_forecast.py +0 -133
  74. openstef/pipeline/create_component_forecast.py +0 -168
  75. openstef/pipeline/create_forecast.py +0 -171
  76. openstef/pipeline/optimize_hyperparameters.py +0 -317
  77. openstef/pipeline/train_create_forecast_backtest.py +0 -163
  78. openstef/pipeline/train_model.py +0 -561
  79. openstef/pipeline/utils.py +0 -52
  80. openstef/postprocessing/__init__.py +0 -3
  81. openstef/postprocessing/postprocessing.py +0 -275
  82. openstef/preprocessing/__init__.py +0 -3
  83. openstef/preprocessing/preprocessing.py +0 -42
  84. openstef/settings.py +0 -15
  85. openstef/tasks/__init__.py +0 -3
  86. openstef/tasks/calculate_kpi.py +0 -324
  87. openstef/tasks/create_basecase_forecast.py +0 -118
  88. openstef/tasks/create_components_forecast.py +0 -162
  89. openstef/tasks/create_forecast.py +0 -145
  90. openstef/tasks/create_solar_forecast.py +0 -420
  91. openstef/tasks/create_wind_forecast.py +0 -80
  92. openstef/tasks/optimize_hyperparameters.py +0 -135
  93. openstef/tasks/split_forecast.py +0 -273
  94. openstef/tasks/train_model.py +0 -224
  95. openstef/tasks/utils/__init__.py +0 -3
  96. openstef/tasks/utils/dependencies.py +0 -107
  97. openstef/tasks/utils/predictionjobloop.py +0 -243
  98. openstef/tasks/utils/taskcontext.py +0 -160
  99. openstef/validation/__init__.py +0 -3
  100. openstef/validation/validation.py +0 -322
  101. openstef-3.4.56.dist-info/METADATA +0 -154
  102. openstef-3.4.56.dist-info/RECORD +0 -102
  103. openstef-3.4.56.dist-info/top_level.txt +0 -1
  104. /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
@@ -1,275 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- import logging
5
- from enum import Enum
6
-
7
- import numpy as np
8
- import pandas as pd
9
- import structlog
10
-
11
- from openstef.data_classes.prediction_job import PredictionJobDataClass
12
- from openstef.enums import ForecastType
13
- from openstef.feature_engineering import weather_features
14
- from openstef.settings import Settings
15
-
16
- # this is the default for "Lagerwey100"
17
- TURBINE_DATA = {
18
- "rated_power": 1,
19
- "slope_center": 8.07,
20
- "steepness": 0.664,
21
- }
22
-
23
- # Set value to define precission of power, this is needed because comparing to zero sometimes leads to issues for very small values.
24
- SMALLEST_POWER_UNIT: float = 0.000001
25
-
26
-
27
- def normalize_and_convert_weather_data_for_splitting(
28
- weather_data: pd.DataFrame,
29
- ) -> pd.DataFrame:
30
- """Normalize and converts weather data for use in energy splitting.
31
-
32
- Args:
33
- weather_data: Weather data with "windspeed_100m" and "radiation".
34
-
35
- Returns:
36
- Dataframe with "windpower" and "radiation" columns.
37
-
38
- """
39
- # Check we have "windspeed_100m" and "radiation" available
40
- if not all(
41
- elem in weather_data.columns for elem in ["windspeed_100m", "radiation"]
42
- ):
43
- raise ValueError("weather data does not contain required data!")
44
-
45
- # Prepare output dataframe
46
- output_dataframe = pd.DataFrame()
47
-
48
- # Normalize weather data
49
- output_dataframe["radiation"] = (
50
- weather_data["radiation"]
51
- / np.percentile(weather_data["radiation"].dropna(), 99.0)
52
- * -1
53
- )
54
- wind_ref_series = weather_features.calculate_windspeed_at_hubheight(
55
- weather_data["windspeed_100m"], fromheight=100
56
- )
57
- wind_ref = wind_ref_series.to_frame()
58
- wind_ref = calculate_wind_power(wind_ref)
59
- wind_ref *= -1
60
-
61
- output_dataframe["windpower"] = wind_ref
62
- return output_dataframe
63
-
64
-
65
- def calculate_wind_power(
66
- windspeed_100m: pd.DataFrame,
67
- ) -> pd.DataFrame:
68
- """Calculate the generated wind power based on the wind speed.
69
-
70
- Values are related through the power curve, which is
71
- described by turbine_data. Default values are used and are normalized to 1MWp.
72
-
73
- Args:
74
- windspeed_100m: Example: ``pd.DataFrame (index = datetime, columns = ["windspeed_100m"])``
75
-
76
- Returns:
77
- Example output ``pd.DataFrame(index = datetime, columns = ["windenergy"])``
78
-
79
- """
80
- generated_power = TURBINE_DATA["rated_power"] / (
81
- 1
82
- + np.exp(
83
- -TURBINE_DATA["steepness"] * (windspeed_100m - TURBINE_DATA["slope_center"])
84
- )
85
- )
86
- return generated_power["windspeed_100m"].rename("windenergy").to_frame()
87
-
88
-
89
- def split_forecast_in_components(
90
- forecast: pd.DataFrame, weather_data: pd.DataFrame, split_coefs: dict
91
- ) -> dict[str, pd.DataFrame]:
92
- """Make estimates of energy components based on given forecast.
93
-
94
- Args:
95
- forecast: KTP load forecast
96
- weather_data: Weather data for energy splitting, at least; "windspeed_100m" and "radiation"
97
- split_coefs: Previously determined splitting coefs for prediction job
98
-
99
- Returns:
100
- Forecast dataframe for each component
101
-
102
- """
103
- # Normalize weather data
104
- weather_ref_profiles = normalize_and_convert_weather_data_for_splitting(
105
- weather_data
106
- )
107
-
108
- # Check input
109
- if not all(
110
- elem in ["windpower", "radiation"]
111
- for elem in list(weather_ref_profiles.columns)
112
- ):
113
- raise ValueError("weather data does not contain required data!")
114
-
115
- # Merge to ensure datetime index is the same
116
- weather_ref_profiles = forecast.merge(
117
- weather_ref_profiles, how="outer", right_index=True, left_index=True
118
- )
119
- # Drop rows with duplicate indices
120
- weather_ref_profiles = weather_ref_profiles[
121
- ~weather_ref_profiles.index.duplicated()
122
- ]
123
- weather_ref_profiles.replace([np.inf, -np.inf], np.nan).dropna(inplace=True)
124
-
125
- # Prepare output dictionary and list of forecast types
126
- components = forecast.copy(deep=True)
127
-
128
- # Calculate profiles of estimated components
129
- components["forecast_wind_on_shore"] = (
130
- split_coefs["wind_ref"] * weather_ref_profiles["windpower"]
131
- )
132
- components["forecast_solar"] = (
133
- split_coefs["pv_ref"] * weather_ref_profiles["radiation"]
134
- )
135
- components["forecast_other"] = (
136
- weather_ref_profiles["forecast"]
137
- - components["forecast_solar"]
138
- - components["forecast_wind_on_shore"]
139
- )
140
-
141
- # Check that sign of production components is negative and not positive, change if sign is wrong
142
- if components["forecast_wind_on_shore"].sum() > 0:
143
- raise ValueError("Sign of estimated wind_on_shore component is positive!")
144
- if components["forecast_solar"].sum() > 0:
145
- raise ValueError("Sign of estimated solar component is positive!")
146
-
147
- # Post process predictions to ensure realistic values
148
- components["forecast_solar"] = post_process_wind_solar(
149
- components["forecast_solar"], ForecastType.SOLAR
150
- )
151
- components["forecast_wind_on_shore"] = post_process_wind_solar(
152
- components["forecast_wind_on_shore"], ForecastType.WIND
153
- )
154
-
155
- return components.drop("forecast", axis=1).drop("stdev", axis=1).dropna()
156
-
157
-
158
- def post_process_wind_solar(
159
- forecast: pd.Series, forecast_type: ForecastType
160
- ) -> pd.DataFrame:
161
- """Function that caries out postprocessing for wind and solar power generators.
162
-
163
- As these points will always produce energy, predicted energy consumption is
164
- set to zero. This function enforces the assumption that production is negative
165
- and consuption positive.
166
-
167
- Args:
168
- forecast: Series with forecast data.
169
- forecast_type: Specifies the type of forecast. This can be retrieved
170
- from the prediction job as pj['forecast_type']
171
-
172
- Returns:
173
- Post-processed forecast.
174
-
175
- """
176
- if forecast_type not in [ForecastType.WIND, ForecastType.SOLAR]:
177
- return forecast
178
-
179
- # For wind and solar forecasted value should always be negative.
180
- forecast.loc[forecast > (-1 * SMALLEST_POWER_UNIT)] = 0
181
-
182
- # write changed back to forecast
183
- return forecast
184
-
185
-
186
- def add_components_base_case_forecast(basecase_forecast: pd.DataFrame) -> pd.DataFrame:
187
- """Makes a basecase forecast for the forecast_other component.
188
-
189
- This will make a simple basecase components forecast
190
- available and ensures that the sum of the components (other, wind and solar) is equal to the normal basecase
191
- forecast This is important for sending GLMD messages correctly to TenneT!
192
-
193
- Args:
194
- basecase_forecast: pd.DataFrame with basecase forecast
195
-
196
- Returns:
197
- basecase_forecast: pd.DataFrame with extra "forecast_other component"
198
-
199
- """
200
- basecase_forecast["forecast_other"] = basecase_forecast["forecast"]
201
- return basecase_forecast
202
-
203
-
204
- def add_prediction_job_properties_to_forecast(
205
- pj: PredictionJobDataClass,
206
- forecast: pd.DataFrame,
207
- algorithm_type: str,
208
- forecast_type: Enum = None,
209
- forecast_quality: str = None,
210
- ) -> pd.DataFrame:
211
- """Adds prediciton job meta data to a forecast dataframe.
212
-
213
- Args:
214
- pj: Prediciton job.
215
- forecast: Forecast dataframe
216
- algorithm_type: Type of algirithm used for making the forecast.
217
- forecast_type: Type of the forecast. Defaults to None.
218
- forecast_quality: Quality of the forecast. Defaults to None.
219
-
220
- Returns:
221
- Dataframe with added metadata.
222
-
223
- """
224
- structlog.configure(
225
- wrapper_class=structlog.make_filtering_bound_logger(
226
- logging.getLevelName(Settings.log_level)
227
- )
228
- )
229
- logger = structlog.get_logger(__name__)
230
-
231
- logger.info("Postproces in preparation of storing")
232
- if forecast_type is None:
233
- forecast_type = pj["forecast_type"]
234
- else:
235
- # get the value from the enum
236
- forecast_type = forecast_type.value
237
-
238
- # NOTE this field is only used when making the babasecase forecast and fallback
239
- if forecast_quality is not None:
240
- forecast["quality"] = forecast_quality
241
-
242
- # TODO rename prediction job typ to type
243
- # TODO algtype = model_file_path, perhaps we can find a more logical name
244
- # TODO perhaps better to make a forecast its own class!
245
- # TODO double check and sync this with make_basecase_forecast (other fields are added)
246
- # !!!!! TODO fix the requirement for customer
247
- forecast["pid"] = pj["id"]
248
- forecast["customer"] = pj["name"]
249
- forecast["description"] = pj["description"]
250
- forecast["type"] = forecast_type
251
- forecast["algtype"] = algorithm_type
252
-
253
- return forecast
254
-
255
-
256
- def sort_quantiles(
257
- forecast: pd.DataFrame, quantile_col_start="quantile_P"
258
- ) -> pd.DataFrame:
259
- """Sort quantile values so quantiles do not cross.
260
-
261
- This function assumes that all quantile columns start with 'quantile_P' For more academic details on why this is
262
- mathematically sounds, please refer to Quantile and Probability Curves Without Crossing (Chernozhukov, 2010)
263
-
264
- """
265
- p_columns = [col for col in forecast.columns if col.startswith(quantile_col_start)]
266
-
267
- if len(p_columns) == 0:
268
- return forecast
269
-
270
- # sort the columns
271
- p_columns = np.sort(p_columns)
272
-
273
- forecast.loc[:, p_columns] = forecast[p_columns].apply(sorted, axis=1).to_list()
274
-
275
- return forecast
@@ -1,3 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
@@ -1,42 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- import numpy as np
6
- import pandas as pd
7
-
8
-
9
- def replace_repeated_values_with_nan(
10
- df: pd.DataFrame, threshold: int, column_name: str
11
- ) -> pd.DataFrame:
12
- """Replace sequentially repeated values with NaN.
13
-
14
- Args:
15
- df: Data with potential repeating values.
16
- threshold: The minimum number of squentially repeated values needed to trigger the replacement with NaN.
17
- column_name: Column name of input dataframe with repeating values.
18
-
19
- Returns:
20
- DataFrame, similar to df, with the desired values set to NaN.
21
-
22
- """
23
- data = df.copy()
24
-
25
- # Add a boolean column to mark sequential duplicates
26
- data["temp_is_duplicate"] = data[column_name].eq(data[column_name].shift(1))
27
-
28
- # Create an unique identifier for each sequence with the same value, so we can easily remove the correct sequences
29
- data["temp_repeated_group"] = (~data["temp_is_duplicate"]).cumsum()
30
-
31
- # Create mask of sequences larger than or equal to the threshold value
32
- mask = (
33
- data.groupby("temp_repeated_group")[column_name].transform("count") >= threshold
34
- )
35
-
36
- # Replace the masked values with NaN
37
- data.loc[mask, column_name] = np.nan
38
-
39
- # Drop temporary columns
40
- data = data.drop(["temp_is_duplicate", "temp_repeated_group"], axis=1)
41
-
42
- return data
openstef/settings.py DELETED
@@ -1,15 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- from functools import lru_cache
6
-
7
- from openstef.app_settings import AppSettings
8
-
9
-
10
- @lru_cache
11
- def _get_app_settings() -> AppSettings:
12
- return AppSettings()
13
-
14
-
15
- Settings = _get_app_settings()
@@ -1,3 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
@@ -1,324 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- """This module contains the CRON job that is periodically executed to calculate key performance indicators (KPIs).
6
-
7
- This code assumes prognoses are available from the persistent storage.
8
- If these are not available run create_forecast.py to train all models.
9
-
10
- The folowing tasks are caried out:
11
- 1: Calculate the KPI for a given pid. Ignore SplitEnergy
12
- 2: Create figures
13
- 3: Write KPI to database
14
-
15
- Example:
16
- This module is meant to be called directly from a CRON job.
17
- Alternatively this code can be run directly by running::
18
- $ python calculate_kpi.py
19
-
20
- """
21
- import logging
22
-
23
- # Import builtins
24
- from datetime import datetime, timedelta
25
- from pathlib import Path
26
-
27
- import numpy as np
28
- import pandas as pd
29
- import structlog
30
-
31
- from openstef.data_classes.prediction_job import PredictionJobDataClass
32
- from openstef.enums import ModelType
33
- from openstef.exceptions import NoPredictedLoadError, NoRealisedLoadError
34
- from openstef.metrics import metrics
35
- from openstef.settings import Settings
36
- from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
37
- from openstef.tasks.utils.taskcontext import TaskContext
38
- from openstef.validation import validation
39
-
40
- # Thresholds for retraining and optimizing
41
- THRESHOLD_RETRAINING = 0.25
42
- THRESHOLD_OPTIMIZING = 0.50
43
-
44
-
45
- def main(model_type: ModelType = None, config=None, database=None) -> None:
46
- taskname = Path(__file__).name.replace(".py", "")
47
-
48
- if database is None or config is None:
49
- raise RuntimeError(
50
- "Please specifiy a config object and/or database connection object. These"
51
- " can be found in the openstef-dbc package."
52
- )
53
-
54
- if model_type is None:
55
- model_type = [ml.value for ml in ModelType]
56
-
57
- with TaskContext(taskname, config, database) as context:
58
- # Set start and end time
59
- start_time = datetime.utcnow() - timedelta(days=1)
60
- end_time = datetime.utcnow()
61
-
62
- PredictionJobLoop(context, model_type=model_type).map(
63
- check_kpi_task,
64
- context,
65
- start_time=start_time,
66
- end_time=end_time,
67
- )
68
-
69
-
70
- def check_kpi_task(
71
- pj: PredictionJobDataClass,
72
- context: TaskContext,
73
- start_time: datetime,
74
- end_time: datetime,
75
- threshold_optimizing=THRESHOLD_OPTIMIZING,
76
- threshold_retraining=THRESHOLD_RETRAINING,
77
- ) -> None:
78
- # Apply default parameters if none are provided
79
- if start_time is None:
80
- start_time = datetime.utcnow() - timedelta(days=1)
81
- if end_time is None:
82
- end_time = datetime.utcnow()
83
-
84
- # Get realised load data
85
- realised = context.database.get_load_pid(pj["id"], start_time, end_time, "15T")
86
-
87
- # Get predicted load
88
- predicted_load = context.database.get_predicted_load_tahead(
89
- pj, start_time, end_time
90
- )
91
-
92
- # Get basecase prediction
93
- load_1_week_before = context.database.get_load_pid(
94
- pj["id"], start_time - timedelta(days=7), end_time - timedelta(days=7), "15T"
95
- )
96
- if len(load_1_week_before) > 0:
97
- basecase = load_1_week_before.shift(periods=7, freq="d")
98
- else:
99
- basecase = pd.DataFrame()
100
-
101
- kpis = calc_kpi_for_specific_pid(pj["id"], realised, predicted_load, basecase)
102
- # Write KPI's to database
103
- context.database.write_kpi(pj, kpis)
104
-
105
- # Add pid to the list of pids that should be retrained or optimized if
106
- # performance is insufficient
107
- if kpis["47.0h"]["rMAE"] > threshold_retraining:
108
- context.logger.warning(
109
- "Need to retrain model, retraining threshold rMAE 47h exceeded",
110
- t_ahead="47.0h",
111
- rMAE=kpis["47.0h"]["rMAE"],
112
- retraining_threshold=threshold_retraining,
113
- )
114
-
115
- if kpis["47.0h"]["rMAE"] > threshold_optimizing:
116
- context.logger.warning(
117
- "Need to optimize hyperparameters, optimizing threshold rMAE 47h exceeded",
118
- t_ahead="47.0h",
119
- rMAE=kpis["47.0h"]["rMAE"],
120
- optimizing_threshold=threshold_optimizing,
121
- )
122
-
123
-
124
- def calc_kpi_for_specific_pid(
125
- pid: int,
126
- realised: pd.DataFrame,
127
- predicted_load: pd.DataFrame,
128
- basecase: pd.DataFrame,
129
- ) -> dict:
130
- """Function that checks the model performance based on a pid. This function.
131
-
132
- - loads and combines forecast and realised data
133
- - calculated several key performance indicators (KPIs)
134
- These metric include:
135
- - RMSE,
136
- - bias,
137
- - NSME (model efficiency, between -inf and 1)
138
- - Mean absolute Error
139
-
140
- Args:
141
- pid: Prediction ID for a given prediction job
142
- realised: Realised load.
143
- predicted_load: Predicted load.
144
- basecase: Basecase predicted load.
145
-
146
- Returns:
147
- - Dictionary that includes a dictonary for each t_ahead.
148
- - Dict includes enddate en window (in days) for clarification
149
-
150
- Raises:
151
- NoPredictedLoadError: When no predicted load for given datatime range.
152
- NoRealisedLoadError: When no realised load for given datetime range.
153
-
154
- Example:
155
- To get the rMAE for the 24 hours ahead prediction: kpis['24h']['rMAE']
156
-
157
- """
158
- COMPLETENESS_REALISED_THRESHOLDS = 0.7
159
- COMPLETENESS_PREDICTED_LOAD_THRESHOLD = 0.7
160
-
161
- structlog.configure(
162
- wrapper_class=structlog.make_filtering_bound_logger(
163
- logging.getLevelName(Settings.log_level)
164
- )
165
- )
166
- logger = structlog.get_logger(__name__)
167
-
168
- # If predicted is empty
169
- if len(predicted_load) == 0:
170
- raise NoPredictedLoadError(pid)
171
-
172
- # If realised is empty
173
- if len(realised) == 0:
174
- raise NoRealisedLoadError(pid)
175
-
176
- # Define start and end time
177
- start_time = realised.index.min().to_pydatetime()
178
- end_time = realised.index.max().to_pydatetime()
179
-
180
- completeness_realised = validation.calc_completeness_dataframe(realised)[0]
181
-
182
- # Interpolate missing data if needed
183
- realised = realised.resample("15T").interpolate(limit=3)
184
-
185
- completeness_predicted_load = validation.calc_completeness_dataframe(predicted_load)
186
-
187
- # Combine the forecast and the realised to make sure indices are matched nicely
188
- combined = pd.merge(realised, predicted_load, left_index=True, right_index=True)
189
-
190
- # Add basecase (load in same time period 7 days ago)
191
- # Check if basecase is not empty, else make a dummy dataframe
192
- if len(basecase) == 0:
193
- basecase = pd.DataFrame(columns=["load"])
194
- basecase = basecase.rename(columns=dict(load="basecase"))
195
-
196
- combined = combined.merge(basecase, how="left", left_index=True, right_index=True)
197
-
198
- # Raise exception in case of constant load
199
- if combined.load.nunique() == 1:
200
- logger.warning(
201
- "The load is constant! KPIs will still be calculated, but relative metrics"
202
- " will be nan."
203
- )
204
-
205
- # Define output dictonary
206
- kpis = dict()
207
-
208
- # Extract t_aheads from predicted_load,
209
- # Make a list of tuples with [(forecast_xh, stdev_xh),(..,..),..]
210
- hor_list = [
211
- ("forecast_" + t_ahead, "stdev_" + t_ahead)
212
- for t_ahead in set(col.split("_")[1] for col in predicted_load.columns)
213
- ]
214
-
215
- # cast date to int
216
- date = pd.to_datetime(end_time)
217
-
218
- # Calculate model metrics and add them to the output dictionary
219
- logger.info("Start calculating kpis")
220
- for hor_cols in hor_list:
221
- t_ahead_h = hor_cols[0].split("_")[1]
222
- fc = combined[hor_cols[0]] # load predictions
223
- st = combined[hor_cols[1]] # standard deviations of load predictions
224
-
225
- completeness_predicted_load_specific_hor = (
226
- validation.calc_completeness_dataframe(fc.to_frame(name=t_ahead_h))[0]
227
- )
228
- kpis.update(
229
- {
230
- t_ahead_h: {
231
- "RMSE": metrics.rmse(combined["load"], fc),
232
- "bias": metrics.bias(combined["load"], fc),
233
- "NSME": metrics.nsme(combined["load"], fc),
234
- "MAE": metrics.mae(combined["load"], fc),
235
- "rMAE": metrics.r_mae(combined["load"], fc),
236
- "rMAE_highest": metrics.r_mae_highest(combined["load"], fc),
237
- "rMNE_highest": metrics.r_mne_highest(combined["load"], fc),
238
- "rMPE_highest": metrics.r_mpe_highest(combined["load"], fc),
239
- "rMAE_lowest": metrics.r_mae_lowest(combined["load"], fc),
240
- "skill_score_basecase": metrics.skill_score(
241
- combined["load"],
242
- combined["basecase"],
243
- np.mean(combined["basecase"]),
244
- ),
245
- "skill_score": metrics.skill_score(
246
- combined["load"], fc, np.mean(combined["basecase"])
247
- ),
248
- "skill_score_positive_peaks": metrics.skill_score_positive_peaks(
249
- combined["load"], fc, np.mean(combined["basecase"])
250
- ),
251
- "skill_score_positive_peaks_basecase": metrics.skill_score_positive_peaks(
252
- combined["load"],
253
- combined["basecase"],
254
- np.mean(combined["basecase"]),
255
- ),
256
- "franks_skill_score": metrics.franks_skill_score(
257
- combined["load"], fc, combined["basecase"]
258
- ),
259
- "franks_skill_score_peaks": metrics.franks_skill_score_peaks(
260
- combined["load"], fc, combined["basecase"]
261
- ),
262
- "load_range": combined["load"].max() - combined["load"].min(),
263
- "frac_in_1sdev": metrics.frac_in_stdev(combined["load"], fc, st),
264
- "frac_in_2sdev": metrics.frac_in_stdev(
265
- combined["load"], fc, 2 * st
266
- ),
267
- "completeness_realised": completeness_realised,
268
- "completeness_predicted": completeness_predicted_load_specific_hor,
269
- "date": date, # cast to date
270
- "window_days": np.round(
271
- (end_time - start_time).total_seconds() / 60.0 / 60.0 / 24.0
272
- ),
273
- }
274
- }
275
- )
276
-
277
- if completeness_realised < COMPLETENESS_REALISED_THRESHOLDS:
278
- logger.warning(
279
- "Completeness realised load too low",
280
- prediction_id=pid,
281
- start_time=start_time,
282
- end_time=end_time,
283
- completeness=completeness_realised,
284
- completeness_threshold=COMPLETENESS_REALISED_THRESHOLDS,
285
- )
286
- set_incomplete_kpi_to_nan(kpis, t_ahead_h)
287
- if completeness_predicted_load.any() < COMPLETENESS_PREDICTED_LOAD_THRESHOLD:
288
- logger.warning(
289
- "Completeness predicted load of specific horizon too low",
290
- prediction_id=pid,
291
- horizon=t_ahead_h,
292
- start_time=start_time,
293
- end_time=end_time,
294
- completeness=completeness_predicted_load,
295
- completeness_threshold=COMPLETENESS_PREDICTED_LOAD_THRESHOLD,
296
- )
297
- set_incomplete_kpi_to_nan(kpis, t_ahead_h)
298
-
299
- # Return output dictionary
300
- return kpis
301
-
302
-
303
- def set_incomplete_kpi_to_nan(kpis: dict, t_ahead_h: str) -> None:
304
- """Checks the given kpis for completeness and sets to nan if this not true.
305
-
306
- Args:
307
- kpis: the kpis
308
- t_ahead_h: t_ahead_h
309
-
310
- """
311
- kpi_metrics = list(kpis[t_ahead_h].keys())
312
- # Set to nan
313
- for kpi in kpi_metrics:
314
- if kpi not in [
315
- "completeness_realised",
316
- "completeness_predicted",
317
- "date",
318
- "window_days",
319
- ]:
320
- kpis[t_ahead_h].update({kpi: np.nan})
321
-
322
-
323
- if __name__ == "__main__":
324
- main()