openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. openstef-4.0.0a3.dist-info/METADATA +177 -0
  2. openstef-4.0.0a3.dist-info/RECORD +4 -0
  3. {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
  4. openstef/__init__.py +0 -14
  5. openstef/__main__.py +0 -3
  6. openstef/app_settings.py +0 -19
  7. openstef/data/NL_terrestrial_radiation.csv +0 -25585
  8. openstef/data/NL_terrestrial_radiation.csv.license +0 -3
  9. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  10. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
  11. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
  12. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
  13. openstef/data/dutch_holidays.csv +0 -1759
  14. openstef/data/dutch_holidays.csv.license +0 -3
  15. openstef/data/pv_single_coefs.csv +0 -601
  16. openstef/data/pv_single_coefs.csv.license +0 -3
  17. openstef/data_classes/__init__.py +0 -3
  18. openstef/data_classes/data_prep.py +0 -99
  19. openstef/data_classes/model_specifications.py +0 -30
  20. openstef/data_classes/prediction_job.py +0 -135
  21. openstef/data_classes/split_function.py +0 -97
  22. openstef/enums.py +0 -140
  23. openstef/exceptions.py +0 -74
  24. openstef/feature_engineering/__init__.py +0 -3
  25. openstef/feature_engineering/apply_features.py +0 -138
  26. openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
  27. openstef/feature_engineering/cyclic_features.py +0 -161
  28. openstef/feature_engineering/data_preparation.py +0 -152
  29. openstef/feature_engineering/feature_adder.py +0 -206
  30. openstef/feature_engineering/feature_applicator.py +0 -202
  31. openstef/feature_engineering/general.py +0 -141
  32. openstef/feature_engineering/holiday_features.py +0 -231
  33. openstef/feature_engineering/lag_features.py +0 -165
  34. openstef/feature_engineering/missing_values_transformer.py +0 -141
  35. openstef/feature_engineering/rolling_features.py +0 -58
  36. openstef/feature_engineering/weather_features.py +0 -492
  37. openstef/metrics/__init__.py +0 -3
  38. openstef/metrics/figure.py +0 -303
  39. openstef/metrics/metrics.py +0 -486
  40. openstef/metrics/reporter.py +0 -222
  41. openstef/model/__init__.py +0 -3
  42. openstef/model/basecase.py +0 -82
  43. openstef/model/confidence_interval_applicator.py +0 -242
  44. openstef/model/fallback.py +0 -77
  45. openstef/model/metamodels/__init__.py +0 -3
  46. openstef/model/metamodels/feature_clipper.py +0 -90
  47. openstef/model/metamodels/grouped_regressor.py +0 -222
  48. openstef/model/metamodels/missing_values_handler.py +0 -138
  49. openstef/model/model_creator.py +0 -214
  50. openstef/model/objective.py +0 -426
  51. openstef/model/objective_creator.py +0 -65
  52. openstef/model/regressors/__init__.py +0 -3
  53. openstef/model/regressors/arima.py +0 -197
  54. openstef/model/regressors/custom_regressor.py +0 -64
  55. openstef/model/regressors/dazls.py +0 -116
  56. openstef/model/regressors/flatliner.py +0 -95
  57. openstef/model/regressors/gblinear_quantile.py +0 -334
  58. openstef/model/regressors/lgbm.py +0 -29
  59. openstef/model/regressors/linear.py +0 -90
  60. openstef/model/regressors/linear_quantile.py +0 -305
  61. openstef/model/regressors/regressor.py +0 -114
  62. openstef/model/regressors/xgb.py +0 -52
  63. openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
  64. openstef/model/regressors/xgb_quantile.py +0 -228
  65. openstef/model/serializer.py +0 -431
  66. openstef/model/standard_deviation_generator.py +0 -81
  67. openstef/model_selection/__init__.py +0 -3
  68. openstef/model_selection/model_selection.py +0 -311
  69. openstef/monitoring/__init__.py +0 -3
  70. openstef/monitoring/performance_meter.py +0 -92
  71. openstef/monitoring/teams.py +0 -203
  72. openstef/pipeline/__init__.py +0 -3
  73. openstef/pipeline/create_basecase_forecast.py +0 -133
  74. openstef/pipeline/create_component_forecast.py +0 -168
  75. openstef/pipeline/create_forecast.py +0 -171
  76. openstef/pipeline/optimize_hyperparameters.py +0 -317
  77. openstef/pipeline/train_create_forecast_backtest.py +0 -163
  78. openstef/pipeline/train_model.py +0 -561
  79. openstef/pipeline/utils.py +0 -52
  80. openstef/postprocessing/__init__.py +0 -3
  81. openstef/postprocessing/postprocessing.py +0 -275
  82. openstef/preprocessing/__init__.py +0 -3
  83. openstef/preprocessing/preprocessing.py +0 -42
  84. openstef/settings.py +0 -15
  85. openstef/tasks/__init__.py +0 -3
  86. openstef/tasks/calculate_kpi.py +0 -324
  87. openstef/tasks/create_basecase_forecast.py +0 -118
  88. openstef/tasks/create_components_forecast.py +0 -162
  89. openstef/tasks/create_forecast.py +0 -145
  90. openstef/tasks/create_solar_forecast.py +0 -420
  91. openstef/tasks/create_wind_forecast.py +0 -80
  92. openstef/tasks/optimize_hyperparameters.py +0 -135
  93. openstef/tasks/split_forecast.py +0 -273
  94. openstef/tasks/train_model.py +0 -224
  95. openstef/tasks/utils/__init__.py +0 -3
  96. openstef/tasks/utils/dependencies.py +0 -107
  97. openstef/tasks/utils/predictionjobloop.py +0 -243
  98. openstef/tasks/utils/taskcontext.py +0 -160
  99. openstef/validation/__init__.py +0 -3
  100. openstef/validation/validation.py +0 -322
  101. openstef-3.4.56.dist-info/METADATA +0 -154
  102. openstef-3.4.56.dist-info/RECORD +0 -102
  103. openstef-3.4.56.dist-info/top_level.txt +0 -1
  104. /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
@@ -1,431 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- import json
5
- import logging
6
- import os
7
- import shutil
8
- from datetime import datetime
9
- from json import JSONDecodeError
10
- from typing import Optional, Union
11
- from urllib.parse import unquote, urlparse
12
-
13
- import mlflow
14
- import numpy as np
15
- import pandas as pd
16
- import structlog
17
- from mlflow.exceptions import MlflowException
18
- from mlflow.store.artifact.artifact_repository_registry import get_artifact_repository
19
- from xgboost import XGBModel # Temporary for backward compatibility
20
-
21
- from openstef.data_classes.model_specifications import ModelSpecificationDataClass
22
- from openstef.metrics.reporter import Report
23
- from openstef.model.regressors.regressor import OpenstfRegressor
24
- from openstef.settings import Settings
25
-
26
-
27
- class MLflowSerializer:
28
- def __init__(self, mlflow_tracking_uri: str):
29
- structlog.configure(
30
- wrapper_class=structlog.make_filtering_bound_logger(
31
- logging.getLevelName(Settings.log_level)
32
- )
33
- )
34
- self.logger = structlog.get_logger(self.__class__.__name__)
35
- mlflow.set_tracking_uri(mlflow_tracking_uri)
36
- self.logger.debug(f"MLflow tracking uri at init= {mlflow_tracking_uri}")
37
- self.experiment_name_prefix = (
38
- os.environ["DATABRICKS_WORKSPACE_PATH"]
39
- if "DATABRICKS_WORKSPACE_PATH" in os.environ
40
- else ""
41
- )
42
-
43
- def save_model(
44
- self,
45
- model: OpenstfRegressor,
46
- experiment_name: str,
47
- model_type: str,
48
- model_specs: ModelSpecificationDataClass,
49
- report: Report,
50
- phase: str = "training",
51
- **kwargs,
52
- ) -> None:
53
- """Save sklearn compatible model to MLFlow."""
54
- mlflow.set_experiment(
55
- experiment_name=self.experiment_name_prefix + experiment_name
56
- )
57
- with mlflow.start_run(run_name=experiment_name):
58
- self._log_model_with_mlflow(
59
- model=model,
60
- experiment_name=experiment_name,
61
- model_type=model_type,
62
- model_specs=model_specs,
63
- report=report,
64
- phase=phase,
65
- **kwargs,
66
- )
67
- self._log_figures_with_mlflow(report)
68
-
69
- def _log_model_with_mlflow(
70
- self,
71
- model: OpenstfRegressor,
72
- experiment_name: str,
73
- model_type: str,
74
- model_specs: ModelSpecificationDataClass,
75
- report: Report,
76
- phase: str,
77
- **kwargs,
78
- ) -> None:
79
- """Log model with MLflow.
80
-
81
- Note: **kwargs has extra information to be logged with mlflow
82
-
83
- """
84
- # Get previous run id
85
- models_df = self._find_models(
86
- self.experiment_name_prefix + experiment_name, max_results=1
87
- ) # returns latest model
88
- if not models_df.empty:
89
- previous_run_id = models_df["run_id"][
90
- 0
91
- ] # Use [0] to only get latest run id
92
- else:
93
- self.logger.info(
94
- "No previous model found in MLflow", experiment_name=experiment_name
95
- )
96
- previous_run_id = None
97
-
98
- # Set tags to the run, can be used to filter on the UI
99
- mlflow.set_tag("run_id", mlflow.active_run().info.run_id)
100
- mlflow.set_tag("phase", phase) # phase can be Training or Hyperparameter_opt
101
- mlflow.set_tag("Previous_version_id", previous_run_id)
102
- mlflow.set_tag("model_type", model_type)
103
- mlflow.set_tag("prediction_job", experiment_name)
104
-
105
- # Add feature names, target, feature modules, metrics and params to the run
106
- mlflow.set_tag(
107
- "feature_names", model_specs.feature_names[1:]
108
- ) # feature names are 1+ columns
109
- mlflow.set_tag("target", model_specs.feature_names[0]) # target is first column
110
- mlflow.set_tag("feature_modules", model_specs.feature_modules)
111
- mlflow.log_metrics(report.metrics)
112
- model_specs.hyper_params.update(model.get_params())
113
- # TODO: Remove this hardcoded hyper params fix with loop after fix by mlflow
114
- # https://github.com/mlflow/mlflow/issues/6384
115
- for key, value in model_specs.hyper_params.items():
116
- if value == "":
117
- model_specs.hyper_params[key] = " "
118
- mlflow.log_params(model_specs.hyper_params)
119
-
120
- # Process args
121
- for key, value in kwargs.items():
122
- if isinstance(value, dict):
123
- mlflow.log_dict(value, f"{key}.json")
124
- elif isinstance(value, str) or isinstance(value, int):
125
- mlflow.set_tag(key, value)
126
- else:
127
- self.logger.warning(
128
- f"Couldn't log {key}, {type(key)} not supported",
129
- experiment_name=experiment_name,
130
- )
131
-
132
- # Log the model to the run. Signature describes model input and output scheme
133
- mlflow.sklearn.log_model(
134
- sk_model=model, artifact_path="model", signature=report.signature
135
- )
136
- self.logger.info("Model saved with MLflow", experiment_name=experiment_name)
137
-
138
- def _log_figures_with_mlflow(self, report) -> None:
139
- """Log figures with MLflow in the artifact folder."""
140
- if report.feature_importance_figure is not None:
141
- mlflow.log_figure(
142
- report.feature_importance_figure, "figures/weight_plot.html"
143
- )
144
- for key, figure in report.data_series_figures.items():
145
- mlflow.log_figure(figure, f"figures/{key}.html")
146
- self.logger.info("Logged figures to MLflow.")
147
-
148
- def load_model(
149
- self,
150
- experiment_name: str,
151
- ) -> tuple[OpenstfRegressor, ModelSpecificationDataClass]:
152
- """Load sklearn compatible model from MLFlow.
153
-
154
- Args:
155
- experiment_name: Name of the experiment, often the id of the predition job.
156
-
157
- Raises:
158
- LookupError: If model is not found in MLflow.
159
-
160
- """
161
- try:
162
- models_df = self._find_models(
163
- self.experiment_name_prefix + experiment_name, max_results=1
164
- ) # return the latest finished run of the model
165
- if not models_df.empty:
166
- latest_run = models_df.iloc[0] # Use .iloc[0] to only get latest run
167
- else:
168
- raise LookupError("Model not found. First train a model!")
169
- model_uri = self._get_model_uri(latest_run.artifact_uri)
170
- loaded_model = mlflow.sklearn.load_model(model_uri)
171
- loaded_model.age = self._determine_model_age_from_mlflow_run(latest_run)
172
- model_specs = self._get_model_specs(
173
- experiment_name, loaded_model, latest_run
174
- )
175
- loaded_model.path = unquote(
176
- urlparse(model_uri).path
177
- ) # Path without file:///
178
- self.logger.info("Model successfully loaded with MLflow")
179
- return loaded_model, model_specs
180
- except (AttributeError, MlflowException, OSError) as exception:
181
- raise LookupError("Model not found. First train a model!") from exception
182
-
183
- def get_model_age(
184
- self, experiment_name: str, hyperparameter_optimization_only: bool = False
185
- ) -> int:
186
- """Get model age of most recent model.
187
-
188
- Args:
189
- experiment_name: Name of the experiment, often the id of the predition job.
190
- hyperparameter_optimization_only: Set to true if only hyperparameters optimaisation events should be considered.
191
-
192
- """
193
- filter_string = "attribute.status = 'FINISHED'"
194
- if hyperparameter_optimization_only:
195
- filter_string += " AND tags.phase = 'Hyperparameter_opt'"
196
- models_df = self._find_models(
197
- self.experiment_name_prefix + experiment_name,
198
- max_results=1,
199
- filter_string=filter_string,
200
- )
201
- if not models_df.empty:
202
- run = models_df.iloc[0] # Use .iloc[0] to only get latest run
203
- return self._determine_model_age_from_mlflow_run(run)
204
- else:
205
- self.logger.info("No model found returning infinite model age!")
206
- return np.inf
207
-
208
- def _find_models(
209
- self,
210
- experiment_name: str,
211
- max_results: Optional[int] = 100,
212
- filter_string: str = "attribute.status = 'FINISHED'",
213
- ) -> pd.DataFrame:
214
- """Finds trained models for specific experiment_name sorted by age in descending order."""
215
- models_df = mlflow.search_runs(
216
- experiment_names=[experiment_name],
217
- max_results=max_results,
218
- filter_string=filter_string,
219
- )
220
- return models_df
221
-
222
- def _get_model_specs(
223
- self,
224
- experiment_name: str,
225
- loaded_model: OpenstfRegressor,
226
- latest_run: pd.Series,
227
- ) -> ModelSpecificationDataClass:
228
- """Get model specifications from existing model."""
229
- model_specs = ModelSpecificationDataClass(id=experiment_name)
230
-
231
- # Temporary fix for update of xgboost
232
- # new version requires some attributes that the old (stored) models don't have yet
233
- # see: https://stackoverflow.com/questions/71912084/attributeerror-xgbmodel-object-has-no-attribute-callbacks
234
- new_attrs = [
235
- "grow_policy",
236
- "max_bin",
237
- "eval_metric",
238
- "callbacks",
239
- "early_stopping_rounds",
240
- "max_cat_to_onehot",
241
- "max_leaves",
242
- "sampling_method",
243
- ]
244
-
245
- manual_additional_attrs = [
246
- "enable_categorical",
247
- "predictor",
248
- ] # these ones are not mentioned in the stackoverflow post
249
- automatic_additional_attrs = [
250
- x
251
- for x in XGBModel._get_param_names()
252
- if x
253
- not in new_attrs + manual_additional_attrs + loaded_model._get_param_names()
254
- ]
255
-
256
- for attr in new_attrs + manual_additional_attrs + automatic_additional_attrs:
257
- setattr(loaded_model, attr, None)
258
-
259
- # This one is new is should be set to a specific value (https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.training)
260
- setattr(loaded_model, "missing", np.nan)
261
- setattr(loaded_model, "n_estimators", 100)
262
-
263
- # End temporary fix
264
-
265
- # get the parameters from old model, we insert these later into new model
266
- model_specs.hyper_params = loaded_model.get_params()
267
- # TODO: Remove this hardcoded hyper params fix with loop after fix by mlflow
268
- # https://github.com/mlflow/mlflow/issues/6384
269
- for key, value in model_specs.hyper_params.items():
270
- if value == " ":
271
- model_specs.hyper_params[key] = ""
272
- # get used feature names else use all feature names
273
- model_specs.feature_names = self._get_feature_names(
274
- experiment_name, latest_run, model_specs, loaded_model
275
- )
276
- # get feature_modules
277
- model_specs.feature_modules = self._get_feature_modules(
278
- experiment_name, latest_run, model_specs, loaded_model
279
- )
280
- return model_specs
281
-
282
- def _determine_model_age_from_mlflow_run(self, run: pd.Series) -> Union[int, float]:
283
- """Determines how many days ago a model is trained from the mlflow run."""
284
- try:
285
- model_datetime = run.end_time.to_pydatetime()
286
- model_datetime = model_datetime.replace(tzinfo=None)
287
- model_age_days = (datetime.utcnow() - model_datetime).days
288
- except Exception as e:
289
- self.logger.warning(
290
- "Could not get model age. Returning infinite age!", exception=str(e)
291
- )
292
- return np.inf # Return fallback age
293
- return model_age_days
294
-
295
- def remove_old_models(
296
- self,
297
- experiment_name: str,
298
- max_n_models: int = 10,
299
- ):
300
- """Remove old models per experiment."""
301
- if max_n_models < 1:
302
- raise ValueError(
303
- f"Max models to keep should be greater than 1! Received: {max_n_models}"
304
- )
305
- previous_runs = self._find_models(
306
- experiment_name=self.experiment_name_prefix + experiment_name
307
- )
308
- if len(previous_runs) > max_n_models:
309
- self.logger.debug(
310
- f"Going to delete old models. {len(previous_runs)} > {max_n_models}"
311
- )
312
- # Find run_ids of oldest runs
313
- runs_to_remove = previous_runs.sort_values(
314
- by="end_time", ascending=False
315
- ).loc[max_n_models:, :]
316
- for _, run in runs_to_remove.iterrows():
317
- self.logger.debug(
318
- f"Going to remove run {run.run_id}, from {run.end_time}."
319
- )
320
- mlflow.delete_run(run.run_id)
321
- self.logger.debug("Removed run")
322
-
323
- # mlflow.delete_run marks it as deleted but does not delete it by itself
324
- # Remove artifacts to save disk space
325
- try:
326
- repository = get_artifact_repository(
327
- mlflow.get_run(run.run_id).info.artifact_uri
328
- )
329
- repository.delete_artifacts()
330
- self.logger.debug("Removed artifacts")
331
- except Exception as e:
332
- self.logger.info(f"Failed removing artifacts: {e}")
333
-
334
- def _get_feature_names(
335
- self,
336
- experiment_name: str,
337
- latest_run: pd.Series,
338
- model_specs: ModelSpecificationDataClass,
339
- loaded_model: OpenstfRegressor,
340
- ) -> list:
341
- """Get the feature_names from MLflow or the old model."""
342
- error_message = "feature_names not loaded and using None, because it"
343
- try:
344
- model_specs.feature_names = json.loads(
345
- latest_run["tags.feature_names"].replace("'", '"')
346
- )
347
- except KeyError:
348
- self.logger.warning(
349
- f"{error_message} did not exist in run",
350
- experiment_name=experiment_name,
351
- )
352
- except AttributeError:
353
- self.logger.warning(
354
- f"{error_message} needs to be a string",
355
- experiment_name=experiment_name,
356
- )
357
- except JSONDecodeError:
358
- self.logger.warning(
359
- f"{error_message} needs to be a string of a list",
360
- experiment_name=experiment_name,
361
- )
362
-
363
- # if feature names is none, see if we can retrieve them from the old model
364
- if model_specs.feature_names is None:
365
- try:
366
- if loaded_model.feature_names is not None:
367
- model_specs.feature_names = loaded_model.feature_names
368
- self.logger.info(
369
- "feature_names retrieved from old model with an attribute",
370
- experiment_name=experiment_name,
371
- )
372
- except AttributeError:
373
- self.logger.warning(
374
- "feature_names not an attribute of the old model, using None ",
375
- experiment_name=experiment_name,
376
- )
377
- return model_specs.feature_names
378
-
379
- def _get_feature_modules(
380
- self,
381
- experiment_name: str,
382
- latest_run: pd.Series,
383
- model_specs: ModelSpecificationDataClass,
384
- loaded_model: OpenstfRegressor,
385
- ) -> list:
386
- """Get the feature_modules from MLflow or the old model."""
387
- error_message = "feature_modules not loaded and using None, because it"
388
- try:
389
- model_specs.feature_modules = json.loads(
390
- latest_run["tags.feature_modules"].replace("'", '"')
391
- )
392
-
393
- except KeyError:
394
- self.logger.warning(
395
- f"{error_message} did not exist in run",
396
- experiment_name=experiment_name,
397
- )
398
- except AttributeError:
399
- self.logger.warning(
400
- f"{error_message} needs to be a string",
401
- experiment_name=experiment_name,
402
- )
403
- except JSONDecodeError:
404
- self.logger.warning(
405
- f"{error_message} needs to be a string of a list",
406
- experiment_name=experiment_name,
407
- )
408
-
409
- # if feature modules is none, see if we can retrieve them from the old model
410
- if not model_specs.feature_modules:
411
- try:
412
- if loaded_model.feature_modules:
413
- model_specs.feature_modules = loaded_model.feature_modules
414
- self.logger.info(
415
- "feature_modules retrieved from old model with an attribute",
416
- experiment_name=experiment_name,
417
- )
418
- except AttributeError:
419
- self.logger.warning(
420
- "feature_modules not an attribute of the old model, using None ",
421
- experiment_name=experiment_name,
422
- )
423
- return model_specs.feature_modules
424
-
425
- def _get_model_uri(self, artifact_uri: str) -> str:
426
- """Set model uri based on latest run.
427
-
428
- Note: this function helps to mock during unit tests
429
-
430
- """
431
- return os.path.join(artifact_uri, "model/")
@@ -1,81 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- import numpy as np
5
- import pandas as pd
6
- from sklearn.base import RegressorMixin
7
-
8
-
9
- class StandardDeviationGenerator:
10
- def __init__(self, validation_data: pd.DataFrame) -> None:
11
- self.validation_data = validation_data
12
-
13
- def generate_standard_deviation_data(self, model: RegressorMixin) -> RegressorMixin:
14
- """Generate the standard data.
15
-
16
- Calculates the difference between realised and predicted on validation set.
17
- For each hour of the day the std of the difference is calculated.
18
-
19
- Args:
20
- model: The trained model
21
-
22
- Returns:
23
- The model with the std data added.
24
-
25
- """
26
- # Define some variables
27
- predicted = None
28
- self.standard_deviation = pd.DataFrame()
29
-
30
- # Loop over horizons and ask prediction for each specific horizon
31
- for horizon in self.validation_data.horizon.unique():
32
- # Make subset for this specific horizon
33
- sub_val = self.validation_data[self.validation_data.horizon == horizon]
34
- try:
35
- predicted = model.predict(sub_val.iloc[:, 1:-1])
36
- except Exception as e:
37
- print("Could not get prediction from new model!", e)
38
-
39
- # Calculate confidence interval for this horizon
40
- confidence_interval_horizon = self._calculate_standard_deviation(
41
- sub_val.iloc[:, 0], predicted
42
- )
43
- confidence_interval_horizon[
44
- "horizon"
45
- ] = horizon # Label with respective horizon
46
- self.standard_deviation = pd.concat(
47
- [self.standard_deviation, confidence_interval_horizon]
48
- )
49
-
50
- model.standard_deviation = self.standard_deviation
51
-
52
- return model
53
-
54
- @staticmethod
55
- def _calculate_standard_deviation(
56
- realised: pd.Series, predicted: pd.Series
57
- ) -> pd.DataFrame:
58
- """Protected static method to calculate the corrections for a model.
59
-
60
- Args:
61
- realised: pd.series with realised load
62
- predicted: pd.series with load predicted by new model
63
-
64
- Returns:
65
- DataFrame with model corrections
66
-
67
- """
68
- result = pd.DataFrame(index=range(24), columns=["stdev", "hour"])
69
- # Calculate the error for each predicted point
70
- error = realised - predicted
71
- error.index = error.index.hour # Hour only, remove the rest
72
- # For the time starts with 00, 01, 02, etc. TODO (MAKE MORE ELEGANT SOLUTION THAN A LOOP)
73
- for hour in range(24):
74
- hour_error = error[error.index == hour]
75
-
76
- result.loc[hour, "stdev"] = np.std(hour_error)
77
- result.loc[hour, "hour"] = hour
78
-
79
- result = result.astype("float")
80
-
81
- return result
@@ -1,3 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0