openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. openstef-4.0.0a3.dist-info/METADATA +177 -0
  2. openstef-4.0.0a3.dist-info/RECORD +4 -0
  3. {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
  4. openstef/__init__.py +0 -14
  5. openstef/__main__.py +0 -3
  6. openstef/app_settings.py +0 -19
  7. openstef/data/NL_terrestrial_radiation.csv +0 -25585
  8. openstef/data/NL_terrestrial_radiation.csv.license +0 -3
  9. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  10. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
  11. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
  12. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
  13. openstef/data/dutch_holidays.csv +0 -1759
  14. openstef/data/dutch_holidays.csv.license +0 -3
  15. openstef/data/pv_single_coefs.csv +0 -601
  16. openstef/data/pv_single_coefs.csv.license +0 -3
  17. openstef/data_classes/__init__.py +0 -3
  18. openstef/data_classes/data_prep.py +0 -99
  19. openstef/data_classes/model_specifications.py +0 -30
  20. openstef/data_classes/prediction_job.py +0 -135
  21. openstef/data_classes/split_function.py +0 -97
  22. openstef/enums.py +0 -140
  23. openstef/exceptions.py +0 -74
  24. openstef/feature_engineering/__init__.py +0 -3
  25. openstef/feature_engineering/apply_features.py +0 -138
  26. openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
  27. openstef/feature_engineering/cyclic_features.py +0 -161
  28. openstef/feature_engineering/data_preparation.py +0 -152
  29. openstef/feature_engineering/feature_adder.py +0 -206
  30. openstef/feature_engineering/feature_applicator.py +0 -202
  31. openstef/feature_engineering/general.py +0 -141
  32. openstef/feature_engineering/holiday_features.py +0 -231
  33. openstef/feature_engineering/lag_features.py +0 -165
  34. openstef/feature_engineering/missing_values_transformer.py +0 -141
  35. openstef/feature_engineering/rolling_features.py +0 -58
  36. openstef/feature_engineering/weather_features.py +0 -492
  37. openstef/metrics/__init__.py +0 -3
  38. openstef/metrics/figure.py +0 -303
  39. openstef/metrics/metrics.py +0 -486
  40. openstef/metrics/reporter.py +0 -222
  41. openstef/model/__init__.py +0 -3
  42. openstef/model/basecase.py +0 -82
  43. openstef/model/confidence_interval_applicator.py +0 -242
  44. openstef/model/fallback.py +0 -77
  45. openstef/model/metamodels/__init__.py +0 -3
  46. openstef/model/metamodels/feature_clipper.py +0 -90
  47. openstef/model/metamodels/grouped_regressor.py +0 -222
  48. openstef/model/metamodels/missing_values_handler.py +0 -138
  49. openstef/model/model_creator.py +0 -214
  50. openstef/model/objective.py +0 -426
  51. openstef/model/objective_creator.py +0 -65
  52. openstef/model/regressors/__init__.py +0 -3
  53. openstef/model/regressors/arima.py +0 -197
  54. openstef/model/regressors/custom_regressor.py +0 -64
  55. openstef/model/regressors/dazls.py +0 -116
  56. openstef/model/regressors/flatliner.py +0 -95
  57. openstef/model/regressors/gblinear_quantile.py +0 -334
  58. openstef/model/regressors/lgbm.py +0 -29
  59. openstef/model/regressors/linear.py +0 -90
  60. openstef/model/regressors/linear_quantile.py +0 -305
  61. openstef/model/regressors/regressor.py +0 -114
  62. openstef/model/regressors/xgb.py +0 -52
  63. openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
  64. openstef/model/regressors/xgb_quantile.py +0 -228
  65. openstef/model/serializer.py +0 -431
  66. openstef/model/standard_deviation_generator.py +0 -81
  67. openstef/model_selection/__init__.py +0 -3
  68. openstef/model_selection/model_selection.py +0 -311
  69. openstef/monitoring/__init__.py +0 -3
  70. openstef/monitoring/performance_meter.py +0 -92
  71. openstef/monitoring/teams.py +0 -203
  72. openstef/pipeline/__init__.py +0 -3
  73. openstef/pipeline/create_basecase_forecast.py +0 -133
  74. openstef/pipeline/create_component_forecast.py +0 -168
  75. openstef/pipeline/create_forecast.py +0 -171
  76. openstef/pipeline/optimize_hyperparameters.py +0 -317
  77. openstef/pipeline/train_create_forecast_backtest.py +0 -163
  78. openstef/pipeline/train_model.py +0 -561
  79. openstef/pipeline/utils.py +0 -52
  80. openstef/postprocessing/__init__.py +0 -3
  81. openstef/postprocessing/postprocessing.py +0 -275
  82. openstef/preprocessing/__init__.py +0 -3
  83. openstef/preprocessing/preprocessing.py +0 -42
  84. openstef/settings.py +0 -15
  85. openstef/tasks/__init__.py +0 -3
  86. openstef/tasks/calculate_kpi.py +0 -324
  87. openstef/tasks/create_basecase_forecast.py +0 -118
  88. openstef/tasks/create_components_forecast.py +0 -162
  89. openstef/tasks/create_forecast.py +0 -145
  90. openstef/tasks/create_solar_forecast.py +0 -420
  91. openstef/tasks/create_wind_forecast.py +0 -80
  92. openstef/tasks/optimize_hyperparameters.py +0 -135
  93. openstef/tasks/split_forecast.py +0 -273
  94. openstef/tasks/train_model.py +0 -224
  95. openstef/tasks/utils/__init__.py +0 -3
  96. openstef/tasks/utils/dependencies.py +0 -107
  97. openstef/tasks/utils/predictionjobloop.py +0 -243
  98. openstef/tasks/utils/taskcontext.py +0 -160
  99. openstef/validation/__init__.py +0 -3
  100. openstef/validation/validation.py +0 -322
  101. openstef-3.4.56.dist-info/METADATA +0 -154
  102. openstef-3.4.56.dist-info/RECORD +0 -102
  103. openstef-3.4.56.dist-info/top_level.txt +0 -1
  104. /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
@@ -1,118 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- """This module should be executed once every day.
5
-
6
- For all prediction_jobs, it will create a 'basecase' forecast which is less accurate, but (almost) always available.
7
- For now, it uses the load a week earlier.
8
- Missing datapoints are interpolated.
9
-
10
- Example:
11
- This module is meant to be called directly from a CRON job. A description of the
12
- CRON job can be found in the /k8s/CronJobs folder.
13
-
14
- Alternatively this code can be run directly by running:
15
-
16
- $ python create_basecase_forecast.py
17
-
18
- """
19
- from datetime import datetime, timedelta
20
- from pathlib import Path
21
-
22
- import pandas as pd
23
-
24
- from openstef.data_classes.prediction_job import PredictionJobDataClass
25
- from openstef.enums import PipelineType
26
- from openstef.pipeline.create_basecase_forecast import create_basecase_forecast_pipeline
27
- from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
28
- from openstef.tasks.utils.taskcontext import TaskContext
29
-
30
- T_BEHIND_DAYS: int = 15
31
- T_AHEAD_DAYS: int = 14
32
-
33
-
34
- def create_basecase_forecast_task(
35
- pj: PredictionJobDataClass,
36
- context: TaskContext,
37
- t_behind_days=T_BEHIND_DAYS,
38
- t_ahead_days=T_AHEAD_DAYS,
39
- ) -> None:
40
- """Top level task that creates a basecase forecast.
41
-
42
- On this task level all database and context manager dependencies are resolved.
43
-
44
- Args:
45
- pj: Prediction job
46
- context: Contect object that holds a config manager and a database connection
47
- t_behind_days: number of days included as history. This is used to generated lagged features for the to-be-forecasted period
48
- t_ahead_days: number of days a basecase forecast is created for
49
-
50
- """
51
- # Check pipeline types
52
- if PipelineType.FORECAST not in pj.pipelines_to_run:
53
- context.logger.info(
54
- "Skip this PredictionJob because forecast pipeline is not specified in the pj."
55
- )
56
- return
57
-
58
- # TODO: Improve implementation by using a field in the database and leveraging the
59
- # `pipelines_to_run` attribute of the `PredictionJobDataClass` object. This
60
- # would require a change to the MySQL datamodel.
61
- if (
62
- context.config.externally_posted_forecasts_pids
63
- and pj.id in context.config.externally_posted_forecasts_pids
64
- ):
65
- context.logger.info(
66
- "Skip this PredictionJob because its forecasts are posted by an external process."
67
- )
68
- return
69
-
70
- # Define datetime range for input data
71
- datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
72
- datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
73
-
74
- # Retrieve input data
75
- input_data = context.database.get_model_input(
76
- pid=pj["id"],
77
- location=[pj["lat"], pj["lon"]],
78
- datetime_start=datetime_start,
79
- datetime_end=datetime_end,
80
- )
81
-
82
- # Make basecase forecast using the corresponding pipeline
83
- basecase_forecast = create_basecase_forecast_pipeline(pj, input_data)
84
-
85
- # Do not store basecase forecasts for moments within the prediction job's horizon.
86
- # Those should be updated by regular forecast process.
87
- basecase_forecast = basecase_forecast.loc[
88
- basecase_forecast.index
89
- > (
90
- pd.to_datetime(datetime.utcnow(), utc=True)
91
- + timedelta(minutes=pj.horizon_minutes)
92
- ),
93
- :,
94
- ]
95
-
96
- # Write basecase forecast to the database
97
- context.database.write_forecast(basecase_forecast, t_ahead_series=True)
98
-
99
-
100
- def main(config: object = None, database: object = None, **kwargs):
101
- taskname = Path(__file__).name.replace(".py", "")
102
-
103
- if database is None or config is None:
104
- raise RuntimeError(
105
- "Please specifiy a config object and/or database connection object. These"
106
- " can be found in the openstef-dbc package."
107
- )
108
-
109
- with TaskContext(taskname, config, database) as context:
110
- model_type = ["xgb", "xgb_quantile", "lgb"]
111
-
112
- PredictionJobLoop(context, model_type=model_type).map(
113
- create_basecase_forecast_task, context, **kwargs
114
- )
115
-
116
-
117
- if __name__ == "__main__":
118
- main()
@@ -1,162 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- """This module contains the CRON job that is periodically executed to make the components prognoses.
5
-
6
- This code assumes trained models are available from the persistent storage.
7
- If these are not available run model_train.py to train all models.
8
- To provide the prognoses the following steps are carried out:
9
- 1. Get historic training data (TDCV, Load, Weather and day_ahead_electricity_price price data)
10
- 2. Apply features
11
- 3. Load model
12
- 4. Make component prediction
13
- 5. Write prediction to the database
14
- 6. Send Teams message if something goes wrong
15
-
16
- Example:
17
- This module is meant to be called directly from a CRON job. A description of
18
- the CRON job can be found in the /k8s/CronJobs folder.
19
- Alternatively this code can be run directly by running::
20
-
21
- $ python create_components_forecast.py
22
-
23
- """
24
- import logging
25
- from datetime import datetime, timedelta, timezone
26
- from pathlib import Path
27
-
28
- import pandas as pd
29
- import structlog
30
-
31
- from openstef.data_classes.prediction_job import PredictionJobDataClass
32
- from openstef.enums import ModelType
33
- from openstef.exceptions import ComponentForecastTooShortHorizonError
34
- from openstef.pipeline.create_component_forecast import (
35
- create_components_forecast_pipeline,
36
- )
37
- from openstef.settings import Settings
38
- from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
39
- from openstef.tasks.utils.taskcontext import TaskContext
40
-
41
- T_BEHIND_DAYS = 0
42
- T_AHEAD_DAYS = 3
43
-
44
-
45
- def create_components_forecast_task(
46
- pj: PredictionJobDataClass,
47
- context: TaskContext,
48
- t_behind_days: int = T_BEHIND_DAYS,
49
- t_ahead_days: int = T_AHEAD_DAYS,
50
- ) -> None:
51
- """Top level task that creates a components forecast.
52
-
53
- On this task level all database and context manager dependencies are resolved.
54
-
55
- Args:
56
- pj: Prediction job
57
- context: Contect object that holds a config manager and a database connection
58
- t_behind_days: number of days in the past that the component forecast is created for
59
- t_ahead_days: number of days in the future that the component forecast is created for
60
-
61
- Raises:
62
- ComponentForecastTooShortHorizonError: If the forecast horizon is too short
63
- (less than 30 minutes in advance)
64
-
65
- """
66
- structlog.configure(
67
- wrapper_class=structlog.make_filtering_bound_logger(
68
- logging.getLevelName(Settings.log_level)
69
- )
70
- )
71
- logger = structlog.get_logger(__name__)
72
- if pj["train_components"] == 0:
73
- context.logger.info(
74
- "Skip prediction job", train_components=pj["train_components"]
75
- )
76
- return
77
-
78
- # Define datetime range for input data
79
- datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
80
- datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
81
-
82
- logger.info(
83
- "Get predicted load", datetime_start=datetime_start, datetime_end=datetime_end
84
- )
85
- # Get most recent load forecast as input_data,
86
- # we use a regular forecast as input point for creating component forecasts
87
- input_data = context.database.get_predicted_load(
88
- pj, start_time=datetime_start, end_time=datetime_end
89
- )
90
- # Check if input_data is not empty
91
- if len(input_data) == 0:
92
- logger.warning(f"No forecast found. Skipping pid", pid=pj["id"])
93
- return
94
-
95
- logger.info("retrieving weather data")
96
- # TODO make openstef_dbc function to retrieve inputdata for component forecast in one call,
97
- # this will make this function much shorter
98
- # Get required weather data
99
- weather_data = context.database.get_weather_data(
100
- [pj["lat"], pj["lon"]],
101
- [
102
- "radiation",
103
- "windspeed_100m",
104
- ], # These variables are used when determing the splitting coeficients, and should therefore be reused when making the component forcasts.
105
- datetime_start=datetime_start,
106
- datetime_end=datetime_end,
107
- )
108
-
109
- # Make forecast for the demand, wind and pv components
110
- forecasts = create_components_forecast_pipeline(pj, input_data, weather_data)
111
-
112
- ## Perform sanity check on index
113
- if not isinstance(forecasts.index, pd.core.indexes.datetimes.DatetimeIndex):
114
- raise ValueError(
115
- f"Index is not datetime. Received forecasts:{forecasts.head()}"
116
- )
117
-
118
- # save forecast to database #######################################################
119
- context.database.write_forecast(forecasts)
120
- logger.debug("Written forecast to database")
121
-
122
- # Check if forecast was complete enough, otherwise raise exception
123
- if forecasts.index.max() < datetime.utcnow().replace(
124
- tzinfo=timezone.utc
125
- ) + timedelta(hours=30):
126
- # Check which input data is missing the most.
127
- # Do this by counting the NANs for (load)forecast, radiation and windspeed
128
- max_index = forecasts.index.max()
129
- n_nas = dict(
130
- nans_load_forecast=input_data.loc[max_index:, "forecast"].isna().sum(),
131
- nans_radiation=weather_data.loc[max_index:, "radiation"].isna().sum(),
132
- nans_windspeed_100m=weather_data.loc[max_index:, "windspeed_100m"]
133
- .isna()
134
- .sum(),
135
- )
136
- max_na = max(n_nas, key=n_nas.get)
137
-
138
- raise ComponentForecastTooShortHorizonError(
139
- f"Could not make component forecast for two days ahead, probably input data is missing, {max_na}: {n_nas[max_na]}"
140
- )
141
-
142
-
143
- def main(config: object = None, database: object = None, **kwargs):
144
- taskname = Path(__file__).name.replace(".py", "")
145
-
146
- if database is None or config is None:
147
- raise RuntimeError(
148
- "Please specifiy a config object and/or database connection object. These"
149
- " can be found in the openstef-dbc package."
150
- )
151
-
152
- with TaskContext(taskname, config, database) as context:
153
- model_type = [ml.value for ml in ModelType]
154
-
155
- PredictionJobLoop(
156
- context,
157
- model_type=model_type,
158
- ).map(create_components_forecast_task, context, **kwargs)
159
-
160
-
161
- if __name__ == "__main__":
162
- main()
@@ -1,145 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- """This module contains the CRON job that is periodically executed to make prognoses and save them in to the database.
5
-
6
- This code assumes trained models are available from the persistent storage. If these
7
- are not available run model_train.py to train all models.
8
- To provide the prognoses the folowing steps are carried out:
9
- 1. Get historic training data (TDCV, Load, Weather and day_ahead_electricity_price price data)
10
- 2. Apply features
11
- 3. Load model
12
- 4. Make prediction
13
- 5. Write prediction to the database
14
- 6. Send Teams message if something goes wrong
15
-
16
- Example:
17
- This module is meant to be called directly from a CRON job.
18
- Alternatively this code can be run directly by running::
19
-
20
- $ python create_forecast.py
21
-
22
- """
23
- from datetime import datetime, timedelta
24
- from pathlib import Path
25
-
26
- from openstef.data_classes.prediction_job import PredictionJobDataClass
27
- from openstef.enums import BiddingZone, ModelType, PipelineType
28
- from openstef.exceptions import InputDataOngoingZeroFlatlinerError
29
- from openstef.pipeline.create_forecast import create_forecast_pipeline
30
- from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
31
- from openstef.tasks.utils.taskcontext import TaskContext
32
- from openstef.validation.validation import detect_ongoing_zero_flatliner
33
-
34
- T_BEHIND_DAYS: int = 14
35
-
36
-
37
- def create_forecast_task(
38
- pj: PredictionJobDataClass, context: TaskContext, t_behind_days: int = T_BEHIND_DAYS
39
- ) -> None:
40
- """Top level task that creates a forecast.
41
-
42
- On this task level all database and context manager dependencies are resolved.
43
-
44
- Expected prediction job keys; "id", "lat", "lon", "resolution_minutes",
45
- "horizon_minutes", "type", "name", "quantiles"
46
-
47
- Args:
48
- pj: Prediction job
49
- context: Contect object that holds a config manager and a database connection
50
- t_behind_days: number of days included as history. This is used to generated lagged features for the to-be-forecasted period
51
-
52
- """
53
- # Check pipeline types
54
- if PipelineType.FORECAST not in pj.pipelines_to_run:
55
- context.logger.info(
56
- "Skip this PredictionJob because forecast pipeline is not specified in the pj."
57
- )
58
- return
59
-
60
- # TODO: Improve implementation by using a field in the database and leveraging the
61
- # `pipelines_to_run` attribute of the `PredictionJobDataClass` object. This
62
- # would require a change to the MySQL datamodel.
63
- if (
64
- context.config.externally_posted_forecasts_pids
65
- and pj.id in context.config.externally_posted_forecasts_pids
66
- ):
67
- context.logger.info(
68
- "Skip this PredictionJob because its forecasts are posted by an external process."
69
- )
70
- return
71
-
72
- # Extract mlflow tracking URI and trained models folder
73
- mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri
74
-
75
- # Define datetime range for input data
76
- datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
77
- datetime_end = datetime.utcnow() + timedelta(seconds=pj.horizon_minutes * 60)
78
-
79
- # Retrieve input data
80
- input_data = context.database.get_model_input(
81
- pid=pj["id"],
82
- location=[pj["lat"], pj["lon"]],
83
- datetime_start=datetime_start,
84
- datetime_end=datetime_end,
85
- market_price=pj.electricity_bidding_zone.value,
86
- )
87
-
88
- # Add APX price to the input data for backward compatibility,remove this line when all models are retrained
89
- if pj.electricity_bidding_zone == BiddingZone.NL:
90
- input_data["APX"] = input_data["day_ahead_electricity_price"]
91
-
92
- try:
93
- # Make forecast with the forecast pipeline
94
- forecast = create_forecast_pipeline(
95
- pj, input_data, mlflow_tracking_uri=mlflow_tracking_uri
96
- )
97
- except (InputDataOngoingZeroFlatlinerError, LookupError) as e:
98
- if (
99
- context.config.known_zero_flatliners
100
- and pj.id in context.config.known_zero_flatliners
101
- ):
102
- context.logger.info(
103
- "No forecasts were made for this known zero flatliner prediction job. No forecasts need to be made either, since the fallback forecasts are sufficient."
104
- )
105
- return
106
- elif isinstance(e, InputDataOngoingZeroFlatlinerError):
107
- raise InputDataOngoingZeroFlatlinerError(
108
- 'All recent load measurements are zero. Check the load profile of this pid as well as related/neighbouring prediction jobs. Afterwards, consider adding this pid to the "known_zero_flatliners" app_setting and possibly removing other pids from the same app_setting.'
109
- ) from e
110
- elif isinstance(e, LookupError):
111
- zero_flatliner_ongoing = detect_ongoing_zero_flatliner(
112
- load=input_data.iloc[:, 0],
113
- duration_threshold_minutes=pj.flatliner_threshold_minutes,
114
- )
115
- if zero_flatliner_ongoing:
116
- raise LookupError(
117
- 'Model not found. Consider checking for a zero flatliner and adding this pid to the "known_zero_flatliners" app_setting. For zero flatliners, no model can be trained.'
118
- ) from e
119
- else:
120
- raise e
121
-
122
- # Write forecast to the database
123
- context.database.write_forecast(forecast, t_ahead_series=True)
124
-
125
-
126
- def main(model_type=None, config=None, database=None, **kwargs):
127
- taskname = Path(__file__).name.replace(".py", "")
128
-
129
- if database is None or config is None:
130
- raise RuntimeError(
131
- "Please specify a config object and/or database connection object. These"
132
- " can be found in the openstef-dbc package."
133
- )
134
-
135
- with TaskContext(taskname, config, database) as context:
136
- if model_type is None:
137
- model_type = [ml.value for ml in ModelType]
138
-
139
- PredictionJobLoop(context, model_type=model_type).map(
140
- create_forecast_task, context, **kwargs
141
- )
142
-
143
-
144
- if __name__ == "__main__":
145
- main()