openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. openstef-4.0.0a3.dist-info/METADATA +177 -0
  2. openstef-4.0.0a3.dist-info/RECORD +4 -0
  3. {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
  4. openstef/__init__.py +0 -14
  5. openstef/__main__.py +0 -3
  6. openstef/app_settings.py +0 -19
  7. openstef/data/NL_terrestrial_radiation.csv +0 -25585
  8. openstef/data/NL_terrestrial_radiation.csv.license +0 -3
  9. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  10. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
  11. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
  12. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
  13. openstef/data/dutch_holidays.csv +0 -1759
  14. openstef/data/dutch_holidays.csv.license +0 -3
  15. openstef/data/pv_single_coefs.csv +0 -601
  16. openstef/data/pv_single_coefs.csv.license +0 -3
  17. openstef/data_classes/__init__.py +0 -3
  18. openstef/data_classes/data_prep.py +0 -99
  19. openstef/data_classes/model_specifications.py +0 -30
  20. openstef/data_classes/prediction_job.py +0 -135
  21. openstef/data_classes/split_function.py +0 -97
  22. openstef/enums.py +0 -140
  23. openstef/exceptions.py +0 -74
  24. openstef/feature_engineering/__init__.py +0 -3
  25. openstef/feature_engineering/apply_features.py +0 -138
  26. openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
  27. openstef/feature_engineering/cyclic_features.py +0 -161
  28. openstef/feature_engineering/data_preparation.py +0 -152
  29. openstef/feature_engineering/feature_adder.py +0 -206
  30. openstef/feature_engineering/feature_applicator.py +0 -202
  31. openstef/feature_engineering/general.py +0 -141
  32. openstef/feature_engineering/holiday_features.py +0 -231
  33. openstef/feature_engineering/lag_features.py +0 -165
  34. openstef/feature_engineering/missing_values_transformer.py +0 -141
  35. openstef/feature_engineering/rolling_features.py +0 -58
  36. openstef/feature_engineering/weather_features.py +0 -492
  37. openstef/metrics/__init__.py +0 -3
  38. openstef/metrics/figure.py +0 -303
  39. openstef/metrics/metrics.py +0 -486
  40. openstef/metrics/reporter.py +0 -222
  41. openstef/model/__init__.py +0 -3
  42. openstef/model/basecase.py +0 -82
  43. openstef/model/confidence_interval_applicator.py +0 -242
  44. openstef/model/fallback.py +0 -77
  45. openstef/model/metamodels/__init__.py +0 -3
  46. openstef/model/metamodels/feature_clipper.py +0 -90
  47. openstef/model/metamodels/grouped_regressor.py +0 -222
  48. openstef/model/metamodels/missing_values_handler.py +0 -138
  49. openstef/model/model_creator.py +0 -214
  50. openstef/model/objective.py +0 -426
  51. openstef/model/objective_creator.py +0 -65
  52. openstef/model/regressors/__init__.py +0 -3
  53. openstef/model/regressors/arima.py +0 -197
  54. openstef/model/regressors/custom_regressor.py +0 -64
  55. openstef/model/regressors/dazls.py +0 -116
  56. openstef/model/regressors/flatliner.py +0 -95
  57. openstef/model/regressors/gblinear_quantile.py +0 -334
  58. openstef/model/regressors/lgbm.py +0 -29
  59. openstef/model/regressors/linear.py +0 -90
  60. openstef/model/regressors/linear_quantile.py +0 -305
  61. openstef/model/regressors/regressor.py +0 -114
  62. openstef/model/regressors/xgb.py +0 -52
  63. openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
  64. openstef/model/regressors/xgb_quantile.py +0 -228
  65. openstef/model/serializer.py +0 -431
  66. openstef/model/standard_deviation_generator.py +0 -81
  67. openstef/model_selection/__init__.py +0 -3
  68. openstef/model_selection/model_selection.py +0 -311
  69. openstef/monitoring/__init__.py +0 -3
  70. openstef/monitoring/performance_meter.py +0 -92
  71. openstef/monitoring/teams.py +0 -203
  72. openstef/pipeline/__init__.py +0 -3
  73. openstef/pipeline/create_basecase_forecast.py +0 -133
  74. openstef/pipeline/create_component_forecast.py +0 -168
  75. openstef/pipeline/create_forecast.py +0 -171
  76. openstef/pipeline/optimize_hyperparameters.py +0 -317
  77. openstef/pipeline/train_create_forecast_backtest.py +0 -163
  78. openstef/pipeline/train_model.py +0 -561
  79. openstef/pipeline/utils.py +0 -52
  80. openstef/postprocessing/__init__.py +0 -3
  81. openstef/postprocessing/postprocessing.py +0 -275
  82. openstef/preprocessing/__init__.py +0 -3
  83. openstef/preprocessing/preprocessing.py +0 -42
  84. openstef/settings.py +0 -15
  85. openstef/tasks/__init__.py +0 -3
  86. openstef/tasks/calculate_kpi.py +0 -324
  87. openstef/tasks/create_basecase_forecast.py +0 -118
  88. openstef/tasks/create_components_forecast.py +0 -162
  89. openstef/tasks/create_forecast.py +0 -145
  90. openstef/tasks/create_solar_forecast.py +0 -420
  91. openstef/tasks/create_wind_forecast.py +0 -80
  92. openstef/tasks/optimize_hyperparameters.py +0 -135
  93. openstef/tasks/split_forecast.py +0 -273
  94. openstef/tasks/train_model.py +0 -224
  95. openstef/tasks/utils/__init__.py +0 -3
  96. openstef/tasks/utils/dependencies.py +0 -107
  97. openstef/tasks/utils/predictionjobloop.py +0 -243
  98. openstef/tasks/utils/taskcontext.py +0 -160
  99. openstef/validation/__init__.py +0 -3
  100. openstef/validation/validation.py +0 -322
  101. openstef-3.4.56.dist-info/METADATA +0 -154
  102. openstef-3.4.56.dist-info/RECORD +0 -102
  103. openstef-3.4.56.dist-info/top_level.txt +0 -1
  104. /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
@@ -1,273 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- """This module contains the CRON job that is periodically executed to make prognoses of solar features.
6
-
7
- These features are usefull for splitting the load
8
- in solar and wind contributions.
9
- This is achieved by carrying out the folowing steps:
10
- 1. Get the wind and solar reference data for the specific location of the
11
- customer
12
- 2. Get the TDCV (Typical Domestic Consumption Values) data
13
- 3. Fit a linear combination of above time series to the historic load data to
14
- determine the contributions of each energy source.
15
- 4. Write the resulting coeficients to the SQL database.
16
-
17
- Example:
18
- This module is meant to be called directly from a CRON job. A description of
19
- the CRON job can be found in the /k8s/CronJobs folder.
20
- Alternatively this code can be run directly by running::
21
-
22
- $ python split_forecast.py
23
-
24
- """
25
- import logging
26
- from datetime import datetime
27
- from pathlib import Path
28
-
29
- import numpy as np
30
- import pandas as pd
31
- import scipy.optimize
32
- import structlog
33
-
34
- import openstef.monitoring.teams as monitoring
35
- from openstef.data_classes.prediction_job import PredictionJobDataClass
36
- from openstef.enums import ModelType
37
- from openstef.settings import Settings
38
- from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
39
- from openstef.tasks.utils.taskcontext import TaskContext
40
-
41
- COEF_MAX_FRACTION_DIFF = 0.3
42
-
43
-
44
- def main(config=None, database=None):
45
- taskname = Path(__file__).name.replace(".py", "")
46
-
47
- if database is None or config is None:
48
- raise RuntimeError(
49
- "Please specifiy a config object and/or database connection object. These"
50
- " can be found in the openstef-dbc package."
51
- )
52
-
53
- with TaskContext(taskname, config, database) as context:
54
- model_type = [ml.value for ml in ModelType]
55
-
56
- PredictionJobLoop(
57
- context,
58
- model_type=model_type,
59
- ).map(split_forecast_task, context)
60
-
61
-
62
- def split_forecast_task(
63
- pj: PredictionJobDataClass,
64
- context: TaskContext,
65
- ) -> pd.DataFrame:
66
- """Function that caries out the energy splitting for a specific prediction job with id pid.
67
-
68
- Args:
69
- pid: Prediction job id
70
-
71
- Returns:
72
- Energy splitting coefficients.
73
-
74
- """
75
- structlog.configure(
76
- wrapper_class=structlog.make_filtering_bound_logger(
77
- logging.getLevelName(Settings.log_level)
78
- )
79
- )
80
- logger = structlog.get_logger(__name__)
81
-
82
- logger.info("Start splitting energy", pid=pj["id"])
83
-
84
- # Get input for splitting
85
- input_split_function = context.database.get_input_energy_splitting(pj)
86
-
87
- # Old split method;
88
- # find_components() gives two things:
89
- # - the split components (load, solar, wind, consumption, Inschatting (=sum of others) )
90
- # - coefdict: coefficients of each component; these are not yet an output of Dazls. Lets discuss with JM if we want that
91
-
92
- # Carry out the splitting
93
- components, coefdict = find_components(input_split_function)
94
-
95
- # Calculate mean absolute error (MAE)
96
- # TODO: use a standard metric function for this
97
- error = components[["load", "Inschatting"]].diff(axis=1).iloc[:, 1]
98
- mae = error.abs().mean()
99
- coefdict.update({"MAE": mae})
100
- coefsdf = convert_coefdict_to_coefsdf(pj, input_split_function, coefdict)
101
-
102
- # Get the coefs of previous runs and check if new coefs are valid
103
- last_coefsdict = context.database.get_energy_split_coefs(pj)
104
- last_coefsdf = convert_coefdict_to_coefsdf(pj, input_split_function, last_coefsdict)
105
- invalid_coefs = determine_invalid_coefs(coefsdf, last_coefsdf)
106
- if not invalid_coefs.empty:
107
- # If coefs not valid, do not update the coefs in the db and send teams
108
- # message that something strange is happening
109
- monitoring.post_teams(
110
- f"New splitting coefficient(s) for pid **{pj['id']}** deviate strongly "
111
- "from previously stored coefficients.",
112
- url=context.config.teams_monitoring_url,
113
- invalid_coefficients=invalid_coefs,
114
- coefficients_df=coefsdf,
115
- )
116
- # Use the last known coefficients for further processing
117
- return last_coefsdf
118
- else:
119
- # Save Results
120
- context.database.write_energy_splitting_coefficients(
121
- coefsdf, if_exists="append"
122
- )
123
- logger.info(
124
- "Succesfully wrote energy split coefficients to database", pid=pj["id"]
125
- )
126
- return coefsdf
127
-
128
-
129
- def determine_invalid_coefs(
130
- new_coefs: pd.DataFrame, last_coefs: pd.DataFrame
131
- ) -> pd.DataFrame:
132
- """Determine which new coefficients are valid and return them.
133
-
134
- Args:
135
- new_coefs: df of new coefficients for standard load
136
- profiles (i.e. wind, solar, household)
137
- last_coefs: df of last coefficients for standard load
138
- profiles (i.e. wind, solar, household)
139
-
140
- Returns:
141
- Dataframe with invalid coefficients
142
-
143
- """
144
- merged_coefs = pd.merge(
145
- last_coefs, new_coefs, on="coef_name", how="left", suffixes=["_last", "_new"]
146
- )
147
- # calculate difference between new and last coefficients, if no new
148
- # coefficient, set difference to inf
149
- # If coefficient name is not present in new coefficients list, fail. If coefficient
150
- # name is not present in last coefficients list, add it.
151
- merged_coefs["difference"] = (
152
- (merged_coefs.coef_value_last - merged_coefs.coef_value_new)
153
- .abs()
154
- .fillna(np.inf)
155
- )
156
- # Check if the absolute difference between last coefficients and new coefficients
157
- # is more than COEF_MAX_FRACTION_DIFF x absolute value of last coefficient
158
- invalid_coefs = merged_coefs[
159
- merged_coefs.difference
160
- > (COEF_MAX_FRACTION_DIFF * merged_coefs.coef_value_last).abs()
161
- ]
162
- return invalid_coefs
163
-
164
-
165
- def convert_coefdict_to_coefsdf(
166
- pj: PredictionJobDataClass, input_split_function: pd.DataFrame, coefdict: dict
167
- ) -> pd.DataFrame:
168
- """Convert dictionary of coefficients to dataframe with additional data for db storage.
169
-
170
- Args:
171
- pj: prediction job
172
- input_split_function: df of columns of standard load profiles,
173
- i.e. wind, solar, household
174
- coefdict: dict of coefficient per standard load profile
175
-
176
- Returns:
177
- DataFrame of coefficients to insert in sql
178
-
179
- """
180
- #
181
- sql_column_labels = ["pid", "date_start", "date_end", "created"]
182
- sql_colum_values = [
183
- pj["id"],
184
- input_split_function.index.min().date(),
185
- input_split_function.index.max().date(),
186
- datetime.utcnow(),
187
- ]
188
- coefsdf = pd.DataFrame(
189
- {"coef_name": list(coefdict.keys()), "coef_value": list(coefdict.values())}
190
- )
191
- for i, column in enumerate(sql_column_labels):
192
- coefsdf[column] = sql_colum_values[i]
193
-
194
- return coefsdf
195
-
196
-
197
- def find_components(
198
- df: pd.DataFrame, zero_bound: bool = True
199
- ) -> tuple[pd.DataFrame, dict]:
200
- """Function that does the actual energy splitting.
201
-
202
- Args:
203
- df: Input data. The dataframe should contain these columns
204
- in exactly this order: [load, wind_ref, pv_ref, mulitple tdcv colums]
205
- zero_bound: If zero_bound is True coefficients can't be negative.
206
-
207
- Returns:
208
- tuple:
209
- - DataFrame containing the wind and solar components
210
- - Dict with the coefficients that result from the fitting
211
-
212
- """
213
- load = df.iloc[:, 0]
214
- wind_ref = df.iloc[:, 1]
215
- pv_ref = df.iloc[:, 2]
216
-
217
- # Define scaler
218
- nedu_scaler = (load.max() - load.min()) / 10
219
-
220
- # Come up with inital guess for the fitting
221
- p_wind_guess = 1.0
222
- ppv_guess = 1.0
223
- p0 = [p_wind_guess, ppv_guess] + (len(df.columns) - 3) * [nedu_scaler]
224
-
225
- # Define fitting bounds
226
- if zero_bound:
227
- bounds = (0, "inf")
228
- else:
229
- bounds = ("-inf", "inf")
230
-
231
- # Define function to fit
232
- def weighted_sum(x, *args):
233
- if len(x) != len(args):
234
- raise ValueError("Length of args should match len of x")
235
- weights = np.array([v for v in args])
236
- return np.dot(x.T, weights)
237
-
238
- # Carry out fitting
239
- # See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html # noqa
240
- coefs, cov = scipy.optimize.curve_fit(
241
- weighted_sum,
242
- xdata=df.iloc[:, 1:].values.T,
243
- ydata=load.values,
244
- p0=p0,
245
- bounds=bounds,
246
- method="trf",
247
- )
248
-
249
- # Set 'almost zero' to zero
250
- coefs[coefs < 0.1] = 0
251
-
252
- # Reconstuct historical load
253
- hist = weighted_sum(df.iloc[:, 1:].values.T, *coefs)
254
- histp0 = weighted_sum(df.iloc[:, 1:].values.T, *p0)
255
-
256
- # Make a nice dataframe to return the components
257
- components = df.iloc[:, [0]].copy()
258
- components["Inschatting"] = hist.T
259
- components["p0"] = histp0.T
260
- components["Windopwek"] = wind_ref * coefs[0]
261
- components["Zonne-opwek"] = pv_ref * coefs[1]
262
- components["StandaardVerbruik"] = (df.iloc[:, 3:] * coefs[2:]).sum(axis=1)
263
- components["Residu"] = -1 * components.iloc[:, 0:2].diff(axis=1).iloc[:, 1]
264
-
265
- # Make nice dictinary to return coefficents
266
- coefdict = {name: value for name, value in zip(df.columns[1:], coefs)}
267
-
268
- # Return result
269
- return components, coefdict
270
-
271
-
272
- if __name__ == "__main__":
273
- main()
@@ -1,224 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- """This module contains the CRON job that is periodically executed to retrain the prognosis models.
5
-
6
- For this the folowing steps are caried out:
7
- 1. Get historic training data (TDCV, Load, Weather and day_ahead_electricity_price price data)
8
- 2. Apply features
9
- 3. Train and Test the new model
10
- 4. Check if new model performs better than the old model
11
- 5. Store the model if it performs better
12
- 6. Send slack message to inform the users
13
-
14
- Example:
15
- This module is meant to be called directly from a CRON job. A description of
16
- the CRON job can be found in the /k8s/CronJobs folder.
17
- Alternatively this code can be run directly by running::
18
-
19
- $ python model_train.py
20
-
21
- """
22
- from datetime import datetime, timedelta
23
- from pathlib import Path
24
-
25
- import pandas as pd
26
-
27
- from openstef.data_classes.prediction_job import PredictionJobDataClass
28
- from openstef.enums import ModelType, PipelineType
29
- from openstef.exceptions import (
30
- InputDataOngoingZeroFlatlinerError,
31
- SkipSaveTrainingForecasts,
32
- )
33
- from openstef.model.serializer import MLflowSerializer
34
- from openstef.pipeline.train_model import (
35
- MAXIMUM_MODEL_AGE,
36
- train_model_pipeline,
37
- train_pipeline_step_load_model,
38
- )
39
- from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
40
- from openstef.tasks.utils.taskcontext import TaskContext
41
-
42
- TRAINING_PERIOD_DAYS: int = 120
43
- DEFAULT_CHECK_MODEL_AGE: bool = True
44
-
45
-
46
- def train_model_task(
47
- pj: PredictionJobDataClass,
48
- context: TaskContext,
49
- check_old_model_age: bool = DEFAULT_CHECK_MODEL_AGE,
50
- datetime_start: datetime = None,
51
- datetime_end: datetime = None,
52
- ) -> None:
53
- """Train model task.
54
-
55
- Top level task that trains a new model and makes sure the best available model is
56
- stored. On this task level all database and context manager dependencies are resolved.
57
-
58
- Expected prediction job keys: "id", "model", "lat", "lon", "name"
59
-
60
- Args:
61
- pj: Prediction job
62
- context: Contect object that holds a config manager and a
63
- database connection.
64
- check_old_model_age: check if model is too young to be retrained
65
- datetime_start: Start
66
- datetime_end: End
67
-
68
- Raises:
69
- SkipSaveTrainingForecasts: If old model is better or too young, you don't need to save the traing forcast.
70
- InputDataOngoingZeroFlatlinerError: If all recent load measurements are zero.
71
-
72
- """
73
- # Check pipeline types
74
- if PipelineType.TRAIN not in pj.pipelines_to_run:
75
- context.logger.info(
76
- "Skip this PredictionJob because train pipeline is not specified in the pj."
77
- )
78
- return
79
-
80
- # TODO: Improve implementation by using a field in the database and leveraging the
81
- # `pipelines_to_run` attribute of the `PredictionJobDataClass` object. This
82
- # would require a change to the MySQL datamodel.
83
- if (
84
- context.config.externally_posted_forecasts_pids
85
- and pj.id in context.config.externally_posted_forecasts_pids
86
- ):
87
- context.logger.info(
88
- "Skip this PredictionJob because its forecasts are posted by an external process."
89
- )
90
- return
91
-
92
- # Get the paths for storing model and reports from the config manager
93
- mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri
94
- context.logger.debug(f"MLflow tracking uri: {mlflow_tracking_uri}")
95
- artifact_folder = context.config.paths_artifact_folder
96
- context.logger.debug(f"Artifact folder: {artifact_folder}")
97
-
98
- context.perf_meter.checkpoint("Added metadata to PredictionJob")
99
-
100
- # Check the model age before retrieving the input data to speed up train job.
101
- # (The exact same model age check is also part of the "train_model_pipeline".)
102
-
103
- # Initialize serializer
104
- serializer = MLflowSerializer(mlflow_tracking_uri=mlflow_tracking_uri)
105
-
106
- # Get old model and age
107
- _, _, old_model_age = train_pipeline_step_load_model(pj, serializer)
108
-
109
- # Check old model age and continue yes/no
110
- if (old_model_age < MAXIMUM_MODEL_AGE) and check_old_model_age:
111
- context.perf_meter.checkpoint(
112
- f"Old model is younger than {MAXIMUM_MODEL_AGE} days, skip training"
113
- )
114
- if pj.save_train_forecasts:
115
- raise SkipSaveTrainingForecasts
116
- return
117
-
118
- # Define start and end of the training input data
119
- training_period_days_to_fetch = (
120
- TRAINING_PERIOD_DAYS
121
- if pj.data_balancing_ratio is None
122
- else int(pj.data_balancing_ratio * TRAINING_PERIOD_DAYS)
123
- )
124
-
125
- if datetime_end is None:
126
- datetime_end = datetime.utcnow()
127
- if datetime_start is None:
128
- datetime_start = datetime_end - timedelta(days=training_period_days_to_fetch)
129
-
130
- # Get training input data from database
131
- input_data = context.database.get_model_input(
132
- pid=pj["id"],
133
- location=[pj["lat"], pj["lon"]],
134
- datetime_start=datetime_start,
135
- datetime_end=datetime_end,
136
- )
137
-
138
- # If data balancing is enabled, fetch data from 1 year ago and combine it with the
139
- # current data
140
- if pj.data_balancing_ratio is not None:
141
- # Because the data is from the past, we can use the data from the "future"
142
- balanced_datetime_start = datetime_end - timedelta(days=365)
143
- balanced_datetime_end = balanced_datetime_start + timedelta(
144
- days=training_period_days_to_fetch
145
- )
146
-
147
- balanced_input_data = context.database.get_model_input(
148
- pid=pj["id"],
149
- location=[pj["lat"], pj["lon"]],
150
- datetime_start=balanced_datetime_start,
151
- datetime_end=balanced_datetime_end,
152
- )
153
-
154
- input_data = pd.concat(
155
- [
156
- balanced_input_data,
157
- input_data,
158
- ]
159
- )
160
-
161
- context.perf_meter.checkpoint("Retrieved timeseries input")
162
-
163
- # Excecute the model training pipeline
164
- try:
165
- data_sets = train_model_pipeline(
166
- pj,
167
- input_data,
168
- check_old_model_age=check_old_model_age,
169
- mlflow_tracking_uri=mlflow_tracking_uri,
170
- artifact_folder=artifact_folder,
171
- )
172
-
173
- if data_sets:
174
- context.perf_meter.checkpoint("Model trained")
175
- else:
176
- context.perf_meter.checkpoint("Model not trained")
177
-
178
- if pj.save_train_forecasts:
179
- if data_sets is None:
180
- raise RuntimeError("Forecasts were not retrieved")
181
- if not hasattr(context.database, "write_train_forecasts"):
182
- raise RuntimeError(
183
- "Database connector does dot support 'write_train_forecasts' while "
184
- "'save_train_forecasts option was activated.'"
185
- )
186
- context.database.write_train_forecasts(pj, data_sets)
187
- context.logger.debug(f"Saved Forecasts from trained model on datasets")
188
- except SkipSaveTrainingForecasts:
189
- context.logger.debug(f"Skip saving forecasts")
190
- except InputDataOngoingZeroFlatlinerError:
191
- if (
192
- context.config.known_zero_flatliners
193
- and pj.id in context.config.known_zero_flatliners
194
- ):
195
- context.logger.info(
196
- "No model was trained for this known zero flatliner. No model needs to be trained either, since the fallback forecasts are sufficient."
197
- )
198
- return
199
- else:
200
- raise InputDataOngoingZeroFlatlinerError(
201
- 'All recent load measurements are zero. Check the load profile of this pid as well as related/neighbouring prediction jobs. Afterwards, consider adding this pid to the "known_zero_flatliners" app_setting and possibly removing other pids from the same app_setting.'
202
- )
203
-
204
-
205
- def main(model_type=None, config=None, database=None):
206
- if database is None or config is None:
207
- raise RuntimeError(
208
- "Please specifiy a config object and/or database connection object. These"
209
- " can be found in the openstef-dbc package."
210
- )
211
-
212
- if model_type is None:
213
- model_type = [ml.value for ml in ModelType]
214
-
215
- taskname = Path(__file__).name.replace(".py", "")
216
- datetime_now = datetime.utcnow()
217
- with TaskContext(taskname, config, database) as context:
218
- PredictionJobLoop(context, model_type=model_type).map(
219
- train_model_task, context, datetime_end=datetime_now
220
- )
221
-
222
-
223
- if __name__ == "__main__":
224
- main()
@@ -1,3 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
@@ -1,107 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- import random
5
- from typing import Iterable, Sequence, Set, Union
6
-
7
- import networkx as nx
8
-
9
- from openstef.data_classes.prediction_job import PredictionJobDataClass
10
-
11
- NodeIdType = Union[str, int]
12
- EdgeType = tuple[NodeIdType, NodeIdType]
13
-
14
-
15
- def has_dependencies(pjs: Iterable[PredictionJobDataClass]) -> bool:
16
- """Test whether some prediction jobs have dependencies information.
17
-
18
- Args:
19
- pjs: The list of prediction jobs
20
-
21
- Returns:
22
- True if some dependency information was found.
23
-
24
- """
25
- for pj in pjs:
26
- if pj.depends_on is not None and len(pj.depends_on) > 0:
27
- return True
28
- return False
29
-
30
-
31
- def build_graph_structure(
32
- pjs: Iterable[PredictionJobDataClass],
33
- ) -> tuple[Set[NodeIdType], Set[EdgeType]]:
34
- """Build the graph of dependencies between prediction jobs.
35
-
36
- Args:
37
- pjs: The Iterable of prediction jobs
38
-
39
- Returns:
40
- - The set of node ids of the graph
41
- - The set of edges in the graph
42
-
43
- """
44
- nodes = set()
45
- edges = set()
46
-
47
- for pj in pjs:
48
- nodes.add(pj["id"])
49
- if pj.depends_on is not None:
50
- for j in pj.depends_on:
51
- edges.add((j, pj["id"]))
52
-
53
- return nodes, edges
54
-
55
-
56
- def build_nx_graph(
57
- nodes: Iterable[NodeIdType], edges: Iterable[EdgeType]
58
- ) -> nx.DiGraph:
59
- """Build a networkx Directed Graph.
60
-
61
- Args:
62
- nodes: The sequence of node ids
63
- edges: The sequence of edges
64
-
65
- Returns:
66
- The dependency graph
67
-
68
- """
69
- graph = nx.DiGraph()
70
- graph.add_nodes_from(nodes)
71
- graph.add_edges_from(edges)
72
- return graph
73
-
74
-
75
- def find_groups(
76
- pjs: Sequence[PredictionJobDataClass], randomize_groups: bool = False
77
- ) -> tuple[nx.DiGraph, list[list[PredictionJobDataClass]]]:
78
- """Find a sequence of prediction job groups respecting dependencies.
79
-
80
- Compute groups of prediction jobs such that the prediction jobs in a group
81
- depend of at least one prediction job in the previous group and does not depend
82
- on a prediction job in the following groups.
83
- This means that all the prediction jobs in a group can be run in parallel and that
84
- if groups are treated in the given order, the dependencies of a prediction job have
85
- already been treated when the prediction job is run.
86
-
87
- Args:
88
- pjs: The sequence of prediction jobs
89
- randomize_groups: Wether subgroups should be randomized.
90
-
91
- Returns:
92
- - The dependency graph
93
- - The list of prediction job groups
94
-
95
- """
96
- nodes, edges = build_graph_structure(pjs)
97
- graph = build_nx_graph(nodes, edges)
98
- groups = list(nx.topological_generations(graph))
99
-
100
- if randomize_groups:
101
- for group in groups:
102
- random.shuffle(group)
103
-
104
- # Convert groups of pj ids to groups of pjs
105
- pj_id_map = {pj["id"]: i for i, pj in enumerate(pjs)}
106
- pj_groups = [[pjs[pj_id_map[pj_id]] for pj_id in group] for group in groups]
107
- return graph, pj_groups