openstef 3.4.10__py3-none-any.whl → 3.4.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. openstef/app_settings.py +19 -0
  2. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  3. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +18 -0
  4. openstef/data/dutch_holidays.csv +1759 -0
  5. openstef/data_classes/data_prep.py +1 -1
  6. openstef/data_classes/prediction_job.py +15 -9
  7. openstef/enums.py +108 -9
  8. openstef/exceptions.py +1 -1
  9. openstef/feature_engineering/apply_features.py +25 -6
  10. openstef/feature_engineering/bidding_zone_to_country_mapping.py +106 -0
  11. openstef/feature_engineering/cyclic_features.py +102 -0
  12. openstef/feature_engineering/data_preparation.py +12 -5
  13. openstef/feature_engineering/feature_applicator.py +1 -5
  14. openstef/feature_engineering/general.py +14 -0
  15. openstef/feature_engineering/holiday_features.py +35 -26
  16. openstef/feature_engineering/missing_values_transformer.py +141 -0
  17. openstef/feature_engineering/weather_features.py +7 -0
  18. openstef/metrics/figure.py +3 -0
  19. openstef/metrics/metrics.py +58 -1
  20. openstef/metrics/reporter.py +7 -0
  21. openstef/model/confidence_interval_applicator.py +28 -3
  22. openstef/model/model_creator.py +54 -41
  23. openstef/model/objective.py +17 -34
  24. openstef/model/objective_creator.py +13 -12
  25. openstef/model/regressors/arima.py +1 -1
  26. openstef/model/regressors/dazls.py +35 -96
  27. openstef/model/regressors/flatliner.py +95 -0
  28. openstef/model/regressors/linear_quantile.py +296 -0
  29. openstef/model/regressors/xgb.py +23 -0
  30. openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
  31. openstef/model/regressors/xgb_quantile.py +3 -0
  32. openstef/model/serializer.py +10 -0
  33. openstef/model_selection/model_selection.py +4 -1
  34. openstef/monitoring/performance_meter.py +1 -2
  35. openstef/monitoring/teams.py +11 -0
  36. openstef/pipeline/create_basecase_forecast.py +11 -1
  37. openstef/pipeline/create_component_forecast.py +24 -28
  38. openstef/pipeline/create_forecast.py +20 -1
  39. openstef/pipeline/optimize_hyperparameters.py +18 -16
  40. openstef/pipeline/train_create_forecast_backtest.py +11 -1
  41. openstef/pipeline/train_model.py +31 -12
  42. openstef/pipeline/utils.py +3 -0
  43. openstef/postprocessing/postprocessing.py +29 -0
  44. openstef/settings.py +15 -0
  45. openstef/tasks/calculate_kpi.py +23 -20
  46. openstef/tasks/create_basecase_forecast.py +15 -7
  47. openstef/tasks/create_components_forecast.py +24 -8
  48. openstef/tasks/create_forecast.py +9 -6
  49. openstef/tasks/create_solar_forecast.py +4 -4
  50. openstef/tasks/optimize_hyperparameters.py +2 -2
  51. openstef/tasks/split_forecast.py +9 -2
  52. openstef/tasks/train_model.py +9 -7
  53. openstef/tasks/utils/taskcontext.py +7 -0
  54. openstef/validation/validation.py +28 -3
  55. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/METADATA +65 -57
  56. openstef-3.4.44.dist-info/RECORD +97 -0
  57. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/WHEEL +1 -1
  58. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
  59. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
  60. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
  61. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
  62. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
  63. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
  64. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
  65. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
  66. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
  67. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
  68. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
  69. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
  70. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
  71. openstef/data/dutch_holidays_2020-2022.csv +0 -831
  72. openstef/data/dutch_holidays_2020-2022.csv.license +0 -3
  73. openstef/feature_engineering/historic_features.py +0 -40
  74. openstef/model/regressors/proloaf.py +0 -281
  75. openstef/tasks/run_tracy.py +0 -145
  76. openstef-3.4.10.dist-info/RECORD +0 -104
  77. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license} +0 -0
  78. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license} +0 -0
  79. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license → dutch_holidays.csv.license} +0 -0
  80. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/LICENSE +0 -0
  81. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,14 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
4
5
  from pathlib import Path
5
6
 
6
7
  import pandas as pd
7
8
  import structlog
8
9
 
9
10
  from openstef.data_classes.prediction_job import PredictionJobDataClass
10
- from openstef.exceptions import NoRealisedLoadError, InputDataOngoingZeroFlatlinerError
11
+ from openstef.exceptions import InputDataOngoingZeroFlatlinerError, NoRealisedLoadError
11
12
  from openstef.feature_engineering.feature_applicator import (
12
13
  OperationalPredictFeatureApplicator,
13
14
  )
@@ -18,6 +19,7 @@ from openstef.postprocessing.postprocessing import (
18
19
  add_components_base_case_forecast,
19
20
  add_prediction_job_properties_to_forecast,
20
21
  )
22
+ from openstef.settings import Settings
21
23
  from openstef.validation import validation
22
24
 
23
25
  MODEL_LOCATION = Path(".")
@@ -38,7 +40,15 @@ def create_basecase_forecast_pipeline(
38
40
  Returns:
39
41
  Base case forecast
40
42
 
43
+ Raises:
44
+ NoRealisedLoadError: When no realised load for given datetime range.
45
+
41
46
  """
47
+ structlog.configure(
48
+ wrapper_class=structlog.make_filtering_bound_logger(
49
+ logging.getLevelName(Settings.log_level)
50
+ )
51
+ )
42
52
  logger = structlog.get_logger(__name__)
43
53
 
44
54
  logger.info("Preprocessing data for basecase forecast")
@@ -2,7 +2,10 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
 
5
+ import logging
6
+
5
7
  import joblib
8
+ import numpy as np
6
9
  import pandas as pd
7
10
  import structlog
8
11
 
@@ -11,12 +14,11 @@ from openstef import PROJECT_ROOT
11
14
  from openstef.data_classes.prediction_job import PredictionJobDataClass
12
15
  from openstef.enums import ForecastType
13
16
  from openstef.model.regressors.dazls import Dazls
14
-
15
- import numpy as np
17
+ from openstef.settings import Settings
16
18
 
17
19
  # Set the path for the Dazls stored model
18
20
  DAZLS_STORED = str(
19
- PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.0" / "dazls_stored_3.4.0_"
21
+ PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.24" / "dazls_stored_3.4.24_"
20
22
  )
21
23
 
22
24
 
@@ -96,47 +98,34 @@ def create_components_forecast_pipeline(
96
98
  "algtype"
97
99
 
98
100
  """
101
+ structlog.configure(
102
+ wrapper_class=structlog.make_filtering_bound_logger(
103
+ logging.getLevelName(Settings.log_level)
104
+ )
105
+ )
99
106
  logger = structlog.get_logger(__name__)
100
107
  logger.info("Make components prediction", pid=pj["id"])
101
108
 
102
109
  # Make component forecasts
103
110
  try:
104
- input_data = create_input(pj, input_data, weather_data)
111
+ dazls_input_data = create_input(pj, input_data, weather_data)
105
112
 
106
113
  # Save and load the model as .sav file (or as .z file)
107
114
  # For the code contact: korte.termijn.prognoses@alliander.com
108
115
  dazls_model = Dazls()
109
- dazls_model.domain_model = joblib.load(DAZLS_STORED + "domain_model.z")
110
- dazls_model.domain_model_scaler = joblib.load(
111
- DAZLS_STORED + "domain_model_scaler.z"
112
- )
113
- dazls_model.domain_model_input_columns = joblib.load(
114
- DAZLS_STORED + "domain_model_features.z"
115
- )
116
+ dazls_model.model_ = joblib.load(DAZLS_STORED + "baseline_model.z")
116
117
 
117
- dazls_model.adaptation_model = joblib.load(DAZLS_STORED + "adaptation_model.z")
118
- dazls_model.adaptation_model_scaler = joblib.load(
119
- DAZLS_STORED + "adaptation_model_scaler.z"
120
- )
121
- dazls_model.adaptation_model_input_columns = joblib.load(
122
- DAZLS_STORED + "adaptation_model_features.z"
123
- )
124
-
125
- dazls_model.target_columns = joblib.load(DAZLS_STORED + "target.z")
126
- dazls_model.target_scaler = joblib.load(DAZLS_STORED + "target_scaler.z")
127
-
128
- logger = structlog.get_logger(__name__)
129
118
  logger.info("DAZLS model loaded", dazls_model=str(dazls_model))
130
119
 
131
120
  # Use the predict function of Dazls model
132
121
  # As input data we use the input_data function which takes into consideration what we want as an input for the forecast and what Dazls can accept as an input
133
- forecasts = dazls_model.predict(x=input_data)
122
+ forecasts = dazls_model.predict(x=dazls_input_data)
134
123
 
135
124
  # Set the columns for the output forecast dataframe
136
125
  forecasts = pd.DataFrame(
137
126
  forecasts,
138
127
  columns=["forecast_wind_on_shore", "forecast_solar"],
139
- index=input_data.index,
128
+ index=dazls_input_data.index,
140
129
  )
141
130
 
142
131
  # Make post-processed forecasts for solar and wind power
@@ -151,18 +140,25 @@ def create_components_forecast_pipeline(
151
140
 
152
141
  # Make forecast for the component: "forecast_other"
153
142
  forecasts["forecast_other"] = (
154
- input_data["total_load"]
143
+ dazls_input_data["total_load"]
155
144
  - forecasts["forecast_solar"]
156
145
  - forecasts["forecast_wind_on_shore"]
157
146
  )
147
+
148
+ # Make sure the forecasts have the same form as the input data. Pad with 0 if necessary
149
+ forecasts = forecasts.reindex(index=input_data.index, fill_value=0)
158
150
  except Exception as e:
159
- # In case something goes wrong we fall back on aan empty dataframe
151
+ # In case something goes wrong we fall back on an a zero-filled dataframe
160
152
  logger.warning(
161
153
  f"Could not make component forecasts: {e}, falling back on series of"
162
154
  " zeros!",
163
155
  exc_info=e,
164
156
  )
165
- forecasts = pd.DataFrame()
157
+ forecasts = pd.DataFrame(
158
+ data=0,
159
+ index=input_data.index,
160
+ columns=["forecast_wind_on_shore", "forecast_solar", "forecast_other"],
161
+ )
166
162
 
167
163
  # Prepare for output
168
164
  # Add more prediction properties to the forecast ("pid","customer","description","type","algtype)
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
5
+
4
6
  import pandas as pd
5
7
  import structlog
6
8
 
@@ -16,7 +18,9 @@ from openstef.model.serializer import MLflowSerializer
16
18
  from openstef.pipeline.utils import generate_forecast_datetime_range
17
19
  from openstef.postprocessing.postprocessing import (
18
20
  add_prediction_job_properties_to_forecast,
21
+ sort_quantiles,
19
22
  )
23
+ from openstef.settings import Settings
20
24
  from openstef.validation import validation
21
25
 
22
26
 
@@ -40,6 +44,10 @@ def create_forecast_pipeline(
40
44
  Returns:
41
45
  DataFrame with the forecast
42
46
 
47
+ Raises:
48
+ InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
49
+ LookupError: When no model is found for the given prediction job in MLflow.
50
+
43
51
  """
44
52
  prediction_model_pid = pj["id"]
45
53
  # Use the alternative forecast model if it's specify in the pj
@@ -64,7 +72,7 @@ def create_forecast_pipeline_core(
64
72
  Computes the forecasts and confidence intervals given a prediction job and input data.
65
73
  This pipeline has no database or persisitent storage dependencies.
66
74
 
67
- Expected prediction job keys: "resolution_minutes", "horizon_minutes", "id", "type",
75
+ Expected prediction job keys: "resolution_minutes", "id", "type",
68
76
  "name", "quantiles"
69
77
 
70
78
  Args:
@@ -76,7 +84,15 @@ def create_forecast_pipeline_core(
76
84
  Returns:
77
85
  Forecast
78
86
 
87
+ Raises:
88
+ InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
89
+
79
90
  """
91
+ structlog.configure(
92
+ wrapper_class=structlog.make_filtering_bound_logger(
93
+ logging.getLevelName(Settings.log_level)
94
+ )
95
+ )
80
96
  logger = structlog.get_logger(__name__)
81
97
 
82
98
  fallback_strategy = "extreme_day" # this can later be expanded
@@ -142,6 +158,9 @@ def create_forecast_pipeline_core(
142
158
  model, forecast_input_data
143
159
  ).add_confidence_interval(forecast, pj)
144
160
 
161
+ # Sort quantiles - prevents crossing and is statistically sound
162
+ forecast = sort_quantiles(forecast)
163
+
145
164
  # Prepare for output
146
165
  forecast = add_prediction_job_properties_to_forecast(
147
166
  pj,
@@ -1,8 +1,9 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
4
5
  import os
5
- from typing import Any, Union
6
+ from typing import Any
6
7
 
7
8
  import optuna
8
9
  import pandas as pd
@@ -21,16 +22,22 @@ from openstef.model.objective import RegressorObjective
21
22
  from openstef.model.objective_creator import ObjectiveCreator
22
23
  from openstef.model.regressors.regressor import OpenstfRegressor
23
24
  from openstef.model.serializer import MLflowSerializer
25
+ from openstef.model_selection.model_selection import split_data_train_validation_test
24
26
  from openstef.pipeline.train_model import (
25
27
  DEFAULT_TRAIN_HORIZONS_HOURS,
26
28
  train_model_pipeline_core,
27
29
  )
30
+ from openstef.settings import Settings
28
31
  from openstef.validation import validation
29
- from openstef.model_selection.model_selection import split_data_train_validation_test
30
32
 
31
33
  optuna.logging.enable_propagation() # Propagate logs to the root logger.
32
34
  optuna.logging.disable_default_handler() # Stop showing logs in sys.stderr.
33
35
 
36
+ structlog.configure(
37
+ wrapper_class=structlog.make_filtering_bound_logger(
38
+ logging.getLevelName(Settings.log_level)
39
+ )
40
+ )
34
41
  logger = structlog.get_logger(__name__)
35
42
 
36
43
  # See https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.Study.html#optuna.study.Study.optimize
@@ -59,6 +66,9 @@ def optimize_hyperparameters_pipeline(
59
66
 
60
67
  Raises:
61
68
  ValueError: If the input_date is insufficient.
69
+ InputDataInsufficientError: If the input dataframe is empty.
70
+ InputDataWrongColumnOrderError: If the load column is missing in the input dataframe.
71
+ OldModelHigherScoreError: When old model is better than new model.
62
72
 
63
73
  Returns:
64
74
  Optimized hyperparameters.
@@ -119,6 +129,10 @@ def optimize_hyperparameters_pipeline_core(
119
129
 
120
130
  Raises:
121
131
  ValueError: If the input_date is insufficient.
132
+ InputDataInsufficientError: If the input dataframe is empty.
133
+ InputDataWrongColumnOrderError: If the load column is missing in the input dataframe.
134
+ OldModelHigherScoreError: When old model is better than new model.
135
+ InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
122
136
 
123
137
  Returns:
124
138
  - Best model,
@@ -175,18 +189,6 @@ def optimize_hyperparameters_pipeline_core(
175
189
  horizons=horizons, feature_names=feature_names, feature_modules=feature_modules
176
190
  ).add_features(validated_data, pj=pj)
177
191
 
178
- # Adds additional proloaf features to the input data, historic_load (equal to the load, first column)
179
- if pj["model"] == "proloaf" and "historic_load" not in list(
180
- validated_data_with_features.columns
181
- ):
182
- validated_data_with_features[
183
- "historic_load"
184
- ] = validated_data_with_features.iloc[:, 0]
185
- # Make sure horizons is last column
186
- temp_cols = validated_data_with_features.columns.tolist()
187
- new_cols = temp_cols[:-2] + [temp_cols[-1]] + [temp_cols[-2]]
188
- validated_data_with_features = validated_data_with_features[new_cols]
189
-
190
192
  # Create objective (NOTE: this is a callable class)
191
193
  objective = ObjectiveCreator.create_objective(model_type=pj["model"])
192
194
 
@@ -245,7 +247,7 @@ def optuna_optimization(
245
247
  - The objective object used by optuna
246
248
 
247
249
  """
248
- model = ModelCreator.create_model(pj["model"])
250
+ model = ModelCreator.create_model(pj["model"], **(pj.model_kwargs or {}))
249
251
  # Apply set to default hyperparameters if they are specified in the pj
250
252
  if pj.default_modelspecs:
251
253
  valid_hyper_parameters = {
@@ -268,7 +270,7 @@ def optuna_optimization(
268
270
  if pj.train_split_func is None:
269
271
  split_func = split_data_train_validation_test
270
272
  split_args = {
271
- "stratification_min_max": pj["model"] != "proloaf",
273
+ "stratification_min_max": True,
272
274
  "back_test": True,
273
275
  }
274
276
  else:
@@ -56,10 +56,16 @@ def train_model_and_forecast_back_test(
56
56
  - Validation data sets (list[pd.DataFrame])
57
57
  - Test data sets (list[pd.DataFrame])
58
58
 
59
+ Raises:
60
+ InputDataInsufficientError: when input data is insufficient.
61
+ InputDataWrongColumnOrderError: when input data has a invalid column order.
62
+ ValueError: when the horizon is a string and the corresponding column in not in the input data
63
+ InputDataOngoingZeroFlatlinerError: when all recent load measurements are zero.
64
+
59
65
  """
60
66
  if pj.backtest_split_func is None:
61
67
  backtest_split_func = backtest_split_default
62
- backtest_split_args = {"stratification_min_max": pj["model"] != "proloaf"}
68
+ backtest_split_args = {"stratification_min_max": True}
63
69
  else:
64
70
  backtest_split_func, backtest_split_args = pj.backtest_split_func.load(
65
71
  required_arguments=["data", "n_folds"]
@@ -124,6 +130,10 @@ def train_model_and_forecast_test_core(
124
130
  - The trained model
125
131
  - The forecast on the test set.
126
132
 
133
+ Raises:
134
+ NotImplementedError: When using invalid model type in the prediction job.
135
+ InputDataWrongColumnOrderError: When 'load' column is not first and 'horizon' column is not last.
136
+
127
137
  """
128
138
  model = train_model.train_pipeline_step_train_model(
129
139
  pj, modelspecs, train_data, validation_data
@@ -3,7 +3,7 @@
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
  import logging
5
5
  import os
6
- from typing import Optional, Union
6
+ from typing import Optional, Union, Tuple
7
7
 
8
8
  import pandas as pd
9
9
  import structlog
@@ -23,6 +23,7 @@ from openstef.model.regressors.regressor import OpenstfRegressor
23
23
  from openstef.model.serializer import MLflowSerializer
24
24
  from openstef.model.standard_deviation_generator import StandardDeviationGenerator
25
25
  from openstef.model_selection.model_selection import split_data_train_validation_test
26
+ from openstef.settings import Settings
26
27
  from openstef.validation import validation
27
28
 
28
29
  DEFAULT_TRAIN_HORIZONS_HOURS: list[float] = [0.25, 47.0]
@@ -31,6 +32,11 @@ MAXIMUM_MODEL_AGE: int = 7
31
32
  DEFAULT_EARLY_STOPPING_ROUNDS: int = 10
32
33
  PENALTY_FACTOR_OLD_MODEL: float = 1.2
33
34
 
35
+ structlog.configure(
36
+ wrapper_class=structlog.make_filtering_bound_logger(
37
+ logging.getLevelName(Settings.log_level)
38
+ )
39
+ )
34
40
  logger = structlog.get_logger(__name__)
35
41
 
36
42
 
@@ -60,6 +66,13 @@ def train_model_pipeline(
60
66
  - The validation dataset with forecasts
61
67
  - The test dataset with forecasts
62
68
 
69
+ Raises:
70
+ InputDataInsufficientError: when input data is insufficient.
71
+ InputDataWrongColumnOrderError: when input data has a invalid column order.
72
+ 'load' column should be first and 'horizon' column last.
73
+ OldModelHigherScoreError: When old model is better than new model.
74
+ SkipSaveTrainingForecasts: If old model is better or younger than `MAXIMUM_MODEL_AGE`, the model is not saved.
75
+
63
76
  """
64
77
  # Initialize serializer
65
78
  serializer = MLflowSerializer(mlflow_tracking_uri=mlflow_tracking_uri)
@@ -142,7 +155,7 @@ def train_model_pipeline_core(
142
155
  input_data: pd.DataFrame,
143
156
  old_model: OpenstfRegressor = None,
144
157
  horizons: list[float] = DEFAULT_TRAIN_HORIZONS_HOURS,
145
- ) -> Union[
158
+ ) -> Tuple[
146
159
  OpenstfRegressor,
147
160
  Report,
148
161
  ModelSpecificationDataClass,
@@ -164,6 +177,7 @@ def train_model_pipeline_core(
164
177
  InputDataInsufficientError: when input data is insufficient.
165
178
  InputDataWrongColumnOrderError: when input data has a invalid column order.
166
179
  OldModelHigherScoreError: When old model is better than new model.
180
+ InputDataOngoingZeroFlatlinerError: when all recent load measurements are zero.
167
181
 
168
182
  Returns:
169
183
  - Fitted_model (OpenstfRegressor)
@@ -172,8 +186,6 @@ def train_model_pipeline_core(
172
186
  - Datasets (tuple[pd.DataFrmae, pd.DataFrame, pd.Dataframe): The train, validation and test sets
173
187
 
174
188
  """
175
- logger = structlog.get_logger(__name__)
176
-
177
189
  # Call common pipeline
178
190
  (
179
191
  model,
@@ -234,7 +246,9 @@ def train_pipeline_common(
234
246
  test_fraction: float = 0.0,
235
247
  backtest: bool = False,
236
248
  test_data_predefined: pd.DataFrame = pd.DataFrame(),
237
- ) -> tuple[OpenstfRegressor, Report, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
249
+ ) -> tuple[
250
+ OpenstfRegressor, Report, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame
251
+ ]:
238
252
  """Common pipeline shared with operational training and backtest training.
239
253
 
240
254
  Args:
@@ -257,6 +271,8 @@ def train_pipeline_common(
257
271
  Raises:
258
272
  InputDataInsufficientError: when input data is insufficient.
259
273
  InputDataWrongColumnOrderError: when input data has a invalid column order.
274
+ 'load' column should be first and 'horizon' column last.
275
+ InputDataOngoingZeroFlatlinerError: when all recent load measurements are zero.
260
276
 
261
277
  """
262
278
  data_with_features = train_pipeline_step_compute_features(
@@ -300,7 +316,8 @@ def train_pipeline_common(
300
316
 
301
317
  def train_pipeline_step_load_model(
302
318
  pj: PredictionJobDataClass, serializer: MLflowSerializer
303
- ) -> tuple[OpenstfRegressor, ModelSpecificationDataClass, Union[int, float]]:
319
+ ) -> Tuple[OpenstfRegressor, ModelSpecificationDataClass, Union[int, float]]:
320
+ old_model: Optional[OpenstfRegressor]
304
321
  try:
305
322
  old_model, model_specs = serializer.load_model(experiment_name=str(pj.id))
306
323
  old_model_age = old_model.age # Age attribute is openstef specific
@@ -346,12 +363,9 @@ def train_pipeline_step_compute_features(
346
363
  InputDataInsufficientError: when input data is insufficient.
347
364
  InputDataWrongColumnOrderError: when input data has a invalid column order.
348
365
  ValueError: when the horizon is a string and the corresponding column in not in the input data
366
+ InputDataOngoingZeroFlatlinerError: when all recent load measurements are zero.
349
367
 
350
368
  """
351
- if pj["model"] == "proloaf":
352
- # proloaf is only able to train with one horizon
353
- horizons = [horizons[0]]
354
-
355
369
  if input_data.empty:
356
370
  raise InputDataInsufficientError("Input dataframe is empty")
357
371
  elif "load" not in input_data.columns:
@@ -423,6 +437,10 @@ def train_pipeline_step_train_model(
423
437
  Returns:
424
438
  The trained model
425
439
 
440
+ Raises:
441
+ NotImplementedError: When using invalid model type in the prediction job.
442
+ InputDataWrongColumnOrderError: When 'load' column is not first and 'horizon' column is not last.
443
+
426
444
  """
427
445
  # Test if first column is "load" and last column is "horizon"
428
446
  if train_data.columns[0] != "load" or train_data.columns[-1] != "horizon":
@@ -435,6 +453,7 @@ def train_pipeline_step_train_model(
435
453
  model = ModelCreator.create_model(
436
454
  pj["model"],
437
455
  quantiles=pj["quantiles"],
456
+ **(pj.model_kwargs or {}),
438
457
  )
439
458
 
440
459
  # split x and y data
@@ -493,7 +512,7 @@ def train_pipeline_step_split_data(
493
512
  test_fraction: float,
494
513
  backtest: bool = False,
495
514
  test_data_predefined: pd.DataFrame = pd.DataFrame(),
496
- ) -> Union[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
515
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
497
516
  """The default way to perform train, val, test split.
498
517
 
499
518
  Args:
@@ -523,7 +542,7 @@ def train_pipeline_step_split_data(
523
542
  if pj.train_split_func is None:
524
543
  split_func = split_data_train_validation_test
525
544
  split_args = {
526
- "stratification_min_max": pj["model"] != "proloaf",
545
+ "stratification_min_max": True,
527
546
  "back_test": backtest,
528
547
  }
529
548
  else:
@@ -27,6 +27,9 @@ def generate_forecast_datetime_range(
27
27
  Returns:
28
28
  Start and end datetimes of the forecast range.
29
29
 
30
+ Raises:
31
+ ValueError: If the target column does not have null values.
32
+
30
33
  """
31
34
  # By labeling the True/False values (based on the isnull() statement) as clusters,
32
35
  # we find what True value belongs to what cluster and the amount of True clusters.
@@ -1,6 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
4
5
  from enum import Enum
5
6
 
6
7
  import numpy as np
@@ -10,6 +11,7 @@ import structlog
10
11
  from openstef.data_classes.prediction_job import PredictionJobDataClass
11
12
  from openstef.enums import ForecastType
12
13
  from openstef.feature_engineering import weather_features
14
+ from openstef.settings import Settings
13
15
 
14
16
  # this is the default for "Lagerwey100"
15
17
  TURBINE_DATA = {
@@ -219,6 +221,11 @@ def add_prediction_job_properties_to_forecast(
219
221
  Dataframe with added metadata.
220
222
 
221
223
  """
224
+ structlog.configure(
225
+ wrapper_class=structlog.make_filtering_bound_logger(
226
+ logging.getLevelName(Settings.log_level)
227
+ )
228
+ )
222
229
  logger = structlog.get_logger(__name__)
223
230
 
224
231
  logger.info("Postproces in preparation of storing")
@@ -244,3 +251,25 @@ def add_prediction_job_properties_to_forecast(
244
251
  forecast["algtype"] = algorithm_type
245
252
 
246
253
  return forecast
254
+
255
+
256
+ def sort_quantiles(
257
+ forecast: pd.DataFrame, quantile_col_start="quantile_P"
258
+ ) -> pd.DataFrame:
259
+ """Sort quantile values so quantiles do not cross.
260
+
261
+ This function assumes that all quantile columns start with 'quantile_P' For more academic details on why this is
262
+ mathematically sounds, please refer to Quantile and Probability Curves Without Crossing (Chernozhukov, 2010)
263
+
264
+ """
265
+ p_columns = [col for col in forecast.columns if col.startswith(quantile_col_start)]
266
+
267
+ if len(p_columns) == 0:
268
+ return forecast
269
+
270
+ # sort the columns
271
+ p_columns = np.sort(p_columns)
272
+
273
+ forecast.loc[:, p_columns] = forecast[p_columns].apply(sorted, axis=1).to_list()
274
+
275
+ return forecast
openstef/settings.py ADDED
@@ -0,0 +1,15 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ from functools import lru_cache
6
+
7
+ from openstef.app_settings import AppSettings
8
+
9
+
10
+ @lru_cache
11
+ def _get_app_settings() -> AppSettings:
12
+ return AppSettings()
13
+
14
+
15
+ Settings = _get_app_settings()