openstef 3.4.10__py3-none-any.whl → 3.4.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. openstef/app_settings.py +19 -0
  2. openstef/data_classes/data_prep.py +1 -1
  3. openstef/data_classes/prediction_job.py +12 -8
  4. openstef/enums.py +3 -7
  5. openstef/exceptions.py +1 -1
  6. openstef/feature_engineering/apply_features.py +0 -6
  7. openstef/feature_engineering/data_preparation.py +12 -5
  8. openstef/feature_engineering/feature_applicator.py +1 -5
  9. openstef/feature_engineering/general.py +14 -0
  10. openstef/feature_engineering/missing_values_transformer.py +99 -0
  11. openstef/feature_engineering/weather_features.py +7 -0
  12. openstef/metrics/figure.py +3 -0
  13. openstef/metrics/metrics.py +58 -1
  14. openstef/metrics/reporter.py +7 -0
  15. openstef/model/confidence_interval_applicator.py +28 -3
  16. openstef/model/model_creator.py +36 -27
  17. openstef/model/objective.py +11 -28
  18. openstef/model/objective_creator.py +4 -3
  19. openstef/model/regressors/arima.py +1 -1
  20. openstef/model/regressors/dazls.py +35 -96
  21. openstef/model/regressors/flatliner.py +100 -0
  22. openstef/model/regressors/linear_quantile.py +247 -0
  23. openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
  24. openstef/model/regressors/xgb_quantile.py +3 -0
  25. openstef/model/serializer.py +10 -0
  26. openstef/model_selection/model_selection.py +3 -0
  27. openstef/monitoring/performance_meter.py +1 -2
  28. openstef/monitoring/teams.py +11 -0
  29. openstef/pipeline/create_basecase_forecast.py +11 -1
  30. openstef/pipeline/create_component_forecast.py +11 -22
  31. openstef/pipeline/create_forecast.py +20 -1
  32. openstef/pipeline/optimize_hyperparameters.py +18 -16
  33. openstef/pipeline/train_create_forecast_backtest.py +11 -1
  34. openstef/pipeline/train_model.py +23 -7
  35. openstef/pipeline/utils.py +3 -0
  36. openstef/postprocessing/postprocessing.py +29 -0
  37. openstef/settings.py +15 -0
  38. openstef/tasks/calculate_kpi.py +20 -17
  39. openstef/tasks/create_basecase_forecast.py +13 -5
  40. openstef/tasks/create_components_forecast.py +20 -4
  41. openstef/tasks/create_forecast.py +5 -2
  42. openstef/tasks/split_forecast.py +7 -0
  43. openstef/tasks/train_model.py +7 -5
  44. openstef/tasks/utils/taskcontext.py +7 -0
  45. openstef/validation/validation.py +27 -2
  46. {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/METADATA +34 -38
  47. openstef-3.4.29.dist-info/RECORD +91 -0
  48. {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/WHEEL +1 -1
  49. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
  50. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license +0 -3
  51. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
  52. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license +0 -3
  53. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
  54. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license +0 -3
  55. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
  56. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
  57. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
  58. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
  59. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
  60. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
  61. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
  62. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
  63. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
  64. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
  65. openstef/feature_engineering/historic_features.py +0 -40
  66. openstef/model/regressors/proloaf.py +0 -281
  67. openstef/tasks/run_tracy.py +0 -145
  68. openstef-3.4.10.dist-info/RECORD +0 -104
  69. {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/LICENSE +0 -0
  70. {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,261 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ from functools import partial
5
+ from typing import Dict, Optional, Sequence, Tuple, Union
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import sklearn.base
10
+ import xgboost as xgb
11
+ from sklearn.compose import TransformedTargetRegressor
12
+ from sklearn.preprocessing import StandardScaler
13
+ from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
14
+ from xgboost import Booster
15
+
16
+ import openstef.metrics.metrics as metrics
17
+ from openstef.model.regressors.regressor import OpenstfRegressor
18
+
19
+ DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
20
+
21
+
22
+ class XGBMultiOutputQuantileOpenstfRegressor(OpenstfRegressor):
23
+ r"""Model that provides multioutput quantile regression with XGBoost by default using the arctan loss function.
24
+
25
+ Arctan loss:
26
+ Refence: https://github.com/LaurensSluyterman/XGBoost_quantile_regression/tree/master
27
+ The key idea is to use a smooth approximation of the pinball loss, the arctan
28
+ pinball loss, that has a relatively large second derivative.
29
+
30
+ The approximation is given by:
31
+ $$L^{(\text{arctan})}_{\tau, s}(u) = (\tau - 0.5 + \frac{\arctan (u/s)}{\pi})u + \frac{s}{\pi}$$. # noqa E501
32
+
33
+ Some important settings:
34
+
35
+ * The parameter in the loss function determines the amount of smoothing. A
36
+ smaller values gives a closer approximation but also a much smaller second
37
+ derivative. A larger value gives more conservative quantiles when
38
+ is larger than 0.5, the quantile becomes larger and vice versa.
39
+ Values between 0.05 and 0.1 appear to work well. It may be a good idea to
40
+ optimize this parameter.
41
+ * Set min-child-weight to zero. The second derivatives can be a lot smaller
42
+ than 1 and this parameter may prevent any splits.
43
+ * Use a relatively small max-delta-step. We used a default of 0.5.
44
+ This prevents excessive steps that could happen due to the relatively
45
+ small second derivative.
46
+ * For the same reason, use a slightly lower learning rate of 0.05.
47
+
48
+ """
49
+
50
+ estimator_: TransformedTargetRegressor
51
+ quantile_indices_: Dict[float, int]
52
+
53
+ @staticmethod
54
+ def _get_importance_names():
55
+ return {
56
+ "gain_importance_name": "total_gain",
57
+ "weight_importance_name": "weight",
58
+ }
59
+
60
+ def __init__(
61
+ self,
62
+ quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
63
+ gamma: float = 0.0,
64
+ colsample_bytree: float = 1.0,
65
+ subsample: float = 1.0,
66
+ min_child_weight: int = 0,
67
+ max_depth: int = 6,
68
+ learning_rate: float = 0.22,
69
+ alpha: float = 0.0,
70
+ max_delta_step: int = 0.5,
71
+ arctan_smoothing: float = 0.055,
72
+ early_stopping_rounds: Optional[int] = None,
73
+ ):
74
+ """Initialize XGBMultiQuantileRegressor.
75
+
76
+ Model that provides quantile regression with XGBoost.
77
+ For each desired quantile an XGBoost model is trained,
78
+ these can later be used to predict quantiles.
79
+
80
+ Args:
81
+ quantiles: Tuple with desired quantiles, quantile 0.5 is required.
82
+ For example: (0.1, 0.5, 0.9)
83
+ gamma: Gamma.
84
+ colsample_bytree: Colsample by tree.
85
+ subsample: Subsample.
86
+ min_child_weight: Minimum child weight.
87
+ max_depth: Maximum depth.
88
+ learning_rate: Learning rate.
89
+ alpha: Alpha.
90
+ max_delta_step: Maximum delta step.
91
+ arctan_smoothing: smoothing parameter of the arctan loss function.
92
+ early_stopping_rounds: Number of rounds to stop training if no improvement
93
+ is made.
94
+
95
+ Raises:
96
+ ValueError in case quantile 0.5 is not in the requested quantiles.
97
+
98
+ """
99
+ super().__init__()
100
+ if 0.5 not in quantiles:
101
+ raise ValueError(
102
+ "Cannot train quantile model as 0.5 is not in requested quantiles!"
103
+ )
104
+
105
+ self.quantiles = quantiles
106
+
107
+ # Set attributes for hyper parameters
108
+ self.subsample = subsample
109
+ self.min_child_weight = min_child_weight
110
+ self.max_depth = max_depth
111
+ self.gamma = gamma
112
+ self.alpha = alpha
113
+ self.max_delta_step = max_delta_step
114
+ self.colsample_bytree = colsample_bytree
115
+ self.learning_rate = learning_rate
116
+ self.early_stopping_rounds = early_stopping_rounds
117
+ self.arctan_smoothing = arctan_smoothing
118
+
119
+ # Get fitting parameters - only those required for xgbooster's
120
+ xgb_regressor_params = {
121
+ key: value
122
+ for key, value in self.get_params().items()
123
+ if key in xgb.XGBRegressor().get_params().keys()
124
+ }
125
+
126
+ # Define the model
127
+ objective = partial(
128
+ metrics.arctan_loss, taus=self.quantiles, s=arctan_smoothing
129
+ )
130
+ xgb_model: xgb.XGBRegressor = xgb.XGBRegressor(
131
+ objective=objective,
132
+ base_score=0,
133
+ multi_strategy="one_output_per_tree",
134
+ **xgb_regressor_params,
135
+ )
136
+ self.estimator_ = TransformedTargetRegressor(
137
+ regressor=xgb_model, transformer=StandardScaler()
138
+ )
139
+
140
+ # Set quantile indices to remap multioutput predictions
141
+ self.quantile_indices_ = {
142
+ quantile: i for i, quantile in enumerate(self.quantiles)
143
+ }
144
+
145
+ def fit(
146
+ self,
147
+ x: np.array,
148
+ y: np.array,
149
+ eval_set: Optional[Sequence[Tuple[np.array, np.array]]] = None,
150
+ verbose: Optional[Union[bool, int]] = 0,
151
+ **kwargs
152
+ ) -> OpenstfRegressor:
153
+ """Fits xgb quantile model.
154
+
155
+ Args:
156
+ x: Feature matrix.
157
+ y: Labels.
158
+ eval_set: Evaluation set to monitor training performance.
159
+ verbose: Verbosity level (disabled by default).
160
+
161
+ Returns:
162
+ Fitted XGBQuantile model.
163
+
164
+ """
165
+ if isinstance(y, pd.Series):
166
+ y = y.to_numpy()
167
+
168
+ if not isinstance(x, pd.DataFrame):
169
+ x = pd.DataFrame(np.asarray(x))
170
+
171
+ # Check/validate input
172
+ check_X_y(x, y, force_all_finite="allow-nan")
173
+
174
+ # Prepare inputs
175
+ y_multioutput = replicate_for_multioutput(y, len(self.quantiles))
176
+
177
+ # Define watchlist if eval_set is defined
178
+ eval_set_multioutput = []
179
+ if eval_set:
180
+ for x_eval, y_eval in eval_set:
181
+ if isinstance(y_eval, pd.Series):
182
+ y_eval = y_eval.to_numpy()
183
+
184
+ y_eval_multioutput = replicate_for_multioutput(
185
+ y=y_eval, num_quantiles=len(self.quantiles)
186
+ )
187
+ eval_set_multioutput.append((x_eval, y_eval_multioutput))
188
+
189
+ eval_set_multioutput.append((x, y_multioutput))
190
+
191
+ self.estimator_.fit(
192
+ X=x.copy(deep=True),
193
+ y=y_multioutput,
194
+ eval_set=eval_set_multioutput,
195
+ verbose=verbose,
196
+ )
197
+
198
+ # Update state of the estimator
199
+ self.feature_importances_ = self.estimator_.regressor_.feature_importances_
200
+ self.is_fitted_ = True
201
+
202
+ return self
203
+
204
+ def predict(self, x: np.array, quantile: float = 0.5) -> np.array:
205
+ """Makes a prediction for a desired quantile.
206
+
207
+ Args:
208
+ x: Feature matrix.
209
+ quantile: Quantile for which a prediciton is desired,
210
+ note that only quantile are available for which a model is trained,
211
+ and that this is a quantile-model specific keyword.
212
+
213
+ Returns:
214
+ Prediction
215
+
216
+ Raises:
217
+ ValueError in case no model is trained for the requested quantile.
218
+
219
+ """
220
+ # Check if model is trained for this quantile
221
+ if quantile not in self.quantiles:
222
+ raise ValueError("No model trained for requested quantile!")
223
+
224
+ # Check/validate input
225
+ check_array(x, force_all_finite="allow-nan")
226
+ check_is_fitted(self)
227
+
228
+ # best_iteration is only available if early stopping was used during training
229
+ prediction: np.array
230
+ if hasattr(self.estimator_, "best_iteration"):
231
+ prediction = self.estimator_.predict(
232
+ X=x,
233
+ iteration_range=(0, self.estimator_.best_iteration + 1),
234
+ )
235
+ else:
236
+ prediction = self.estimator_.predict(X=x)
237
+
238
+ quantile_index = self.quantile_indices_[quantile]
239
+ return prediction[:, quantile_index]
240
+
241
+ @property
242
+ def feature_names(self):
243
+ return self.estimator_.feature_names_in_
244
+
245
+ @property
246
+ def can_predict_quantiles(self):
247
+ return True
248
+
249
+
250
+ def replicate_for_multioutput(y: np.array, num_quantiles: int) -> np.array:
251
+ """Replicates a 1D array to a 2D array for multioutput regression.
252
+
253
+ Args:
254
+ y: 1D array.
255
+ num_quantiles: Number of columns in the output array.
256
+
257
+ Returns:
258
+ 2D array with shape (len(y), num_quantiles)
259
+
260
+ """
261
+ return np.repeat(y[:, None], num_quantiles, axis=1)
@@ -52,6 +52,9 @@ class XGBQuantileOpenstfRegressor(OpenstfRegressor):
52
52
  alpha: Alpha
53
53
  max_delta_step: Maximum delta step
54
54
 
55
+ Raises:
56
+ ValueError in case quantile 0.5 is not in the requested quantiles
57
+
55
58
  """
56
59
  super().__init__()
57
60
  # Check if quantile 0.5 is pressent this is required
@@ -2,6 +2,7 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
  import json
5
+ import logging
5
6
  import os
6
7
  import shutil
7
8
  from datetime import datetime
@@ -20,10 +21,16 @@ from xgboost import XGBModel # Temporary for backward compatibility
20
21
  from openstef.data_classes.model_specifications import ModelSpecificationDataClass
21
22
  from openstef.metrics.reporter import Report
22
23
  from openstef.model.regressors.regressor import OpenstfRegressor
24
+ from openstef.settings import Settings
23
25
 
24
26
 
25
27
  class MLflowSerializer:
26
28
  def __init__(self, mlflow_tracking_uri: str):
29
+ structlog.configure(
30
+ wrapper_class=structlog.make_filtering_bound_logger(
31
+ logging.getLevelName(Settings.log_level)
32
+ )
33
+ )
27
34
  self.logger = structlog.get_logger(self.__class__.__name__)
28
35
  mlflow.set_tracking_uri(mlflow_tracking_uri)
29
36
  self.logger.debug(f"MLflow tracking uri at init= {mlflow_tracking_uri}")
@@ -147,6 +154,9 @@ class MLflowSerializer:
147
154
  Args:
148
155
  experiment_name: Name of the experiment, often the id of the predition job.
149
156
 
157
+ Raises:
158
+ LookupError: If model is not found in MLflow.
159
+
150
160
  """
151
161
  try:
152
162
  models_df = self._find_models(
@@ -140,6 +140,9 @@ def split_data_train_validation_test(
140
140
  - Validation data.
141
141
  - Test data.
142
142
 
143
+ Raises:
144
+ ValueError: When the test and validation fractions are too high.
145
+
143
146
  """
144
147
  test_fraction = test_fraction if back_test else 0
145
148
  train_fraction = 1 - (test_fraction + validation_fraction)
@@ -20,8 +20,7 @@ class PerformanceMeter:
20
20
 
21
21
  Args:
22
22
  level_label: The label of the new level. This could i.e. be 'task'
23
- level_name: The name of the specified level. This could i.e. be
24
- 'tracy_todo'
23
+ level_name: The name of the specified level.
25
24
  **kwargs: Any other kwargs are appended to the logging.
26
25
 
27
26
  Returns:
@@ -1,6 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
4
5
  from typing import Union
5
6
 
6
7
  import pandas as pd
@@ -8,6 +9,8 @@ import pymsteams
8
9
  import structlog
9
10
  from pymsteams import cardsection
10
11
 
12
+ from openstef.settings import Settings
13
+
11
14
 
12
15
  def post_teams(
13
16
  msg: Union[str, dict],
@@ -38,6 +41,14 @@ def post_teams(
38
41
  Note:
39
42
  This function is namespace-specific.
40
43
  """
44
+ if not Settings.post_teams_messages:
45
+ return
46
+
47
+ structlog.configure(
48
+ wrapper_class=structlog.make_filtering_bound_logger(
49
+ logging.getLevelName(Settings.log_level)
50
+ )
51
+ )
41
52
  logger = structlog.get_logger(__name__)
42
53
  # If no url is passed, give warning and don't send teams message
43
54
  if url is None:
@@ -1,13 +1,14 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
4
5
  from pathlib import Path
5
6
 
6
7
  import pandas as pd
7
8
  import structlog
8
9
 
9
10
  from openstef.data_classes.prediction_job import PredictionJobDataClass
10
- from openstef.exceptions import NoRealisedLoadError, InputDataOngoingZeroFlatlinerError
11
+ from openstef.exceptions import InputDataOngoingZeroFlatlinerError, NoRealisedLoadError
11
12
  from openstef.feature_engineering.feature_applicator import (
12
13
  OperationalPredictFeatureApplicator,
13
14
  )
@@ -18,6 +19,7 @@ from openstef.postprocessing.postprocessing import (
18
19
  add_components_base_case_forecast,
19
20
  add_prediction_job_properties_to_forecast,
20
21
  )
22
+ from openstef.settings import Settings
21
23
  from openstef.validation import validation
22
24
 
23
25
  MODEL_LOCATION = Path(".")
@@ -38,7 +40,15 @@ def create_basecase_forecast_pipeline(
38
40
  Returns:
39
41
  Base case forecast
40
42
 
43
+ Raises:
44
+ NoRealisedLoadError: When no realised load for given datetime range.
45
+
41
46
  """
47
+ structlog.configure(
48
+ wrapper_class=structlog.make_filtering_bound_logger(
49
+ logging.getLevelName(Settings.log_level)
50
+ )
51
+ )
42
52
  logger = structlog.get_logger(__name__)
43
53
 
44
54
  logger.info("Preprocessing data for basecase forecast")
@@ -2,7 +2,10 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
 
5
+ import logging
6
+
5
7
  import joblib
8
+ import numpy as np
6
9
  import pandas as pd
7
10
  import structlog
8
11
 
@@ -11,12 +14,11 @@ from openstef import PROJECT_ROOT
11
14
  from openstef.data_classes.prediction_job import PredictionJobDataClass
12
15
  from openstef.enums import ForecastType
13
16
  from openstef.model.regressors.dazls import Dazls
14
-
15
- import numpy as np
17
+ from openstef.settings import Settings
16
18
 
17
19
  # Set the path for the Dazls stored model
18
20
  DAZLS_STORED = str(
19
- PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.0" / "dazls_stored_3.4.0_"
21
+ PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.24" / "dazls_stored_3.4.24_"
20
22
  )
21
23
 
22
24
 
@@ -96,6 +98,11 @@ def create_components_forecast_pipeline(
96
98
  "algtype"
97
99
 
98
100
  """
101
+ structlog.configure(
102
+ wrapper_class=structlog.make_filtering_bound_logger(
103
+ logging.getLevelName(Settings.log_level)
104
+ )
105
+ )
99
106
  logger = structlog.get_logger(__name__)
100
107
  logger.info("Make components prediction", pid=pj["id"])
101
108
 
@@ -106,26 +113,8 @@ def create_components_forecast_pipeline(
106
113
  # Save and load the model as .sav file (or as .z file)
107
114
  # For the code contact: korte.termijn.prognoses@alliander.com
108
115
  dazls_model = Dazls()
109
- dazls_model.domain_model = joblib.load(DAZLS_STORED + "domain_model.z")
110
- dazls_model.domain_model_scaler = joblib.load(
111
- DAZLS_STORED + "domain_model_scaler.z"
112
- )
113
- dazls_model.domain_model_input_columns = joblib.load(
114
- DAZLS_STORED + "domain_model_features.z"
115
- )
116
-
117
- dazls_model.adaptation_model = joblib.load(DAZLS_STORED + "adaptation_model.z")
118
- dazls_model.adaptation_model_scaler = joblib.load(
119
- DAZLS_STORED + "adaptation_model_scaler.z"
120
- )
121
- dazls_model.adaptation_model_input_columns = joblib.load(
122
- DAZLS_STORED + "adaptation_model_features.z"
123
- )
124
-
125
- dazls_model.target_columns = joblib.load(DAZLS_STORED + "target.z")
126
- dazls_model.target_scaler = joblib.load(DAZLS_STORED + "target_scaler.z")
116
+ dazls_model.model_ = joblib.load(DAZLS_STORED + "baseline_model.z")
127
117
 
128
- logger = structlog.get_logger(__name__)
129
118
  logger.info("DAZLS model loaded", dazls_model=str(dazls_model))
130
119
 
131
120
  # Use the predict function of Dazls model
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
5
+
4
6
  import pandas as pd
5
7
  import structlog
6
8
 
@@ -16,7 +18,9 @@ from openstef.model.serializer import MLflowSerializer
16
18
  from openstef.pipeline.utils import generate_forecast_datetime_range
17
19
  from openstef.postprocessing.postprocessing import (
18
20
  add_prediction_job_properties_to_forecast,
21
+ sort_quantiles,
19
22
  )
23
+ from openstef.settings import Settings
20
24
  from openstef.validation import validation
21
25
 
22
26
 
@@ -40,6 +44,10 @@ def create_forecast_pipeline(
40
44
  Returns:
41
45
  DataFrame with the forecast
42
46
 
47
+ Raises:
48
+ InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
49
+ LookupError: When no model is found for the given prediction job in MLflow.
50
+
43
51
  """
44
52
  prediction_model_pid = pj["id"]
45
53
  # Use the alternative forecast model if it's specify in the pj
@@ -64,7 +72,7 @@ def create_forecast_pipeline_core(
64
72
  Computes the forecasts and confidence intervals given a prediction job and input data.
65
73
  This pipeline has no database or persisitent storage dependencies.
66
74
 
67
- Expected prediction job keys: "resolution_minutes", "horizon_minutes", "id", "type",
75
+ Expected prediction job keys: "resolution_minutes", "id", "type",
68
76
  "name", "quantiles"
69
77
 
70
78
  Args:
@@ -76,7 +84,15 @@ def create_forecast_pipeline_core(
76
84
  Returns:
77
85
  Forecast
78
86
 
87
+ Raises:
88
+ InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
89
+
79
90
  """
91
+ structlog.configure(
92
+ wrapper_class=structlog.make_filtering_bound_logger(
93
+ logging.getLevelName(Settings.log_level)
94
+ )
95
+ )
80
96
  logger = structlog.get_logger(__name__)
81
97
 
82
98
  fallback_strategy = "extreme_day" # this can later be expanded
@@ -142,6 +158,9 @@ def create_forecast_pipeline_core(
142
158
  model, forecast_input_data
143
159
  ).add_confidence_interval(forecast, pj)
144
160
 
161
+ # Sort quantiles - prevents crossing and is statistically sound
162
+ forecast = sort_quantiles(forecast)
163
+
145
164
  # Prepare for output
146
165
  forecast = add_prediction_job_properties_to_forecast(
147
166
  pj,
@@ -1,8 +1,9 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
4
5
  import os
5
- from typing import Any, Union
6
+ from typing import Any
6
7
 
7
8
  import optuna
8
9
  import pandas as pd
@@ -21,16 +22,22 @@ from openstef.model.objective import RegressorObjective
21
22
  from openstef.model.objective_creator import ObjectiveCreator
22
23
  from openstef.model.regressors.regressor import OpenstfRegressor
23
24
  from openstef.model.serializer import MLflowSerializer
25
+ from openstef.model_selection.model_selection import split_data_train_validation_test
24
26
  from openstef.pipeline.train_model import (
25
27
  DEFAULT_TRAIN_HORIZONS_HOURS,
26
28
  train_model_pipeline_core,
27
29
  )
30
+ from openstef.settings import Settings
28
31
  from openstef.validation import validation
29
- from openstef.model_selection.model_selection import split_data_train_validation_test
30
32
 
31
33
  optuna.logging.enable_propagation() # Propagate logs to the root logger.
32
34
  optuna.logging.disable_default_handler() # Stop showing logs in sys.stderr.
33
35
 
36
+ structlog.configure(
37
+ wrapper_class=structlog.make_filtering_bound_logger(
38
+ logging.getLevelName(Settings.log_level)
39
+ )
40
+ )
34
41
  logger = structlog.get_logger(__name__)
35
42
 
36
43
  # See https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.Study.html#optuna.study.Study.optimize
@@ -59,6 +66,9 @@ def optimize_hyperparameters_pipeline(
59
66
 
60
67
  Raises:
61
68
  ValueError: If the input_date is insufficient.
69
+ InputDataInsufficientError: If the input dataframe is empty.
70
+ InputDataWrongColumnOrderError: If the load column is missing in the input dataframe.
71
+ OldModelHigherScoreError: When old model is better than new model.
62
72
 
63
73
  Returns:
64
74
  Optimized hyperparameters.
@@ -119,6 +129,10 @@ def optimize_hyperparameters_pipeline_core(
119
129
 
120
130
  Raises:
121
131
  ValueError: If the input_date is insufficient.
132
+ InputDataInsufficientError: If the input dataframe is empty.
133
+ InputDataWrongColumnOrderError: If the load column is missing in the input dataframe.
134
+ OldModelHigherScoreError: When old model is better than new model.
135
+ InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
122
136
 
123
137
  Returns:
124
138
  - Best model,
@@ -175,18 +189,6 @@ def optimize_hyperparameters_pipeline_core(
175
189
  horizons=horizons, feature_names=feature_names, feature_modules=feature_modules
176
190
  ).add_features(validated_data, pj=pj)
177
191
 
178
- # Adds additional proloaf features to the input data, historic_load (equal to the load, first column)
179
- if pj["model"] == "proloaf" and "historic_load" not in list(
180
- validated_data_with_features.columns
181
- ):
182
- validated_data_with_features[
183
- "historic_load"
184
- ] = validated_data_with_features.iloc[:, 0]
185
- # Make sure horizons is last column
186
- temp_cols = validated_data_with_features.columns.tolist()
187
- new_cols = temp_cols[:-2] + [temp_cols[-1]] + [temp_cols[-2]]
188
- validated_data_with_features = validated_data_with_features[new_cols]
189
-
190
192
  # Create objective (NOTE: this is a callable class)
191
193
  objective = ObjectiveCreator.create_objective(model_type=pj["model"])
192
194
 
@@ -245,7 +247,7 @@ def optuna_optimization(
245
247
  - The objective object used by optuna
246
248
 
247
249
  """
248
- model = ModelCreator.create_model(pj["model"])
250
+ model = ModelCreator.create_model(pj["model"], **(pj.model_kwargs or {}))
249
251
  # Apply set to default hyperparameters if they are specified in the pj
250
252
  if pj.default_modelspecs:
251
253
  valid_hyper_parameters = {
@@ -268,7 +270,7 @@ def optuna_optimization(
268
270
  if pj.train_split_func is None:
269
271
  split_func = split_data_train_validation_test
270
272
  split_args = {
271
- "stratification_min_max": pj["model"] != "proloaf",
273
+ "stratification_min_max": True,
272
274
  "back_test": True,
273
275
  }
274
276
  else:
@@ -56,10 +56,16 @@ def train_model_and_forecast_back_test(
56
56
  - Validation data sets (list[pd.DataFrame])
57
57
  - Test data sets (list[pd.DataFrame])
58
58
 
59
+ Raises:
60
+ InputDataInsufficientError: when input data is insufficient.
61
+ InputDataWrongColumnOrderError: when input data has a invalid column order.
62
+ ValueError: when the horizon is a string and the corresponding column in not in the input data
63
+ InputDataOngoingZeroFlatlinerError: when all recent load measurements are zero.
64
+
59
65
  """
60
66
  if pj.backtest_split_func is None:
61
67
  backtest_split_func = backtest_split_default
62
- backtest_split_args = {"stratification_min_max": pj["model"] != "proloaf"}
68
+ backtest_split_args = {"stratification_min_max": True}
63
69
  else:
64
70
  backtest_split_func, backtest_split_args = pj.backtest_split_func.load(
65
71
  required_arguments=["data", "n_folds"]
@@ -124,6 +130,10 @@ def train_model_and_forecast_test_core(
124
130
  - The trained model
125
131
  - The forecast on the test set.
126
132
 
133
+ Raises:
134
+ NotImplementedError: When using invalid model type in the prediction job.
135
+ InputDataWrongColumnOrderError: When 'load' column is not first and 'horizon' column is not last.
136
+
127
137
  """
128
138
  model = train_model.train_pipeline_step_train_model(
129
139
  pj, modelspecs, train_data, validation_data