openstef 3.4.10__py3-none-any.whl → 3.4.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. openstef/app_settings.py +19 -0
  2. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  3. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +18 -0
  4. openstef/data/dutch_holidays.csv +1759 -0
  5. openstef/data_classes/data_prep.py +1 -1
  6. openstef/data_classes/prediction_job.py +15 -9
  7. openstef/enums.py +108 -9
  8. openstef/exceptions.py +1 -1
  9. openstef/feature_engineering/apply_features.py +25 -6
  10. openstef/feature_engineering/bidding_zone_to_country_mapping.py +106 -0
  11. openstef/feature_engineering/cyclic_features.py +102 -0
  12. openstef/feature_engineering/data_preparation.py +12 -5
  13. openstef/feature_engineering/feature_applicator.py +1 -5
  14. openstef/feature_engineering/general.py +14 -0
  15. openstef/feature_engineering/holiday_features.py +35 -26
  16. openstef/feature_engineering/missing_values_transformer.py +141 -0
  17. openstef/feature_engineering/weather_features.py +7 -0
  18. openstef/metrics/figure.py +3 -0
  19. openstef/metrics/metrics.py +58 -1
  20. openstef/metrics/reporter.py +7 -0
  21. openstef/model/confidence_interval_applicator.py +28 -3
  22. openstef/model/model_creator.py +54 -41
  23. openstef/model/objective.py +17 -34
  24. openstef/model/objective_creator.py +13 -12
  25. openstef/model/regressors/arima.py +1 -1
  26. openstef/model/regressors/dazls.py +35 -96
  27. openstef/model/regressors/flatliner.py +95 -0
  28. openstef/model/regressors/linear_quantile.py +296 -0
  29. openstef/model/regressors/xgb.py +23 -0
  30. openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
  31. openstef/model/regressors/xgb_quantile.py +3 -0
  32. openstef/model/serializer.py +10 -0
  33. openstef/model_selection/model_selection.py +4 -1
  34. openstef/monitoring/performance_meter.py +1 -2
  35. openstef/monitoring/teams.py +11 -0
  36. openstef/pipeline/create_basecase_forecast.py +11 -1
  37. openstef/pipeline/create_component_forecast.py +24 -28
  38. openstef/pipeline/create_forecast.py +20 -1
  39. openstef/pipeline/optimize_hyperparameters.py +18 -16
  40. openstef/pipeline/train_create_forecast_backtest.py +11 -1
  41. openstef/pipeline/train_model.py +31 -12
  42. openstef/pipeline/utils.py +3 -0
  43. openstef/postprocessing/postprocessing.py +29 -0
  44. openstef/settings.py +15 -0
  45. openstef/tasks/calculate_kpi.py +23 -20
  46. openstef/tasks/create_basecase_forecast.py +15 -7
  47. openstef/tasks/create_components_forecast.py +24 -8
  48. openstef/tasks/create_forecast.py +9 -6
  49. openstef/tasks/create_solar_forecast.py +4 -4
  50. openstef/tasks/optimize_hyperparameters.py +2 -2
  51. openstef/tasks/split_forecast.py +9 -2
  52. openstef/tasks/train_model.py +9 -7
  53. openstef/tasks/utils/taskcontext.py +7 -0
  54. openstef/validation/validation.py +28 -3
  55. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/METADATA +65 -57
  56. openstef-3.4.44.dist-info/RECORD +97 -0
  57. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/WHEEL +1 -1
  58. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
  59. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
  60. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
  61. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
  62. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
  63. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
  64. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
  65. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
  66. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
  67. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
  68. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
  69. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
  70. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
  71. openstef/data/dutch_holidays_2020-2022.csv +0 -831
  72. openstef/data/dutch_holidays_2020-2022.csv.license +0 -3
  73. openstef/feature_engineering/historic_features.py +0 -40
  74. openstef/model/regressors/proloaf.py +0 -281
  75. openstef/tasks/run_tracy.py +0 -145
  76. openstef-3.4.10.dist-info/RECORD +0 -104
  77. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license} +0 -0
  78. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license} +0 -0
  79. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license → dutch_holidays.csv.license} +0 -0
  80. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/LICENSE +0 -0
  81. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,296 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ import re
5
+ from typing import Dict, Union, Set, Optional, List
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from sklearn.base import RegressorMixin
10
+ from sklearn.linear_model import QuantileRegressor
11
+ from sklearn.preprocessing import StandardScaler
12
+ from sklearn.utils.validation import check_is_fitted
13
+
14
+ from openstef.feature_engineering.missing_values_transformer import (
15
+ MissingValuesTransformer,
16
+ )
17
+ from openstef.model.regressors.regressor import OpenstfRegressor
18
+
19
+ DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
20
+
21
+
22
+ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
23
+ quantiles: tuple[float, ...]
24
+ alpha: float
25
+ solver: str
26
+
27
+ imputer_: MissingValuesTransformer
28
+ x_scaler_: StandardScaler
29
+ y_scaler_: StandardScaler
30
+ models_: Dict[float, QuantileRegressor]
31
+
32
+ is_fitted_: bool = False
33
+
34
+ FEATURE_IGNORE_LIST: Set[str] = {
35
+ "IsWeekendDay",
36
+ "IsWeekDay",
37
+ "IsSunday",
38
+ "Month",
39
+ "Quarter",
40
+ }
41
+
42
+ def __init__(
43
+ self,
44
+ quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
45
+ alpha: float = 0.0,
46
+ solver: str = "highs",
47
+ missing_values: Union[int, float, str, None] = np.nan,
48
+ imputation_strategy: Optional[str] = "mean",
49
+ fill_value: Union[str, int, float] = None,
50
+ weight_scale_percentile: int = 95,
51
+ weight_exponent: float = 1,
52
+ weight_floor: float = 0.1,
53
+ no_fill_future_values_features: List[str] = None,
54
+ ):
55
+ """Initialize LinearQuantileOpenstfRegressor.
56
+
57
+ Model that provides quantile regression with SKLearn QuantileRegressor.
58
+ For each desired quantile an QuantileRegressor model is trained,
59
+ these can later be used to predict quantiles.
60
+
61
+ This model is sensitive to feature quality and therefore has logic to remove
62
+ some custom features produced by OpenSTEF. The features that are removed are:
63
+ - Holiday features (is_christmas, is_*)
64
+ - Lagged features (T-1d, T-*)
65
+ - Point in time features (IsWeekendDay, IsWeekDay, IsSunday, Month, Quarter)
66
+ - Infeed MFFBAS profiles (E*_I)
67
+
68
+ Args:
69
+ quantiles: Tuple with desired quantiles, quantile 0.5 is required.
70
+ For example: (0.1, 0.5, 0.9)
71
+ alpha: Regularization constant for L1 regularization
72
+ solver: Solver to use for optimization
73
+ missing_values: Value to be considered as missing value
74
+ imputation_strategy: Imputation strategy
75
+ fill_value: Fill value
76
+ weight_scale_percentile: Percentile used in scaling of the samples
77
+ weight_exponent: Exponent used in sample weighing
78
+ weight_floor: Minimum weight for samples
79
+ no_fill_future_values_features: The features for which it does not make sense
80
+ to fill future values. Rows that contain trailing null values for these
81
+ features will be removed from the data.
82
+
83
+ """
84
+ super().__init__()
85
+
86
+ # Check if quantile 0.5 is present. This is required.
87
+ if 0.5 not in quantiles:
88
+ raise ValueError(
89
+ "Cannot train quantile model as 0.5 is not in requested quantiles!"
90
+ )
91
+
92
+ self.quantiles = quantiles
93
+ self.alpha = alpha
94
+ self.solver = solver
95
+ self.weight_scale_percentile = weight_scale_percentile
96
+ self.weight_exponent = weight_exponent
97
+ self.weight_floor = weight_floor
98
+ self.imputer_ = MissingValuesTransformer(
99
+ missing_values=missing_values,
100
+ imputation_strategy=imputation_strategy,
101
+ fill_value=fill_value,
102
+ no_fill_future_values_features=no_fill_future_values_features,
103
+ )
104
+ self.x_scaler_ = StandardScaler()
105
+ self.y_scaler_ = StandardScaler()
106
+ self.models_ = {
107
+ quantile: QuantileRegressor(alpha=alpha, quantile=quantile, solver=solver)
108
+ for quantile in quantiles
109
+ }
110
+
111
+ @property
112
+ def feature_names(self) -> list:
113
+ """The names of the features used to train the model."""
114
+ check_is_fitted(self)
115
+ return self.imputer_.non_null_feature_names
116
+
117
+ @staticmethod
118
+ def _get_importance_names():
119
+ return {
120
+ "gain_importance_name": "total_gain",
121
+ "weight_importance_name": "weight",
122
+ }
123
+
124
+ @property
125
+ def can_predict_quantiles(self) -> bool:
126
+ """Attribute that indicates if the model predict particular quantiles."""
127
+ return True
128
+
129
+ def _is_feature_ignored(self, feature_name: str) -> bool:
130
+ """Check if a feature is ignored by the model.
131
+
132
+ Args:
133
+ feature_name: Feature name
134
+
135
+ Returns:
136
+ True if the feature is ignored, False otherwise
137
+
138
+ """
139
+ return (
140
+ # Ignore named features
141
+ feature_name in self.FEATURE_IGNORE_LIST
142
+ or
143
+ # Ignore holiday features
144
+ re.match(r"is_", feature_name) is not None
145
+ or
146
+ # Ignore lag features
147
+ re.match(r"T-", feature_name) is not None
148
+ or
149
+ # Ignore infeed MFFBAS profiles
150
+ re.match(r"E\d.*_I", feature_name) is not None
151
+ )
152
+
153
+ def _remove_ignored_features(self, x: pd.DataFrame) -> pd.DataFrame:
154
+ """Remove ignored features from the input data.
155
+
156
+ Args:
157
+ x: Input data
158
+
159
+ Returns:
160
+ Data without ignored features
161
+
162
+ """
163
+ return x.drop(columns=[c for c in x.columns if self._is_feature_ignored(c)])
164
+
165
+ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
166
+ """Fits linear quantile model.
167
+
168
+ Args:
169
+ x: Feature matrix
170
+ y: Labels
171
+
172
+ Returns:
173
+ Fitted LinearQuantile model
174
+
175
+ """
176
+ if not isinstance(y, pd.Series):
177
+ y = pd.Series(np.asarray(y), name="load")
178
+
179
+ x = self._remove_ignored_features(x)
180
+
181
+ # Fix nan columns
182
+ x, y = self.imputer_.fit_transform(x, y)
183
+ if x.isna().any().any():
184
+ raise ValueError(
185
+ "There are nan values in the input data. Set "
186
+ "imputation_strategy to solve them."
187
+ )
188
+
189
+ # Apply feature scaling
190
+ x_scaled = self.x_scaler_.fit_transform(x)
191
+ y_scaled = self.y_scaler_.fit_transform(y.to_frame())[:, 0]
192
+
193
+ # Add more focus on extreme / peak values
194
+ sample_weight = self._calculate_sample_weights(y.values.squeeze())
195
+
196
+ # Fit quantile regressors
197
+ for quantile in self.quantiles:
198
+ self.models_[quantile].fit(
199
+ X=x_scaled, y=y_scaled, sample_weight=sample_weight
200
+ )
201
+
202
+ self.is_fitted_ = True
203
+
204
+ self.feature_importances_ = self._get_feature_importance_from_linear()
205
+
206
+ return self
207
+
208
+ def _calculate_sample_weights(self, y: np.array):
209
+ """Calculate sample weights based on the y values of arbitrary scale.
210
+
211
+ The resulting weights are in the range [0,1] and are used to put more emphasis
212
+ on certain samples. The sample weighting function does:
213
+
214
+ * Rescale data to a [-1, 1] range using quantile scaling. 90% of the data will
215
+ be within this range. Rest is outside.
216
+ * Calculate the weight by taking the exponent of scaled data.
217
+ * exponent=0: Results in uniform weights for all samples.
218
+ * exponent=1: Results in linearly increasing weights for samples that are
219
+ closer to the extremes.
220
+ * exponent>1: Results in exponentially increasing weights for samples that are
221
+ closer to the extremes.
222
+ * Clip the data to [0, 1] range with weight_floor as the minimum weight.
223
+ * Weight floor is used to make sure that all the samples are considered.
224
+
225
+ """
226
+ return np.clip(
227
+ _weight_exp(
228
+ _scale_percentile(y, percentile=self.weight_scale_percentile),
229
+ exponent=self.weight_exponent,
230
+ ),
231
+ a_min=self.weight_floor,
232
+ a_max=1,
233
+ )
234
+
235
+ def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
236
+ """Makes a prediction for a desired quantile.
237
+
238
+ Args:
239
+ x: Feature matrix
240
+ quantile: Quantile for which a prediciton is desired,
241
+ note that only quantile are available for which a model is trained,
242
+ and that this is a quantile-model specific keyword
243
+
244
+ Returns:
245
+ Prediction
246
+
247
+ Raises:
248
+ ValueError in case no model is trained for the requested quantile
249
+
250
+ """
251
+ check_is_fitted(self)
252
+
253
+ # Preprocess input data
254
+ x = self._remove_ignored_features(x)
255
+ x = self.imputer_.transform(x)
256
+ x_scaled = self.x_scaler_.transform(x)
257
+
258
+ # Make prediction
259
+ y_pred = self.models_[quantile].predict(X=x_scaled)
260
+
261
+ # Inverse scaling
262
+ y_pred = self.y_scaler_.inverse_transform(y_pred.reshape(-1, 1))[:, 0]
263
+
264
+ return y_pred
265
+
266
+ def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array:
267
+ check_is_fitted(self)
268
+ feature_importance_linear = np.abs(self.models_[quantile].coef_)
269
+ reg_feature_importances_dict = dict(
270
+ zip(self.imputer_.non_null_feature_names, feature_importance_linear)
271
+ )
272
+ return np.array(
273
+ [
274
+ reg_feature_importances_dict.get(c, 0)
275
+ for c in self.imputer_.non_null_feature_names
276
+ ]
277
+ )
278
+
279
+ @classmethod
280
+ def _get_param_names(cls):
281
+ return [
282
+ "quantiles",
283
+ "alpha",
284
+ "solver",
285
+ ]
286
+
287
+ def __sklearn_is_fitted__(self) -> bool:
288
+ return self.is_fitted_
289
+
290
+
291
+ def _scale_percentile(x: np.ndarray, percentile: int = 95):
292
+ return np.abs(x / np.percentile(np.abs(x), percentile))
293
+
294
+
295
+ def _weight_exp(x: np.ndarray, exponent: float = 1):
296
+ return np.abs(x) ** exponent
@@ -1,6 +1,10 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ from typing import Optional
5
+
6
+ import numpy as np
7
+ from sklearn.base import RegressorMixin
4
8
 
5
9
  from xgboost import XGBRegressor
6
10
 
@@ -27,3 +31,22 @@ class XGBOpenstfRegressor(XGBRegressor, OpenstfRegressor):
27
31
  "gain_importance_name": "total_gain",
28
32
  "weight_importance_name": "weight",
29
33
  }
34
+
35
+ def fit(
36
+ self,
37
+ x: np.array,
38
+ y: np.array,
39
+ *,
40
+ early_stopping_rounds: Optional[int] = None,
41
+ callbacks: Optional[list] = None,
42
+ eval_metric: Optional[str] = None,
43
+ **kwargs
44
+ ):
45
+ if early_stopping_rounds is not None:
46
+ self.set_params(early_stopping_rounds=early_stopping_rounds)
47
+ if callbacks is not None:
48
+ self.set_params(callbacks=callbacks)
49
+ if eval_metric is not None:
50
+ self.set_params(eval_metric=eval_metric)
51
+
52
+ super().fit(x, y, **kwargs)
@@ -0,0 +1,261 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ from functools import partial
5
+ from typing import Dict, Optional, Sequence, Tuple, Union
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import sklearn.base
10
+ import xgboost as xgb
11
+ from sklearn.compose import TransformedTargetRegressor
12
+ from sklearn.preprocessing import StandardScaler
13
+ from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
14
+ from xgboost import Booster
15
+
16
+ import openstef.metrics.metrics as metrics
17
+ from openstef.model.regressors.regressor import OpenstfRegressor
18
+
19
+ DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
20
+
21
+
22
+ class XGBMultiOutputQuantileOpenstfRegressor(OpenstfRegressor):
23
+ r"""Model that provides multioutput quantile regression with XGBoost by default using the arctan loss function.
24
+
25
+ Arctan loss:
26
+ Refence: https://github.com/LaurensSluyterman/XGBoost_quantile_regression/tree/master
27
+ The key idea is to use a smooth approximation of the pinball loss, the arctan
28
+ pinball loss, that has a relatively large second derivative.
29
+
30
+ The approximation is given by:
31
+ $$L^{(\text{arctan})}_{\tau, s}(u) = (\tau - 0.5 + \frac{\arctan (u/s)}{\pi})u + \frac{s}{\pi}$$. # noqa E501
32
+
33
+ Some important settings:
34
+
35
+ * The parameter in the loss function determines the amount of smoothing. A
36
+ smaller values gives a closer approximation but also a much smaller second
37
+ derivative. A larger value gives more conservative quantiles when
38
+ is larger than 0.5, the quantile becomes larger and vice versa.
39
+ Values between 0.05 and 0.1 appear to work well. It may be a good idea to
40
+ optimize this parameter.
41
+ * Set min-child-weight to zero. The second derivatives can be a lot smaller
42
+ than 1 and this parameter may prevent any splits.
43
+ * Use a relatively small max-delta-step. We used a default of 0.5.
44
+ This prevents excessive steps that could happen due to the relatively
45
+ small second derivative.
46
+ * For the same reason, use a slightly lower learning rate of 0.05.
47
+
48
+ """
49
+
50
+ estimator_: TransformedTargetRegressor
51
+ quantile_indices_: Dict[float, int]
52
+
53
+ @staticmethod
54
+ def _get_importance_names():
55
+ return {
56
+ "gain_importance_name": "total_gain",
57
+ "weight_importance_name": "weight",
58
+ }
59
+
60
+ def __init__(
61
+ self,
62
+ quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
63
+ gamma: float = 0.0,
64
+ colsample_bytree: float = 1.0,
65
+ subsample: float = 1.0,
66
+ min_child_weight: int = 0,
67
+ max_depth: int = 6,
68
+ learning_rate: float = 0.22,
69
+ alpha: float = 0.0,
70
+ max_delta_step: int = 0.5,
71
+ arctan_smoothing: float = 0.055,
72
+ early_stopping_rounds: Optional[int] = None,
73
+ ):
74
+ """Initialize XGBMultiQuantileRegressor.
75
+
76
+ Model that provides quantile regression with XGBoost.
77
+ For each desired quantile an XGBoost model is trained,
78
+ these can later be used to predict quantiles.
79
+
80
+ Args:
81
+ quantiles: Tuple with desired quantiles, quantile 0.5 is required.
82
+ For example: (0.1, 0.5, 0.9)
83
+ gamma: Gamma.
84
+ colsample_bytree: Colsample by tree.
85
+ subsample: Subsample.
86
+ min_child_weight: Minimum child weight.
87
+ max_depth: Maximum depth.
88
+ learning_rate: Learning rate.
89
+ alpha: Alpha.
90
+ max_delta_step: Maximum delta step.
91
+ arctan_smoothing: smoothing parameter of the arctan loss function.
92
+ early_stopping_rounds: Number of rounds to stop training if no improvement
93
+ is made.
94
+
95
+ Raises:
96
+ ValueError in case quantile 0.5 is not in the requested quantiles.
97
+
98
+ """
99
+ super().__init__()
100
+ if 0.5 not in quantiles:
101
+ raise ValueError(
102
+ "Cannot train quantile model as 0.5 is not in requested quantiles!"
103
+ )
104
+
105
+ self.quantiles = quantiles
106
+
107
+ # Set attributes for hyper parameters
108
+ self.subsample = subsample
109
+ self.min_child_weight = min_child_weight
110
+ self.max_depth = max_depth
111
+ self.gamma = gamma
112
+ self.alpha = alpha
113
+ self.max_delta_step = max_delta_step
114
+ self.colsample_bytree = colsample_bytree
115
+ self.learning_rate = learning_rate
116
+ self.early_stopping_rounds = early_stopping_rounds
117
+ self.arctan_smoothing = arctan_smoothing
118
+
119
+ # Get fitting parameters - only those required for xgbooster's
120
+ xgb_regressor_params = {
121
+ key: value
122
+ for key, value in self.get_params().items()
123
+ if key in xgb.XGBRegressor().get_params().keys()
124
+ }
125
+
126
+ # Define the model
127
+ objective = partial(
128
+ metrics.arctan_loss, taus=self.quantiles, s=arctan_smoothing
129
+ )
130
+ xgb_model: xgb.XGBRegressor = xgb.XGBRegressor(
131
+ objective=objective,
132
+ base_score=0,
133
+ multi_strategy="one_output_per_tree",
134
+ **xgb_regressor_params,
135
+ )
136
+ self.estimator_ = TransformedTargetRegressor(
137
+ regressor=xgb_model, transformer=StandardScaler()
138
+ )
139
+
140
+ # Set quantile indices to remap multioutput predictions
141
+ self.quantile_indices_ = {
142
+ quantile: i for i, quantile in enumerate(self.quantiles)
143
+ }
144
+
145
+ def fit(
146
+ self,
147
+ x: np.array,
148
+ y: np.array,
149
+ eval_set: Optional[Sequence[Tuple[np.array, np.array]]] = None,
150
+ verbose: Optional[Union[bool, int]] = 0,
151
+ **kwargs
152
+ ) -> OpenstfRegressor:
153
+ """Fits xgb quantile model.
154
+
155
+ Args:
156
+ x: Feature matrix.
157
+ y: Labels.
158
+ eval_set: Evaluation set to monitor training performance.
159
+ verbose: Verbosity level (disabled by default).
160
+
161
+ Returns:
162
+ Fitted XGBQuantile model.
163
+
164
+ """
165
+ if isinstance(y, pd.Series):
166
+ y = y.to_numpy()
167
+
168
+ if not isinstance(x, pd.DataFrame):
169
+ x = pd.DataFrame(np.asarray(x))
170
+
171
+ # Check/validate input
172
+ check_X_y(x, y, force_all_finite="allow-nan")
173
+
174
+ # Prepare inputs
175
+ y_multioutput = replicate_for_multioutput(y, len(self.quantiles))
176
+
177
+ # Define watchlist if eval_set is defined
178
+ eval_set_multioutput = []
179
+ if eval_set:
180
+ for x_eval, y_eval in eval_set:
181
+ if isinstance(y_eval, pd.Series):
182
+ y_eval = y_eval.to_numpy()
183
+
184
+ y_eval_multioutput = replicate_for_multioutput(
185
+ y=y_eval, num_quantiles=len(self.quantiles)
186
+ )
187
+ eval_set_multioutput.append((x_eval, y_eval_multioutput))
188
+
189
+ eval_set_multioutput.append((x, y_multioutput))
190
+
191
+ self.estimator_.fit(
192
+ X=x.copy(deep=True),
193
+ y=y_multioutput,
194
+ eval_set=eval_set_multioutput,
195
+ verbose=verbose,
196
+ )
197
+
198
+ # Update state of the estimator
199
+ self.feature_importances_ = self.estimator_.regressor_.feature_importances_
200
+ self.is_fitted_ = True
201
+
202
+ return self
203
+
204
+ def predict(self, x: np.array, quantile: float = 0.5) -> np.array:
205
+ """Makes a prediction for a desired quantile.
206
+
207
+ Args:
208
+ x: Feature matrix.
209
+ quantile: Quantile for which a prediciton is desired,
210
+ note that only quantile are available for which a model is trained,
211
+ and that this is a quantile-model specific keyword.
212
+
213
+ Returns:
214
+ Prediction
215
+
216
+ Raises:
217
+ ValueError in case no model is trained for the requested quantile.
218
+
219
+ """
220
+ # Check if model is trained for this quantile
221
+ if quantile not in self.quantiles:
222
+ raise ValueError("No model trained for requested quantile!")
223
+
224
+ # Check/validate input
225
+ check_array(x, force_all_finite="allow-nan")
226
+ check_is_fitted(self)
227
+
228
+ # best_iteration is only available if early stopping was used during training
229
+ prediction: np.array
230
+ if hasattr(self.estimator_, "best_iteration"):
231
+ prediction = self.estimator_.predict(
232
+ X=x,
233
+ iteration_range=(0, self.estimator_.best_iteration + 1),
234
+ )
235
+ else:
236
+ prediction = self.estimator_.predict(X=x)
237
+
238
+ quantile_index = self.quantile_indices_[quantile]
239
+ return prediction[:, quantile_index]
240
+
241
+ @property
242
+ def feature_names(self):
243
+ return self.estimator_.feature_names_in_
244
+
245
+ @property
246
+ def can_predict_quantiles(self):
247
+ return True
248
+
249
+
250
+ def replicate_for_multioutput(y: np.array, num_quantiles: int) -> np.array:
251
+ """Replicates a 1D array to a 2D array for multioutput regression.
252
+
253
+ Args:
254
+ y: 1D array.
255
+ num_quantiles: Number of columns in the output array.
256
+
257
+ Returns:
258
+ 2D array with shape (len(y), num_quantiles)
259
+
260
+ """
261
+ return np.repeat(y[:, None], num_quantiles, axis=1)
@@ -52,6 +52,9 @@ class XGBQuantileOpenstfRegressor(OpenstfRegressor):
52
52
  alpha: Alpha
53
53
  max_delta_step: Maximum delta step
54
54
 
55
+ Raises:
56
+ ValueError in case quantile 0.5 is not in the requested quantiles
57
+
55
58
  """
56
59
  super().__init__()
57
60
  # Check if quantile 0.5 is pressent this is required
@@ -2,6 +2,7 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
  import json
5
+ import logging
5
6
  import os
6
7
  import shutil
7
8
  from datetime import datetime
@@ -20,10 +21,16 @@ from xgboost import XGBModel # Temporary for backward compatibility
20
21
  from openstef.data_classes.model_specifications import ModelSpecificationDataClass
21
22
  from openstef.metrics.reporter import Report
22
23
  from openstef.model.regressors.regressor import OpenstfRegressor
24
+ from openstef.settings import Settings
23
25
 
24
26
 
25
27
  class MLflowSerializer:
26
28
  def __init__(self, mlflow_tracking_uri: str):
29
+ structlog.configure(
30
+ wrapper_class=structlog.make_filtering_bound_logger(
31
+ logging.getLevelName(Settings.log_level)
32
+ )
33
+ )
27
34
  self.logger = structlog.get_logger(self.__class__.__name__)
28
35
  mlflow.set_tracking_uri(mlflow_tracking_uri)
29
36
  self.logger.debug(f"MLflow tracking uri at init= {mlflow_tracking_uri}")
@@ -147,6 +154,9 @@ class MLflowSerializer:
147
154
  Args:
148
155
  experiment_name: Name of the experiment, often the id of the predition job.
149
156
 
157
+ Raises:
158
+ LookupError: If model is not found in MLflow.
159
+
150
160
  """
151
161
  try:
152
162
  models_df = self._find_models(
@@ -106,7 +106,7 @@ def split_data_train_validation_test(
106
106
  validation_fraction: float = 0.15,
107
107
  back_test: bool = False,
108
108
  stratification_min_max: bool = True,
109
- ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
109
+ ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
110
110
  """Split input data into train, test and validation set.
111
111
 
112
112
  Function for splitting data with features in a train, test and
@@ -140,6 +140,9 @@ def split_data_train_validation_test(
140
140
  - Validation data.
141
141
  - Test data.
142
142
 
143
+ Raises:
144
+ ValueError: When the test and validation fractions are too high.
145
+
143
146
  """
144
147
  test_fraction = test_fraction if back_test else 0
145
148
  train_fraction = 1 - (test_fraction + validation_fraction)
@@ -20,8 +20,7 @@ class PerformanceMeter:
20
20
 
21
21
  Args:
22
22
  level_label: The label of the new level. This could i.e. be 'task'
23
- level_name: The name of the specified level. This could i.e. be
24
- 'tracy_todo'
23
+ level_name: The name of the specified level.
25
24
  **kwargs: Any other kwargs are appended to the logging.
26
25
 
27
26
  Returns:
@@ -1,6 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
4
5
  from typing import Union
5
6
 
6
7
  import pandas as pd
@@ -8,6 +9,8 @@ import pymsteams
8
9
  import structlog
9
10
  from pymsteams import cardsection
10
11
 
12
+ from openstef.settings import Settings
13
+
11
14
 
12
15
  def post_teams(
13
16
  msg: Union[str, dict],
@@ -38,6 +41,14 @@ def post_teams(
38
41
  Note:
39
42
  This function is namespace-specific.
40
43
  """
44
+ if not Settings.post_teams_messages:
45
+ return
46
+
47
+ structlog.configure(
48
+ wrapper_class=structlog.make_filtering_bound_logger(
49
+ logging.getLevelName(Settings.log_level)
50
+ )
51
+ )
41
52
  logger = structlog.get_logger(__name__)
42
53
  # If no url is passed, give warning and don't send teams message
43
54
  if url is None: