openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. openstef-4.0.0a3.dist-info/METADATA +177 -0
  2. openstef-4.0.0a3.dist-info/RECORD +4 -0
  3. {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
  4. openstef/__init__.py +0 -14
  5. openstef/__main__.py +0 -3
  6. openstef/app_settings.py +0 -19
  7. openstef/data/NL_terrestrial_radiation.csv +0 -25585
  8. openstef/data/NL_terrestrial_radiation.csv.license +0 -3
  9. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  10. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
  11. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
  12. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
  13. openstef/data/dutch_holidays.csv +0 -1759
  14. openstef/data/dutch_holidays.csv.license +0 -3
  15. openstef/data/pv_single_coefs.csv +0 -601
  16. openstef/data/pv_single_coefs.csv.license +0 -3
  17. openstef/data_classes/__init__.py +0 -3
  18. openstef/data_classes/data_prep.py +0 -99
  19. openstef/data_classes/model_specifications.py +0 -30
  20. openstef/data_classes/prediction_job.py +0 -135
  21. openstef/data_classes/split_function.py +0 -97
  22. openstef/enums.py +0 -140
  23. openstef/exceptions.py +0 -74
  24. openstef/feature_engineering/__init__.py +0 -3
  25. openstef/feature_engineering/apply_features.py +0 -138
  26. openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
  27. openstef/feature_engineering/cyclic_features.py +0 -161
  28. openstef/feature_engineering/data_preparation.py +0 -152
  29. openstef/feature_engineering/feature_adder.py +0 -206
  30. openstef/feature_engineering/feature_applicator.py +0 -202
  31. openstef/feature_engineering/general.py +0 -141
  32. openstef/feature_engineering/holiday_features.py +0 -231
  33. openstef/feature_engineering/lag_features.py +0 -165
  34. openstef/feature_engineering/missing_values_transformer.py +0 -141
  35. openstef/feature_engineering/rolling_features.py +0 -58
  36. openstef/feature_engineering/weather_features.py +0 -492
  37. openstef/metrics/__init__.py +0 -3
  38. openstef/metrics/figure.py +0 -303
  39. openstef/metrics/metrics.py +0 -486
  40. openstef/metrics/reporter.py +0 -222
  41. openstef/model/__init__.py +0 -3
  42. openstef/model/basecase.py +0 -82
  43. openstef/model/confidence_interval_applicator.py +0 -242
  44. openstef/model/fallback.py +0 -77
  45. openstef/model/metamodels/__init__.py +0 -3
  46. openstef/model/metamodels/feature_clipper.py +0 -90
  47. openstef/model/metamodels/grouped_regressor.py +0 -222
  48. openstef/model/metamodels/missing_values_handler.py +0 -138
  49. openstef/model/model_creator.py +0 -214
  50. openstef/model/objective.py +0 -426
  51. openstef/model/objective_creator.py +0 -65
  52. openstef/model/regressors/__init__.py +0 -3
  53. openstef/model/regressors/arima.py +0 -197
  54. openstef/model/regressors/custom_regressor.py +0 -64
  55. openstef/model/regressors/dazls.py +0 -116
  56. openstef/model/regressors/flatliner.py +0 -95
  57. openstef/model/regressors/gblinear_quantile.py +0 -334
  58. openstef/model/regressors/lgbm.py +0 -29
  59. openstef/model/regressors/linear.py +0 -90
  60. openstef/model/regressors/linear_quantile.py +0 -305
  61. openstef/model/regressors/regressor.py +0 -114
  62. openstef/model/regressors/xgb.py +0 -52
  63. openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
  64. openstef/model/regressors/xgb_quantile.py +0 -228
  65. openstef/model/serializer.py +0 -431
  66. openstef/model/standard_deviation_generator.py +0 -81
  67. openstef/model_selection/__init__.py +0 -3
  68. openstef/model_selection/model_selection.py +0 -311
  69. openstef/monitoring/__init__.py +0 -3
  70. openstef/monitoring/performance_meter.py +0 -92
  71. openstef/monitoring/teams.py +0 -203
  72. openstef/pipeline/__init__.py +0 -3
  73. openstef/pipeline/create_basecase_forecast.py +0 -133
  74. openstef/pipeline/create_component_forecast.py +0 -168
  75. openstef/pipeline/create_forecast.py +0 -171
  76. openstef/pipeline/optimize_hyperparameters.py +0 -317
  77. openstef/pipeline/train_create_forecast_backtest.py +0 -163
  78. openstef/pipeline/train_model.py +0 -561
  79. openstef/pipeline/utils.py +0 -52
  80. openstef/postprocessing/__init__.py +0 -3
  81. openstef/postprocessing/postprocessing.py +0 -275
  82. openstef/preprocessing/__init__.py +0 -3
  83. openstef/preprocessing/preprocessing.py +0 -42
  84. openstef/settings.py +0 -15
  85. openstef/tasks/__init__.py +0 -3
  86. openstef/tasks/calculate_kpi.py +0 -324
  87. openstef/tasks/create_basecase_forecast.py +0 -118
  88. openstef/tasks/create_components_forecast.py +0 -162
  89. openstef/tasks/create_forecast.py +0 -145
  90. openstef/tasks/create_solar_forecast.py +0 -420
  91. openstef/tasks/create_wind_forecast.py +0 -80
  92. openstef/tasks/optimize_hyperparameters.py +0 -135
  93. openstef/tasks/split_forecast.py +0 -273
  94. openstef/tasks/train_model.py +0 -224
  95. openstef/tasks/utils/__init__.py +0 -3
  96. openstef/tasks/utils/dependencies.py +0 -107
  97. openstef/tasks/utils/predictionjobloop.py +0 -243
  98. openstef/tasks/utils/taskcontext.py +0 -160
  99. openstef/validation/__init__.py +0 -3
  100. openstef/validation/validation.py +0 -322
  101. openstef-3.4.56.dist-info/METADATA +0 -154
  102. openstef-3.4.56.dist-info/RECORD +0 -102
  103. openstef-3.4.56.dist-info/top_level.txt +0 -1
  104. /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
@@ -1,261 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- from functools import partial
5
- from typing import Dict, Optional, Sequence, Tuple, Union
6
-
7
- import numpy as np
8
- import pandas as pd
9
- import sklearn.base
10
- import xgboost as xgb
11
- from sklearn.compose import TransformedTargetRegressor
12
- from sklearn.preprocessing import StandardScaler
13
- from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
14
- from xgboost import Booster
15
-
16
- import openstef.metrics.metrics as metrics
17
- from openstef.model.regressors.regressor import OpenstfRegressor
18
-
19
- DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
20
-
21
-
22
- class XGBMultiOutputQuantileOpenstfRegressor(OpenstfRegressor):
23
- r"""Model that provides multioutput quantile regression with XGBoost by default using the arctan loss function.
24
-
25
- Arctan loss:
26
- Refence: https://github.com/LaurensSluyterman/XGBoost_quantile_regression/tree/master
27
- The key idea is to use a smooth approximation of the pinball loss, the arctan
28
- pinball loss, that has a relatively large second derivative.
29
-
30
- The approximation is given by:
31
- $$L^{(\text{arctan})}_{\tau, s}(u) = (\tau - 0.5 + \frac{\arctan (u/s)}{\pi})u + \frac{s}{\pi}$$. # noqa E501
32
-
33
- Some important settings:
34
-
35
- * The parameter in the loss function determines the amount of smoothing. A
36
- smaller values gives a closer approximation but also a much smaller second
37
- derivative. A larger value gives more conservative quantiles when
38
- is larger than 0.5, the quantile becomes larger and vice versa.
39
- Values between 0.05 and 0.1 appear to work well. It may be a good idea to
40
- optimize this parameter.
41
- * Set min-child-weight to zero. The second derivatives can be a lot smaller
42
- than 1 and this parameter may prevent any splits.
43
- * Use a relatively small max-delta-step. We used a default of 0.5.
44
- This prevents excessive steps that could happen due to the relatively
45
- small second derivative.
46
- * For the same reason, use a slightly lower learning rate of 0.05.
47
-
48
- """
49
-
50
- estimator_: TransformedTargetRegressor
51
- quantile_indices_: Dict[float, int]
52
-
53
- @staticmethod
54
- def _get_importance_names():
55
- return {
56
- "gain_importance_name": "total_gain",
57
- "weight_importance_name": "weight",
58
- }
59
-
60
- def __init__(
61
- self,
62
- quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
63
- gamma: float = 0.0,
64
- colsample_bytree: float = 1.0,
65
- subsample: float = 1.0,
66
- min_child_weight: int = 0,
67
- max_depth: int = 6,
68
- learning_rate: float = 0.22,
69
- alpha: float = 0.0,
70
- max_delta_step: int = 0.5,
71
- arctan_smoothing: float = 0.055,
72
- early_stopping_rounds: Optional[int] = None,
73
- ):
74
- """Initialize XGBMultiQuantileRegressor.
75
-
76
- Model that provides quantile regression with XGBoost.
77
- For each desired quantile an XGBoost model is trained,
78
- these can later be used to predict quantiles.
79
-
80
- Args:
81
- quantiles: Tuple with desired quantiles, quantile 0.5 is required.
82
- For example: (0.1, 0.5, 0.9)
83
- gamma: Gamma.
84
- colsample_bytree: Colsample by tree.
85
- subsample: Subsample.
86
- min_child_weight: Minimum child weight.
87
- max_depth: Maximum depth.
88
- learning_rate: Learning rate.
89
- alpha: Alpha.
90
- max_delta_step: Maximum delta step.
91
- arctan_smoothing: smoothing parameter of the arctan loss function.
92
- early_stopping_rounds: Number of rounds to stop training if no improvement
93
- is made.
94
-
95
- Raises:
96
- ValueError in case quantile 0.5 is not in the requested quantiles.
97
-
98
- """
99
- super().__init__()
100
- if 0.5 not in quantiles:
101
- raise ValueError(
102
- "Cannot train quantile model as 0.5 is not in requested quantiles!"
103
- )
104
-
105
- self.quantiles = quantiles
106
-
107
- # Set attributes for hyper parameters
108
- self.subsample = subsample
109
- self.min_child_weight = min_child_weight
110
- self.max_depth = max_depth
111
- self.gamma = gamma
112
- self.alpha = alpha
113
- self.max_delta_step = max_delta_step
114
- self.colsample_bytree = colsample_bytree
115
- self.learning_rate = learning_rate
116
- self.early_stopping_rounds = early_stopping_rounds
117
- self.arctan_smoothing = arctan_smoothing
118
-
119
- # Get fitting parameters - only those required for xgbooster's
120
- xgb_regressor_params = {
121
- key: value
122
- for key, value in self.get_params().items()
123
- if key in xgb.XGBRegressor().get_params().keys()
124
- }
125
-
126
- # Define the model
127
- objective = partial(
128
- metrics.arctan_loss, taus=self.quantiles, s=arctan_smoothing
129
- )
130
- xgb_model: xgb.XGBRegressor = xgb.XGBRegressor(
131
- objective=objective,
132
- base_score=0,
133
- multi_strategy="one_output_per_tree",
134
- **xgb_regressor_params,
135
- )
136
- self.estimator_ = TransformedTargetRegressor(
137
- regressor=xgb_model, transformer=StandardScaler()
138
- )
139
-
140
- # Set quantile indices to remap multioutput predictions
141
- self.quantile_indices_ = {
142
- quantile: i for i, quantile in enumerate(self.quantiles)
143
- }
144
-
145
- def fit(
146
- self,
147
- x: np.array,
148
- y: np.array,
149
- eval_set: Optional[Sequence[Tuple[np.array, np.array]]] = None,
150
- verbose: Optional[Union[bool, int]] = 0,
151
- **kwargs
152
- ) -> OpenstfRegressor:
153
- """Fits xgb quantile model.
154
-
155
- Args:
156
- x: Feature matrix.
157
- y: Labels.
158
- eval_set: Evaluation set to monitor training performance.
159
- verbose: Verbosity level (disabled by default).
160
-
161
- Returns:
162
- Fitted XGBQuantile model.
163
-
164
- """
165
- if isinstance(y, pd.Series):
166
- y = y.to_numpy()
167
-
168
- if not isinstance(x, pd.DataFrame):
169
- x = pd.DataFrame(np.asarray(x))
170
-
171
- # Check/validate input
172
- check_X_y(x, y, force_all_finite="allow-nan")
173
-
174
- # Prepare inputs
175
- y_multioutput = replicate_for_multioutput(y, len(self.quantiles))
176
-
177
- # Define watchlist if eval_set is defined
178
- eval_set_multioutput = []
179
- if eval_set:
180
- for x_eval, y_eval in eval_set:
181
- if isinstance(y_eval, pd.Series):
182
- y_eval = y_eval.to_numpy()
183
-
184
- y_eval_multioutput = replicate_for_multioutput(
185
- y=y_eval, num_quantiles=len(self.quantiles)
186
- )
187
- eval_set_multioutput.append((x_eval, y_eval_multioutput))
188
-
189
- eval_set_multioutput.append((x, y_multioutput))
190
-
191
- self.estimator_.fit(
192
- X=x.copy(deep=True),
193
- y=y_multioutput,
194
- eval_set=eval_set_multioutput,
195
- verbose=verbose,
196
- )
197
-
198
- # Update state of the estimator
199
- self.feature_importances_ = self.estimator_.regressor_.feature_importances_
200
- self.is_fitted_ = True
201
-
202
- return self
203
-
204
- def predict(self, x: np.array, quantile: float = 0.5) -> np.array:
205
- """Makes a prediction for a desired quantile.
206
-
207
- Args:
208
- x: Feature matrix.
209
- quantile: Quantile for which a prediciton is desired,
210
- note that only quantile are available for which a model is trained,
211
- and that this is a quantile-model specific keyword.
212
-
213
- Returns:
214
- Prediction
215
-
216
- Raises:
217
- ValueError in case no model is trained for the requested quantile.
218
-
219
- """
220
- # Check if model is trained for this quantile
221
- if quantile not in self.quantiles:
222
- raise ValueError("No model trained for requested quantile!")
223
-
224
- # Check/validate input
225
- check_array(x, force_all_finite="allow-nan")
226
- check_is_fitted(self)
227
-
228
- # best_iteration is only available if early stopping was used during training
229
- prediction: np.array
230
- if hasattr(self.estimator_, "best_iteration"):
231
- prediction = self.estimator_.predict(
232
- X=x,
233
- iteration_range=(0, self.estimator_.best_iteration + 1),
234
- )
235
- else:
236
- prediction = self.estimator_.predict(X=x)
237
-
238
- quantile_index = self.quantile_indices_[quantile]
239
- return prediction[:, quantile_index]
240
-
241
- @property
242
- def feature_names(self):
243
- return self.estimator_.feature_names_in_
244
-
245
- @property
246
- def can_predict_quantiles(self):
247
- return True
248
-
249
-
250
- def replicate_for_multioutput(y: np.array, num_quantiles: int) -> np.array:
251
- """Replicates a 1D array to a 2D array for multioutput regression.
252
-
253
- Args:
254
- y: 1D array.
255
- num_quantiles: Number of columns in the output array.
256
-
257
- Returns:
258
- 2D array with shape (len(y), num_quantiles)
259
-
260
- """
261
- return np.repeat(y[:, None], num_quantiles, axis=1)
@@ -1,228 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- from functools import partial
5
-
6
- import numpy as np
7
- import xgboost as xgb
8
- from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
9
- from xgboost import Booster
10
-
11
- import openstef.metrics.metrics as metrics
12
- from openstef.model.regressors.regressor import OpenstfRegressor
13
-
14
- DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
15
-
16
-
17
- class XGBQuantileOpenstfRegressor(OpenstfRegressor):
18
- @staticmethod
19
- def _get_importance_names():
20
- return {
21
- "gain_importance_name": "total_gain",
22
- "weight_importance_name": "weight",
23
- }
24
-
25
- def __init__(
26
- self,
27
- quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
28
- gamma: float = 0.0,
29
- colsample_bytree: float = 1.0,
30
- subsample: float = 1.0,
31
- min_child_weight: int = 1,
32
- max_depth: int = 6,
33
- learning_rate: float = 0.3,
34
- alpha: float = 0.0,
35
- max_delta_step: int = 0,
36
- ):
37
- """Initialize XGBQuantileRegressor.
38
-
39
- Model that provides quantile regression with XGBoost.
40
- For each desired quantile an XGBoost model is trained,
41
- these can later be used to predict quantiles.
42
-
43
- Args:
44
- quantiles: Tuple with desired quantiles, quantile 0.5 is required.
45
- For example: (0.1, 0.5, 0.9)
46
- gamma: Gamma
47
- colsample_bytree: Colsample by tree
48
- subsample: Subsample
49
- min_child_weight: Minimum child weight
50
- max_depth: Maximum depth
51
- learning_rate: Learning rate
52
- alpha: Alpha
53
- max_delta_step: Maximum delta step
54
-
55
- Raises:
56
- ValueError in case quantile 0.5 is not in the requested quantiles
57
-
58
- """
59
- super().__init__()
60
- # Check if quantile 0.5 is pressent this is required
61
- if 0.5 not in quantiles:
62
- raise ValueError(
63
- "Cannot train quantile model as 0.5 is not in requested quantiles!"
64
- )
65
-
66
- self.quantiles = quantiles
67
-
68
- # Set attributes for hyper parameters
69
- self.subsample = subsample
70
- self.min_child_weight = min_child_weight
71
- self.max_depth = max_depth
72
- self.gamma = gamma
73
- self.alpha = alpha
74
- self.max_delta_step = max_delta_step
75
- self.colsample_bytree = colsample_bytree
76
- self.learning_rate = learning_rate
77
-
78
- def fit(self, x: np.array, y: np.array, **kwargs) -> OpenstfRegressor:
79
- """Fits xgb quantile model.
80
-
81
- Args:
82
- x: Feature matrix
83
- y: Labels
84
-
85
- Returns:
86
- Fitted XGBQuantile model
87
-
88
- """
89
- # TODO: specify these required kwargs in the function definition
90
- early_stopping_rounds = kwargs.get("early_stopping_rounds", None)
91
- eval_set = kwargs.get("eval_set", None)
92
-
93
- # Check/validate input
94
- check_X_y(x, y, force_all_finite="allow-nan")
95
-
96
- # Convert x and y to dmatrix input
97
- dtrain = xgb.DMatrix(x.copy(deep=True), label=y.copy(deep=True))
98
-
99
- # Define watchlist if eval_set is defined
100
- if eval_set:
101
- dval = xgb.DMatrix(
102
- eval_set[1][0].copy(deep=True),
103
- label=eval_set[1][1].copy(deep=True),
104
- )
105
-
106
- # Define data set to be monitored during training, the last(validation)
107
- # will be used for early stopping
108
- watchlist = [(dtrain, "train"), (dval, "validation")]
109
- else:
110
- watchlist = ()
111
-
112
- # Get fitting parameters - only those required for xgbooster's
113
- xgb_regressor_params = {
114
- key: value
115
- for key, value in self.get_params().items()
116
- if key in xgb.XGBRegressor().get_params().keys()
117
- }
118
-
119
- quantile_models = {}
120
-
121
- for quantile in self.quantiles:
122
- # Define objective callback functions specifically for desired quantile
123
- xgb_quantile_eval_this_quantile = partial(
124
- metrics.xgb_quantile_eval, quantile=quantile
125
- )
126
- xgb_quantile_obj_this_quantile = partial(
127
- metrics.xgb_quantile_obj, quantile=quantile
128
- )
129
-
130
- # Train quantile model
131
- quantile_models[quantile] = xgb.train(
132
- params=xgb_regressor_params,
133
- dtrain=dtrain,
134
- evals=watchlist,
135
- # Can be large because we are early stopping anyway
136
- num_boost_round=100,
137
- obj=xgb_quantile_obj_this_quantile,
138
- feval=xgb_quantile_eval_this_quantile,
139
- verbose_eval=False,
140
- early_stopping_rounds=early_stopping_rounds,
141
- )
142
-
143
- # Set weigths and features from the 0.5 (median) model
144
- self.feature_importances_ = self.get_feature_importances_from_booster(
145
- quantile_models[0.5]
146
- )
147
- self._Booster = quantile_models[0.5] # Used for feature names later on
148
- # Update state of the estimator
149
- self.estimators_ = quantile_models
150
- self.is_fitted_ = True
151
-
152
- return self
153
-
154
- def predict(self, x: np.array, quantile: float = 0.5) -> np.array:
155
- """Makes a prediction for a desired quantile.
156
-
157
- Args:
158
- x: Feature matrix
159
- quantile: Quantile for which a prediciton is desired,
160
- note that only quantile are available for which a model is trained,
161
- and that this is a quantile-model specific keyword
162
-
163
- Returns:
164
- Prediction
165
-
166
- Raises:
167
- ValueError in case no model is trained for the requested quantile
168
-
169
- """
170
- # Check if model is trained for this quantile
171
- if quantile not in self.quantiles:
172
- raise ValueError("No model trained for requested quantile!")
173
-
174
- # Check/validate input
175
- check_array(x, force_all_finite="allow-nan")
176
- check_is_fitted(self)
177
-
178
- # Convert array to dmatrix
179
- dmatrix_input = xgb.DMatrix(x.copy(deep=True))
180
-
181
- # best_iteration is only available if early stopping was used during training
182
- if hasattr(self.estimators_[quantile], "best_iteration"):
183
- return self.estimators_[quantile].predict(
184
- dmatrix_input,
185
- iteration_range=(0, self.estimators_[quantile].best_iteration + 1),
186
- )
187
-
188
- else:
189
- return self.estimators_[quantile].predict(dmatrix_input)
190
-
191
- @classmethod
192
- def get_feature_importances_from_booster(cls, booster: Booster) -> np.ndarray:
193
- """Gets feauture importances from a XGB booster.
194
-
195
- This is based on the feature_importance_ property defined in:
196
- https://github.com/dmlc/xgboost/blob/master/python-package/xgboost/sklearn.py.
197
-
198
- Args:
199
- booster: Booster object,
200
- most of the times the median model (quantile=0.5) is preferred
201
-
202
- Returns:
203
- Ndarray with normalized feature importances.
204
-
205
- """
206
- # Get score
207
- score = booster.get_score(importance_type="gain")
208
-
209
- # Get feature names from booster
210
- feature_names = booster.feature_names
211
-
212
- # Get importance
213
- feature_importance = [score.get(f, 0.0) for f in feature_names]
214
- # Convert to array
215
- features_importance_array = np.array(feature_importance, dtype=np.float32)
216
-
217
- total = features_importance_array.sum() # For normalizing
218
- if total == 0:
219
- return features_importance_array
220
- return features_importance_array / total # Normalize
221
-
222
- @property
223
- def feature_names(self):
224
- return self._Booster.feature_names
225
-
226
- @property
227
- def can_predict_quantiles(self):
228
- return True