openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. openstef-4.0.0a3.dist-info/METADATA +177 -0
  2. openstef-4.0.0a3.dist-info/RECORD +4 -0
  3. {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
  4. openstef/__init__.py +0 -14
  5. openstef/__main__.py +0 -3
  6. openstef/app_settings.py +0 -19
  7. openstef/data/NL_terrestrial_radiation.csv +0 -25585
  8. openstef/data/NL_terrestrial_radiation.csv.license +0 -3
  9. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  10. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
  11. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
  12. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
  13. openstef/data/dutch_holidays.csv +0 -1759
  14. openstef/data/dutch_holidays.csv.license +0 -3
  15. openstef/data/pv_single_coefs.csv +0 -601
  16. openstef/data/pv_single_coefs.csv.license +0 -3
  17. openstef/data_classes/__init__.py +0 -3
  18. openstef/data_classes/data_prep.py +0 -99
  19. openstef/data_classes/model_specifications.py +0 -30
  20. openstef/data_classes/prediction_job.py +0 -135
  21. openstef/data_classes/split_function.py +0 -97
  22. openstef/enums.py +0 -140
  23. openstef/exceptions.py +0 -74
  24. openstef/feature_engineering/__init__.py +0 -3
  25. openstef/feature_engineering/apply_features.py +0 -138
  26. openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
  27. openstef/feature_engineering/cyclic_features.py +0 -161
  28. openstef/feature_engineering/data_preparation.py +0 -152
  29. openstef/feature_engineering/feature_adder.py +0 -206
  30. openstef/feature_engineering/feature_applicator.py +0 -202
  31. openstef/feature_engineering/general.py +0 -141
  32. openstef/feature_engineering/holiday_features.py +0 -231
  33. openstef/feature_engineering/lag_features.py +0 -165
  34. openstef/feature_engineering/missing_values_transformer.py +0 -141
  35. openstef/feature_engineering/rolling_features.py +0 -58
  36. openstef/feature_engineering/weather_features.py +0 -492
  37. openstef/metrics/__init__.py +0 -3
  38. openstef/metrics/figure.py +0 -303
  39. openstef/metrics/metrics.py +0 -486
  40. openstef/metrics/reporter.py +0 -222
  41. openstef/model/__init__.py +0 -3
  42. openstef/model/basecase.py +0 -82
  43. openstef/model/confidence_interval_applicator.py +0 -242
  44. openstef/model/fallback.py +0 -77
  45. openstef/model/metamodels/__init__.py +0 -3
  46. openstef/model/metamodels/feature_clipper.py +0 -90
  47. openstef/model/metamodels/grouped_regressor.py +0 -222
  48. openstef/model/metamodels/missing_values_handler.py +0 -138
  49. openstef/model/model_creator.py +0 -214
  50. openstef/model/objective.py +0 -426
  51. openstef/model/objective_creator.py +0 -65
  52. openstef/model/regressors/__init__.py +0 -3
  53. openstef/model/regressors/arima.py +0 -197
  54. openstef/model/regressors/custom_regressor.py +0 -64
  55. openstef/model/regressors/dazls.py +0 -116
  56. openstef/model/regressors/flatliner.py +0 -95
  57. openstef/model/regressors/gblinear_quantile.py +0 -334
  58. openstef/model/regressors/lgbm.py +0 -29
  59. openstef/model/regressors/linear.py +0 -90
  60. openstef/model/regressors/linear_quantile.py +0 -305
  61. openstef/model/regressors/regressor.py +0 -114
  62. openstef/model/regressors/xgb.py +0 -52
  63. openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
  64. openstef/model/regressors/xgb_quantile.py +0 -228
  65. openstef/model/serializer.py +0 -431
  66. openstef/model/standard_deviation_generator.py +0 -81
  67. openstef/model_selection/__init__.py +0 -3
  68. openstef/model_selection/model_selection.py +0 -311
  69. openstef/monitoring/__init__.py +0 -3
  70. openstef/monitoring/performance_meter.py +0 -92
  71. openstef/monitoring/teams.py +0 -203
  72. openstef/pipeline/__init__.py +0 -3
  73. openstef/pipeline/create_basecase_forecast.py +0 -133
  74. openstef/pipeline/create_component_forecast.py +0 -168
  75. openstef/pipeline/create_forecast.py +0 -171
  76. openstef/pipeline/optimize_hyperparameters.py +0 -317
  77. openstef/pipeline/train_create_forecast_backtest.py +0 -163
  78. openstef/pipeline/train_model.py +0 -561
  79. openstef/pipeline/utils.py +0 -52
  80. openstef/postprocessing/__init__.py +0 -3
  81. openstef/postprocessing/postprocessing.py +0 -275
  82. openstef/preprocessing/__init__.py +0 -3
  83. openstef/preprocessing/preprocessing.py +0 -42
  84. openstef/settings.py +0 -15
  85. openstef/tasks/__init__.py +0 -3
  86. openstef/tasks/calculate_kpi.py +0 -324
  87. openstef/tasks/create_basecase_forecast.py +0 -118
  88. openstef/tasks/create_components_forecast.py +0 -162
  89. openstef/tasks/create_forecast.py +0 -145
  90. openstef/tasks/create_solar_forecast.py +0 -420
  91. openstef/tasks/create_wind_forecast.py +0 -80
  92. openstef/tasks/optimize_hyperparameters.py +0 -135
  93. openstef/tasks/split_forecast.py +0 -273
  94. openstef/tasks/train_model.py +0 -224
  95. openstef/tasks/utils/__init__.py +0 -3
  96. openstef/tasks/utils/dependencies.py +0 -107
  97. openstef/tasks/utils/predictionjobloop.py +0 -243
  98. openstef/tasks/utils/taskcontext.py +0 -160
  99. openstef/validation/__init__.py +0 -3
  100. openstef/validation/validation.py +0 -322
  101. openstef-3.4.56.dist-info/METADATA +0 -154
  102. openstef-3.4.56.dist-info/RECORD +0 -102
  103. openstef-3.4.56.dist-info/top_level.txt +0 -1
  104. /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
@@ -1,426 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- import copy
5
- from datetime import datetime, timezone
6
- from typing import Any, Callable, Optional
7
-
8
- import optuna
9
- import pandas as pd
10
-
11
- from openstef.enums import ModelType
12
- from openstef.metrics import metrics
13
- from openstef.metrics.reporter import Report, Reporter
14
- from openstef.model.regressors.regressor import OpenstfRegressor
15
- from openstef.model.standard_deviation_generator import StandardDeviationGenerator
16
- from openstef.model_selection.model_selection import split_data_train_validation_test
17
-
18
- EARLY_STOPPING_ROUNDS: int = 10
19
- TEST_FRACTION: float = 0.15
20
- VALIDATION_FRACTION: float = 0.15
21
- # See https://xgboost.readthedocs.io/en/latest/parameter.html for all possibilities
22
- EVAL_METRIC: str = "mae"
23
-
24
- # https://optuna.readthedocs.io/en/stable/faq.html#objective-func-additional-args
25
-
26
-
27
- class RegressorObjective:
28
- """Regressor optuna objective function.
29
-
30
- Use any of the derived classes for optimization using an optuna study.
31
- The constructor is used to set the "input_data", specify the splitting function
32
- and its arguments and optionally add some configuration.
33
- Next the instance will be called by he optuna study during optimization.
34
-
35
- Example usage:
36
-
37
- .. code-block:: py
38
-
39
- # initialize a (derived class) objective function
40
- objective = XGBRegressorObjective(input_data, test_fraction)
41
- # use the objective function
42
- study.optimize(objective)
43
-
44
- """
45
-
46
- def __init__(
47
- self,
48
- model: OpenstfRegressor,
49
- input_data: pd.DataFrame,
50
- split_func: Optional[Callable] = None,
51
- split_args: Optional[dict[str, Any]] = None,
52
- test_fraction=TEST_FRACTION,
53
- validation_fraction=VALIDATION_FRACTION,
54
- eval_metric=EVAL_METRIC,
55
- verbose=False,
56
- ):
57
- self.input_data = input_data
58
- self.train_data = None
59
- self.validation_data = None
60
- self.test_data = None
61
- self.model = model
62
- self.start_time = datetime.now(timezone.utc)
63
- self.test_fraction = test_fraction
64
- self.validation_fraction = validation_fraction
65
- self.eval_metric = eval_metric
66
- self.eval_metric_function = metrics.get_eval_metric_function(eval_metric)
67
- self.verbose = verbose
68
- # Should be set on a derived classes
69
- self.model_type = None
70
- self.track_trials = {}
71
-
72
- # split function and arguments
73
- self.split_func = split_func
74
- self.split_args = split_args
75
-
76
- # default behavior for splitting
77
- if self.split_func is None:
78
- self.split_func = split_data_train_validation_test
79
- self.split_args = None
80
-
81
- def __call__(
82
- self,
83
- trial: optuna.trial.FrozenTrial,
84
- ) -> float:
85
- """Optuna objective function.
86
-
87
- Args: trial
88
-
89
- Returns:
90
- Mean absolute error for this trial.
91
-
92
- """
93
- # Perform data preprocessing
94
- split_args = self.split_args
95
- if split_args is None:
96
- split_args = {
97
- "stratification_min_max": True,
98
- "back_test": True,
99
- }
100
- (
101
- self.train_data,
102
- self.validation_data,
103
- self.test_data,
104
- self.operational_score_data,
105
- ) = self.split_func(
106
- self.input_data,
107
- test_fraction=self.test_fraction,
108
- validation_fraction=self.validation_fraction,
109
- **split_args,
110
- )
111
-
112
- # Test if first column is "load" and last column is "horizon"
113
- if (
114
- self.train_data.columns[0] != "load"
115
- or self.train_data.columns[-1] != "horizon"
116
- ):
117
- raise RuntimeError(
118
- "Column order in train input data not as expected, "
119
- "could not train a model!"
120
- )
121
-
122
- # Split in x, y data (x are the features, y is the load)
123
- train_x, train_y = self.train_data.iloc[:, 1:-1], self.train_data.iloc[:, 0]
124
- valid_x, valid_y = (
125
- self.validation_data.iloc[:, 1:-1],
126
- self.validation_data.iloc[:, 0],
127
- )
128
- test_x, test_y = self.test_data.iloc[:, 1:-1], self.test_data.iloc[:, 0]
129
-
130
- # Configure evals for early stopping
131
- eval_set = [(train_x, train_y), (valid_x, valid_y)]
132
-
133
- # get the parameters used in this trial
134
- hyper_params = self.get_params(trial)
135
-
136
- # insert parameters into model
137
- self.model.set_params(**hyper_params)
138
-
139
- # create the specific pruning callback
140
- pruning_callback = self.get_pruning_callback(trial)
141
- if pruning_callback is None:
142
- callbacks = None
143
- else:
144
- callbacks = [pruning_callback]
145
-
146
- # validation_0 and validation_1 are available
147
- self.model.fit(
148
- train_x,
149
- train_y,
150
- eval_set=eval_set,
151
- early_stopping_rounds=EARLY_STOPPING_ROUNDS,
152
- verbose=self.verbose,
153
- eval_metric=self.eval_metric,
154
- callbacks=callbacks,
155
- )
156
-
157
- self.model.feature_importance_dataframe = self.model.set_feature_importance()
158
-
159
- # Do confidence interval determination
160
- self.model = StandardDeviationGenerator(
161
- self.validation_data
162
- ).generate_standard_deviation_data(self.model)
163
-
164
- forecast_y = self.model.predict(test_x)
165
- score = self.eval_metric_function(test_y, forecast_y)
166
-
167
- # Convert float32 to float because float32 is not JSON serializable
168
- self.track_trials[f" trial: {trial.number}"] = {
169
- "score": float(score),
170
- "params": hyper_params,
171
- }
172
- trial.set_user_attr(key="model", value=copy.deepcopy(self.model))
173
- return score
174
-
175
- def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
176
- """Get parameters for objective without model specific get_params function.
177
-
178
- Args: trial
179
-
180
- Returns:
181
- Dictionary with hyperparameter name as key and hyperparamer value as value.
182
-
183
- """
184
- default_params = {
185
- "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.5),
186
- "alpha": trial.suggest_float("alpha", 0, 1.0),
187
- "lambda": trial.suggest_float("lambda", 1e-8, 1.0),
188
- "subsample": trial.suggest_float("subsample", 0.4, 1.0),
189
- "min_child_weight": trial.suggest_int("min_child_weight", 1, 16),
190
- "max_depth": trial.suggest_int("max_depth", 3, 10),
191
- "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
192
- "max_delta_step": trial.suggest_int("max_delta_step", 0, 10),
193
- }
194
-
195
- # Compare the list to the default parameter space
196
- model_parameters = self.model.get_params()
197
- keys = [x for x in model_parameters.keys() if x in default_params.keys()]
198
- # create a dictionary with the matching parameters
199
- params = {parameter: default_params[parameter] for parameter in keys}
200
-
201
- return params
202
-
203
- def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
204
- return None
205
-
206
- def get_trial_track(self) -> dict:
207
- """Get a dictionary of al trials.
208
-
209
- Returns:
210
- Dict with al trials and it's parameters
211
-
212
- """
213
- return self.track_trials
214
-
215
- def create_report(self, model: OpenstfRegressor) -> Report:
216
- """Generate a report from the data available inside the objective function.
217
-
218
- Args:
219
- model: OpenstfRegressor, model to create a report on
220
-
221
- Returns:
222
- Report about the model
223
-
224
- """
225
- # Report about the training process
226
- reporter = Reporter(self.train_data, self.validation_data, self.test_data)
227
- report = reporter.generate_report(model)
228
-
229
- return report
230
-
231
- @classmethod
232
- def get_default_values(cls) -> dict:
233
- return {
234
- "learning_rate": 0.3,
235
- "alpha": 0.0,
236
- "lambda": 1.0,
237
- "subsample": 1.0,
238
- "min_child_weight": 1,
239
- "max_depth": 6,
240
- "colsample_bytree": 1,
241
- "max_delta_step": 0,
242
- }
243
-
244
-
245
- class XGBRegressorObjective(RegressorObjective):
246
- def __init__(self, *args, **kwargs):
247
- super().__init__(*args, **kwargs)
248
- self.model_type = ModelType.XGB
249
-
250
- # extend the parameters with the model specific ones per implementation
251
- def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
252
- """Get parameters for XGB Regressor Objective with objective specific parameters.
253
-
254
- Args: trial
255
-
256
- Returns:
257
- Dictionary with hyperparameter name as key and hyperparamer value as value.
258
-
259
- """
260
- # Filtered default parameters
261
- model_params = super().get_params(trial)
262
-
263
- # XGB specific parameters
264
- params = {
265
- "gamma": trial.suggest_float("gamma", 0.0, 1.0),
266
- "booster": trial.suggest_categorical("booster", ["gbtree", "dart"]),
267
- }
268
- return {**model_params, **params}
269
-
270
- def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
271
- return optuna.integration.XGBoostPruningCallback(
272
- trial, observation_key=f"validation_1-{self.eval_metric}"
273
- )
274
-
275
- @classmethod
276
- def get_default_values(cls) -> dict:
277
- default_parameter_values = super().get_default_values()
278
- default_parameter_values.update({"gamma": 0.0, "booster": "gbtree"})
279
- return default_parameter_values
280
-
281
-
282
- class LGBRegressorObjective(RegressorObjective):
283
- def __init__(self, *args, **kwargs):
284
- super().__init__(*args, **kwargs)
285
- self.model_type = ModelType.LGB
286
-
287
- def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
288
- """Get parameters for LGB Regressor Objective with objective specific parameters.
289
-
290
- Args: trial
291
-
292
- Returns:
293
- Dictionary with hyperparameter name as key and hyperparamer value as value.
294
-
295
- """
296
- # Filtered default parameters
297
- model_params = super().get_params(trial)
298
-
299
- # LGB specific parameters
300
- params = {
301
- "num_leaves": trial.suggest_int("num_leaves", 16, 62),
302
- "boosting_type": trial.suggest_categorical(
303
- "boosting_type", ["gbdt", "dart", "rf"]
304
- ),
305
- "tree_learner": trial.suggest_categorical(
306
- "tree_learner", ["serial", "feature", "data", "voting"]
307
- ),
308
- "n_estimators": trial.suggest_int("n_estimators", 50, 150),
309
- "min_split_gain": trial.suggest_float("min_split_gain", 1e-8, 1),
310
- "subsample_freq": trial.suggest_int("subsample_freq", 1, 10),
311
- }
312
- return {**model_params, **params}
313
-
314
- def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
315
- metric = self.eval_metric
316
- if metric == "mae":
317
- metric = "l1"
318
- return optuna.integration.LightGBMPruningCallback(
319
- trial, metric=metric, valid_name="valid_1"
320
- )
321
-
322
-
323
- class XGBQuantileRegressorObjective(RegressorObjective):
324
- def __init__(self, *args, **kwargs):
325
- super().__init__(*args, **kwargs)
326
- self.model_type = ModelType.XGB_QUANTILE
327
-
328
- def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
329
- """Get parameters for XGBQuantile Regressor Objective with objective specific parameters.
330
-
331
- Args: trial
332
-
333
- Returns:
334
- Dictionary with hyperparameter name as key and hyperparamer value as value.
335
-
336
- """
337
- # Filtered default parameters
338
- model_params = super().get_params(trial)
339
-
340
- # XGB specific parameters
341
- params = {
342
- "gamma": trial.suggest_float("gamma", 1e-8, 1.0),
343
- }
344
- return {**model_params, **params}
345
-
346
- def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
347
- return optuna.integration.XGBoostPruningCallback(
348
- trial, observation_key=f"validation_1-{self.eval_metric}"
349
- )
350
-
351
-
352
- class XGBMultioutputQuantileRegressorObjective(RegressorObjective):
353
- def __init__(self, *args, **kwargs):
354
- super().__init__(*args, **kwargs)
355
- self.model_type = ModelType.XGB_QUANTILE
356
-
357
- def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
358
- """Get parameters for XGB Multioutput Quantile Regressor Objective with objective specific parameters.
359
-
360
- Args: trial
361
-
362
- Returns:
363
- Dictionary with hyperparameter name as key and hyperparamer value as value.
364
-
365
- """
366
- # Filtered default parameters
367
- model_params = super().get_params(trial)
368
-
369
- # XGB specific parameters
370
- params = {
371
- "gamma": trial.suggest_float("gamma", 1e-8, 1.0),
372
- "arctan_smoothing": trial.suggest_float("arctan_smoothing", 0.025, 0.15),
373
- }
374
- return {**model_params, **params}
375
-
376
- def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
377
- return optuna.integration.XGBoostPruningCallback(
378
- trial, observation_key=f"validation_1-{self.eval_metric}"
379
- )
380
-
381
-
382
- class LinearRegressorObjective(RegressorObjective):
383
- def __init__(self, *args, **kwargs):
384
- super().__init__(*args, **kwargs)
385
- self.model_type = ModelType.LINEAR
386
-
387
- def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
388
- """Get parameters for Linear Regressor Objective with objective specific parameters.
389
-
390
- Args: trial
391
-
392
- Returns:
393
- Dictionary with hyperparameter name as key and hyperparamer value as value.
394
-
395
- """
396
- # Imputation strategy
397
- params = {
398
- "imputation_strategy": trial.suggest_categorical(
399
- "imputation_strategy", ["mean", "median", "most_frequent"]
400
- ),
401
- }
402
- return params
403
-
404
-
405
- class ARIMARegressorObjective(RegressorObjective):
406
- def __init__(self, *args, **kwargs):
407
- super().__init__(*args, **kwargs)
408
- self.model_type = ModelType.ARIMA
409
-
410
- def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
411
- """Get parameters for ARIMA Regressor Objective with objective specific parameters.
412
-
413
- Temporary, it seems strange to use optuna for ARIMA models,
414
- it is usually done via statistical analysis and heuristics.
415
-
416
- Args: trial
417
-
418
- Returns:
419
- Dictionary with hyperparameter name as key and hyperparamer value as value.
420
-
421
- """
422
- # Imputation strategy
423
- params = {
424
- "trend": trial.suggest_categorical("trend", ["n", "c", "t", "ct"]),
425
- }
426
- return params
@@ -1,65 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- from typing import Union
6
-
7
- from openstef.enums import ModelType
8
- from openstef.model.objective import (
9
- ARIMARegressorObjective,
10
- LGBRegressorObjective,
11
- LinearRegressorObjective,
12
- RegressorObjective,
13
- XGBQuantileRegressorObjective,
14
- XGBRegressorObjective,
15
- XGBMultioutputQuantileRegressorObjective,
16
- )
17
- from openstef.model.regressors.custom_regressor import (
18
- create_custom_objective,
19
- is_custom_type,
20
- )
21
-
22
-
23
- class ObjectiveCreator:
24
- OBJECTIVES = {
25
- ModelType.XGB: XGBRegressorObjective,
26
- ModelType.LGB: LGBRegressorObjective,
27
- ModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
28
- ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
29
- ModelType.LINEAR: LinearRegressorObjective,
30
- ModelType.LINEAR_QUANTILE: LinearRegressorObjective,
31
- ModelType.GBLINEAR_QUANTILE: LinearRegressorObjective,
32
- ModelType.ARIMA: ARIMARegressorObjective,
33
- }
34
-
35
- @staticmethod
36
- def create_objective(model_type: Union[ModelType, str]) -> RegressorObjective:
37
- """Create an objective function based on model type.
38
-
39
- Args:
40
- model_type: Model type to construct.
41
-
42
- Raises:
43
- NotImplementedError: When using an invalid model_type.
44
-
45
- Returns:
46
- Objective function
47
-
48
- """
49
- try:
50
- # This will raise a ValueError when an invalid model_type str is used
51
- # and nothing when a MLModelType enum is used.
52
- if is_custom_type(model_type):
53
- objective = create_custom_objective(model_type)
54
- else:
55
- model_type = ModelType(model_type)
56
- objective = ObjectiveCreator.OBJECTIVES[model_type]
57
- except ValueError as e:
58
- valid_types = [t.value for t in ModelType]
59
- raise NotImplementedError(
60
- f"No objective for '{model_type}', "
61
- f"valid model_types are: {valid_types}"
62
- "or import a custom model"
63
- ) from e
64
-
65
- return objective
@@ -1,3 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
@@ -1,197 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- """This module contains the SARIMAX regressor wrapper around statsmodels implementation."""
5
- import numpy as np
6
- import pandas as pd
7
- import statsmodels.api as sm
8
- from sklearn.metrics import r2_score
9
- from sklearn.model_selection import TimeSeriesSplit
10
-
11
- from openstef.model.regressors.regressor import OpenstfRegressor
12
-
13
-
14
- class ARIMAOpenstfRegressor(OpenstfRegressor):
15
- """Wrapper around statmodels implementation of (S)ARIMA(X) model.
16
-
17
- The fit of an ARIMA statsmodels produces a result object which is used to perform the various computations around forecasting.
18
- (see https://www.statsmodels.org/dev/generated/statsmodels.tsa.arima.model.ARIMAResults.html)
19
-
20
- To make a prediction, it needs to update the result object's historic data,
21
- ie the past values of the target/endogenous data and the features/exogenous data,
22
- applying the fitted parameters to these new data unrelated to the original training data.
23
- This update can be performed by the method `update_historic_data`.
24
-
25
- In the following code, we use interchangeably the statmodels and scikit-learn terminology for the variables:
26
- - the features 'x' is equivalent to the exogenous data: 'exog' for short.
27
- - the target 'y' is equivalent to the endogenous data: 'endog' for short.
28
- More information here https://www.statsmodels.org/stable/endog_exog.html.
29
-
30
- """
31
-
32
- def __init__(
33
- self,
34
- backtest_max_horizon=1440,
35
- order=(0, 0, 0),
36
- seasonal_order=(0, 0, 0, 0),
37
- trend=None,
38
- ):
39
- self.backtest_max_horizon = backtest_max_horizon
40
- self.order = order
41
- self.seasonal_order = seasonal_order
42
- self.trend = trend
43
-
44
- def fit(self, x, y, **kwargs):
45
- dates = x.index
46
- self.model_ = sm.tsa.arima.ARIMA(
47
- endog=y,
48
- exog=x,
49
- dates=dates,
50
- order=self.order,
51
- seasonal_order=self.seasonal_order,
52
- trend=self.trend,
53
- )
54
- self.results_ = self.model_.fit()
55
- self.feature_in_names_ = list(x.columns)
56
- return self
57
-
58
- def update_historic_data(self, x_past, y_past):
59
- """Apply the fitted parameters to new data unrelated to the original training data. It's a side-effect.
60
-
61
- Creates a new result object using the current fitted parameters,
62
- applied to a completely new dataset that is assumed to be unrelated to the model’s original data.
63
- The new results can then be used for analysis or forecasting.
64
- It should be used before forecasting, to wedge the historic data just before the first forecast timestamp,
65
- with:
66
- - New observations from the modeled time-series process.
67
- - New observations of exogenous regressors.
68
-
69
- Parameters
70
- ----------
71
- x_past : pd.DataFrame
72
- The exogenous (features) data.
73
- y_past : pd.DataFrame
74
- The endogenous (target) data.
75
-
76
- """
77
- self.results_ = self.results_.apply(
78
- endog=y_past,
79
- exog=x_past,
80
- )
81
-
82
- def predict_quantile(self, start, end, exog, quantile):
83
- """Quantile prediction.
84
-
85
- It relies on the parameters' confidence intervals.
86
-
87
- Parameters
88
- ----------
89
- start : int, str, or datetime, optional
90
- Zero-indexed observation number at which to start forecasting, i.e.,
91
- the first forecast is start. Can also be a date string to parse or a datetime type.
92
- Default is the the zeroth observation.
93
- end : int, str, or datetime, optional
94
- Zero-indexed observation number at which to end forecasting, i.e.,
95
- the last forecast is end. Can also be a date string to parse or a datetime type.
96
- However, if the dates index does not have a fixed frequency,
97
- end must be an integer index if you want out of sample prediction.
98
- Default is the last observation in the sample.
99
- exog : pd.DataFrame
100
- Exogenous data (features).
101
- quantile : float
102
- The quantile for the confidence interval.
103
-
104
- Returns
105
- -------
106
- pd.Serie
107
- The quantile prediction.
108
-
109
- """
110
- alpha = quantile
111
- idx = 0
112
- if quantile > 0.5:
113
- alpha = 1 - quantile
114
- idx = 1
115
- return (
116
- self.results_.get_prediction(start, end, exog=exog)
117
- .conf_int(alpha=alpha)
118
- .iloc[:, idx]
119
- )
120
-
121
- def predict(self, x, quantile: float = 0.5, **kwargs):
122
- start = x.iloc[0].name
123
- end = x.iloc[-1].name
124
- predictions = self.results_.predict(start, end, exog=x).to_numpy()
125
- if quantile != 0.5:
126
- predictions = self.predict_quantile(start, end, exog=x, quantile=quantile)
127
- return predictions
128
-
129
- def set_feature_importance(self):
130
- """Because report needs 'weight' and 'gain' as importance metrics, we set the values to these names.
131
-
132
- - 'weight' is corresponding to the coefficients values
133
- - 'gain' is corresponding to the pvalue for the nullity test of each coefficient
134
-
135
- """
136
- importances = pd.DataFrame(
137
- {"weight": self.results_.params, "gain": self.results_.pvalues}
138
- )
139
- return importances
140
-
141
- @property
142
- def feature_names(self):
143
- """The names of he features used to train the model."""
144
- return self.feature_in_names_
145
-
146
- @property
147
- def can_predict_quantiles(self):
148
- """Indicates wether this model can make quantile predictions."""
149
- return True
150
-
151
- def score(self, x, y):
152
- """Compute R2 score with backtesting strategy.
153
-
154
- The backtest is performed by the Time Series cross-validator of scikit-learn which
155
- returns first k folds as train set and the (k+1)th fold as test set in the kth split.
156
- (see https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.TimeSeriesSplit.html)
157
-
158
- It needs to update the historic data with (x_past, y_past) for each split.
159
-
160
- """
161
- ys_true = []
162
- ys_pred = []
163
-
164
- # Build the cross-validator
165
- freq = pd.infer_freq(x.index)
166
- if not (freq[0].isdigit()):
167
- freq = f"1{freq}"
168
- max_horizon_delta = pd.Timedelta(self.backtest_max_horizon, "minutes")
169
- freq_delta = pd.Timedelta(freq)
170
- test_size = max_horizon_delta // freq_delta
171
- n_splits = (x.shape[0] // test_size) - 1
172
- time_series_cross_validator = TimeSeriesSplit(
173
- n_splits=n_splits, test_size=test_size
174
- )
175
-
176
- # Backtesting
177
- for apply_index, test_index in time_series_cross_validator.split(x):
178
- # Update the historic data to the current split (ie the k first folds)
179
- updated_results = self.results_.apply(
180
- y.iloc[apply_index], x.iloc[apply_index]
181
- )
182
-
183
- # The (k+1)th fold as the test data
184
- x_test, y_true_test = x.iloc[test_index], y.iloc[test_index]
185
- start_test = x_test.iloc[0].name
186
- end_test = x_test.iloc[-1].name
187
-
188
- # Compute and gather the predictions
189
- y_pred_test = updated_results.predict(
190
- start=start_test, end=end_test, exog=x_test
191
- )
192
- ys_true.append(y_true_test)
193
- ys_pred.append(y_pred_test)
194
-
195
- ys_true = np.concatenate(ys_true)
196
- ys_pred = np.concatenate(ys_pred)
197
- return r2_score(ys_true, ys_pred)