openstef 3.4.9__py3-none-any.whl → 3.4.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. openstef/app_settings.py +19 -0
  2. openstef/data_classes/data_prep.py +1 -1
  3. openstef/data_classes/prediction_job.py +12 -8
  4. openstef/enums.py +3 -7
  5. openstef/exceptions.py +1 -1
  6. openstef/feature_engineering/apply_features.py +0 -6
  7. openstef/feature_engineering/data_preparation.py +12 -5
  8. openstef/feature_engineering/feature_applicator.py +1 -5
  9. openstef/feature_engineering/general.py +14 -0
  10. openstef/feature_engineering/lag_features.py +1 -1
  11. openstef/feature_engineering/missing_values_transformer.py +99 -0
  12. openstef/feature_engineering/weather_features.py +7 -0
  13. openstef/metrics/figure.py +3 -0
  14. openstef/metrics/metrics.py +58 -1
  15. openstef/metrics/reporter.py +7 -0
  16. openstef/model/confidence_interval_applicator.py +28 -3
  17. openstef/model/model_creator.py +36 -27
  18. openstef/model/objective.py +11 -28
  19. openstef/model/objective_creator.py +4 -3
  20. openstef/model/regressors/arima.py +1 -1
  21. openstef/model/regressors/dazls.py +35 -96
  22. openstef/model/regressors/flatliner.py +100 -0
  23. openstef/model/regressors/linear_quantile.py +247 -0
  24. openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
  25. openstef/model/regressors/xgb_quantile.py +3 -0
  26. openstef/model/serializer.py +10 -0
  27. openstef/model/standard_deviation_generator.py +3 -2
  28. openstef/model_selection/model_selection.py +3 -0
  29. openstef/monitoring/performance_meter.py +1 -2
  30. openstef/monitoring/teams.py +11 -0
  31. openstef/pipeline/create_basecase_forecast.py +11 -1
  32. openstef/pipeline/create_component_forecast.py +11 -22
  33. openstef/pipeline/create_forecast.py +20 -1
  34. openstef/pipeline/optimize_hyperparameters.py +18 -16
  35. openstef/pipeline/train_create_forecast_backtest.py +11 -1
  36. openstef/pipeline/train_model.py +23 -7
  37. openstef/pipeline/utils.py +3 -0
  38. openstef/postprocessing/postprocessing.py +29 -0
  39. openstef/settings.py +15 -0
  40. openstef/tasks/calculate_kpi.py +20 -17
  41. openstef/tasks/create_basecase_forecast.py +13 -5
  42. openstef/tasks/create_components_forecast.py +20 -4
  43. openstef/tasks/create_forecast.py +5 -2
  44. openstef/tasks/split_forecast.py +7 -0
  45. openstef/tasks/train_model.py +7 -5
  46. openstef/tasks/utils/taskcontext.py +7 -0
  47. openstef/validation/validation.py +27 -2
  48. {openstef-3.4.9.dist-info → openstef-3.4.29.dist-info}/METADATA +34 -38
  49. openstef-3.4.29.dist-info/RECORD +91 -0
  50. {openstef-3.4.9.dist-info → openstef-3.4.29.dist-info}/WHEEL +1 -1
  51. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
  52. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license +0 -3
  53. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
  54. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license +0 -3
  55. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
  56. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license +0 -3
  57. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
  58. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
  59. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
  60. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
  61. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
  62. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
  63. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
  64. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
  65. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
  66. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
  67. openstef/feature_engineering/historic_features.py +0 -40
  68. openstef/model/regressors/proloaf.py +0 -281
  69. openstef/tasks/run_tracy.py +0 -145
  70. openstef-3.4.9.dist-info/RECORD +0 -104
  71. {openstef-3.4.9.dist-info → openstef-3.4.29.dist-info}/LICENSE +0 -0
  72. {openstef-3.4.9.dist-info → openstef-3.4.29.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
  import copy
5
- from datetime import datetime
5
+ from datetime import datetime, timezone
6
6
  from typing import Any, Callable, Optional
7
7
 
8
8
  import optuna
@@ -59,7 +59,7 @@ class RegressorObjective:
59
59
  self.validation_data = None
60
60
  self.test_data = None
61
61
  self.model = model
62
- self.start_time = datetime.utcnow()
62
+ self.start_time = datetime.now(timezone.utc)
63
63
  self.test_fraction = test_fraction
64
64
  self.validation_fraction = validation_fraction
65
65
  self.eval_metric = eval_metric
@@ -94,7 +94,7 @@ class RegressorObjective:
94
94
  split_args = self.split_args
95
95
  if split_args is None:
96
96
  split_args = {
97
- "stratification_min_max": self.model_type != MLModelType.ProLoaf,
97
+ "stratification_min_max": True,
98
98
  "back_test": True,
99
99
  }
100
100
  (
@@ -349,13 +349,13 @@ class XGBQuantileRegressorObjective(RegressorObjective):
349
349
  )
350
350
 
351
351
 
352
- class ProLoafRegressorObjective(RegressorObjective):
352
+ class XGBMultioutputQuantileRegressorObjective(RegressorObjective):
353
353
  def __init__(self, *args, **kwargs):
354
354
  super().__init__(*args, **kwargs)
355
- self.model_type = MLModelType.ProLoaf
355
+ self.model_type = MLModelType.XGB_QUANTILE
356
356
 
357
357
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
358
- """Get parameters for ProLoaf Regressor Objective with objective specific parameters.
358
+ """Get parameters for XGB Multioutput Quantile Regressor Objective with objective specific parameters.
359
359
 
360
360
  Args: trial
361
361
 
@@ -366,33 +366,16 @@ class ProLoafRegressorObjective(RegressorObjective):
366
366
  # Filtered default parameters
367
367
  model_params = super().get_params(trial)
368
368
 
369
- # ProLoaf specific parameters
369
+ # XGB specific parameters
370
370
  params = {
371
- # TODO: look into optimizing this pipeline for proloaf
372
- # "relu_leak": trial.suggest_float("relu_leak", 0.1, 1.0),
373
- # "core_layers": trial.suggest_int("core_layers", 1, 3),
374
- # "rel_linear_hidden_size": trial.suggest_float(
375
- # "rel_linear_hidden_size", 0.1, 1
376
- # ),
377
- # "rel_core_hidden_size": trial.suggest_float("rel_core_hidden_size", 0.1, 1),
378
- # "dropout_fc": trial.suggest_float("dropout_fc", 0.1, 0.9),
379
- # "dropout_core": trial.suggest_float("dropout_core", 0.1, 0.9),
380
- # "early_stopping_patience": trial.suggest_int(
381
- # "early_stopping_patience", 5, 10
382
- # ),
383
- # "early_stopping_margin": trial.suggest_float(
384
- # "early_stopping_margin", 0.1, 0.9
385
- # ),
386
- "max_epochs": trial.suggest_int(
387
- "max_epochs", 1, 1
388
- ), # TODO: change after having availability to gpu resource
389
- "batch_size": trial.suggest_int("batch_size", 1, 24),
371
+ "gamma": trial.suggest_float("gamma", 1e-8, 1.0),
372
+ "arctan_smoothing": trial.suggest_float("arctan_smoothing", 0.025, 0.15),
390
373
  }
391
374
  return {**model_params, **params}
392
375
 
393
376
  def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
394
- return optuna.integration.PyTorchLightningPruningCallback(
395
- trial, monitor="val_loss"
377
+ return optuna.integration.XGBoostPruningCallback(
378
+ trial, observation_key=f"validation_1-{self.eval_metric}"
396
379
  )
397
380
 
398
381
 
@@ -6,13 +6,13 @@ from typing import Union
6
6
 
7
7
  from openstef.enums import MLModelType
8
8
  from openstef.model.objective import (
9
+ ARIMARegressorObjective,
9
10
  LGBRegressorObjective,
10
11
  LinearRegressorObjective,
11
- ProLoafRegressorObjective,
12
12
  RegressorObjective,
13
13
  XGBQuantileRegressorObjective,
14
14
  XGBRegressorObjective,
15
- ARIMARegressorObjective,
15
+ XGBMultioutputQuantileRegressorObjective,
16
16
  )
17
17
  from openstef.model.regressors.custom_regressor import (
18
18
  create_custom_objective,
@@ -25,8 +25,9 @@ class ObjectiveCreator:
25
25
  MLModelType.XGB: XGBRegressorObjective,
26
26
  MLModelType.LGB: LGBRegressorObjective,
27
27
  MLModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
28
- MLModelType.ProLoaf: ProLoafRegressorObjective,
28
+ MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
29
29
  MLModelType.LINEAR: LinearRegressorObjective,
30
+ MLModelType.LINEAR_QUANTILE: LinearRegressorObjective,
30
31
  MLModelType.ARIMA: ARIMARegressorObjective,
31
32
  }
32
33
 
@@ -5,9 +5,9 @@
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
  import statsmodels.api as sm
8
-
9
8
  from sklearn.metrics import r2_score
10
9
  from sklearn.model_selection import TimeSeriesSplit
10
+
11
11
  from openstef.model.regressors.regressor import OpenstfRegressor
12
12
 
13
13
 
@@ -4,65 +4,41 @@
4
4
  """This module defines the DAZL model."""
5
5
  import numpy as np
6
6
  from sklearn.base import BaseEstimator
7
+ from sklearn.compose import TransformedTargetRegressor
8
+ from sklearn.linear_model import LinearRegression
7
9
  from sklearn.metrics import mean_squared_error, r2_score
8
- from sklearn.neighbors import KNeighborsRegressor
10
+ from sklearn.pipeline import Pipeline
9
11
  from sklearn.preprocessing import MinMaxScaler
10
- from sklearn.utils import shuffle
11
12
 
12
13
 
13
14
  class Dazls(BaseEstimator):
14
15
  """DAZLS model.
15
16
 
16
- The model carries out wind and solar power prediction for unseen target substations using training data from
17
- other substations with known components.
18
-
19
- Any data-driven model can be plugged and used as the base for the domain and the adaptation model.
20
-
21
- For a full reference, see:
22
- Teng, S.Y., van Nooten, C. C., van Doorn, J.M., Ottenbros, A., Huijbregts, M., Jansen, J.J.
23
- Improving Near Real-Time Predictions of Renewable Electricity Production at Substation Level (Submitted)
17
+ The model carries out wind and solar power prediction for unseen target substations using training data from other
18
+ substations with known components.
24
19
 
25
20
  """
26
21
 
22
+ model_: Pipeline
23
+
27
24
  def __init__(self):
28
25
  """Initialize DAZL model."""
29
26
  self.__name__ = "DAZLS"
30
- self.domain_model_scaler = MinMaxScaler(clip=True)
31
- self.adaptation_model_scaler = MinMaxScaler(clip=True)
32
- self.target_scaler = MinMaxScaler(clip=True)
33
- self.domain_model = KNeighborsRegressor(n_neighbors=20, weights="uniform")
34
- self.adaptation_model = KNeighborsRegressor(n_neighbors=20, weights="uniform")
27
+
28
+ regressor = TransformedTargetRegressor(
29
+ regressor=LinearRegression(),
30
+ transformer=MinMaxScaler(clip=True),
31
+ )
32
+
33
+ self.model_ = Pipeline(
34
+ [("scaler", MinMaxScaler(clip=True)), ("regressor", regressor)]
35
+ )
35
36
 
36
37
  # The input columns for the domain and adaptation models (with description)
37
- self.domain_model_input_columns = [
38
+ self.baseline_input_columns = [
38
39
  "radiation", # Weather parameter
39
40
  "windspeed_100m", # Weather parameter
40
- "total_substation", # Substation's measured total load
41
- "lat", # Latitude
42
- "lon", # Longitude
43
- "solar_on", # Solar installed on substation: yes=1, no=0
44
- "wind_on", # Wind installed on substation: yes=1, no=0
45
- "hour", # Hour of the day
46
- "minute", # Minute of the hour
47
- "var0", # Variance of the total load
48
- "var1", # Variance of the total pv load (only available for calibration substations)
49
- "var2", # Variance of the total wind load (only available for calibration substations)
50
- "sem0", # Standard Error of the Mean of the total load
51
- "sem1", # Standard Error of the Mean of the total PV load (only available for calibration substations)
52
- ]
53
- self.adaptation_model_input_columns = [
54
- "total_substation",
55
- "lat",
56
- "lon",
57
- "solar_on",
58
- "wind_on",
59
- "hour",
60
- "minute",
61
- "var0",
62
- "var1",
63
- "var2",
64
- "sem0",
65
- "sem1",
41
+ "total_load",
66
42
  ]
67
43
  self.target_columns = ["total_wind_part", "total_solar_part"]
68
44
 
@@ -78,30 +54,12 @@ class Dazls(BaseEstimator):
78
54
  target: the expected output (y_train)
79
55
 
80
56
  """
81
- x, x2, y = (
82
- features.loc[:, self.domain_model_input_columns],
83
- features.loc[:, self.adaptation_model_input_columns],
57
+ x, y = (
58
+ features.loc[:, self.baseline_input_columns],
84
59
  target.loc[:, self.target_columns],
85
60
  )
86
- domain_model_input, adaptation_model_input, y_train = shuffle(
87
- x, x2, y, random_state=999
88
- ) # just shuffling
89
-
90
- self.domain_model_scaler.fit(domain_model_input)
91
- self.adaptation_model_scaler.fit(adaptation_model_input)
92
- self.target_scaler.fit(y_train)
93
- domain_model_input = self.domain_model_scaler.transform(domain_model_input)
94
- adaptation_model_input = self.adaptation_model_scaler.transform(
95
- adaptation_model_input
96
- )
97
- y_train = self.target_scaler.transform(y_train)
98
61
 
99
- self.domain_model.fit(domain_model_input, y_train)
100
- domain_model_pred = self.domain_model.predict(domain_model_input)
101
- adaptation_model_input = np.concatenate(
102
- (adaptation_model_input, domain_model_pred), axis=1
103
- )
104
- self.adaptation_model.fit(adaptation_model_input, y_train)
62
+ self.model_.fit(x, y)
105
63
 
106
64
  def predict(self, x: np.array):
107
65
  """Make a prediction.
@@ -109,37 +67,21 @@ class Dazls(BaseEstimator):
109
67
  For the prediction we use the test data x. We use domain_model_input_columns and
110
68
  adaptation_model_input_columns to separate x in test data for domain model and adaptation model respectively.
111
69
 
70
+ There is an option available to return the domain model and adaptation model predictions separately to more
71
+ easily investigate the effectiveness of the models.
72
+
112
73
  Args:
113
74
  x: domain_model_test_data, adaptation_model_test_data
75
+ return_sub_preds : a flag value indicating to return the predictions of the domain model and adaptation
76
+ model separately. (Default: False.)
77
+
78
+ Returns:
114
79
  prediction: The output prediction after both models.
115
80
 
116
81
  """
117
- domain_model_test_data, adaptation_model_test_data = (
118
- x.loc[:, self.domain_model_input_columns],
119
- x.loc[:, self.adaptation_model_input_columns],
120
- )
121
- # Rescale test data for both models (if required)
122
- domain_model_test_data_scaled = self.domain_model_scaler.transform(
123
- domain_model_test_data
124
- )
125
- adaptation_model_test_data_scaled = self.adaptation_model_scaler.transform(
126
- adaptation_model_test_data
127
- )
128
- # Use the scaled data to make domain_model_prediction
129
- domain_model_test_data_pred = self.domain_model.predict(
130
- domain_model_test_data_scaled
131
- )
132
- # Use the domain_model_prediction to make adaptation_model_prediction
133
- adaptation_model_test_data_pred = self.adaptation_model.predict(
134
- np.concatenate(
135
- [adaptation_model_test_data_scaled, domain_model_test_data_pred], axis=1
136
- )
137
- )
138
- # Rescale adaptation_model_prediction (if required)
139
- prediction = self.target_scaler.inverse_transform(
140
- adaptation_model_test_data_pred
141
- )
142
- return prediction
82
+ model_test_data = x.loc[:, self.baseline_input_columns]
83
+
84
+ return self.model_.predict(model_test_data)
143
85
 
144
86
  def score(self, truth, prediction):
145
87
  """Evaluation of the prediction's output.
@@ -165,13 +107,10 @@ class Dazls(BaseEstimator):
165
107
  """
166
108
  summary_str = (
167
109
  f"{self.__name__} model summary:\n\n"
168
- f"Domain Model: {self.domain_model} \n"
169
- f"\tInput columns: {self.domain_model_input_columns} \n"
170
- f"\tScaler: {self.domain_model_scaler} \n\n"
171
- f"Adaptation Model: {self.adaptation_model} \n"
172
- f"\tInput columns: {self.adaptation_model_input_columns} \n"
173
- f"\tScaler: {self.adaptation_model_scaler} \n\n"
174
- f"Target columns: {self.target_columns}"
110
+ f"Model: {self.model_} \n"
111
+ f"\tInput columns: {self.baseline_input_columns} \n"
112
+ f"\tScaler: {self.model_['scaler']} \n\n"
113
+ f"\tRegressor: {self.model_['regressor']} \n\n"
175
114
  )
176
115
 
177
116
  return summary_str
@@ -0,0 +1,100 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ import re
5
+ from typing import Dict, Union, Set, Optional, List
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from sklearn.base import RegressorMixin
10
+ from sklearn.linear_model import QuantileRegressor
11
+ from sklearn.preprocessing import MinMaxScaler
12
+ from sklearn.utils.validation import check_is_fitted
13
+
14
+ from openstef.feature_engineering.missing_values_transformer import (
15
+ MissingValuesTransformer,
16
+ )
17
+ from openstef.model.regressors.regressor import OpenstfRegressor
18
+
19
+
20
+ class FlatlinerRegressor(OpenstfRegressor, RegressorMixin):
21
+ feature_names_: List[str] = []
22
+
23
+ def __init__(self, quantiles=None):
24
+ """Initialize FlatlinerRegressor.
25
+
26
+ The model always predicts 0.0, regardless of the input features. The model is
27
+ meant to be used for flatliner locations that still expect a prediction while
28
+ preserving the prediction interface.
29
+ """
30
+ super().__init__()
31
+ self.quantiles = quantiles
32
+
33
+ @property
34
+ def feature_names(self) -> list:
35
+ """The names of the features used to train the model."""
36
+ check_is_fitted(self)
37
+ return self.feature_names_
38
+
39
+ @staticmethod
40
+ def _get_importance_names():
41
+ return {
42
+ "gain_importance_name": "total_gain",
43
+ "weight_importance_name": "weight",
44
+ }
45
+
46
+ @property
47
+ def can_predict_quantiles(self) -> bool:
48
+ """Attribute that indicates if the model predict particular quantiles."""
49
+ return True
50
+
51
+ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
52
+ """Fits flatliner model.
53
+
54
+ Args:
55
+ x: Feature matrix
56
+ y: Labels
57
+
58
+ Returns:
59
+ Fitted LinearQuantile model
60
+
61
+ """
62
+ self.feature_names_ = list(x.columns)
63
+ self.feature_importances_ = np.ones(len(self.feature_names_)) / (
64
+ len(self.feature_names_) or 1.0
65
+ )
66
+
67
+ return self
68
+
69
+ def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
70
+ """Makes a prediction for a desired quantile.
71
+
72
+ Args:
73
+ x: Feature matrix
74
+ quantile: Quantile for which a prediciton is desired,
75
+ note that only quantile are available for which a model is trained,
76
+ and that this is a quantile-model specific keyword
77
+
78
+ Returns:
79
+ Prediction
80
+
81
+ Raises:
82
+ ValueError in case no model is trained for the requested quantile
83
+
84
+ """
85
+ check_is_fitted(self)
86
+
87
+ return np.zeros(x.shape[0])
88
+
89
+ def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array:
90
+ check_is_fitted(self)
91
+ return np.array([0.0 for _ in self.feature_names_])
92
+
93
+ @classmethod
94
+ def _get_param_names(cls):
95
+ return [
96
+ "quantiles",
97
+ ]
98
+
99
+ def __sklearn_is_fitted__(self) -> bool:
100
+ return True
@@ -0,0 +1,247 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ import re
5
+ from typing import Dict, Union, Set, Optional
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from sklearn.base import RegressorMixin
10
+ from sklearn.linear_model import QuantileRegressor
11
+ from sklearn.preprocessing import MinMaxScaler
12
+ from sklearn.utils.validation import check_is_fitted
13
+
14
+ from openstef.feature_engineering.missing_values_transformer import (
15
+ MissingValuesTransformer,
16
+ )
17
+ from openstef.model.regressors.regressor import OpenstfRegressor
18
+
19
+ DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
20
+
21
+
22
+ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
23
+ quantiles: tuple[float, ...]
24
+ alpha: float
25
+ solver: str
26
+
27
+ imputer_: MissingValuesTransformer
28
+ x_scaler_: MinMaxScaler
29
+ y_scaler_: MinMaxScaler
30
+ models_: Dict[float, QuantileRegressor]
31
+
32
+ is_fitted_: bool = False
33
+
34
+ FEATURE_IGNORE_LIST: Set[str] = {
35
+ "IsWeekendDay",
36
+ "IsWeekDay",
37
+ "IsSunday",
38
+ "Month",
39
+ "Quarter",
40
+ }
41
+
42
+ def __init__(
43
+ self,
44
+ quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
45
+ alpha: float = 0.0,
46
+ solver: str = "highs",
47
+ missing_values: Union[int, float, str, None] = np.nan,
48
+ imputation_strategy: Optional[str] = "mean",
49
+ fill_value: Union[str, int, float] = None,
50
+ ):
51
+ """Initialize LinearQuantileOpenstfRegressor.
52
+
53
+ Model that provides quantile regression with SKLearn QuantileRegressor.
54
+ For each desired quantile an QuantileRegressor model is trained,
55
+ these can later be used to predict quantiles.
56
+
57
+ This model is sensitive to feature quality and therefore has logic to remove
58
+ some custom features produced by OpenSTEF. The features that are removed are:
59
+ - Holiday features (is_christmas, is_*)
60
+ - Lagged features (T-1d, T-*)
61
+ - Point in time features (IsWeekendDay, IsWeekDay, IsSunday, Month, Quarter)
62
+ - Infeed MFFBAS profiles (E*_I)
63
+
64
+ Args:
65
+ quantiles: Tuple with desired quantiles, quantile 0.5 is required.
66
+ For example: (0.1, 0.5, 0.9)
67
+ alpha: Regularization constant for L1 regularization
68
+ solver: Solver to use for optimization
69
+ missing_values: Value to be considered as missing value
70
+ imputation_strategy: Imputation strategy
71
+ fill_value: Fill value
72
+
73
+ """
74
+ super().__init__()
75
+
76
+ # Check if quantile 0.5 is present. This is required.
77
+ if 0.5 not in quantiles:
78
+ raise ValueError(
79
+ "Cannot train quantile model as 0.5 is not in requested quantiles!"
80
+ )
81
+
82
+ self.quantiles = quantiles
83
+ self.alpha = alpha
84
+ self.solver = solver
85
+ self.imputer_ = MissingValuesTransformer(
86
+ missing_values=missing_values,
87
+ imputation_strategy=imputation_strategy,
88
+ fill_value=fill_value,
89
+ )
90
+ self.x_scaler_ = MinMaxScaler(feature_range=(-1, 1))
91
+ self.y_scaler_ = MinMaxScaler(feature_range=(-1, 1))
92
+ self.models_ = {
93
+ quantile: QuantileRegressor(alpha=alpha, quantile=quantile, solver=solver)
94
+ for quantile in quantiles
95
+ }
96
+
97
+ @property
98
+ def feature_names(self) -> list:
99
+ """The names of the features used to train the model."""
100
+ check_is_fitted(self)
101
+ return self.imputer_.non_null_feature_names
102
+
103
+ @staticmethod
104
+ def _get_importance_names():
105
+ return {
106
+ "gain_importance_name": "total_gain",
107
+ "weight_importance_name": "weight",
108
+ }
109
+
110
+ @property
111
+ def can_predict_quantiles(self) -> bool:
112
+ """Attribute that indicates if the model predict particular quantiles."""
113
+ return True
114
+
115
+ def _is_feature_ignored(self, feature_name: str) -> bool:
116
+ """Check if a feature is ignored by the model.
117
+
118
+ Args:
119
+ feature_name: Feature name
120
+
121
+ Returns:
122
+ True if the feature is ignored, False otherwise
123
+
124
+ """
125
+ return (
126
+ # Ignore named features
127
+ feature_name in self.FEATURE_IGNORE_LIST
128
+ or
129
+ # Ignore holiday features
130
+ re.match(r"is_", feature_name) is not None
131
+ or
132
+ # Ignore lag features
133
+ re.match(r"T-", feature_name) is not None
134
+ or
135
+ # Ignore infeed MFFBAS profiles
136
+ re.match(r"E\d.*_I", feature_name) is not None
137
+ )
138
+
139
+ def _remove_ignored_features(self, x: pd.DataFrame) -> pd.DataFrame:
140
+ """Remove ignored features from the input data.
141
+
142
+ Args:
143
+ x: Input data
144
+
145
+ Returns:
146
+ Data without ignored features
147
+
148
+ """
149
+ return x.drop(columns=[c for c in x.columns if self._is_feature_ignored(c)])
150
+
151
+ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
152
+ """Fits linear quantile model.
153
+
154
+ Args:
155
+ x: Feature matrix
156
+ y: Labels
157
+
158
+ Returns:
159
+ Fitted LinearQuantile model
160
+
161
+ """
162
+ if not isinstance(y, pd.Series):
163
+ y = pd.Series(np.asarray(y), name="load")
164
+
165
+ x = self._remove_ignored_features(x)
166
+
167
+ # Fix nan columns
168
+ x = self.imputer_.fit_transform(x)
169
+ if x.isna().any().any():
170
+ raise ValueError(
171
+ "There are nan values in the input data. Set "
172
+ "imputation_strategy to solve them."
173
+ )
174
+
175
+ # Apply feature scaling
176
+ x_scaled = self.x_scaler_.fit_transform(x)
177
+ y_scaled = self.y_scaler_.fit_transform(y.to_frame())[:, 0]
178
+
179
+ # Add more focus on extreme / peak values
180
+ sample_weight = np.abs(y_scaled)
181
+
182
+ # Fit quantile regressors
183
+ for quantile in self.quantiles:
184
+ self.models_[quantile].fit(
185
+ X=x_scaled, y=y_scaled, sample_weight=sample_weight
186
+ )
187
+
188
+ self.is_fitted_ = True
189
+
190
+ self.feature_importances_ = self._get_feature_importance_from_linear()
191
+
192
+ return self
193
+
194
+ def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
195
+ """Makes a prediction for a desired quantile.
196
+
197
+ Args:
198
+ x: Feature matrix
199
+ quantile: Quantile for which a prediciton is desired,
200
+ note that only quantile are available for which a model is trained,
201
+ and that this is a quantile-model specific keyword
202
+
203
+ Returns:
204
+ Prediction
205
+
206
+ Raises:
207
+ ValueError in case no model is trained for the requested quantile
208
+
209
+ """
210
+ check_is_fitted(self)
211
+
212
+ # Preprocess input data
213
+ x = self._remove_ignored_features(x)
214
+ x = self.imputer_.transform(x)
215
+ x_scaled = self.x_scaler_.transform(x)
216
+
217
+ # Make prediction
218
+ y_pred = self.models_[quantile].predict(X=x_scaled)
219
+
220
+ # Inverse scaling
221
+ y_pred = self.y_scaler_.inverse_transform(y_pred.reshape(-1, 1))[:, 0]
222
+
223
+ return y_pred
224
+
225
+ def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array:
226
+ check_is_fitted(self)
227
+ feature_importance_linear = np.abs(self.models_[quantile].coef_)
228
+ reg_feature_importances_dict = dict(
229
+ zip(self.imputer_.non_null_feature_names, feature_importance_linear)
230
+ )
231
+ return np.array(
232
+ [
233
+ reg_feature_importances_dict.get(c, 0)
234
+ for c in self.imputer_.in_feature_names
235
+ ]
236
+ )
237
+
238
+ @classmethod
239
+ def _get_param_names(cls):
240
+ return [
241
+ "quantiles",
242
+ "alpha",
243
+ "solver",
244
+ ]
245
+
246
+ def __sklearn_is_fitted__(self) -> bool:
247
+ return self.is_fitted_