openstef 3.4.10__py3-none-any.whl → 3.4.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. openstef/app_settings.py +19 -0
  2. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  3. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +18 -0
  4. openstef/data/dutch_holidays.csv +1759 -0
  5. openstef/data_classes/data_prep.py +1 -1
  6. openstef/data_classes/prediction_job.py +15 -9
  7. openstef/enums.py +108 -9
  8. openstef/exceptions.py +1 -1
  9. openstef/feature_engineering/apply_features.py +25 -6
  10. openstef/feature_engineering/bidding_zone_to_country_mapping.py +106 -0
  11. openstef/feature_engineering/cyclic_features.py +102 -0
  12. openstef/feature_engineering/data_preparation.py +12 -5
  13. openstef/feature_engineering/feature_applicator.py +1 -5
  14. openstef/feature_engineering/general.py +14 -0
  15. openstef/feature_engineering/holiday_features.py +35 -26
  16. openstef/feature_engineering/missing_values_transformer.py +141 -0
  17. openstef/feature_engineering/weather_features.py +7 -0
  18. openstef/metrics/figure.py +3 -0
  19. openstef/metrics/metrics.py +58 -1
  20. openstef/metrics/reporter.py +7 -0
  21. openstef/model/confidence_interval_applicator.py +28 -3
  22. openstef/model/model_creator.py +54 -41
  23. openstef/model/objective.py +17 -34
  24. openstef/model/objective_creator.py +13 -12
  25. openstef/model/regressors/arima.py +1 -1
  26. openstef/model/regressors/dazls.py +35 -96
  27. openstef/model/regressors/flatliner.py +95 -0
  28. openstef/model/regressors/linear_quantile.py +296 -0
  29. openstef/model/regressors/xgb.py +23 -0
  30. openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
  31. openstef/model/regressors/xgb_quantile.py +3 -0
  32. openstef/model/serializer.py +10 -0
  33. openstef/model_selection/model_selection.py +4 -1
  34. openstef/monitoring/performance_meter.py +1 -2
  35. openstef/monitoring/teams.py +11 -0
  36. openstef/pipeline/create_basecase_forecast.py +11 -1
  37. openstef/pipeline/create_component_forecast.py +24 -28
  38. openstef/pipeline/create_forecast.py +20 -1
  39. openstef/pipeline/optimize_hyperparameters.py +18 -16
  40. openstef/pipeline/train_create_forecast_backtest.py +11 -1
  41. openstef/pipeline/train_model.py +31 -12
  42. openstef/pipeline/utils.py +3 -0
  43. openstef/postprocessing/postprocessing.py +29 -0
  44. openstef/settings.py +15 -0
  45. openstef/tasks/calculate_kpi.py +23 -20
  46. openstef/tasks/create_basecase_forecast.py +15 -7
  47. openstef/tasks/create_components_forecast.py +24 -8
  48. openstef/tasks/create_forecast.py +9 -6
  49. openstef/tasks/create_solar_forecast.py +4 -4
  50. openstef/tasks/optimize_hyperparameters.py +2 -2
  51. openstef/tasks/split_forecast.py +9 -2
  52. openstef/tasks/train_model.py +9 -7
  53. openstef/tasks/utils/taskcontext.py +7 -0
  54. openstef/validation/validation.py +28 -3
  55. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/METADATA +65 -57
  56. openstef-3.4.44.dist-info/RECORD +97 -0
  57. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/WHEEL +1 -1
  58. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
  59. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
  60. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
  61. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
  62. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
  63. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
  64. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
  65. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
  66. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
  67. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
  68. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
  69. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
  70. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
  71. openstef/data/dutch_holidays_2020-2022.csv +0 -831
  72. openstef/data/dutch_holidays_2020-2022.csv.license +0 -3
  73. openstef/feature_engineering/historic_features.py +0 -40
  74. openstef/model/regressors/proloaf.py +0 -281
  75. openstef/tasks/run_tracy.py +0 -145
  76. openstef-3.4.10.dist-info/RECORD +0 -104
  77. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license} +0 -0
  78. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license} +0 -0
  79. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license → dutch_holidays.csv.license} +0 -0
  80. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/LICENSE +0 -0
  81. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/top_level.txt +0 -0
@@ -2,13 +2,13 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
  import copy
5
- from datetime import datetime
5
+ from datetime import datetime, timezone
6
6
  from typing import Any, Callable, Optional
7
7
 
8
8
  import optuna
9
9
  import pandas as pd
10
10
 
11
- from openstef.enums import MLModelType
11
+ from openstef.enums import ModelType
12
12
  from openstef.metrics import metrics
13
13
  from openstef.metrics.reporter import Report, Reporter
14
14
  from openstef.model.regressors.regressor import OpenstfRegressor
@@ -59,7 +59,7 @@ class RegressorObjective:
59
59
  self.validation_data = None
60
60
  self.test_data = None
61
61
  self.model = model
62
- self.start_time = datetime.utcnow()
62
+ self.start_time = datetime.now(timezone.utc)
63
63
  self.test_fraction = test_fraction
64
64
  self.validation_fraction = validation_fraction
65
65
  self.eval_metric = eval_metric
@@ -94,7 +94,7 @@ class RegressorObjective:
94
94
  split_args = self.split_args
95
95
  if split_args is None:
96
96
  split_args = {
97
- "stratification_min_max": self.model_type != MLModelType.ProLoaf,
97
+ "stratification_min_max": True,
98
98
  "back_test": True,
99
99
  }
100
100
  (
@@ -245,7 +245,7 @@ class RegressorObjective:
245
245
  class XGBRegressorObjective(RegressorObjective):
246
246
  def __init__(self, *args, **kwargs):
247
247
  super().__init__(*args, **kwargs)
248
- self.model_type = MLModelType.XGB
248
+ self.model_type = ModelType.XGB
249
249
 
250
250
  # extend the parameters with the model specific ones per implementation
251
251
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
@@ -282,7 +282,7 @@ class XGBRegressorObjective(RegressorObjective):
282
282
  class LGBRegressorObjective(RegressorObjective):
283
283
  def __init__(self, *args, **kwargs):
284
284
  super().__init__(*args, **kwargs)
285
- self.model_type = MLModelType.LGB
285
+ self.model_type = ModelType.LGB
286
286
 
287
287
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
288
288
  """Get parameters for LGB Regressor Objective with objective specific parameters.
@@ -323,7 +323,7 @@ class LGBRegressorObjective(RegressorObjective):
323
323
  class XGBQuantileRegressorObjective(RegressorObjective):
324
324
  def __init__(self, *args, **kwargs):
325
325
  super().__init__(*args, **kwargs)
326
- self.model_type = MLModelType.XGB_QUANTILE
326
+ self.model_type = ModelType.XGB_QUANTILE
327
327
 
328
328
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
329
329
  """Get parameters for XGBQuantile Regressor Objective with objective specific parameters.
@@ -349,13 +349,13 @@ class XGBQuantileRegressorObjective(RegressorObjective):
349
349
  )
350
350
 
351
351
 
352
- class ProLoafRegressorObjective(RegressorObjective):
352
+ class XGBMultioutputQuantileRegressorObjective(RegressorObjective):
353
353
  def __init__(self, *args, **kwargs):
354
354
  super().__init__(*args, **kwargs)
355
- self.model_type = MLModelType.ProLoaf
355
+ self.model_type = ModelType.XGB_QUANTILE
356
356
 
357
357
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
358
- """Get parameters for ProLoaf Regressor Objective with objective specific parameters.
358
+ """Get parameters for XGB Multioutput Quantile Regressor Objective with objective specific parameters.
359
359
 
360
360
  Args: trial
361
361
 
@@ -366,40 +366,23 @@ class ProLoafRegressorObjective(RegressorObjective):
366
366
  # Filtered default parameters
367
367
  model_params = super().get_params(trial)
368
368
 
369
- # ProLoaf specific parameters
369
+ # XGB specific parameters
370
370
  params = {
371
- # TODO: look into optimizing this pipeline for proloaf
372
- # "relu_leak": trial.suggest_float("relu_leak", 0.1, 1.0),
373
- # "core_layers": trial.suggest_int("core_layers", 1, 3),
374
- # "rel_linear_hidden_size": trial.suggest_float(
375
- # "rel_linear_hidden_size", 0.1, 1
376
- # ),
377
- # "rel_core_hidden_size": trial.suggest_float("rel_core_hidden_size", 0.1, 1),
378
- # "dropout_fc": trial.suggest_float("dropout_fc", 0.1, 0.9),
379
- # "dropout_core": trial.suggest_float("dropout_core", 0.1, 0.9),
380
- # "early_stopping_patience": trial.suggest_int(
381
- # "early_stopping_patience", 5, 10
382
- # ),
383
- # "early_stopping_margin": trial.suggest_float(
384
- # "early_stopping_margin", 0.1, 0.9
385
- # ),
386
- "max_epochs": trial.suggest_int(
387
- "max_epochs", 1, 1
388
- ), # TODO: change after having availability to gpu resource
389
- "batch_size": trial.suggest_int("batch_size", 1, 24),
371
+ "gamma": trial.suggest_float("gamma", 1e-8, 1.0),
372
+ "arctan_smoothing": trial.suggest_float("arctan_smoothing", 0.025, 0.15),
390
373
  }
391
374
  return {**model_params, **params}
392
375
 
393
376
  def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
394
- return optuna.integration.PyTorchLightningPruningCallback(
395
- trial, monitor="val_loss"
377
+ return optuna.integration.XGBoostPruningCallback(
378
+ trial, observation_key=f"validation_1-{self.eval_metric}"
396
379
  )
397
380
 
398
381
 
399
382
  class LinearRegressorObjective(RegressorObjective):
400
383
  def __init__(self, *args, **kwargs):
401
384
  super().__init__(*args, **kwargs)
402
- self.model_type = MLModelType.LINEAR
385
+ self.model_type = ModelType.LINEAR
403
386
 
404
387
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
405
388
  """Get parameters for Linear Regressor Objective with objective specific parameters.
@@ -422,7 +405,7 @@ class LinearRegressorObjective(RegressorObjective):
422
405
  class ARIMARegressorObjective(RegressorObjective):
423
406
  def __init__(self, *args, **kwargs):
424
407
  super().__init__(*args, **kwargs)
425
- self.model_type = MLModelType.ARIMA
408
+ self.model_type = ModelType.ARIMA
426
409
 
427
410
  def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
428
411
  """Get parameters for ARIMA Regressor Objective with objective specific parameters.
@@ -4,15 +4,15 @@
4
4
 
5
5
  from typing import Union
6
6
 
7
- from openstef.enums import MLModelType
7
+ from openstef.enums import ModelType
8
8
  from openstef.model.objective import (
9
+ ARIMARegressorObjective,
9
10
  LGBRegressorObjective,
10
11
  LinearRegressorObjective,
11
- ProLoafRegressorObjective,
12
12
  RegressorObjective,
13
13
  XGBQuantileRegressorObjective,
14
14
  XGBRegressorObjective,
15
- ARIMARegressorObjective,
15
+ XGBMultioutputQuantileRegressorObjective,
16
16
  )
17
17
  from openstef.model.regressors.custom_regressor import (
18
18
  create_custom_objective,
@@ -22,16 +22,17 @@ from openstef.model.regressors.custom_regressor import (
22
22
 
23
23
  class ObjectiveCreator:
24
24
  OBJECTIVES = {
25
- MLModelType.XGB: XGBRegressorObjective,
26
- MLModelType.LGB: LGBRegressorObjective,
27
- MLModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
28
- MLModelType.ProLoaf: ProLoafRegressorObjective,
29
- MLModelType.LINEAR: LinearRegressorObjective,
30
- MLModelType.ARIMA: ARIMARegressorObjective,
25
+ ModelType.XGB: XGBRegressorObjective,
26
+ ModelType.LGB: LGBRegressorObjective,
27
+ ModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
28
+ ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
29
+ ModelType.LINEAR: LinearRegressorObjective,
30
+ ModelType.LINEAR_QUANTILE: LinearRegressorObjective,
31
+ ModelType.ARIMA: ARIMARegressorObjective,
31
32
  }
32
33
 
33
34
  @staticmethod
34
- def create_objective(model_type: Union[MLModelType, str]) -> RegressorObjective:
35
+ def create_objective(model_type: Union[ModelType, str]) -> RegressorObjective:
35
36
  """Create an objective function based on model type.
36
37
 
37
38
  Args:
@@ -50,10 +51,10 @@ class ObjectiveCreator:
50
51
  if is_custom_type(model_type):
51
52
  objective = create_custom_objective(model_type)
52
53
  else:
53
- model_type = MLModelType(model_type)
54
+ model_type = ModelType(model_type)
54
55
  objective = ObjectiveCreator.OBJECTIVES[model_type]
55
56
  except ValueError as e:
56
- valid_types = [t.value for t in MLModelType]
57
+ valid_types = [t.value for t in ModelType]
57
58
  raise NotImplementedError(
58
59
  f"No objective for '{model_type}', "
59
60
  f"valid model_types are: {valid_types}"
@@ -5,9 +5,9 @@
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
  import statsmodels.api as sm
8
-
9
8
  from sklearn.metrics import r2_score
10
9
  from sklearn.model_selection import TimeSeriesSplit
10
+
11
11
  from openstef.model.regressors.regressor import OpenstfRegressor
12
12
 
13
13
 
@@ -4,65 +4,41 @@
4
4
  """This module defines the DAZL model."""
5
5
  import numpy as np
6
6
  from sklearn.base import BaseEstimator
7
+ from sklearn.compose import TransformedTargetRegressor
8
+ from sklearn.linear_model import LinearRegression
7
9
  from sklearn.metrics import mean_squared_error, r2_score
8
- from sklearn.neighbors import KNeighborsRegressor
10
+ from sklearn.pipeline import Pipeline
9
11
  from sklearn.preprocessing import MinMaxScaler
10
- from sklearn.utils import shuffle
11
12
 
12
13
 
13
14
  class Dazls(BaseEstimator):
14
15
  """DAZLS model.
15
16
 
16
- The model carries out wind and solar power prediction for unseen target substations using training data from
17
- other substations with known components.
18
-
19
- Any data-driven model can be plugged and used as the base for the domain and the adaptation model.
20
-
21
- For a full reference, see:
22
- Teng, S.Y., van Nooten, C. C., van Doorn, J.M., Ottenbros, A., Huijbregts, M., Jansen, J.J.
23
- Improving Near Real-Time Predictions of Renewable Electricity Production at Substation Level (Submitted)
17
+ The model carries out wind and solar power prediction for unseen target substations using training data from other
18
+ substations with known components.
24
19
 
25
20
  """
26
21
 
22
+ model_: Pipeline
23
+
27
24
  def __init__(self):
28
25
  """Initialize DAZL model."""
29
26
  self.__name__ = "DAZLS"
30
- self.domain_model_scaler = MinMaxScaler(clip=True)
31
- self.adaptation_model_scaler = MinMaxScaler(clip=True)
32
- self.target_scaler = MinMaxScaler(clip=True)
33
- self.domain_model = KNeighborsRegressor(n_neighbors=20, weights="uniform")
34
- self.adaptation_model = KNeighborsRegressor(n_neighbors=20, weights="uniform")
27
+
28
+ regressor = TransformedTargetRegressor(
29
+ regressor=LinearRegression(),
30
+ transformer=MinMaxScaler(clip=True),
31
+ )
32
+
33
+ self.model_ = Pipeline(
34
+ [("scaler", MinMaxScaler(clip=True)), ("regressor", regressor)]
35
+ )
35
36
 
36
37
  # The input columns for the domain and adaptation models (with description)
37
- self.domain_model_input_columns = [
38
+ self.baseline_input_columns = [
38
39
  "radiation", # Weather parameter
39
40
  "windspeed_100m", # Weather parameter
40
- "total_substation", # Substation's measured total load
41
- "lat", # Latitude
42
- "lon", # Longitude
43
- "solar_on", # Solar installed on substation: yes=1, no=0
44
- "wind_on", # Wind installed on substation: yes=1, no=0
45
- "hour", # Hour of the day
46
- "minute", # Minute of the hour
47
- "var0", # Variance of the total load
48
- "var1", # Variance of the total pv load (only available for calibration substations)
49
- "var2", # Variance of the total wind load (only available for calibration substations)
50
- "sem0", # Standard Error of the Mean of the total load
51
- "sem1", # Standard Error of the Mean of the total PV load (only available for calibration substations)
52
- ]
53
- self.adaptation_model_input_columns = [
54
- "total_substation",
55
- "lat",
56
- "lon",
57
- "solar_on",
58
- "wind_on",
59
- "hour",
60
- "minute",
61
- "var0",
62
- "var1",
63
- "var2",
64
- "sem0",
65
- "sem1",
41
+ "total_load",
66
42
  ]
67
43
  self.target_columns = ["total_wind_part", "total_solar_part"]
68
44
 
@@ -78,30 +54,12 @@ class Dazls(BaseEstimator):
78
54
  target: the expected output (y_train)
79
55
 
80
56
  """
81
- x, x2, y = (
82
- features.loc[:, self.domain_model_input_columns],
83
- features.loc[:, self.adaptation_model_input_columns],
57
+ x, y = (
58
+ features.loc[:, self.baseline_input_columns],
84
59
  target.loc[:, self.target_columns],
85
60
  )
86
- domain_model_input, adaptation_model_input, y_train = shuffle(
87
- x, x2, y, random_state=999
88
- ) # just shuffling
89
-
90
- self.domain_model_scaler.fit(domain_model_input)
91
- self.adaptation_model_scaler.fit(adaptation_model_input)
92
- self.target_scaler.fit(y_train)
93
- domain_model_input = self.domain_model_scaler.transform(domain_model_input)
94
- adaptation_model_input = self.adaptation_model_scaler.transform(
95
- adaptation_model_input
96
- )
97
- y_train = self.target_scaler.transform(y_train)
98
61
 
99
- self.domain_model.fit(domain_model_input, y_train)
100
- domain_model_pred = self.domain_model.predict(domain_model_input)
101
- adaptation_model_input = np.concatenate(
102
- (adaptation_model_input, domain_model_pred), axis=1
103
- )
104
- self.adaptation_model.fit(adaptation_model_input, y_train)
62
+ self.model_.fit(x, y)
105
63
 
106
64
  def predict(self, x: np.array):
107
65
  """Make a prediction.
@@ -109,37 +67,21 @@ class Dazls(BaseEstimator):
109
67
  For the prediction we use the test data x. We use domain_model_input_columns and
110
68
  adaptation_model_input_columns to separate x in test data for domain model and adaptation model respectively.
111
69
 
70
+ There is an option available to return the domain model and adaptation model predictions separately to more
71
+ easily investigate the effectiveness of the models.
72
+
112
73
  Args:
113
74
  x: domain_model_test_data, adaptation_model_test_data
75
+ return_sub_preds : a flag value indicating to return the predictions of the domain model and adaptation
76
+ model separately. (Default: False.)
77
+
78
+ Returns:
114
79
  prediction: The output prediction after both models.
115
80
 
116
81
  """
117
- domain_model_test_data, adaptation_model_test_data = (
118
- x.loc[:, self.domain_model_input_columns],
119
- x.loc[:, self.adaptation_model_input_columns],
120
- )
121
- # Rescale test data for both models (if required)
122
- domain_model_test_data_scaled = self.domain_model_scaler.transform(
123
- domain_model_test_data
124
- )
125
- adaptation_model_test_data_scaled = self.adaptation_model_scaler.transform(
126
- adaptation_model_test_data
127
- )
128
- # Use the scaled data to make domain_model_prediction
129
- domain_model_test_data_pred = self.domain_model.predict(
130
- domain_model_test_data_scaled
131
- )
132
- # Use the domain_model_prediction to make adaptation_model_prediction
133
- adaptation_model_test_data_pred = self.adaptation_model.predict(
134
- np.concatenate(
135
- [adaptation_model_test_data_scaled, domain_model_test_data_pred], axis=1
136
- )
137
- )
138
- # Rescale adaptation_model_prediction (if required)
139
- prediction = self.target_scaler.inverse_transform(
140
- adaptation_model_test_data_pred
141
- )
142
- return prediction
82
+ model_test_data = x.loc[:, self.baseline_input_columns]
83
+
84
+ return self.model_.predict(model_test_data)
143
85
 
144
86
  def score(self, truth, prediction):
145
87
  """Evaluation of the prediction's output.
@@ -165,13 +107,10 @@ class Dazls(BaseEstimator):
165
107
  """
166
108
  summary_str = (
167
109
  f"{self.__name__} model summary:\n\n"
168
- f"Domain Model: {self.domain_model} \n"
169
- f"\tInput columns: {self.domain_model_input_columns} \n"
170
- f"\tScaler: {self.domain_model_scaler} \n\n"
171
- f"Adaptation Model: {self.adaptation_model} \n"
172
- f"\tInput columns: {self.adaptation_model_input_columns} \n"
173
- f"\tScaler: {self.adaptation_model_scaler} \n\n"
174
- f"Target columns: {self.target_columns}"
110
+ f"Model: {self.model_} \n"
111
+ f"\tInput columns: {self.baseline_input_columns} \n"
112
+ f"\tScaler: {self.model_['scaler']} \n\n"
113
+ f"\tRegressor: {self.model_['regressor']} \n\n"
175
114
  )
176
115
 
177
116
  return summary_str
@@ -0,0 +1,95 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ import re
5
+ from typing import List
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from sklearn.base import RegressorMixin
10
+ from sklearn.utils.validation import check_is_fitted
11
+
12
+ from openstef.model.regressors.regressor import OpenstfRegressor
13
+
14
+
15
+ class FlatlinerRegressor(OpenstfRegressor, RegressorMixin):
16
+ feature_names_: List[str] = []
17
+
18
+ def __init__(self, quantiles=None):
19
+ """Initialize FlatlinerRegressor.
20
+
21
+ The model always predicts 0.0, regardless of the input features. The model is meant to be used for flatliner
22
+ locations that still expect a prediction while preserving the prediction interface.
23
+
24
+ """
25
+ super().__init__()
26
+ self.quantiles = quantiles
27
+
28
+ @property
29
+ def feature_names(self) -> list:
30
+ """The names of the features used to train the model."""
31
+ check_is_fitted(self)
32
+ return self.feature_names_
33
+
34
+ @staticmethod
35
+ def _get_importance_names():
36
+ return {
37
+ "gain_importance_name": "total_gain",
38
+ "weight_importance_name": "weight",
39
+ }
40
+
41
+ @property
42
+ def can_predict_quantiles(self) -> bool:
43
+ """Attribute that indicates if the model predict particular quantiles."""
44
+ return True
45
+
46
+ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
47
+ """Fits flatliner model.
48
+
49
+ Args:
50
+ x: Feature matrix
51
+ y: Labels
52
+
53
+ Returns:
54
+ Fitted LinearQuantile model
55
+
56
+ """
57
+ self.feature_names_ = list(x.columns)
58
+ self.feature_importances_ = np.ones(len(self.feature_names_)) / (
59
+ len(self.feature_names_) or 1.0
60
+ )
61
+
62
+ return self
63
+
64
+ def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
65
+ """Makes a prediction for a desired quantile.
66
+
67
+ Args:
68
+ x: Feature matrix
69
+ quantile: Quantile for which a prediciton is desired,
70
+ note that only quantile are available for which a model is trained,
71
+ and that this is a quantile-model specific keyword
72
+
73
+ Returns:
74
+ Prediction
75
+
76
+ Raises:
77
+ ValueError in case no model is trained for the requested quantile
78
+
79
+ """
80
+ check_is_fitted(self)
81
+
82
+ return np.zeros(x.shape[0])
83
+
84
+ def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array:
85
+ check_is_fitted(self)
86
+ return np.array([0.0 for _ in self.feature_names_])
87
+
88
+ @classmethod
89
+ def _get_param_names(cls):
90
+ return [
91
+ "quantiles",
92
+ ]
93
+
94
+ def __sklearn_is_fitted__(self) -> bool:
95
+ return True