openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. openstef-4.0.0a3.dist-info/METADATA +177 -0
  2. openstef-4.0.0a3.dist-info/RECORD +4 -0
  3. {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
  4. openstef/__init__.py +0 -14
  5. openstef/__main__.py +0 -3
  6. openstef/app_settings.py +0 -19
  7. openstef/data/NL_terrestrial_radiation.csv +0 -25585
  8. openstef/data/NL_terrestrial_radiation.csv.license +0 -3
  9. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  10. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
  11. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
  12. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
  13. openstef/data/dutch_holidays.csv +0 -1759
  14. openstef/data/dutch_holidays.csv.license +0 -3
  15. openstef/data/pv_single_coefs.csv +0 -601
  16. openstef/data/pv_single_coefs.csv.license +0 -3
  17. openstef/data_classes/__init__.py +0 -3
  18. openstef/data_classes/data_prep.py +0 -99
  19. openstef/data_classes/model_specifications.py +0 -30
  20. openstef/data_classes/prediction_job.py +0 -135
  21. openstef/data_classes/split_function.py +0 -97
  22. openstef/enums.py +0 -140
  23. openstef/exceptions.py +0 -74
  24. openstef/feature_engineering/__init__.py +0 -3
  25. openstef/feature_engineering/apply_features.py +0 -138
  26. openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
  27. openstef/feature_engineering/cyclic_features.py +0 -161
  28. openstef/feature_engineering/data_preparation.py +0 -152
  29. openstef/feature_engineering/feature_adder.py +0 -206
  30. openstef/feature_engineering/feature_applicator.py +0 -202
  31. openstef/feature_engineering/general.py +0 -141
  32. openstef/feature_engineering/holiday_features.py +0 -231
  33. openstef/feature_engineering/lag_features.py +0 -165
  34. openstef/feature_engineering/missing_values_transformer.py +0 -141
  35. openstef/feature_engineering/rolling_features.py +0 -58
  36. openstef/feature_engineering/weather_features.py +0 -492
  37. openstef/metrics/__init__.py +0 -3
  38. openstef/metrics/figure.py +0 -303
  39. openstef/metrics/metrics.py +0 -486
  40. openstef/metrics/reporter.py +0 -222
  41. openstef/model/__init__.py +0 -3
  42. openstef/model/basecase.py +0 -82
  43. openstef/model/confidence_interval_applicator.py +0 -242
  44. openstef/model/fallback.py +0 -77
  45. openstef/model/metamodels/__init__.py +0 -3
  46. openstef/model/metamodels/feature_clipper.py +0 -90
  47. openstef/model/metamodels/grouped_regressor.py +0 -222
  48. openstef/model/metamodels/missing_values_handler.py +0 -138
  49. openstef/model/model_creator.py +0 -214
  50. openstef/model/objective.py +0 -426
  51. openstef/model/objective_creator.py +0 -65
  52. openstef/model/regressors/__init__.py +0 -3
  53. openstef/model/regressors/arima.py +0 -197
  54. openstef/model/regressors/custom_regressor.py +0 -64
  55. openstef/model/regressors/dazls.py +0 -116
  56. openstef/model/regressors/flatliner.py +0 -95
  57. openstef/model/regressors/gblinear_quantile.py +0 -334
  58. openstef/model/regressors/lgbm.py +0 -29
  59. openstef/model/regressors/linear.py +0 -90
  60. openstef/model/regressors/linear_quantile.py +0 -305
  61. openstef/model/regressors/regressor.py +0 -114
  62. openstef/model/regressors/xgb.py +0 -52
  63. openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
  64. openstef/model/regressors/xgb_quantile.py +0 -228
  65. openstef/model/serializer.py +0 -431
  66. openstef/model/standard_deviation_generator.py +0 -81
  67. openstef/model_selection/__init__.py +0 -3
  68. openstef/model_selection/model_selection.py +0 -311
  69. openstef/monitoring/__init__.py +0 -3
  70. openstef/monitoring/performance_meter.py +0 -92
  71. openstef/monitoring/teams.py +0 -203
  72. openstef/pipeline/__init__.py +0 -3
  73. openstef/pipeline/create_basecase_forecast.py +0 -133
  74. openstef/pipeline/create_component_forecast.py +0 -168
  75. openstef/pipeline/create_forecast.py +0 -171
  76. openstef/pipeline/optimize_hyperparameters.py +0 -317
  77. openstef/pipeline/train_create_forecast_backtest.py +0 -163
  78. openstef/pipeline/train_model.py +0 -561
  79. openstef/pipeline/utils.py +0 -52
  80. openstef/postprocessing/__init__.py +0 -3
  81. openstef/postprocessing/postprocessing.py +0 -275
  82. openstef/preprocessing/__init__.py +0 -3
  83. openstef/preprocessing/preprocessing.py +0 -42
  84. openstef/settings.py +0 -15
  85. openstef/tasks/__init__.py +0 -3
  86. openstef/tasks/calculate_kpi.py +0 -324
  87. openstef/tasks/create_basecase_forecast.py +0 -118
  88. openstef/tasks/create_components_forecast.py +0 -162
  89. openstef/tasks/create_forecast.py +0 -145
  90. openstef/tasks/create_solar_forecast.py +0 -420
  91. openstef/tasks/create_wind_forecast.py +0 -80
  92. openstef/tasks/optimize_hyperparameters.py +0 -135
  93. openstef/tasks/split_forecast.py +0 -273
  94. openstef/tasks/train_model.py +0 -224
  95. openstef/tasks/utils/__init__.py +0 -3
  96. openstef/tasks/utils/dependencies.py +0 -107
  97. openstef/tasks/utils/predictionjobloop.py +0 -243
  98. openstef/tasks/utils/taskcontext.py +0 -160
  99. openstef/validation/__init__.py +0 -3
  100. openstef/validation/validation.py +0 -322
  101. openstef-3.4.56.dist-info/METADATA +0 -154
  102. openstef-3.4.56.dist-info/RECORD +0 -102
  103. openstef-3.4.56.dist-info/top_level.txt +0 -1
  104. /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
@@ -1,64 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- """This module defines the custom regressor."""
5
- import inspect
6
- from abc import abstractmethod
7
- from importlib import import_module
8
- from typing import Type
9
-
10
- import pandas as pd
11
-
12
- from openstef.model.objective import (
13
- EVAL_METRIC,
14
- TEST_FRACTION,
15
- VALIDATION_FRACTION,
16
- RegressorObjective,
17
- )
18
- from openstef.model.regressors.regressor import OpenstfRegressor
19
-
20
-
21
- class CustomOpenstfRegressor(OpenstfRegressor):
22
- """A custom regressor allows to load any custom model that is not included with openSTEF."""
23
-
24
- @staticmethod
25
- @abstractmethod
26
- def valid_kwargs() -> list[str]:
27
- ...
28
-
29
- @classmethod
30
- @abstractmethod
31
- def objective(self) -> Type[RegressorObjective]:
32
- ...
33
-
34
-
35
- def load_custom_model(custom_model_path) -> CustomOpenstfRegressor:
36
- """Load the external custom model."""
37
- path_elements = custom_model_path.split(".")
38
- module_path = ".".join(path_elements[:-1])
39
- module = import_module(module_path)
40
- model_name = path_elements[-1]
41
- model_class = getattr(module, model_name)
42
-
43
- if (
44
- not inspect.isclass(model_class)
45
- or inspect.isabstract(model_class)
46
- or not issubclass(model_class, CustomOpenstfRegressor)
47
- ):
48
- raise ValueError(
49
- f"The path {custom_model_path!r} does not correspond to a concrete"
50
- " CustomOpenstfRegressor subclass"
51
- )
52
-
53
- return model_class
54
-
55
-
56
- def is_custom_type(model_type):
57
- return isinstance(model_type, str) and "." in model_type
58
-
59
-
60
- def create_custom_objective(
61
- custom_model_path,
62
- ):
63
- model_class = load_custom_model(custom_model_path)
64
- return model_class.objective()
@@ -1,116 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- """This module defines the DAZL model."""
5
- import numpy as np
6
- from sklearn.base import BaseEstimator
7
- from sklearn.compose import TransformedTargetRegressor
8
- from sklearn.linear_model import LinearRegression
9
- from sklearn.metrics import mean_squared_error, r2_score
10
- from sklearn.pipeline import Pipeline
11
- from sklearn.preprocessing import MinMaxScaler
12
-
13
-
14
- class Dazls(BaseEstimator):
15
- """DAZLS model.
16
-
17
- The model carries out wind and solar power prediction for unseen target substations using training data from other
18
- substations with known components.
19
-
20
- """
21
-
22
- model_: Pipeline
23
-
24
- def __init__(self):
25
- """Initialize DAZL model."""
26
- self.__name__ = "DAZLS"
27
-
28
- regressor = TransformedTargetRegressor(
29
- regressor=LinearRegression(),
30
- transformer=MinMaxScaler(clip=True),
31
- )
32
-
33
- self.model_ = Pipeline(
34
- [("scaler", MinMaxScaler(clip=True)), ("regressor", regressor)]
35
- )
36
-
37
- # The input columns for the domain and adaptation models (with description)
38
- self.baseline_input_columns = [
39
- "radiation", # Weather parameter
40
- "windspeed_100m", # Weather parameter
41
- "total_load",
42
- ]
43
- self.target_columns = ["total_wind_part", "total_solar_part"]
44
-
45
- def fit(self, features, target):
46
- """Fit the model.
47
-
48
- In this function we scale the input of the domain and adaptation models of the DAZLS MODEL. Then we fit the
49
- two models. We separate the features into domain_model_input, adaptation_model_input and target, and we use them
50
- for the fitting and the training of the models.
51
-
52
- Args:
53
- features: inputs for domain and adaptation model (domain_model_input, adaptation_model_input)
54
- target: the expected output (y_train)
55
-
56
- """
57
- x, y = (
58
- features.loc[:, self.baseline_input_columns],
59
- target.loc[:, self.target_columns],
60
- )
61
-
62
- self.model_.fit(x, y)
63
-
64
- def predict(self, x: np.array):
65
- """Make a prediction.
66
-
67
- For the prediction we use the test data x. We use domain_model_input_columns and
68
- adaptation_model_input_columns to separate x in test data for domain model and adaptation model respectively.
69
-
70
- There is an option available to return the domain model and adaptation model predictions separately to more
71
- easily investigate the effectiveness of the models.
72
-
73
- Args:
74
- x: domain_model_test_data, adaptation_model_test_data
75
- return_sub_preds : a flag value indicating to return the predictions of the domain model and adaptation
76
- model separately. (Default: False.)
77
-
78
- Returns:
79
- prediction: The output prediction after both models.
80
-
81
- """
82
- model_test_data = x.loc[:, self.baseline_input_columns]
83
-
84
- return self.model_.predict(model_test_data)
85
-
86
- def score(self, truth, prediction):
87
- """Evaluation of the prediction's output.
88
-
89
- Args:
90
- truth: real values
91
- prediction: predicted values
92
-
93
- Returns:
94
- RMSE and R2 scores
95
-
96
- """
97
- rmse = (mean_squared_error(truth, prediction)) ** 0.5
98
- r2_score_value = r2_score(truth, prediction)
99
- return rmse, r2_score_value
100
-
101
- def __str__(self):
102
- """String method of the DAZLs model, provides a summary of the model for easy inspection.
103
-
104
- Returns:
105
- Summary represented by a string
106
-
107
- """
108
- summary_str = (
109
- f"{self.__name__} model summary:\n\n"
110
- f"Model: {self.model_} \n"
111
- f"\tInput columns: {self.baseline_input_columns} \n"
112
- f"\tScaler: {self.model_['scaler']} \n\n"
113
- f"\tRegressor: {self.model_['regressor']} \n\n"
114
- )
115
-
116
- return summary_str
@@ -1,95 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- import re
5
- from typing import List
6
-
7
- import numpy as np
8
- import pandas as pd
9
- from sklearn.base import RegressorMixin
10
- from sklearn.utils.validation import check_is_fitted
11
-
12
- from openstef.model.regressors.regressor import OpenstfRegressor
13
-
14
-
15
- class FlatlinerRegressor(OpenstfRegressor, RegressorMixin):
16
- feature_names_: List[str] = []
17
-
18
- def __init__(self, quantiles=None):
19
- """Initialize FlatlinerRegressor.
20
-
21
- The model always predicts 0.0, regardless of the input features. The model is meant to be used for flatliner
22
- locations that still expect a prediction while preserving the prediction interface.
23
-
24
- """
25
- super().__init__()
26
- self.quantiles = quantiles
27
-
28
- @property
29
- def feature_names(self) -> list:
30
- """The names of the features used to train the model."""
31
- check_is_fitted(self)
32
- return self.feature_names_
33
-
34
- @staticmethod
35
- def _get_importance_names():
36
- return {
37
- "gain_importance_name": "total_gain",
38
- "weight_importance_name": "weight",
39
- }
40
-
41
- @property
42
- def can_predict_quantiles(self) -> bool:
43
- """Attribute that indicates if the model predict particular quantiles."""
44
- return True
45
-
46
- def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
47
- """Fits flatliner model.
48
-
49
- Args:
50
- x: Feature matrix
51
- y: Labels
52
-
53
- Returns:
54
- Fitted LinearQuantile model
55
-
56
- """
57
- self.feature_names_ = list(x.columns)
58
- self.feature_importances_ = np.ones(len(self.feature_names_)) / (
59
- len(self.feature_names_) or 1.0
60
- )
61
-
62
- return self
63
-
64
- def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
65
- """Makes a prediction for a desired quantile.
66
-
67
- Args:
68
- x: Feature matrix
69
- quantile: Quantile for which a prediciton is desired,
70
- note that only quantile are available for which a model is trained,
71
- and that this is a quantile-model specific keyword
72
-
73
- Returns:
74
- Prediction
75
-
76
- Raises:
77
- ValueError in case no model is trained for the requested quantile
78
-
79
- """
80
- check_is_fitted(self)
81
-
82
- return np.zeros(x.shape[0])
83
-
84
- def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array:
85
- check_is_fitted(self)
86
- return np.array([0.0 for _ in self.feature_names_])
87
-
88
- @classmethod
89
- def _get_param_names(cls):
90
- return [
91
- "quantiles",
92
- ]
93
-
94
- def __sklearn_is_fitted__(self) -> bool:
95
- return True
@@ -1,334 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2025 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- import math
5
- import re
6
- from typing import Union, Optional, List
7
-
8
- import numpy as np
9
- import pandas as pd
10
- from sklearn.model_selection import train_test_split
11
- import xgboost as xgb
12
- from sklearn.preprocessing import StandardScaler
13
- from sklearn.utils.validation import check_is_fitted
14
-
15
- from openstef.feature_engineering.missing_values_transformer import (
16
- MissingValuesTransformer,
17
- )
18
- from openstef.model.metamodels.feature_clipper import FeatureClipper
19
- from openstef.model.regressors.regressor import OpenstfRegressor
20
-
21
- DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
22
-
23
-
24
- class GBLinearQuantileOpenstfRegressor(OpenstfRegressor):
25
- is_fitted_: bool = False
26
-
27
- TO_KEEP_FEATURES: List[str] = [
28
- "T-7d",
29
- "T-1d",
30
- ]
31
- TO_IGNORE_FEATURES: List[str] = [
32
- "Month",
33
- "Quarter",
34
- ]
35
-
36
- def __init__(
37
- self,
38
- quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
39
- missing_values: Union[int, float, str, None] = np.nan,
40
- imputation_strategy: Optional[str] = "mean",
41
- fill_value: Union[str, int, float] = None,
42
- weight_scale_percentile: int = 95,
43
- weight_exponent: float = 1,
44
- weight_floor: float = 0.1,
45
- validation_fraction: float = 0.2,
46
- no_fill_future_values_features: List[str] = None,
47
- clipped_features: List[str] = None,
48
- learning_rate: float = 0.15,
49
- num_boost_round: int = 500,
50
- early_stopping_rounds: int = 10,
51
- reg_alpha: float = 0.0001,
52
- reg_lambda: float = 0.1,
53
- updater: str = "shotgun",
54
- feature_selector: str = "shuffle",
55
- top_k: int = 0,
56
- ):
57
- super().__init__()
58
-
59
- # Check if quantile 0.5 is present. This is required.
60
- if 0.5 not in quantiles:
61
- raise ValueError(
62
- "Cannot train quantile model as 0.5 is not in requested quantiles!"
63
- )
64
-
65
- if clipped_features is None:
66
- clipped_features = ["APX"]
67
-
68
- self.quantiles = quantiles
69
- self.weight_scale_percentile = weight_scale_percentile
70
- self.weight_exponent = weight_exponent
71
- self.weight_floor = weight_floor
72
- self.imputer_ = MissingValuesTransformer(
73
- missing_values=missing_values,
74
- imputation_strategy=imputation_strategy,
75
- fill_value=fill_value,
76
- no_fill_future_values_features=no_fill_future_values_features,
77
- )
78
- self.x_scaler_ = StandardScaler()
79
- self.y_scaler_ = StandardScaler()
80
- self.validation_fraction = validation_fraction
81
- self.model_: xgb.Booster = None
82
- self.feature_clipper_ = FeatureClipper(columns=clipped_features)
83
-
84
- self.learning_rate = learning_rate
85
- self.num_boost_round = num_boost_round
86
- self.early_stopping_rounds = early_stopping_rounds
87
- self.reg_alpha = reg_alpha
88
- self.reg_labmda = reg_lambda
89
- self.updater = updater
90
- self.feature_selector = feature_selector
91
- self.top_k = top_k
92
-
93
- @property
94
- def feature_names(self) -> list:
95
- """The names of the features used to train the model."""
96
- check_is_fitted(self)
97
- return self.imputer_.non_null_feature_names
98
-
99
- @staticmethod
100
- def _get_importance_names():
101
- return {
102
- "gain_importance_name": "total_gain",
103
- "weight_importance_name": "weight",
104
- }
105
-
106
- @property
107
- def can_predict_quantiles(self) -> bool:
108
- """Attribute that indicates if the model predict particular quantiles."""
109
- return True
110
-
111
- def _is_feature_ignored(self, feature_name: str) -> bool:
112
- """Check if a feature is ignored by the model.
113
-
114
- Args:
115
- feature_name: Feature name
116
-
117
- Returns:
118
- True if the feature is ignored, False otherwise
119
-
120
- """
121
-
122
- if feature_name in self.TO_KEEP_FEATURES:
123
- return False
124
-
125
- return (
126
- # Ignore named features
127
- feature_name in self.TO_IGNORE_FEATURES
128
- or
129
- # Ignore holiday features
130
- re.match(r"is_", feature_name) is not None
131
- or
132
- # Ignore lag features
133
- re.match(r"T-", feature_name) is not None
134
- or
135
- # Ignore infeed MFFBAS profiles
136
- re.match(r"E\d.*_I", feature_name) is not None
137
- )
138
-
139
- def _remove_ignored_features(self, x: pd.DataFrame) -> pd.DataFrame:
140
- """Remove ignored features from the input data.
141
-
142
- Args:
143
- x: Input data
144
-
145
- Returns:
146
- Data without ignored features
147
-
148
- """
149
- return x.drop(columns=[c for c in x.columns if self._is_feature_ignored(c)])
150
-
151
- def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> OpenstfRegressor:
152
- if not isinstance(y, pd.Series):
153
- y = pd.Series(np.asarray(y), name="load")
154
-
155
- x = self._remove_ignored_features(x)
156
- self.feature_clipper_.fit(x)
157
-
158
- # Fix nan columns
159
- x, y = self.imputer_.fit_transform(x, y)
160
- if x.isna().any().any():
161
- raise ValueError(
162
- "There are nan values in the input data. Set "
163
- "imputation_strategy to solve them."
164
- )
165
-
166
- # Apply feature scaling
167
- x_scaled = self.x_scaler_.fit_transform(x)
168
- y_scaled = self.y_scaler_.fit_transform(y.to_frame())[:, 0]
169
-
170
- # Add more focus on extreme / peak values
171
- sample_weight = self._calculate_sample_weights(y.values.squeeze())
172
-
173
- # Split the data into training and validation sets
174
- x_train, x_val, y_train, y_val, weight_train, weight_val = train_test_split(
175
- x_scaled,
176
- y_scaled,
177
- sample_weight,
178
- test_size=self.validation_fraction,
179
- random_state=42,
180
- )
181
-
182
- # Preserve feature names
183
- x_train = pd.DataFrame(x_train, columns=x.columns)
184
- x_val = pd.DataFrame(x_val, columns=x.columns)
185
-
186
- dtrain = xgb.DMatrix(x_train, label=y_train, weight=weight_train)
187
- dval = xgb.DMatrix(x_val, label=y_val, weight=weight_val)
188
-
189
- xgb_params = {
190
- # Use the quantile objective function.
191
- "objective": "reg:quantileerror", # This is pinball loss
192
- "booster": "gblinear",
193
- "updater": self.updater,
194
- "alpha": self.reg_alpha,
195
- "lambda": self.reg_labmda,
196
- "feature_selector": self.feature_selector,
197
- "quantile_alpha": np.array(self.quantiles),
198
- "learning_rate": self.learning_rate,
199
- }
200
-
201
- if self.top_k > 0:
202
- xgb_params["top_k"] = self.top_k
203
-
204
- self.model_ = xgb.train(
205
- params=xgb_params,
206
- dtrain=dtrain,
207
- num_boost_round=self.num_boost_round,
208
- early_stopping_rounds=self.early_stopping_rounds,
209
- evals=[(dtrain, "train"), (dval, "val")],
210
- )
211
-
212
- self._Booster = self.model_
213
-
214
- self.is_fitted_ = True
215
-
216
- self.feature_importances_ = self._get_feature_importances_from_booster(
217
- self.model_
218
- )
219
-
220
- return self
221
-
222
- def _calculate_sample_weights(self, y: np.array):
223
- """Calculate sample weights based on the y values of arbitrary scale.
224
-
225
- The resulting weights are in the range [0,1] and are used to put more emphasis
226
- on certain samples. The sample weighting function does:
227
-
228
- * Rescale data to a [-1, 1] range using quantile scaling. 90% of the data will
229
- be within this range. Rest is outside.
230
- * Calculate the weight by taking the exponent of scaled data.
231
- * exponent=0: Results in uniform weights for all samples.
232
- * exponent=1: Results in linearly increasing weights for samples that are
233
- closer to the extremes.
234
- * exponent>1: Results in exponentially increasing weights for samples that are
235
- closer to the extremes.
236
- * Clip the data to [0, 1] range with weight_floor as the minimum weight.
237
- * Weight floor is used to make sure that all the samples are considered.
238
-
239
- """
240
- return np.clip(
241
- _weight_exp(
242
- _scale_percentile(y, percentile=self.weight_scale_percentile),
243
- exponent=self.weight_exponent,
244
- ),
245
- a_min=self.weight_floor,
246
- a_max=1,
247
- )
248
-
249
- def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
250
- check_is_fitted(self)
251
-
252
- # Preprocess input data
253
- x = self._remove_ignored_features(x)
254
- x = self.feature_clipper_.transform(x)
255
- x = self.imputer_.transform(x)
256
- x_scaled = self.x_scaler_.transform(x)
257
-
258
- # Preserve feature names
259
- x_scaled = pd.DataFrame(x_scaled, columns=x.columns)
260
-
261
- d_x_scaled = xgb.DMatrix(x_scaled)
262
-
263
- # Make prediction
264
- y_pred = self.model_.predict(d_x_scaled)
265
-
266
- # When multiple quantiles are trained,
267
- # we need to select the requested quantile
268
- if len(self.quantiles) > 1:
269
- # Get index of the quantile value in the quantiles list
270
- quantile_index = self.quantiles.index(quantile)
271
-
272
- # Get the quantile prediction
273
- y_pred = y_pred[:, quantile_index]
274
-
275
- # Inverse scaling
276
- y_pred = self.y_scaler_.inverse_transform(y_pred.reshape(-1, 1))[:, 0]
277
-
278
- return y_pred
279
-
280
- @classmethod
281
- def _get_feature_importances_from_booster(cls, booster: xgb.Booster) -> np.ndarray:
282
- """Gets feature importances from a XGB booster.
283
-
284
- This is based on the feature_importance_ property defined in:
285
- https://github.com/dmlc/xgboost/blob/master/python-package/xgboost/sklearn.py.
286
-
287
- Args:
288
- booster: Booster object,
289
- most of the times the median model (quantile=0.5) is preferred
290
-
291
- Returns:
292
- Ndarray with normalized feature importances.
293
-
294
- """
295
- # Get score
296
- score = booster.get_score(importance_type="weight")
297
-
298
- if type(next(iter(score.values()))) is list:
299
- num_quantiles = len(next(iter(score.values())))
300
-
301
- # Select middle quantile, assuming odd number of quantiles
302
- quantile_index = num_quantiles // 2
303
-
304
- score = {f: score[f][quantile_index] for f in score}
305
-
306
- # Get feature names from booster
307
- feature_names = booster.feature_names
308
-
309
- # Get importance
310
- feature_importance = [score.get(f, 0.0) for f in feature_names]
311
- # Convert to array
312
- features_importance_array = np.array(feature_importance, dtype=np.float32)
313
-
314
- total = features_importance_array.sum() # For normalizing
315
- if total == 0:
316
- return features_importance_array
317
- return features_importance_array / total # Normalize
318
-
319
- @classmethod
320
- def _get_param_names(cls):
321
- return [
322
- "quantiles",
323
- ]
324
-
325
- def __sklearn_is_fitted__(self) -> bool:
326
- return self.is_fitted_
327
-
328
-
329
- def _scale_percentile(x: np.ndarray, percentile: int = 95):
330
- return np.abs(x / np.percentile(np.abs(x), percentile))
331
-
332
-
333
- def _weight_exp(x: np.ndarray, exponent: float = 1):
334
- return np.abs(x) ** exponent
@@ -1,29 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
-
5
- from lightgbm import LGBMRegressor
6
-
7
- from openstef.model.regressors.regressor import OpenstfRegressor
8
-
9
-
10
- class LGBMOpenstfRegressor(LGBMRegressor, OpenstfRegressor):
11
- """LGBM Regressor which implements the Openstf regressor API."""
12
-
13
- gain_importance_name = "gain"
14
- weight_importance_name = "split"
15
-
16
- @property
17
- def feature_names(self):
18
- return self._Booster.feature_name()
19
-
20
- @property
21
- def can_predict_quantiles(self):
22
- return False
23
-
24
- @staticmethod
25
- def _get_importance_names():
26
- return {
27
- "gain_importance_name": "gain",
28
- "weight_importance_name": "split",
29
- }