openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef-4.0.0a3.dist-info/METADATA +177 -0
- openstef-4.0.0a3.dist-info/RECORD +4 -0
- {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
- openstef/__init__.py +0 -14
- openstef/__main__.py +0 -3
- openstef/app_settings.py +0 -19
- openstef/data/NL_terrestrial_radiation.csv +0 -25585
- openstef/data/NL_terrestrial_radiation.csv.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
- openstef/data/dutch_holidays.csv +0 -1759
- openstef/data/dutch_holidays.csv.license +0 -3
- openstef/data/pv_single_coefs.csv +0 -601
- openstef/data/pv_single_coefs.csv.license +0 -3
- openstef/data_classes/__init__.py +0 -3
- openstef/data_classes/data_prep.py +0 -99
- openstef/data_classes/model_specifications.py +0 -30
- openstef/data_classes/prediction_job.py +0 -135
- openstef/data_classes/split_function.py +0 -97
- openstef/enums.py +0 -140
- openstef/exceptions.py +0 -74
- openstef/feature_engineering/__init__.py +0 -3
- openstef/feature_engineering/apply_features.py +0 -138
- openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
- openstef/feature_engineering/cyclic_features.py +0 -161
- openstef/feature_engineering/data_preparation.py +0 -152
- openstef/feature_engineering/feature_adder.py +0 -206
- openstef/feature_engineering/feature_applicator.py +0 -202
- openstef/feature_engineering/general.py +0 -141
- openstef/feature_engineering/holiday_features.py +0 -231
- openstef/feature_engineering/lag_features.py +0 -165
- openstef/feature_engineering/missing_values_transformer.py +0 -141
- openstef/feature_engineering/rolling_features.py +0 -58
- openstef/feature_engineering/weather_features.py +0 -492
- openstef/metrics/__init__.py +0 -3
- openstef/metrics/figure.py +0 -303
- openstef/metrics/metrics.py +0 -486
- openstef/metrics/reporter.py +0 -222
- openstef/model/__init__.py +0 -3
- openstef/model/basecase.py +0 -82
- openstef/model/confidence_interval_applicator.py +0 -242
- openstef/model/fallback.py +0 -77
- openstef/model/metamodels/__init__.py +0 -3
- openstef/model/metamodels/feature_clipper.py +0 -90
- openstef/model/metamodels/grouped_regressor.py +0 -222
- openstef/model/metamodels/missing_values_handler.py +0 -138
- openstef/model/model_creator.py +0 -214
- openstef/model/objective.py +0 -426
- openstef/model/objective_creator.py +0 -65
- openstef/model/regressors/__init__.py +0 -3
- openstef/model/regressors/arima.py +0 -197
- openstef/model/regressors/custom_regressor.py +0 -64
- openstef/model/regressors/dazls.py +0 -116
- openstef/model/regressors/flatliner.py +0 -95
- openstef/model/regressors/gblinear_quantile.py +0 -334
- openstef/model/regressors/lgbm.py +0 -29
- openstef/model/regressors/linear.py +0 -90
- openstef/model/regressors/linear_quantile.py +0 -305
- openstef/model/regressors/regressor.py +0 -114
- openstef/model/regressors/xgb.py +0 -52
- openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
- openstef/model/regressors/xgb_quantile.py +0 -228
- openstef/model/serializer.py +0 -431
- openstef/model/standard_deviation_generator.py +0 -81
- openstef/model_selection/__init__.py +0 -3
- openstef/model_selection/model_selection.py +0 -311
- openstef/monitoring/__init__.py +0 -3
- openstef/monitoring/performance_meter.py +0 -92
- openstef/monitoring/teams.py +0 -203
- openstef/pipeline/__init__.py +0 -3
- openstef/pipeline/create_basecase_forecast.py +0 -133
- openstef/pipeline/create_component_forecast.py +0 -168
- openstef/pipeline/create_forecast.py +0 -171
- openstef/pipeline/optimize_hyperparameters.py +0 -317
- openstef/pipeline/train_create_forecast_backtest.py +0 -163
- openstef/pipeline/train_model.py +0 -561
- openstef/pipeline/utils.py +0 -52
- openstef/postprocessing/__init__.py +0 -3
- openstef/postprocessing/postprocessing.py +0 -275
- openstef/preprocessing/__init__.py +0 -3
- openstef/preprocessing/preprocessing.py +0 -42
- openstef/settings.py +0 -15
- openstef/tasks/__init__.py +0 -3
- openstef/tasks/calculate_kpi.py +0 -324
- openstef/tasks/create_basecase_forecast.py +0 -118
- openstef/tasks/create_components_forecast.py +0 -162
- openstef/tasks/create_forecast.py +0 -145
- openstef/tasks/create_solar_forecast.py +0 -420
- openstef/tasks/create_wind_forecast.py +0 -80
- openstef/tasks/optimize_hyperparameters.py +0 -135
- openstef/tasks/split_forecast.py +0 -273
- openstef/tasks/train_model.py +0 -224
- openstef/tasks/utils/__init__.py +0 -3
- openstef/tasks/utils/dependencies.py +0 -107
- openstef/tasks/utils/predictionjobloop.py +0 -243
- openstef/tasks/utils/taskcontext.py +0 -160
- openstef/validation/__init__.py +0 -3
- openstef/validation/validation.py +0 -322
- openstef-3.4.56.dist-info/METADATA +0 -154
- openstef-3.4.56.dist-info/RECORD +0 -102
- openstef-3.4.56.dist-info/top_level.txt +0 -1
- /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
"""This module contains the linear regressor."""
|
|
5
|
-
import numpy as np
|
|
6
|
-
from sklearn.linear_model import LinearRegression
|
|
7
|
-
from sklearn.utils.validation import check_is_fitted
|
|
8
|
-
|
|
9
|
-
from openstef.model.metamodels.missing_values_handler import MissingValuesHandler
|
|
10
|
-
from openstef.model.regressors.regressor import OpenstfRegressor
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class LinearRegressor(MissingValuesHandler):
|
|
14
|
-
"""Linear Regressor wrapped in the metamodel `MissingValuesHandler`.
|
|
15
|
-
|
|
16
|
-
This regressor can handle missing values by imputation strategy.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
missing_values : int, float, str, np.nan or None, default=np.nan
|
|
20
|
-
The placeholder for the missing values. All occurrences of
|
|
21
|
-
`missing_values` will be imputed. For pandas' dataframes with
|
|
22
|
-
nullable integer dtypes with missing values, `missing_values`
|
|
23
|
-
should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.
|
|
24
|
-
|
|
25
|
-
imputation_strategy : str, default=None
|
|
26
|
-
The imputation strategy.
|
|
27
|
-
- If None no imputation is performed.
|
|
28
|
-
- If "mean", then replace missing values using the mean along
|
|
29
|
-
each column. Can only be used with numeric data.
|
|
30
|
-
- If "median", then replace missing values using the median along
|
|
31
|
-
each column. Can only be used with numeric data.
|
|
32
|
-
- If "most_frequent", then replace missing using the most frequent
|
|
33
|
-
value along each column. Can be used with strings or numeric data.
|
|
34
|
-
If there is more than one such value, only the smallest is returned.
|
|
35
|
-
- If "constant", then replace missing values with fill_value. Can be
|
|
36
|
-
used with strings or numeric data.
|
|
37
|
-
|
|
38
|
-
fill_value : str or numerical value, default=None
|
|
39
|
-
When strategy == "constant", fill_value is used to replace all
|
|
40
|
-
occurrences of missing_values.
|
|
41
|
-
If left to the default, fill_value will be 0 when imputing numerical
|
|
42
|
-
data and "missing_value" for strings or object data types.
|
|
43
|
-
|
|
44
|
-
"""
|
|
45
|
-
|
|
46
|
-
def __init__(self, missing_values=np.nan, imputation_strategy=None, fill_value=0):
|
|
47
|
-
"""Initialize Linear regressor."""
|
|
48
|
-
super().__init__(
|
|
49
|
-
LinearRegression(),
|
|
50
|
-
missing_values=missing_values,
|
|
51
|
-
imputation_strategy=imputation_strategy,
|
|
52
|
-
fill_value=fill_value,
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
class LinearOpenstfRegressor(LinearRegressor, OpenstfRegressor):
|
|
57
|
-
"""Linear Regressor which implements the Openstf regressor API."""
|
|
58
|
-
|
|
59
|
-
@staticmethod
|
|
60
|
-
def _get_importance_names():
|
|
61
|
-
return {
|
|
62
|
-
"gain_importance_name": "total_gain",
|
|
63
|
-
"weight_importance_name": "weight",
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
def fit(self, x, y, **kwargs):
|
|
67
|
-
"""Fit model."""
|
|
68
|
-
super().fit(x, y)
|
|
69
|
-
self.feature_importances_ = self._get_feature_importance_from_linear()
|
|
70
|
-
return self
|
|
71
|
-
|
|
72
|
-
def _get_feature_importance_from_linear(self):
|
|
73
|
-
check_is_fitted(self)
|
|
74
|
-
feature_importance_linear = np.abs(self.regressor_.coef_)
|
|
75
|
-
reg_feature_importances_dict = dict(
|
|
76
|
-
zip(self.non_null_columns_, feature_importance_linear)
|
|
77
|
-
)
|
|
78
|
-
return np.array(
|
|
79
|
-
[reg_feature_importances_dict.get(c, 0) for c in self.feature_in_names_]
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
@property
|
|
83
|
-
def feature_names(self):
|
|
84
|
-
"""The names of he features used to train the model."""
|
|
85
|
-
return self.feature_in_names_
|
|
86
|
-
|
|
87
|
-
@property
|
|
88
|
-
def can_predict_quantiles(self):
|
|
89
|
-
"""Indicates wether this model can make quantile predictions."""
|
|
90
|
-
return False
|
|
@@ -1,305 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
import re
|
|
5
|
-
from typing import Dict, Union, Set, Optional, List
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
|
-
import pandas as pd
|
|
9
|
-
from sklearn.base import RegressorMixin
|
|
10
|
-
from sklearn.linear_model import QuantileRegressor
|
|
11
|
-
from sklearn.preprocessing import StandardScaler
|
|
12
|
-
from sklearn.utils.validation import check_is_fitted
|
|
13
|
-
|
|
14
|
-
from openstef.feature_engineering.missing_values_transformer import (
|
|
15
|
-
MissingValuesTransformer,
|
|
16
|
-
)
|
|
17
|
-
from openstef.model.metamodels.feature_clipper import FeatureClipper
|
|
18
|
-
from openstef.model.regressors.regressor import OpenstfRegressor
|
|
19
|
-
|
|
20
|
-
DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
24
|
-
quantiles: tuple[float, ...]
|
|
25
|
-
alpha: float
|
|
26
|
-
solver: str
|
|
27
|
-
|
|
28
|
-
imputer_: MissingValuesTransformer
|
|
29
|
-
x_scaler_: StandardScaler
|
|
30
|
-
y_scaler_: StandardScaler
|
|
31
|
-
models_: Dict[float, QuantileRegressor]
|
|
32
|
-
feature_clipper_: FeatureClipper
|
|
33
|
-
|
|
34
|
-
is_fitted_: bool = False
|
|
35
|
-
|
|
36
|
-
FEATURE_IGNORE_LIST: Set[str] = {
|
|
37
|
-
"IsWeekendDay",
|
|
38
|
-
"IsWeekDay",
|
|
39
|
-
"IsSunday",
|
|
40
|
-
"Month",
|
|
41
|
-
"Quarter",
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
def __init__(
|
|
45
|
-
self,
|
|
46
|
-
quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
|
|
47
|
-
alpha: float = 0.0,
|
|
48
|
-
solver: str = "highs",
|
|
49
|
-
missing_values: Union[int, float, str, None] = np.nan,
|
|
50
|
-
imputation_strategy: Optional[str] = "mean",
|
|
51
|
-
fill_value: Union[str, int, float] = None,
|
|
52
|
-
weight_scale_percentile: int = 95,
|
|
53
|
-
weight_exponent: float = 1,
|
|
54
|
-
weight_floor: float = 0.1,
|
|
55
|
-
no_fill_future_values_features: List[str] = None,
|
|
56
|
-
clipped_features: List[str] = None,
|
|
57
|
-
):
|
|
58
|
-
"""Initialize LinearQuantileOpenstfRegressor.
|
|
59
|
-
|
|
60
|
-
Model that provides quantile regression with SKLearn QuantileRegressor.
|
|
61
|
-
For each desired quantile an QuantileRegressor model is trained,
|
|
62
|
-
these can later be used to predict quantiles.
|
|
63
|
-
|
|
64
|
-
This model is sensitive to feature quality and therefore has logic to remove
|
|
65
|
-
some custom features produced by OpenSTEF. The features that are removed are:
|
|
66
|
-
- Holiday features (is_christmas, is_*)
|
|
67
|
-
- Lagged features (T-1d, T-*)
|
|
68
|
-
- Point in time features (IsWeekendDay, IsWeekDay, IsSunday, Month, Quarter)
|
|
69
|
-
- Infeed MFFBAS profiles (E*_I)
|
|
70
|
-
|
|
71
|
-
Args:
|
|
72
|
-
quantiles: Tuple with desired quantiles, quantile 0.5 is required.
|
|
73
|
-
For example: (0.1, 0.5, 0.9)
|
|
74
|
-
alpha: Regularization constant for L1 regularization
|
|
75
|
-
solver: Solver to use for optimization
|
|
76
|
-
missing_values: Value to be considered as missing value
|
|
77
|
-
imputation_strategy: Imputation strategy
|
|
78
|
-
fill_value: Fill value
|
|
79
|
-
weight_scale_percentile: Percentile used in scaling of the samples
|
|
80
|
-
weight_exponent: Exponent used in sample weighing
|
|
81
|
-
weight_floor: Minimum weight for samples
|
|
82
|
-
no_fill_future_values_features: The features for which it does not make sense
|
|
83
|
-
to fill future values. Rows that contain trailing null values for these
|
|
84
|
-
features will be removed from the data.
|
|
85
|
-
|
|
86
|
-
"""
|
|
87
|
-
super().__init__()
|
|
88
|
-
|
|
89
|
-
# Check if quantile 0.5 is present. This is required.
|
|
90
|
-
if 0.5 not in quantiles:
|
|
91
|
-
raise ValueError(
|
|
92
|
-
"Cannot train quantile model as 0.5 is not in requested quantiles!"
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
if clipped_features is None:
|
|
96
|
-
clipped_features = ["day_ahead_electricity_price"]
|
|
97
|
-
|
|
98
|
-
self.quantiles = quantiles
|
|
99
|
-
self.alpha = alpha
|
|
100
|
-
self.solver = solver
|
|
101
|
-
self.weight_scale_percentile = weight_scale_percentile
|
|
102
|
-
self.weight_exponent = weight_exponent
|
|
103
|
-
self.weight_floor = weight_floor
|
|
104
|
-
self.imputer_ = MissingValuesTransformer(
|
|
105
|
-
missing_values=missing_values,
|
|
106
|
-
imputation_strategy=imputation_strategy,
|
|
107
|
-
fill_value=fill_value,
|
|
108
|
-
no_fill_future_values_features=no_fill_future_values_features,
|
|
109
|
-
)
|
|
110
|
-
self.x_scaler_ = StandardScaler()
|
|
111
|
-
self.y_scaler_ = StandardScaler()
|
|
112
|
-
self.feature_clipper_ = FeatureClipper(columns=clipped_features)
|
|
113
|
-
self.models_ = {
|
|
114
|
-
quantile: QuantileRegressor(alpha=alpha, quantile=quantile, solver=solver)
|
|
115
|
-
for quantile in quantiles
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
@property
|
|
119
|
-
def feature_names(self) -> list:
|
|
120
|
-
"""The names of the features used to train the model."""
|
|
121
|
-
check_is_fitted(self)
|
|
122
|
-
return self.imputer_.non_null_feature_names
|
|
123
|
-
|
|
124
|
-
@staticmethod
|
|
125
|
-
def _get_importance_names():
|
|
126
|
-
return {
|
|
127
|
-
"gain_importance_name": "total_gain",
|
|
128
|
-
"weight_importance_name": "weight",
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
@property
|
|
132
|
-
def can_predict_quantiles(self) -> bool:
|
|
133
|
-
"""Attribute that indicates if the model predict particular quantiles."""
|
|
134
|
-
return True
|
|
135
|
-
|
|
136
|
-
def _is_feature_ignored(self, feature_name: str) -> bool:
|
|
137
|
-
"""Check if a feature is ignored by the model.
|
|
138
|
-
|
|
139
|
-
Args:
|
|
140
|
-
feature_name: Feature name
|
|
141
|
-
|
|
142
|
-
Returns:
|
|
143
|
-
True if the feature is ignored, False otherwise
|
|
144
|
-
|
|
145
|
-
"""
|
|
146
|
-
return (
|
|
147
|
-
# Ignore named features
|
|
148
|
-
feature_name in self.FEATURE_IGNORE_LIST
|
|
149
|
-
or
|
|
150
|
-
# Ignore holiday features
|
|
151
|
-
re.match(r"is_", feature_name) is not None
|
|
152
|
-
or
|
|
153
|
-
# Ignore lag features
|
|
154
|
-
re.match(r"T-", feature_name) is not None
|
|
155
|
-
or
|
|
156
|
-
# Ignore infeed MFFBAS profiles
|
|
157
|
-
re.match(r"E\d.*_I", feature_name) is not None
|
|
158
|
-
)
|
|
159
|
-
|
|
160
|
-
def _remove_ignored_features(self, x: pd.DataFrame) -> pd.DataFrame:
|
|
161
|
-
"""Remove ignored features from the input data.
|
|
162
|
-
|
|
163
|
-
Args:
|
|
164
|
-
x: Input data
|
|
165
|
-
|
|
166
|
-
Returns:
|
|
167
|
-
Data without ignored features
|
|
168
|
-
|
|
169
|
-
"""
|
|
170
|
-
return x.drop(columns=[c for c in x.columns if self._is_feature_ignored(c)])
|
|
171
|
-
|
|
172
|
-
def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
|
|
173
|
-
"""Fits linear quantile model.
|
|
174
|
-
|
|
175
|
-
Args:
|
|
176
|
-
x: Feature matrix
|
|
177
|
-
y: Labels
|
|
178
|
-
|
|
179
|
-
Returns:
|
|
180
|
-
Fitted LinearQuantile model
|
|
181
|
-
|
|
182
|
-
"""
|
|
183
|
-
if not isinstance(y, pd.Series):
|
|
184
|
-
y = pd.Series(np.asarray(y), name="load")
|
|
185
|
-
|
|
186
|
-
x = self._remove_ignored_features(x)
|
|
187
|
-
self.feature_clipper_.fit(x)
|
|
188
|
-
|
|
189
|
-
# Fix nan columns
|
|
190
|
-
x, y = self.imputer_.fit_transform(x, y)
|
|
191
|
-
if x.isna().any().any():
|
|
192
|
-
raise ValueError(
|
|
193
|
-
"There are nan values in the input data. Set "
|
|
194
|
-
"imputation_strategy to solve them."
|
|
195
|
-
)
|
|
196
|
-
|
|
197
|
-
# Apply feature scaling
|
|
198
|
-
x_scaled = self.x_scaler_.fit_transform(x)
|
|
199
|
-
y_scaled = self.y_scaler_.fit_transform(y.to_frame())[:, 0]
|
|
200
|
-
|
|
201
|
-
# Add more focus on extreme / peak values
|
|
202
|
-
sample_weight = self._calculate_sample_weights(y.values.squeeze())
|
|
203
|
-
|
|
204
|
-
# Fit quantile regressors
|
|
205
|
-
for quantile in self.quantiles:
|
|
206
|
-
self.models_[quantile].fit(
|
|
207
|
-
X=x_scaled, y=y_scaled, sample_weight=sample_weight
|
|
208
|
-
)
|
|
209
|
-
|
|
210
|
-
self.is_fitted_ = True
|
|
211
|
-
|
|
212
|
-
self.feature_importances_ = self._get_feature_importance_from_linear()
|
|
213
|
-
|
|
214
|
-
return self
|
|
215
|
-
|
|
216
|
-
def _calculate_sample_weights(self, y: np.array):
|
|
217
|
-
"""Calculate sample weights based on the y values of arbitrary scale.
|
|
218
|
-
|
|
219
|
-
The resulting weights are in the range [0,1] and are used to put more emphasis
|
|
220
|
-
on certain samples. The sample weighting function does:
|
|
221
|
-
|
|
222
|
-
* Rescale data to a [-1, 1] range using quantile scaling. 90% of the data will
|
|
223
|
-
be within this range. Rest is outside.
|
|
224
|
-
* Calculate the weight by taking the exponent of scaled data.
|
|
225
|
-
* exponent=0: Results in uniform weights for all samples.
|
|
226
|
-
* exponent=1: Results in linearly increasing weights for samples that are
|
|
227
|
-
closer to the extremes.
|
|
228
|
-
* exponent>1: Results in exponentially increasing weights for samples that are
|
|
229
|
-
closer to the extremes.
|
|
230
|
-
* Clip the data to [0, 1] range with weight_floor as the minimum weight.
|
|
231
|
-
* Weight floor is used to make sure that all the samples are considered.
|
|
232
|
-
|
|
233
|
-
"""
|
|
234
|
-
return np.clip(
|
|
235
|
-
_weight_exp(
|
|
236
|
-
_scale_percentile(y, percentile=self.weight_scale_percentile),
|
|
237
|
-
exponent=self.weight_exponent,
|
|
238
|
-
),
|
|
239
|
-
a_min=self.weight_floor,
|
|
240
|
-
a_max=1,
|
|
241
|
-
)
|
|
242
|
-
|
|
243
|
-
def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
|
|
244
|
-
"""Makes a prediction for a desired quantile.
|
|
245
|
-
|
|
246
|
-
Args:
|
|
247
|
-
x: Feature matrix
|
|
248
|
-
quantile: Quantile for which a prediciton is desired,
|
|
249
|
-
note that only quantile are available for which a model is trained,
|
|
250
|
-
and that this is a quantile-model specific keyword
|
|
251
|
-
|
|
252
|
-
Returns:
|
|
253
|
-
Prediction
|
|
254
|
-
|
|
255
|
-
Raises:
|
|
256
|
-
ValueError in case no model is trained for the requested quantile
|
|
257
|
-
|
|
258
|
-
"""
|
|
259
|
-
check_is_fitted(self)
|
|
260
|
-
|
|
261
|
-
# Preprocess input data
|
|
262
|
-
x = self._remove_ignored_features(x)
|
|
263
|
-
x = self.feature_clipper_.transform(x)
|
|
264
|
-
x = self.imputer_.transform(x)
|
|
265
|
-
x_scaled = self.x_scaler_.transform(x)
|
|
266
|
-
|
|
267
|
-
# Make prediction
|
|
268
|
-
y_pred = self.models_[quantile].predict(X=x_scaled)
|
|
269
|
-
|
|
270
|
-
# Inverse scaling
|
|
271
|
-
y_pred = self.y_scaler_.inverse_transform(y_pred.reshape(-1, 1))[:, 0]
|
|
272
|
-
|
|
273
|
-
return y_pred
|
|
274
|
-
|
|
275
|
-
def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array:
|
|
276
|
-
check_is_fitted(self)
|
|
277
|
-
feature_importance_linear = np.abs(self.models_[quantile].coef_)
|
|
278
|
-
reg_feature_importances_dict = dict(
|
|
279
|
-
zip(self.imputer_.non_null_feature_names, feature_importance_linear)
|
|
280
|
-
)
|
|
281
|
-
return np.array(
|
|
282
|
-
[
|
|
283
|
-
reg_feature_importances_dict.get(c, 0)
|
|
284
|
-
for c in self.imputer_.non_null_feature_names
|
|
285
|
-
]
|
|
286
|
-
)
|
|
287
|
-
|
|
288
|
-
@classmethod
|
|
289
|
-
def _get_param_names(cls):
|
|
290
|
-
return [
|
|
291
|
-
"quantiles",
|
|
292
|
-
"alpha",
|
|
293
|
-
"solver",
|
|
294
|
-
]
|
|
295
|
-
|
|
296
|
-
def __sklearn_is_fitted__(self) -> bool:
|
|
297
|
-
return self.is_fitted_
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
def _scale_percentile(x: np.ndarray, percentile: int = 95):
|
|
301
|
-
return np.abs(x / np.percentile(np.abs(x), percentile))
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
def _weight_exp(x: np.ndarray, exponent: float = 1):
|
|
305
|
-
return np.abs(x) ** exponent
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
from abc import abstractmethod
|
|
5
|
-
from typing import Union
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
|
-
import pandas as pd
|
|
9
|
-
from sklearn.base import BaseEstimator, RegressorMixin
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class OpenstfRegressor(BaseEstimator):
|
|
13
|
-
"""This class defines the interface to which all ML models within OpenSTEF should adhere.
|
|
14
|
-
|
|
15
|
-
Required methods are indicated by abstractmethods, for which concrete implementations of ML models should have a
|
|
16
|
-
definition. Common functionality which is required for the automated pipelines in OpenSTEF is defined in this class.
|
|
17
|
-
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
def __init__(self):
|
|
21
|
-
self.feature_importance_dataframe = None
|
|
22
|
-
self.feature_importances_ = None
|
|
23
|
-
|
|
24
|
-
def score(self, X, y):
|
|
25
|
-
"""Makes `score` method from RegressorMixin available."""
|
|
26
|
-
return RegressorMixin.score(self, X, y)
|
|
27
|
-
|
|
28
|
-
## Define abstract methods required to be implemented by concrete models
|
|
29
|
-
@property
|
|
30
|
-
@abstractmethod
|
|
31
|
-
def feature_names(self) -> list:
|
|
32
|
-
"""Retrieve the model input feature names.
|
|
33
|
-
|
|
34
|
-
Returns:
|
|
35
|
-
The list of feature names
|
|
36
|
-
|
|
37
|
-
"""
|
|
38
|
-
|
|
39
|
-
@property
|
|
40
|
-
@abstractmethod
|
|
41
|
-
def can_predict_quantiles(self) -> bool:
|
|
42
|
-
"""Attribute that indicates if the model predict particular quantiles.
|
|
43
|
-
|
|
44
|
-
e.g. XGBQuantileOpenstfRegressor
|
|
45
|
-
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
@abstractmethod
|
|
49
|
-
def predict(self, x: pd.DataFrame, **kwargs) -> np.array:
|
|
50
|
-
"""Makes a prediction. Only available after the model has been trained.
|
|
51
|
-
|
|
52
|
-
Args:
|
|
53
|
-
x: Feature matrix
|
|
54
|
-
kwargs: model-specific keywords
|
|
55
|
-
|
|
56
|
-
Returns:
|
|
57
|
-
Prediction
|
|
58
|
-
|
|
59
|
-
"""
|
|
60
|
-
|
|
61
|
-
@abstractmethod
|
|
62
|
-
def fit(self, x: np.array, y: np.array, **kwargs) -> RegressorMixin:
|
|
63
|
-
"""Fits the regressor.
|
|
64
|
-
|
|
65
|
-
Args:
|
|
66
|
-
x: Feature matrix
|
|
67
|
-
y: Labels
|
|
68
|
-
kwargs: model-specific keywords
|
|
69
|
-
|
|
70
|
-
Returns:
|
|
71
|
-
Fitted model
|
|
72
|
-
|
|
73
|
-
"""
|
|
74
|
-
|
|
75
|
-
def set_feature_importance(self) -> Union[pd.DataFrame, None]:
|
|
76
|
-
"""Get feature importance.
|
|
77
|
-
|
|
78
|
-
Returns:
|
|
79
|
-
DataFrame with feature importance.
|
|
80
|
-
|
|
81
|
-
"""
|
|
82
|
-
# returns a dict if we can get feature importance else returns None
|
|
83
|
-
importance_names = self._get_importance_names()
|
|
84
|
-
# if the model doesn't support feature importance return None
|
|
85
|
-
if importance_names is None:
|
|
86
|
-
return None
|
|
87
|
-
|
|
88
|
-
gain = self._fraction_importance(importance_names["gain_importance_name"])
|
|
89
|
-
weight_importance = self._fraction_importance(
|
|
90
|
-
importance_names["weight_importance_name"]
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
feature_importance = pd.DataFrame(
|
|
94
|
-
{"gain": gain, "weight": weight_importance}, index=self.feature_names
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
feature_importance.sort_values(by="gain", ascending=False, inplace=True)
|
|
98
|
-
return feature_importance
|
|
99
|
-
|
|
100
|
-
def _fraction_importance(self, importance: str) -> np.ndarray:
|
|
101
|
-
self.importance_type = importance
|
|
102
|
-
feature_importance = self.feature_importances_
|
|
103
|
-
feature_importance = feature_importance / sum(feature_importance)
|
|
104
|
-
return feature_importance
|
|
105
|
-
|
|
106
|
-
@staticmethod
|
|
107
|
-
def _get_importance_names() -> Union[dict, None]:
|
|
108
|
-
"""Get importance names if applicable.
|
|
109
|
-
|
|
110
|
-
Returns:
|
|
111
|
-
A dict or None, return None if the model can't get feature importance
|
|
112
|
-
|
|
113
|
-
"""
|
|
114
|
-
return None
|
openstef/model/regressors/xgb.py
DELETED
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
from typing import Optional
|
|
5
|
-
|
|
6
|
-
import numpy as np
|
|
7
|
-
from sklearn.base import RegressorMixin
|
|
8
|
-
|
|
9
|
-
from xgboost import XGBRegressor
|
|
10
|
-
|
|
11
|
-
from openstef.model.regressors.regressor import OpenstfRegressor
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class XGBOpenstfRegressor(XGBRegressor, OpenstfRegressor):
|
|
15
|
-
"""XGB Regressor which implements the Openstf regressor API."""
|
|
16
|
-
|
|
17
|
-
gain_importance_name = "total_gain"
|
|
18
|
-
weight_importance_name = "weight"
|
|
19
|
-
|
|
20
|
-
@property
|
|
21
|
-
def feature_names(self):
|
|
22
|
-
return self._Booster.feature_names
|
|
23
|
-
|
|
24
|
-
@property
|
|
25
|
-
def can_predict_quantiles(self):
|
|
26
|
-
return False
|
|
27
|
-
|
|
28
|
-
@staticmethod
|
|
29
|
-
def _get_importance_names():
|
|
30
|
-
return {
|
|
31
|
-
"gain_importance_name": "total_gain",
|
|
32
|
-
"weight_importance_name": "weight",
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
def fit(
|
|
36
|
-
self,
|
|
37
|
-
x: np.array,
|
|
38
|
-
y: np.array,
|
|
39
|
-
*,
|
|
40
|
-
early_stopping_rounds: Optional[int] = None,
|
|
41
|
-
callbacks: Optional[list] = None,
|
|
42
|
-
eval_metric: Optional[str] = None,
|
|
43
|
-
**kwargs
|
|
44
|
-
):
|
|
45
|
-
if early_stopping_rounds is not None:
|
|
46
|
-
self.set_params(early_stopping_rounds=early_stopping_rounds)
|
|
47
|
-
if callbacks is not None:
|
|
48
|
-
self.set_params(callbacks=callbacks)
|
|
49
|
-
if eval_metric is not None:
|
|
50
|
-
self.set_params(eval_metric=eval_metric)
|
|
51
|
-
|
|
52
|
-
super().fit(x, y, **kwargs)
|