openstef 3.4.46__py3-none-any.whl → 3.4.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef/feature_engineering/apply_features.py +5 -3
- openstef/feature_engineering/holiday_features.py +5 -3
- openstef/model/metamodels/feature_clipper.py +90 -0
- openstef/model/regressors/linear_quantile.py +9 -0
- {openstef-3.4.46.dist-info → openstef-3.4.49.dist-info}/METADATA +14 -3
- {openstef-3.4.46.dist-info → openstef-3.4.49.dist-info}/RECORD +9 -8
- {openstef-3.4.46.dist-info → openstef-3.4.49.dist-info}/WHEEL +1 -1
- {openstef-3.4.46.dist-info → openstef-3.4.49.dist-info}/LICENSE +0 -0
- {openstef-3.4.46.dist-info → openstef-3.4.49.dist-info}/top_level.txt +0 -0
@@ -40,13 +40,14 @@ def apply_features(
|
|
40
40
|
pj: PredictionJobDataClass = None,
|
41
41
|
feature_names: list[str] = None,
|
42
42
|
horizon: float = 24.0,
|
43
|
+
years: list[int] | None = None,
|
43
44
|
) -> pd.DataFrame:
|
44
45
|
"""Applies the feature functions defined in ``feature_functions.py`` and returns the complete dataframe.
|
45
46
|
|
46
47
|
Features requiring more recent label-data are omitted.
|
47
48
|
|
48
49
|
.. note::
|
49
|
-
For the time
|
50
|
+
For the time derived features only the ones in the features list will be added. But for the weather features all will be added at present.
|
50
51
|
These unrequested additional features have to be filtered out later.
|
51
52
|
|
52
53
|
Args:
|
@@ -56,8 +57,9 @@ def apply_features(
|
|
56
57
|
columns=[label, predictor_1,..., predictor_n]
|
57
58
|
)
|
58
59
|
pj (PredictionJobDataClass): Prediction job.
|
59
|
-
feature_names (list[str]): list of
|
60
|
+
feature_names (list[str]): list of requested features
|
60
61
|
horizon (float): Forecast horizon limit in hours.
|
62
|
+
years (list[int] | None): years for which to create holiday features.
|
61
63
|
|
62
64
|
Returns:
|
63
65
|
pd.DataFrame(index = datetime, columns = [label, predictor_1,..., predictor_n, feature_1, ..., feature_m])
|
@@ -100,7 +102,7 @@ def apply_features(
|
|
100
102
|
|
101
103
|
# Get holiday feature functions
|
102
104
|
feature_functions.update(
|
103
|
-
generate_holiday_feature_functions(country_code=country_code)
|
105
|
+
generate_holiday_feature_functions(country_code=country_code, years=years)
|
104
106
|
)
|
105
107
|
|
106
108
|
# Add the features to the dataframe using previously defined feature functions
|
@@ -15,7 +15,7 @@ HOLIDAY_CSV_PATH: str = PROJECT_ROOT / "openstef" / "data" / "dutch_holidays.csv
|
|
15
15
|
|
16
16
|
def generate_holiday_feature_functions(
|
17
17
|
country_code: str = "NL",
|
18
|
-
years: list = None,
|
18
|
+
years: list[int] | None = None,
|
19
19
|
path_to_school_holidays_csv: str = HOLIDAY_CSV_PATH,
|
20
20
|
) -> dict:
|
21
21
|
"""Generates functions for creating holiday feature.
|
@@ -48,9 +48,11 @@ def generate_holiday_feature_functions(
|
|
48
48
|
The 'Brugdagen' are updated untill dec 2020. (Generated using agenda)
|
49
49
|
|
50
50
|
Args:
|
51
|
-
|
52
|
-
years: years for which to create holiday features.
|
51
|
+
country_code: Country for which to create holiday features.
|
52
|
+
years: years for which to create holiday features. If None,
|
53
|
+
the last 4 years, the current and next year are used.
|
53
54
|
path_to_school_holidays_csv: Filepath to csv with school holidays.
|
55
|
+
|
54
56
|
NOTE: Dutch holidays csv file is only until January 2026.
|
55
57
|
|
56
58
|
Returns:
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
from sklearn.base import BaseEstimator, TransformerMixin
|
5
|
+
import pandas as pd
|
6
|
+
from typing import List, Dict, Tuple, Optional
|
7
|
+
|
8
|
+
|
9
|
+
class FeatureClipper(BaseEstimator, TransformerMixin):
|
10
|
+
"""
|
11
|
+
A transformer that clips the values of specified columns to the minimum and
|
12
|
+
maximum values observed during training. This prevents the model from
|
13
|
+
extrapolating beyond these values during prediction.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self, columns: List[str]):
|
17
|
+
"""
|
18
|
+
Initialize the FeatureClipper.
|
19
|
+
|
20
|
+
Parameters:
|
21
|
+
----------
|
22
|
+
columns : List[str]
|
23
|
+
List of column names to be clipped.
|
24
|
+
"""
|
25
|
+
self.columns: List[str] = columns
|
26
|
+
self.feature_ranges: Dict[str, Tuple[float, float]] = {}
|
27
|
+
|
28
|
+
def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> "FeatureClipper":
|
29
|
+
"""
|
30
|
+
Fits the transformer on the training data by calculating the min and max
|
31
|
+
values for the specified columns.
|
32
|
+
|
33
|
+
Parameters:
|
34
|
+
----------
|
35
|
+
X : pd.DataFrame
|
36
|
+
The input DataFrame containing training data.
|
37
|
+
|
38
|
+
y : Optional[pd.Series]
|
39
|
+
Ignored. This parameter exists for compatibility with scikit-learn's pipeline.
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
-------
|
43
|
+
self : FeatureClipper
|
44
|
+
Fitted transformer.
|
45
|
+
|
46
|
+
Raises:
|
47
|
+
------
|
48
|
+
ValueError:
|
49
|
+
If the input is not a pandas DataFrame.
|
50
|
+
"""
|
51
|
+
if not isinstance(X, pd.DataFrame):
|
52
|
+
raise ValueError("Input must be a pandas DataFrame")
|
53
|
+
|
54
|
+
for col in self.columns:
|
55
|
+
if col in X.columns:
|
56
|
+
self.feature_ranges[col] = (X[col].min(), X[col].max())
|
57
|
+
|
58
|
+
return self
|
59
|
+
|
60
|
+
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
|
61
|
+
"""
|
62
|
+
Transforms new data by clipping the specified columns' values to be within
|
63
|
+
the min and max range observed during fitting.
|
64
|
+
|
65
|
+
Parameters:
|
66
|
+
----------
|
67
|
+
X : pd.DataFrame
|
68
|
+
The input DataFrame containing new data to be transformed.
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
-------
|
72
|
+
X_ : pd.DataFrame
|
73
|
+
A copy of the input DataFrame with clipped values in the specified columns.
|
74
|
+
|
75
|
+
Raises:
|
76
|
+
------
|
77
|
+
ValueError:
|
78
|
+
If the input is not a pandas DataFrame.
|
79
|
+
"""
|
80
|
+
if not isinstance(X, pd.DataFrame):
|
81
|
+
raise ValueError("Input must be a pandas DataFrame")
|
82
|
+
|
83
|
+
X_copy = X.copy()
|
84
|
+
|
85
|
+
for col in self.columns:
|
86
|
+
if col in X_copy.columns and col in self.feature_ranges:
|
87
|
+
min_val, max_val = self.feature_ranges[col]
|
88
|
+
X_copy[col] = X_copy[col].clip(lower=min_val, upper=max_val)
|
89
|
+
|
90
|
+
return X_copy
|
@@ -14,6 +14,7 @@ from sklearn.utils.validation import check_is_fitted
|
|
14
14
|
from openstef.feature_engineering.missing_values_transformer import (
|
15
15
|
MissingValuesTransformer,
|
16
16
|
)
|
17
|
+
from openstef.model.metamodels.feature_clipper import FeatureClipper
|
17
18
|
from openstef.model.regressors.regressor import OpenstfRegressor
|
18
19
|
|
19
20
|
DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
|
@@ -28,6 +29,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
28
29
|
x_scaler_: StandardScaler
|
29
30
|
y_scaler_: StandardScaler
|
30
31
|
models_: Dict[float, QuantileRegressor]
|
32
|
+
feature_clipper_: FeatureClipper
|
31
33
|
|
32
34
|
is_fitted_: bool = False
|
33
35
|
|
@@ -51,6 +53,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
51
53
|
weight_exponent: float = 1,
|
52
54
|
weight_floor: float = 0.1,
|
53
55
|
no_fill_future_values_features: List[str] = None,
|
56
|
+
clipped_features: List[str] = None,
|
54
57
|
):
|
55
58
|
"""Initialize LinearQuantileOpenstfRegressor.
|
56
59
|
|
@@ -89,6 +92,9 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
89
92
|
"Cannot train quantile model as 0.5 is not in requested quantiles!"
|
90
93
|
)
|
91
94
|
|
95
|
+
if clipped_features is None:
|
96
|
+
clipped_features = ["APX"]
|
97
|
+
|
92
98
|
self.quantiles = quantiles
|
93
99
|
self.alpha = alpha
|
94
100
|
self.solver = solver
|
@@ -103,6 +109,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
103
109
|
)
|
104
110
|
self.x_scaler_ = StandardScaler()
|
105
111
|
self.y_scaler_ = StandardScaler()
|
112
|
+
self.feature_clipper_ = FeatureClipper(columns=clipped_features)
|
106
113
|
self.models_ = {
|
107
114
|
quantile: QuantileRegressor(alpha=alpha, quantile=quantile, solver=solver)
|
108
115
|
for quantile in quantiles
|
@@ -177,6 +184,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
177
184
|
y = pd.Series(np.asarray(y), name="load")
|
178
185
|
|
179
186
|
x = self._remove_ignored_features(x)
|
187
|
+
self.feature_clipper_.fit(x)
|
180
188
|
|
181
189
|
# Fix nan columns
|
182
190
|
x, y = self.imputer_.fit_transform(x, y)
|
@@ -252,6 +260,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
252
260
|
|
253
261
|
# Preprocess input data
|
254
262
|
x = self._remove_ignored_features(x)
|
263
|
+
x = self.feature_clipper_.transform(x)
|
255
264
|
x = self.imputer_.transform(x)
|
256
265
|
x_scaled = self.x_scaler_.transform(x)
|
257
266
|
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: openstef
|
3
|
-
Version: 3.4.
|
3
|
+
Version: 3.4.49
|
4
4
|
Summary: Open short term energy forecaster
|
5
5
|
Home-page: https://github.com/OpenSTEF/openstef
|
6
6
|
Author: Alliander N.V
|
@@ -29,11 +29,22 @@ Requires-Dist: pvlib==0.10.5
|
|
29
29
|
Requires-Dist: pydantic~=2.4
|
30
30
|
Requires-Dist: pydantic-settings~=2.3
|
31
31
|
Requires-Dist: pymsteams~=0.2.2
|
32
|
-
Requires-Dist: scikit-learn
|
32
|
+
Requires-Dist: scikit-learn<1.6,>=1.3
|
33
33
|
Requires-Dist: scipy~=1.10
|
34
34
|
Requires-Dist: statsmodels<1.0.0,>=0.13.5
|
35
35
|
Requires-Dist: structlog<25,>=23.1
|
36
36
|
Requires-Dist: xgboost~=2.0
|
37
|
+
Dynamic: author
|
38
|
+
Dynamic: author-email
|
39
|
+
Dynamic: classifier
|
40
|
+
Dynamic: description
|
41
|
+
Dynamic: description-content-type
|
42
|
+
Dynamic: home-page
|
43
|
+
Dynamic: keywords
|
44
|
+
Dynamic: license
|
45
|
+
Dynamic: requires-dist
|
46
|
+
Dynamic: requires-python
|
47
|
+
Dynamic: summary
|
37
48
|
|
38
49
|
<!--
|
39
50
|
SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com>
|
@@ -20,14 +20,14 @@ openstef/data_classes/model_specifications.py,sha256=Uod1W3QzhRqVLb6zvXwxh9wRL3E
|
|
20
20
|
openstef/data_classes/prediction_job.py,sha256=_o5_9HYv6ERTIWlcMpUE-mWwe7dRpaiP83dgNpqpa5Y,5657
|
21
21
|
openstef/data_classes/split_function.py,sha256=ljQIQQu1t1Y_CVWGAy25jrM6wG9odIVVQVimrT1n-1s,3358
|
22
22
|
openstef/feature_engineering/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
23
|
-
openstef/feature_engineering/apply_features.py,sha256=
|
23
|
+
openstef/feature_engineering/apply_features.py,sha256=EIxP9fvmnAjFRehQpVGBhZHt35GACL-HhDm65_ktHc0,5121
|
24
24
|
openstef/feature_engineering/bidding_zone_to_country_mapping.py,sha256=u9aabjFDImydkO6_cXiaQxBT4gb5zy0gGTg2EoIUO_Y,2106
|
25
25
|
openstef/feature_engineering/cyclic_features.py,sha256=0Z3wZeF_qrkmEcOq91gtdSMZucAq99kUoBuFDV0SHqk,5962
|
26
26
|
openstef/feature_engineering/data_preparation.py,sha256=htca9LBO3ZN1D-iX4vXf0UN1fw_rRO7y6N3AuYVMpfk,5628
|
27
27
|
openstef/feature_engineering/feature_adder.py,sha256=aSqDl_gUrB3H2TD3cNvU5JniY_KOb4u4a2A6J7zB2BQ,6835
|
28
28
|
openstef/feature_engineering/feature_applicator.py,sha256=DR7jayrEMlra4BFL1Ps5WV2fxbkQ6VaOTa5RIKM-YNk,7447
|
29
29
|
openstef/feature_engineering/general.py,sha256=tgU4_1stag9jJmaQAfWCMhfBscznVuQvW5hPK_z9_9g,4438
|
30
|
-
openstef/feature_engineering/holiday_features.py,sha256=
|
30
|
+
openstef/feature_engineering/holiday_features.py,sha256=CbolIP5bfiQkqDct-9TbD828-lhC48bfeNQ2-VFnsJA,8274
|
31
31
|
openstef/feature_engineering/lag_features.py,sha256=Dr6qS8UhdgEHPZZSe-w6ibtjl_lcbcQohhqdZN9fqEU,5652
|
32
32
|
openstef/feature_engineering/missing_values_transformer.py,sha256=o_zCVEOCPn2tWzvlY44XZuDysV0TuxqeVYhilYU54YY,5010
|
33
33
|
openstef/feature_engineering/weather_features.py,sha256=Lr9DItyHvJ2CpWQ1r6A83tJKtR2k_Wwn32FdFTGblO0,15750
|
@@ -45,6 +45,7 @@ openstef/model/objective_creator.py,sha256=cIO-uiCEYHjqYrgZizeFEjjgLHLLwab8le9O8
|
|
45
45
|
openstef/model/serializer.py,sha256=IUiiAWvoGVoWzmS-akI6LC7jHRY5Ln_vOCBZy1LnESY,17238
|
46
46
|
openstef/model/standard_deviation_generator.py,sha256=Od9bzXi2TLb1v8Nz-VhBMZHSopWH6ssaDe8gYLlqO1I,2911
|
47
47
|
openstef/model/metamodels/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
48
|
+
openstef/model/metamodels/feature_clipper.py,sha256=wDsf4k_2YuC6rrFlwE12LpgOdOwHHWuET2ZrJ_gr4yo,2861
|
48
49
|
openstef/model/metamodels/grouped_regressor.py,sha256=yMN_a6TnQSyFaqlB_6Nifq-ydpb5hs6w_b97IaBbHj4,8337
|
49
50
|
openstef/model/metamodels/missing_values_handler.py,sha256=veyvYZHhKvlYZxaUpxRQ7XoE033_3Lcg9LrbuKchlOk,5241
|
50
51
|
openstef/model/regressors/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
@@ -54,7 +55,7 @@ openstef/model/regressors/dazls.py,sha256=Xt89yFHjkwpIUTkkhPmPZ74F8_tht_XV88INuP
|
|
54
55
|
openstef/model/regressors/flatliner.py,sha256=T9u-ukhqFcatQmlgUtBL_G-1b_wQzgdVRq0ac64GnjQ,2789
|
55
56
|
openstef/model/regressors/lgbm.py,sha256=zCdn1euEdSFxYJzH8XqQFFnb6R4JVUnmineKjX_Gy-g,800
|
56
57
|
openstef/model/regressors/linear.py,sha256=uOvZMLGZH_9nXfmS5honCMfyVeyGXP1Cza9A_BdXlVw,3665
|
57
|
-
openstef/model/regressors/linear_quantile.py,sha256=
|
58
|
+
openstef/model/regressors/linear_quantile.py,sha256=VAyIhp7GPayqbk8Vj_ONqPLNYuaOvxkFKDrRxx6yGY0,10510
|
58
59
|
openstef/model/regressors/regressor.py,sha256=uJcx59AyCPE9f_yPcAQ59h2ZS7eNsDpIHJrladKvHIw,3461
|
59
60
|
openstef/model/regressors/xgb.py,sha256=SH-UiYJtMbfmRBK6738dU0ZRfYfzNynnikwbxINCE7Q,1467
|
60
61
|
openstef/model/regressors/xgb_multioutput_quantile.py,sha256=xWzA7tymC_o-F1OS3I7vUKf9zP6RR1ZglEeY4NAgjU0,9146
|
@@ -92,8 +93,8 @@ openstef/tasks/utils/predictionjobloop.py,sha256=Ysy3zF5lzPMz_asYDKeF5m0qgVT3tCt
|
|
92
93
|
openstef/tasks/utils/taskcontext.py,sha256=L9K14ycwgVxbIVUjH2DIn_QWbnu-OfxcGtQ1K9T6sus,5630
|
93
94
|
openstef/validation/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
94
95
|
openstef/validation/validation.py,sha256=HVgreHvcZvPazfwC3NNE8_3lsMsZEd_42osCAg1_6W4,11128
|
95
|
-
openstef-3.4.
|
96
|
-
openstef-3.4.
|
97
|
-
openstef-3.4.
|
98
|
-
openstef-3.4.
|
99
|
-
openstef-3.4.
|
96
|
+
openstef-3.4.49.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
|
97
|
+
openstef-3.4.49.dist-info/METADATA,sha256=kQ4x33ZVT37sVcGoODmV3hnT3rpKMmM9JAyG88q63go,8305
|
98
|
+
openstef-3.4.49.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
99
|
+
openstef-3.4.49.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
|
100
|
+
openstef-3.4.49.dist-info/RECORD,,
|
File without changes
|
File without changes
|