openstef 3.4.45__py3-none-any.whl → 3.4.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef/data/NL_terrestrial_radiation.csv +25585 -0
- openstef/data/NL_terrestrial_radiation.csv.license +3 -0
- openstef/feature_engineering/apply_features.py +4 -0
- openstef/feature_engineering/cyclic_features.py +60 -1
- openstef/model/metamodels/feature_clipper.py +90 -0
- openstef/model/regressors/linear_quantile.py +9 -0
- {openstef-3.4.45.dist-info → openstef-3.4.48.dist-info}/METADATA +2 -2
- {openstef-3.4.45.dist-info → openstef-3.4.48.dist-info}/RECORD +11 -8
- {openstef-3.4.45.dist-info → openstef-3.4.48.dist-info}/LICENSE +0 -0
- {openstef-3.4.45.dist-info → openstef-3.4.48.dist-info}/WHEEL +0 -0
- {openstef-3.4.45.dist-info → openstef-3.4.48.dist-info}/top_level.txt +0 -0
@@ -31,6 +31,7 @@ from openstef.feature_engineering.weather_features import (
|
|
31
31
|
from openstef.feature_engineering.cyclic_features import (
|
32
32
|
add_seasonal_cyclic_features,
|
33
33
|
add_time_cyclic_features,
|
34
|
+
add_daylight_terrestrial_feature,
|
34
35
|
)
|
35
36
|
|
36
37
|
|
@@ -124,5 +125,8 @@ def apply_features(
|
|
124
125
|
# Adds polar time features (sine and cosine) to capture periodic patterns based on the timestamp index.
|
125
126
|
data = add_time_cyclic_features(data)
|
126
127
|
|
128
|
+
# Adds daylight terrestrial feature
|
129
|
+
data = add_daylight_terrestrial_feature(data)
|
130
|
+
|
127
131
|
# Return dataframe including all requested features
|
128
132
|
return data
|
@@ -13,6 +13,8 @@ import structlog
|
|
13
13
|
import logging
|
14
14
|
|
15
15
|
from openstef.settings import Settings
|
16
|
+
from openstef import PROJECT_ROOT
|
17
|
+
|
16
18
|
|
17
19
|
structlog.configure(
|
18
20
|
wrapper_class=structlog.make_filtering_bound_logger(
|
@@ -21,10 +23,65 @@ structlog.configure(
|
|
21
23
|
)
|
22
24
|
logger = structlog.get_logger(__name__)
|
23
25
|
|
24
|
-
|
26
|
+
TERRESTRIAL_RADIATION_CSV_PATH: str = (
|
27
|
+
PROJECT_ROOT / "openstef" / "data" / "NL_terrestrial_radiation.csv"
|
28
|
+
)
|
25
29
|
NUM_SECONDS_IN_A_DAY = 24 * 60 * 60
|
26
30
|
|
27
31
|
|
32
|
+
def add_daylight_terrestrial_feature(
|
33
|
+
data: pd.DataFrame,
|
34
|
+
path_to_terrestrial_radiation_csv: str = TERRESTRIAL_RADIATION_CSV_PATH,
|
35
|
+
) -> pd.DataFrame:
|
36
|
+
"""Add daylight terrestrial radiation feature to the input dataset. This function processes terrestrial radiation
|
37
|
+
data and aligns it with the time indices of the input dataset. The terrestrial radiation data is normalized,
|
38
|
+
interpolated, and merged with the main dataset to provide a feature representing terrestrial radiation.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
data (pd.DataFrame):
|
42
|
+
The input dataset containing a time-indexed DataFrame.
|
43
|
+
path_to_terrestrial_radiation_csv (str):
|
44
|
+
File path to the CSV file containing terrestrial radiation data. The CSV file
|
45
|
+
should have a time-based index.
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
pd.DataFrame:
|
49
|
+
The input dataset with an added column for the terrestrial radiation feature.
|
50
|
+
|
51
|
+
Notes:
|
52
|
+
- The function assumes the input data and the terrestrial radiation data share
|
53
|
+
the same time zone and frequency alignment.
|
54
|
+
- The terrestrial radiation values are normalized using z-score normalization.
|
55
|
+
|
56
|
+
"""
|
57
|
+
# Load the terrestrial radiation data
|
58
|
+
terrestrial_radiation = pd.read_csv(path_to_terrestrial_radiation_csv, index_col=0)
|
59
|
+
terrestrial_radiation.index = pd.to_datetime(terrestrial_radiation.index)
|
60
|
+
|
61
|
+
# Align the index with the input data's year
|
62
|
+
year_diff = data.index.min().year - terrestrial_radiation.index.min().year
|
63
|
+
terrestrial_radiation.index += pd.DateOffset(years=year_diff)
|
64
|
+
|
65
|
+
# Resample to 15-minute intervals, and interpolate missing values
|
66
|
+
terrestrial_radiation = terrestrial_radiation.resample("15min").mean().interpolate()
|
67
|
+
|
68
|
+
# Normalize the terrestrial radiation values using z-score normalization
|
69
|
+
terrestrial_radiation = (
|
70
|
+
terrestrial_radiation - terrestrial_radiation.mean(axis=0)
|
71
|
+
) / terrestrial_radiation.std(axis=0)
|
72
|
+
terrestrial_radiation.columns = ["daylight_continuous"]
|
73
|
+
|
74
|
+
# Make a copy of the DataFrame to avoid modifying the original
|
75
|
+
data = data.copy()
|
76
|
+
|
77
|
+
# Merge the terrestrial radiation data with the input dataset
|
78
|
+
data = data.merge(
|
79
|
+
terrestrial_radiation, left_index=True, right_index=True, how="left"
|
80
|
+
)
|
81
|
+
|
82
|
+
return data
|
83
|
+
|
84
|
+
|
28
85
|
def add_time_cyclic_features(
|
29
86
|
data: pd.DataFrame,
|
30
87
|
) -> pd.DataFrame:
|
@@ -35,6 +92,7 @@ def add_time_cyclic_features(
|
|
35
92
|
|
36
93
|
Returns:
|
37
94
|
DataFrame that is the same as input dataframe with extra columns for the added time of the day features.
|
95
|
+
|
38
96
|
"""
|
39
97
|
# Ensure the index is a DatetimeIndex
|
40
98
|
if not isinstance(data.index, pd.DatetimeIndex):
|
@@ -71,6 +129,7 @@ def add_seasonal_cyclic_features(
|
|
71
129
|
>>> data = pd.DataFrame(index=pd.date_range(start='2023-01-01', periods=365, freq='D'))
|
72
130
|
>>> data_with_features = add_cyclical_features(data)
|
73
131
|
>>> print(data_with_features.head())
|
132
|
+
|
74
133
|
"""
|
75
134
|
# Ensure the index is a DatetimeIndex
|
76
135
|
if not isinstance(data.index, pd.DatetimeIndex):
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
from sklearn.base import BaseEstimator, TransformerMixin
|
5
|
+
import pandas as pd
|
6
|
+
from typing import List, Dict, Tuple, Optional
|
7
|
+
|
8
|
+
|
9
|
+
class FeatureClipper(BaseEstimator, TransformerMixin):
|
10
|
+
"""
|
11
|
+
A transformer that clips the values of specified columns to the minimum and
|
12
|
+
maximum values observed during training. This prevents the model from
|
13
|
+
extrapolating beyond these values during prediction.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self, columns: List[str]):
|
17
|
+
"""
|
18
|
+
Initialize the FeatureClipper.
|
19
|
+
|
20
|
+
Parameters:
|
21
|
+
----------
|
22
|
+
columns : List[str]
|
23
|
+
List of column names to be clipped.
|
24
|
+
"""
|
25
|
+
self.columns: List[str] = columns
|
26
|
+
self.feature_ranges: Dict[str, Tuple[float, float]] = {}
|
27
|
+
|
28
|
+
def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> "FeatureClipper":
|
29
|
+
"""
|
30
|
+
Fits the transformer on the training data by calculating the min and max
|
31
|
+
values for the specified columns.
|
32
|
+
|
33
|
+
Parameters:
|
34
|
+
----------
|
35
|
+
X : pd.DataFrame
|
36
|
+
The input DataFrame containing training data.
|
37
|
+
|
38
|
+
y : Optional[pd.Series]
|
39
|
+
Ignored. This parameter exists for compatibility with scikit-learn's pipeline.
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
-------
|
43
|
+
self : FeatureClipper
|
44
|
+
Fitted transformer.
|
45
|
+
|
46
|
+
Raises:
|
47
|
+
------
|
48
|
+
ValueError:
|
49
|
+
If the input is not a pandas DataFrame.
|
50
|
+
"""
|
51
|
+
if not isinstance(X, pd.DataFrame):
|
52
|
+
raise ValueError("Input must be a pandas DataFrame")
|
53
|
+
|
54
|
+
for col in self.columns:
|
55
|
+
if col in X.columns:
|
56
|
+
self.feature_ranges[col] = (X[col].min(), X[col].max())
|
57
|
+
|
58
|
+
return self
|
59
|
+
|
60
|
+
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
|
61
|
+
"""
|
62
|
+
Transforms new data by clipping the specified columns' values to be within
|
63
|
+
the min and max range observed during fitting.
|
64
|
+
|
65
|
+
Parameters:
|
66
|
+
----------
|
67
|
+
X : pd.DataFrame
|
68
|
+
The input DataFrame containing new data to be transformed.
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
-------
|
72
|
+
X_ : pd.DataFrame
|
73
|
+
A copy of the input DataFrame with clipped values in the specified columns.
|
74
|
+
|
75
|
+
Raises:
|
76
|
+
------
|
77
|
+
ValueError:
|
78
|
+
If the input is not a pandas DataFrame.
|
79
|
+
"""
|
80
|
+
if not isinstance(X, pd.DataFrame):
|
81
|
+
raise ValueError("Input must be a pandas DataFrame")
|
82
|
+
|
83
|
+
X_copy = X.copy()
|
84
|
+
|
85
|
+
for col in self.columns:
|
86
|
+
if col in X_copy.columns and col in self.feature_ranges:
|
87
|
+
min_val, max_val = self.feature_ranges[col]
|
88
|
+
X_copy[col] = X_copy[col].clip(lower=min_val, upper=max_val)
|
89
|
+
|
90
|
+
return X_copy
|
@@ -14,6 +14,7 @@ from sklearn.utils.validation import check_is_fitted
|
|
14
14
|
from openstef.feature_engineering.missing_values_transformer import (
|
15
15
|
MissingValuesTransformer,
|
16
16
|
)
|
17
|
+
from openstef.model.metamodels.feature_clipper import FeatureClipper
|
17
18
|
from openstef.model.regressors.regressor import OpenstfRegressor
|
18
19
|
|
19
20
|
DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
|
@@ -28,6 +29,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
28
29
|
x_scaler_: StandardScaler
|
29
30
|
y_scaler_: StandardScaler
|
30
31
|
models_: Dict[float, QuantileRegressor]
|
32
|
+
feature_clipper_: FeatureClipper
|
31
33
|
|
32
34
|
is_fitted_: bool = False
|
33
35
|
|
@@ -51,6 +53,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
51
53
|
weight_exponent: float = 1,
|
52
54
|
weight_floor: float = 0.1,
|
53
55
|
no_fill_future_values_features: List[str] = None,
|
56
|
+
clipped_features: List[str] = None,
|
54
57
|
):
|
55
58
|
"""Initialize LinearQuantileOpenstfRegressor.
|
56
59
|
|
@@ -89,6 +92,9 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
89
92
|
"Cannot train quantile model as 0.5 is not in requested quantiles!"
|
90
93
|
)
|
91
94
|
|
95
|
+
if clipped_features is None:
|
96
|
+
clipped_features = ["APX"]
|
97
|
+
|
92
98
|
self.quantiles = quantiles
|
93
99
|
self.alpha = alpha
|
94
100
|
self.solver = solver
|
@@ -103,6 +109,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
103
109
|
)
|
104
110
|
self.x_scaler_ = StandardScaler()
|
105
111
|
self.y_scaler_ = StandardScaler()
|
112
|
+
self.feature_clipper_ = FeatureClipper(columns=clipped_features)
|
106
113
|
self.models_ = {
|
107
114
|
quantile: QuantileRegressor(alpha=alpha, quantile=quantile, solver=solver)
|
108
115
|
for quantile in quantiles
|
@@ -177,6 +184,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
177
184
|
y = pd.Series(np.asarray(y), name="load")
|
178
185
|
|
179
186
|
x = self._remove_ignored_features(x)
|
187
|
+
self.feature_clipper_.fit(x)
|
180
188
|
|
181
189
|
# Fix nan columns
|
182
190
|
x, y = self.imputer_.fit_transform(x, y)
|
@@ -252,6 +260,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
|
|
252
260
|
|
253
261
|
# Preprocess input data
|
254
262
|
x = self._remove_ignored_features(x)
|
263
|
+
x = self.feature_clipper_.transform(x)
|
255
264
|
x = self.imputer_.transform(x)
|
256
265
|
x_scaled = self.x_scaler_.transform(x)
|
257
266
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: openstef
|
3
|
-
Version: 3.4.
|
3
|
+
Version: 3.4.48
|
4
4
|
Summary: Open short term energy forecaster
|
5
5
|
Home-page: https://github.com/OpenSTEF/openstef
|
6
6
|
Author: Alliander N.V
|
@@ -29,7 +29,7 @@ Requires-Dist: pvlib==0.10.5
|
|
29
29
|
Requires-Dist: pydantic~=2.4
|
30
30
|
Requires-Dist: pydantic-settings~=2.3
|
31
31
|
Requires-Dist: pymsteams~=0.2.2
|
32
|
-
Requires-Dist: scikit-learn
|
32
|
+
Requires-Dist: scikit-learn<1.6,>=1.3
|
33
33
|
Requires-Dist: scipy~=1.10
|
34
34
|
Requires-Dist: statsmodels<1.0.0,>=0.13.5
|
35
35
|
Requires-Dist: structlog<25,>=23.1
|
@@ -4,6 +4,8 @@ openstef/app_settings.py,sha256=EJTDtimctFQQ-3f7ZcOQaRYohpZk3JD6aZBWPFYM2_A,582
|
|
4
4
|
openstef/enums.py,sha256=Wmoag2p7G2cvENA1qt8FcVbAgo-MswXKxmq7vkxHaxs,2680
|
5
5
|
openstef/exceptions.py,sha256=U4u2LTcdT6cmzpipT2Jh7kq9nCjT_-6gntn8yjuhGU0,1993
|
6
6
|
openstef/settings.py,sha256=nSgkBqFxuqB3w7Rwo60i8j37c5ngDbt6vpjHS6QtJXQ,354
|
7
|
+
openstef/data/NL_terrestrial_radiation.csv,sha256=A4kbW56GDzWi4tWUwY2C-4PiOvcKJCwkWQQtdg4ekPE,820246
|
8
|
+
openstef/data/NL_terrestrial_radiation.csv.license,sha256=AxxHusqwIXU5RHl5ZMU65LyXmgtbj6QlcnFaOEN4kEE,145
|
7
9
|
openstef/data/dutch_holidays.csv,sha256=Cg8EYjXp1O0lcFOkIOmrS5HaOArrxZwOXsZ9pVkIcKI,49847
|
8
10
|
openstef/data/dutch_holidays.csv.license,sha256=AxxHusqwIXU5RHl5ZMU65LyXmgtbj6QlcnFaOEN4kEE,145
|
9
11
|
openstef/data/pv_single_coefs.csv,sha256=jadIEYdHvl1lnV_06X_FASkJZ6C3Hecs5xZnH1gPMvI,24779
|
@@ -18,9 +20,9 @@ openstef/data_classes/model_specifications.py,sha256=Uod1W3QzhRqVLb6zvXwxh9wRL3E
|
|
18
20
|
openstef/data_classes/prediction_job.py,sha256=_o5_9HYv6ERTIWlcMpUE-mWwe7dRpaiP83dgNpqpa5Y,5657
|
19
21
|
openstef/data_classes/split_function.py,sha256=ljQIQQu1t1Y_CVWGAy25jrM6wG9odIVVQVimrT1n-1s,3358
|
20
22
|
openstef/feature_engineering/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
21
|
-
openstef/feature_engineering/apply_features.py,sha256=
|
23
|
+
openstef/feature_engineering/apply_features.py,sha256=S9HSsooDPcSSyEw9kixEhjjk3MAkAfyK1VhZKg5R0gA,4995
|
22
24
|
openstef/feature_engineering/bidding_zone_to_country_mapping.py,sha256=u9aabjFDImydkO6_cXiaQxBT4gb5zy0gGTg2EoIUO_Y,2106
|
23
|
-
openstef/feature_engineering/cyclic_features.py,sha256=
|
25
|
+
openstef/feature_engineering/cyclic_features.py,sha256=0Z3wZeF_qrkmEcOq91gtdSMZucAq99kUoBuFDV0SHqk,5962
|
24
26
|
openstef/feature_engineering/data_preparation.py,sha256=htca9LBO3ZN1D-iX4vXf0UN1fw_rRO7y6N3AuYVMpfk,5628
|
25
27
|
openstef/feature_engineering/feature_adder.py,sha256=aSqDl_gUrB3H2TD3cNvU5JniY_KOb4u4a2A6J7zB2BQ,6835
|
26
28
|
openstef/feature_engineering/feature_applicator.py,sha256=DR7jayrEMlra4BFL1Ps5WV2fxbkQ6VaOTa5RIKM-YNk,7447
|
@@ -43,6 +45,7 @@ openstef/model/objective_creator.py,sha256=cIO-uiCEYHjqYrgZizeFEjjgLHLLwab8le9O8
|
|
43
45
|
openstef/model/serializer.py,sha256=IUiiAWvoGVoWzmS-akI6LC7jHRY5Ln_vOCBZy1LnESY,17238
|
44
46
|
openstef/model/standard_deviation_generator.py,sha256=Od9bzXi2TLb1v8Nz-VhBMZHSopWH6ssaDe8gYLlqO1I,2911
|
45
47
|
openstef/model/metamodels/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
48
|
+
openstef/model/metamodels/feature_clipper.py,sha256=wDsf4k_2YuC6rrFlwE12LpgOdOwHHWuET2ZrJ_gr4yo,2861
|
46
49
|
openstef/model/metamodels/grouped_regressor.py,sha256=yMN_a6TnQSyFaqlB_6Nifq-ydpb5hs6w_b97IaBbHj4,8337
|
47
50
|
openstef/model/metamodels/missing_values_handler.py,sha256=veyvYZHhKvlYZxaUpxRQ7XoE033_3Lcg9LrbuKchlOk,5241
|
48
51
|
openstef/model/regressors/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
@@ -52,7 +55,7 @@ openstef/model/regressors/dazls.py,sha256=Xt89yFHjkwpIUTkkhPmPZ74F8_tht_XV88INuP
|
|
52
55
|
openstef/model/regressors/flatliner.py,sha256=T9u-ukhqFcatQmlgUtBL_G-1b_wQzgdVRq0ac64GnjQ,2789
|
53
56
|
openstef/model/regressors/lgbm.py,sha256=zCdn1euEdSFxYJzH8XqQFFnb6R4JVUnmineKjX_Gy-g,800
|
54
57
|
openstef/model/regressors/linear.py,sha256=uOvZMLGZH_9nXfmS5honCMfyVeyGXP1Cza9A_BdXlVw,3665
|
55
|
-
openstef/model/regressors/linear_quantile.py,sha256=
|
58
|
+
openstef/model/regressors/linear_quantile.py,sha256=VAyIhp7GPayqbk8Vj_ONqPLNYuaOvxkFKDrRxx6yGY0,10510
|
56
59
|
openstef/model/regressors/regressor.py,sha256=uJcx59AyCPE9f_yPcAQ59h2ZS7eNsDpIHJrladKvHIw,3461
|
57
60
|
openstef/model/regressors/xgb.py,sha256=SH-UiYJtMbfmRBK6738dU0ZRfYfzNynnikwbxINCE7Q,1467
|
58
61
|
openstef/model/regressors/xgb_multioutput_quantile.py,sha256=xWzA7tymC_o-F1OS3I7vUKf9zP6RR1ZglEeY4NAgjU0,9146
|
@@ -90,8 +93,8 @@ openstef/tasks/utils/predictionjobloop.py,sha256=Ysy3zF5lzPMz_asYDKeF5m0qgVT3tCt
|
|
90
93
|
openstef/tasks/utils/taskcontext.py,sha256=L9K14ycwgVxbIVUjH2DIn_QWbnu-OfxcGtQ1K9T6sus,5630
|
91
94
|
openstef/validation/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
92
95
|
openstef/validation/validation.py,sha256=HVgreHvcZvPazfwC3NNE8_3lsMsZEd_42osCAg1_6W4,11128
|
93
|
-
openstef-3.4.
|
94
|
-
openstef-3.4.
|
95
|
-
openstef-3.4.
|
96
|
-
openstef-3.4.
|
97
|
-
openstef-3.4.
|
96
|
+
openstef-3.4.48.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
|
97
|
+
openstef-3.4.48.dist-info/METADATA,sha256=w22ao36-l9Y4HXpsHvVZ8G9k3_3Iqt2EJ8-dGoArWIs,8073
|
98
|
+
openstef-3.4.48.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
99
|
+
openstef-3.4.48.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
|
100
|
+
openstef-3.4.48.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|