openstef 3.4.45__py3-none-any.whl → 3.4.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com>
2
+
3
+ SPDX-License-Identifier: MPL-2.0
@@ -31,6 +31,7 @@ from openstef.feature_engineering.weather_features import (
31
31
  from openstef.feature_engineering.cyclic_features import (
32
32
  add_seasonal_cyclic_features,
33
33
  add_time_cyclic_features,
34
+ add_daylight_terrestrial_feature,
34
35
  )
35
36
 
36
37
 
@@ -124,5 +125,8 @@ def apply_features(
124
125
  # Adds polar time features (sine and cosine) to capture periodic patterns based on the timestamp index.
125
126
  data = add_time_cyclic_features(data)
126
127
 
128
+ # Adds daylight terrestrial feature
129
+ data = add_daylight_terrestrial_feature(data)
130
+
127
131
  # Return dataframe including all requested features
128
132
  return data
@@ -13,6 +13,8 @@ import structlog
13
13
  import logging
14
14
 
15
15
  from openstef.settings import Settings
16
+ from openstef import PROJECT_ROOT
17
+
16
18
 
17
19
  structlog.configure(
18
20
  wrapper_class=structlog.make_filtering_bound_logger(
@@ -21,10 +23,65 @@ structlog.configure(
21
23
  )
22
24
  logger = structlog.get_logger(__name__)
23
25
 
24
-
26
+ TERRESTRIAL_RADIATION_CSV_PATH: str = (
27
+ PROJECT_ROOT / "openstef" / "data" / "NL_terrestrial_radiation.csv"
28
+ )
25
29
  NUM_SECONDS_IN_A_DAY = 24 * 60 * 60
26
30
 
27
31
 
32
+ def add_daylight_terrestrial_feature(
33
+ data: pd.DataFrame,
34
+ path_to_terrestrial_radiation_csv: str = TERRESTRIAL_RADIATION_CSV_PATH,
35
+ ) -> pd.DataFrame:
36
+ """Add daylight terrestrial radiation feature to the input dataset. This function processes terrestrial radiation
37
+ data and aligns it with the time indices of the input dataset. The terrestrial radiation data is normalized,
38
+ interpolated, and merged with the main dataset to provide a feature representing terrestrial radiation.
39
+
40
+ Args:
41
+ data (pd.DataFrame):
42
+ The input dataset containing a time-indexed DataFrame.
43
+ path_to_terrestrial_radiation_csv (str):
44
+ File path to the CSV file containing terrestrial radiation data. The CSV file
45
+ should have a time-based index.
46
+
47
+ Returns:
48
+ pd.DataFrame:
49
+ The input dataset with an added column for the terrestrial radiation feature.
50
+
51
+ Notes:
52
+ - The function assumes the input data and the terrestrial radiation data share
53
+ the same time zone and frequency alignment.
54
+ - The terrestrial radiation values are normalized using z-score normalization.
55
+
56
+ """
57
+ # Load the terrestrial radiation data
58
+ terrestrial_radiation = pd.read_csv(path_to_terrestrial_radiation_csv, index_col=0)
59
+ terrestrial_radiation.index = pd.to_datetime(terrestrial_radiation.index)
60
+
61
+ # Align the index with the input data's year
62
+ year_diff = data.index.min().year - terrestrial_radiation.index.min().year
63
+ terrestrial_radiation.index += pd.DateOffset(years=year_diff)
64
+
65
+ # Resample to 15-minute intervals, and interpolate missing values
66
+ terrestrial_radiation = terrestrial_radiation.resample("15min").mean().interpolate()
67
+
68
+ # Normalize the terrestrial radiation values using z-score normalization
69
+ terrestrial_radiation = (
70
+ terrestrial_radiation - terrestrial_radiation.mean(axis=0)
71
+ ) / terrestrial_radiation.std(axis=0)
72
+ terrestrial_radiation.columns = ["daylight_continuous"]
73
+
74
+ # Make a copy of the DataFrame to avoid modifying the original
75
+ data = data.copy()
76
+
77
+ # Merge the terrestrial radiation data with the input dataset
78
+ data = data.merge(
79
+ terrestrial_radiation, left_index=True, right_index=True, how="left"
80
+ )
81
+
82
+ return data
83
+
84
+
28
85
  def add_time_cyclic_features(
29
86
  data: pd.DataFrame,
30
87
  ) -> pd.DataFrame:
@@ -35,6 +92,7 @@ def add_time_cyclic_features(
35
92
 
36
93
  Returns:
37
94
  DataFrame that is the same as input dataframe with extra columns for the added time of the day features.
95
+
38
96
  """
39
97
  # Ensure the index is a DatetimeIndex
40
98
  if not isinstance(data.index, pd.DatetimeIndex):
@@ -71,6 +129,7 @@ def add_seasonal_cyclic_features(
71
129
  >>> data = pd.DataFrame(index=pd.date_range(start='2023-01-01', periods=365, freq='D'))
72
130
  >>> data_with_features = add_cyclical_features(data)
73
131
  >>> print(data_with_features.head())
132
+
74
133
  """
75
134
  # Ensure the index is a DatetimeIndex
76
135
  if not isinstance(data.index, pd.DatetimeIndex):
@@ -0,0 +1,90 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ from sklearn.base import BaseEstimator, TransformerMixin
5
+ import pandas as pd
6
+ from typing import List, Dict, Tuple, Optional
7
+
8
+
9
+ class FeatureClipper(BaseEstimator, TransformerMixin):
10
+ """
11
+ A transformer that clips the values of specified columns to the minimum and
12
+ maximum values observed during training. This prevents the model from
13
+ extrapolating beyond these values during prediction.
14
+ """
15
+
16
+ def __init__(self, columns: List[str]):
17
+ """
18
+ Initialize the FeatureClipper.
19
+
20
+ Parameters:
21
+ ----------
22
+ columns : List[str]
23
+ List of column names to be clipped.
24
+ """
25
+ self.columns: List[str] = columns
26
+ self.feature_ranges: Dict[str, Tuple[float, float]] = {}
27
+
28
+ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> "FeatureClipper":
29
+ """
30
+ Fits the transformer on the training data by calculating the min and max
31
+ values for the specified columns.
32
+
33
+ Parameters:
34
+ ----------
35
+ X : pd.DataFrame
36
+ The input DataFrame containing training data.
37
+
38
+ y : Optional[pd.Series]
39
+ Ignored. This parameter exists for compatibility with scikit-learn's pipeline.
40
+
41
+ Returns:
42
+ -------
43
+ self : FeatureClipper
44
+ Fitted transformer.
45
+
46
+ Raises:
47
+ ------
48
+ ValueError:
49
+ If the input is not a pandas DataFrame.
50
+ """
51
+ if not isinstance(X, pd.DataFrame):
52
+ raise ValueError("Input must be a pandas DataFrame")
53
+
54
+ for col in self.columns:
55
+ if col in X.columns:
56
+ self.feature_ranges[col] = (X[col].min(), X[col].max())
57
+
58
+ return self
59
+
60
+ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
61
+ """
62
+ Transforms new data by clipping the specified columns' values to be within
63
+ the min and max range observed during fitting.
64
+
65
+ Parameters:
66
+ ----------
67
+ X : pd.DataFrame
68
+ The input DataFrame containing new data to be transformed.
69
+
70
+ Returns:
71
+ -------
72
+ X_ : pd.DataFrame
73
+ A copy of the input DataFrame with clipped values in the specified columns.
74
+
75
+ Raises:
76
+ ------
77
+ ValueError:
78
+ If the input is not a pandas DataFrame.
79
+ """
80
+ if not isinstance(X, pd.DataFrame):
81
+ raise ValueError("Input must be a pandas DataFrame")
82
+
83
+ X_copy = X.copy()
84
+
85
+ for col in self.columns:
86
+ if col in X_copy.columns and col in self.feature_ranges:
87
+ min_val, max_val = self.feature_ranges[col]
88
+ X_copy[col] = X_copy[col].clip(lower=min_val, upper=max_val)
89
+
90
+ return X_copy
@@ -14,6 +14,7 @@ from sklearn.utils.validation import check_is_fitted
14
14
  from openstef.feature_engineering.missing_values_transformer import (
15
15
  MissingValuesTransformer,
16
16
  )
17
+ from openstef.model.metamodels.feature_clipper import FeatureClipper
17
18
  from openstef.model.regressors.regressor import OpenstfRegressor
18
19
 
19
20
  DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
@@ -28,6 +29,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
28
29
  x_scaler_: StandardScaler
29
30
  y_scaler_: StandardScaler
30
31
  models_: Dict[float, QuantileRegressor]
32
+ feature_clipper_: FeatureClipper
31
33
 
32
34
  is_fitted_: bool = False
33
35
 
@@ -51,6 +53,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
51
53
  weight_exponent: float = 1,
52
54
  weight_floor: float = 0.1,
53
55
  no_fill_future_values_features: List[str] = None,
56
+ clipped_features: List[str] = None,
54
57
  ):
55
58
  """Initialize LinearQuantileOpenstfRegressor.
56
59
 
@@ -89,6 +92,9 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
89
92
  "Cannot train quantile model as 0.5 is not in requested quantiles!"
90
93
  )
91
94
 
95
+ if clipped_features is None:
96
+ clipped_features = ["APX"]
97
+
92
98
  self.quantiles = quantiles
93
99
  self.alpha = alpha
94
100
  self.solver = solver
@@ -103,6 +109,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
103
109
  )
104
110
  self.x_scaler_ = StandardScaler()
105
111
  self.y_scaler_ = StandardScaler()
112
+ self.feature_clipper_ = FeatureClipper(columns=clipped_features)
106
113
  self.models_ = {
107
114
  quantile: QuantileRegressor(alpha=alpha, quantile=quantile, solver=solver)
108
115
  for quantile in quantiles
@@ -177,6 +184,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
177
184
  y = pd.Series(np.asarray(y), name="load")
178
185
 
179
186
  x = self._remove_ignored_features(x)
187
+ self.feature_clipper_.fit(x)
180
188
 
181
189
  # Fix nan columns
182
190
  x, y = self.imputer_.fit_transform(x, y)
@@ -252,6 +260,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
252
260
 
253
261
  # Preprocess input data
254
262
  x = self._remove_ignored_features(x)
263
+ x = self.feature_clipper_.transform(x)
255
264
  x = self.imputer_.transform(x)
256
265
  x_scaled = self.x_scaler_.transform(x)
257
266
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openstef
3
- Version: 3.4.45
3
+ Version: 3.4.48
4
4
  Summary: Open short term energy forecaster
5
5
  Home-page: https://github.com/OpenSTEF/openstef
6
6
  Author: Alliander N.V
@@ -29,7 +29,7 @@ Requires-Dist: pvlib==0.10.5
29
29
  Requires-Dist: pydantic~=2.4
30
30
  Requires-Dist: pydantic-settings~=2.3
31
31
  Requires-Dist: pymsteams~=0.2.2
32
- Requires-Dist: scikit-learn~=1.3
32
+ Requires-Dist: scikit-learn<1.6,>=1.3
33
33
  Requires-Dist: scipy~=1.10
34
34
  Requires-Dist: statsmodels<1.0.0,>=0.13.5
35
35
  Requires-Dist: structlog<25,>=23.1
@@ -4,6 +4,8 @@ openstef/app_settings.py,sha256=EJTDtimctFQQ-3f7ZcOQaRYohpZk3JD6aZBWPFYM2_A,582
4
4
  openstef/enums.py,sha256=Wmoag2p7G2cvENA1qt8FcVbAgo-MswXKxmq7vkxHaxs,2680
5
5
  openstef/exceptions.py,sha256=U4u2LTcdT6cmzpipT2Jh7kq9nCjT_-6gntn8yjuhGU0,1993
6
6
  openstef/settings.py,sha256=nSgkBqFxuqB3w7Rwo60i8j37c5ngDbt6vpjHS6QtJXQ,354
7
+ openstef/data/NL_terrestrial_radiation.csv,sha256=A4kbW56GDzWi4tWUwY2C-4PiOvcKJCwkWQQtdg4ekPE,820246
8
+ openstef/data/NL_terrestrial_radiation.csv.license,sha256=AxxHusqwIXU5RHl5ZMU65LyXmgtbj6QlcnFaOEN4kEE,145
7
9
  openstef/data/dutch_holidays.csv,sha256=Cg8EYjXp1O0lcFOkIOmrS5HaOArrxZwOXsZ9pVkIcKI,49847
8
10
  openstef/data/dutch_holidays.csv.license,sha256=AxxHusqwIXU5RHl5ZMU65LyXmgtbj6QlcnFaOEN4kEE,145
9
11
  openstef/data/pv_single_coefs.csv,sha256=jadIEYdHvl1lnV_06X_FASkJZ6C3Hecs5xZnH1gPMvI,24779
@@ -18,9 +20,9 @@ openstef/data_classes/model_specifications.py,sha256=Uod1W3QzhRqVLb6zvXwxh9wRL3E
18
20
  openstef/data_classes/prediction_job.py,sha256=_o5_9HYv6ERTIWlcMpUE-mWwe7dRpaiP83dgNpqpa5Y,5657
19
21
  openstef/data_classes/split_function.py,sha256=ljQIQQu1t1Y_CVWGAy25jrM6wG9odIVVQVimrT1n-1s,3358
20
22
  openstef/feature_engineering/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
21
- openstef/feature_engineering/apply_features.py,sha256=9scyEpUZcSWQrhMXV4c7iT1KvmHDk1J_KSZ_qI63lfY,4866
23
+ openstef/feature_engineering/apply_features.py,sha256=S9HSsooDPcSSyEw9kixEhjjk3MAkAfyK1VhZKg5R0gA,4995
22
24
  openstef/feature_engineering/bidding_zone_to_country_mapping.py,sha256=u9aabjFDImydkO6_cXiaQxBT4gb5zy0gGTg2EoIUO_Y,2106
23
- openstef/feature_engineering/cyclic_features.py,sha256=gmU49D40yR9-Fh9ajiv3SyIWVLQcnibvQ4fFnpvAOj4,3527
25
+ openstef/feature_engineering/cyclic_features.py,sha256=0Z3wZeF_qrkmEcOq91gtdSMZucAq99kUoBuFDV0SHqk,5962
24
26
  openstef/feature_engineering/data_preparation.py,sha256=htca9LBO3ZN1D-iX4vXf0UN1fw_rRO7y6N3AuYVMpfk,5628
25
27
  openstef/feature_engineering/feature_adder.py,sha256=aSqDl_gUrB3H2TD3cNvU5JniY_KOb4u4a2A6J7zB2BQ,6835
26
28
  openstef/feature_engineering/feature_applicator.py,sha256=DR7jayrEMlra4BFL1Ps5WV2fxbkQ6VaOTa5RIKM-YNk,7447
@@ -43,6 +45,7 @@ openstef/model/objective_creator.py,sha256=cIO-uiCEYHjqYrgZizeFEjjgLHLLwab8le9O8
43
45
  openstef/model/serializer.py,sha256=IUiiAWvoGVoWzmS-akI6LC7jHRY5Ln_vOCBZy1LnESY,17238
44
46
  openstef/model/standard_deviation_generator.py,sha256=Od9bzXi2TLb1v8Nz-VhBMZHSopWH6ssaDe8gYLlqO1I,2911
45
47
  openstef/model/metamodels/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
48
+ openstef/model/metamodels/feature_clipper.py,sha256=wDsf4k_2YuC6rrFlwE12LpgOdOwHHWuET2ZrJ_gr4yo,2861
46
49
  openstef/model/metamodels/grouped_regressor.py,sha256=yMN_a6TnQSyFaqlB_6Nifq-ydpb5hs6w_b97IaBbHj4,8337
47
50
  openstef/model/metamodels/missing_values_handler.py,sha256=veyvYZHhKvlYZxaUpxRQ7XoE033_3Lcg9LrbuKchlOk,5241
48
51
  openstef/model/regressors/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
@@ -52,7 +55,7 @@ openstef/model/regressors/dazls.py,sha256=Xt89yFHjkwpIUTkkhPmPZ74F8_tht_XV88INuP
52
55
  openstef/model/regressors/flatliner.py,sha256=T9u-ukhqFcatQmlgUtBL_G-1b_wQzgdVRq0ac64GnjQ,2789
53
56
  openstef/model/regressors/lgbm.py,sha256=zCdn1euEdSFxYJzH8XqQFFnb6R4JVUnmineKjX_Gy-g,800
54
57
  openstef/model/regressors/linear.py,sha256=uOvZMLGZH_9nXfmS5honCMfyVeyGXP1Cza9A_BdXlVw,3665
55
- openstef/model/regressors/linear_quantile.py,sha256=uj8Hd-Po14YymC_dUNSzxDayGqK9MjZ2dp_aLnpiv0s,10126
58
+ openstef/model/regressors/linear_quantile.py,sha256=VAyIhp7GPayqbk8Vj_ONqPLNYuaOvxkFKDrRxx6yGY0,10510
56
59
  openstef/model/regressors/regressor.py,sha256=uJcx59AyCPE9f_yPcAQ59h2ZS7eNsDpIHJrladKvHIw,3461
57
60
  openstef/model/regressors/xgb.py,sha256=SH-UiYJtMbfmRBK6738dU0ZRfYfzNynnikwbxINCE7Q,1467
58
61
  openstef/model/regressors/xgb_multioutput_quantile.py,sha256=xWzA7tymC_o-F1OS3I7vUKf9zP6RR1ZglEeY4NAgjU0,9146
@@ -90,8 +93,8 @@ openstef/tasks/utils/predictionjobloop.py,sha256=Ysy3zF5lzPMz_asYDKeF5m0qgVT3tCt
90
93
  openstef/tasks/utils/taskcontext.py,sha256=L9K14ycwgVxbIVUjH2DIn_QWbnu-OfxcGtQ1K9T6sus,5630
91
94
  openstef/validation/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
92
95
  openstef/validation/validation.py,sha256=HVgreHvcZvPazfwC3NNE8_3lsMsZEd_42osCAg1_6W4,11128
93
- openstef-3.4.45.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
94
- openstef-3.4.45.dist-info/METADATA,sha256=nszNfTz9kFGVZyiXyRL18H4_6WtSIlZDvLuAAleM5wM,8068
95
- openstef-3.4.45.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
96
- openstef-3.4.45.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
97
- openstef-3.4.45.dist-info/RECORD,,
96
+ openstef-3.4.48.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
97
+ openstef-3.4.48.dist-info/METADATA,sha256=w22ao36-l9Y4HXpsHvVZ8G9k3_3Iqt2EJ8-dGoArWIs,8073
98
+ openstef-3.4.48.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
99
+ openstef-3.4.48.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
100
+ openstef-3.4.48.dist-info/RECORD,,