openstef 3.4.43__py3-none-any.whl → 3.4.45__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -79,6 +79,9 @@ class PredictionJobDataClass(BaseModel):
79
79
  """Minimum length (in rows) of the forecast input for making a regular forecast."""
80
80
  flatliner_threshold_minutes: int = 1440
81
81
  """Number of minutes that the load has to be constant to detect a flatliner. """
82
+ data_balancing_ratio: Optional[float] = None
83
+ """If data balancing is enabled, the data will be balanced with data from 1 year
84
+ ago in the future."""
82
85
  depends_on: Optional[list[Union[int, str]]]
83
86
  """Link to another prediction job on which this prediction job might depend."""
84
87
  sid: Optional[str]
@@ -28,6 +28,11 @@ from openstef.feature_engineering.weather_features import (
28
28
  add_humidity_features,
29
29
  )
30
30
 
31
+ from openstef.feature_engineering.cyclic_features import (
32
+ add_seasonal_cyclic_features,
33
+ add_time_cyclic_features,
34
+ )
35
+
31
36
 
32
37
  def apply_features(
33
38
  data: pd.DataFrame,
@@ -113,5 +118,11 @@ def apply_features(
113
118
  # Add solar features; when pj is unavailable a default location is used.
114
119
  data = add_additional_solar_features(data, pj, feature_names)
115
120
 
121
+ # Adds cyclical features to capture seasonal and periodic patterns in time-based data.
122
+ data = add_seasonal_cyclic_features(data)
123
+
124
+ # Adds polar time features (sine and cosine) to capture periodic patterns based on the timestamp index.
125
+ data = add_time_cyclic_features(data)
126
+
116
127
  # Return dataframe including all requested features
117
128
  return data
@@ -0,0 +1,102 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ # Module for adding temporal cyclic features to time-based data for capturing seasonality and periodic patterns.
6
+ # Features include yearly, weekly, and monthly seasonality, as well as time-of-day periodicity.
7
+
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ import structlog
13
+ import logging
14
+
15
+ from openstef.settings import Settings
16
+
17
+ structlog.configure(
18
+ wrapper_class=structlog.make_filtering_bound_logger(
19
+ logging.getLevelName(Settings.log_level)
20
+ )
21
+ )
22
+ logger = structlog.get_logger(__name__)
23
+
24
+
25
+ NUM_SECONDS_IN_A_DAY = 24 * 60 * 60
26
+
27
+
28
+ def add_time_cyclic_features(
29
+ data: pd.DataFrame,
30
+ ) -> pd.DataFrame:
31
+ """Adds time of the day features cyclically encoded using sine and cosine to the input data.
32
+
33
+ Args:
34
+ data: Dataframe indexed by datetime.
35
+
36
+ Returns:
37
+ DataFrame that is the same as input dataframe with extra columns for the added time of the day features.
38
+ """
39
+ # Ensure the index is a DatetimeIndex
40
+ if not isinstance(data.index, pd.DatetimeIndex):
41
+ raise ValueError("Index should be a pandas DatetimeIndex")
42
+
43
+ # Make a copy of the DataFrame to avoid modifying the original
44
+ data = data.copy()
45
+
46
+ second_of_the_day = (
47
+ data.index.second + data.index.minute * 60 + data.index.hour * 60 * 60
48
+ )
49
+ period_of_the_day = 2 * np.pi * second_of_the_day / NUM_SECONDS_IN_A_DAY
50
+
51
+ data["time0fday_sine"] = np.sin(period_of_the_day)
52
+ data["time0fday_cosine"] = np.cos(period_of_the_day)
53
+
54
+ return data
55
+
56
+
57
+ def add_seasonal_cyclic_features(
58
+ data: pd.DataFrame, compute_features: list = None
59
+ ) -> pd.DataFrame:
60
+ """Adds cyclical features to capture seasonal and periodic patterns in time-based data.
61
+
62
+ Args:
63
+ - data (pd.DataFrame): DataFrame with a DatetimeIndex.
64
+ - compute_features (list): Optional. List of features to compute. Options are:
65
+ ['season', 'dayofweek', 'month']. Default is all features.
66
+
67
+ Returns:
68
+ - pd.DataFrame: DataFrame with added cyclical features.
69
+
70
+ Example:
71
+ >>> data = pd.DataFrame(index=pd.date_range(start='2023-01-01', periods=365, freq='D'))
72
+ >>> data_with_features = add_cyclical_features(data)
73
+ >>> print(data_with_features.head())
74
+ """
75
+ # Ensure the index is a DatetimeIndex
76
+ if not isinstance(data.index, pd.DatetimeIndex):
77
+ raise ValueError("The DataFrame index must be a DatetimeIndex.")
78
+
79
+ # Make a copy of the DataFrame to avoid modifying the original
80
+ data = data.copy()
81
+
82
+ # Default to all features if none specified
83
+ compute_features = compute_features or ["season", "dayofweek", "month"]
84
+
85
+ days_in_year = 365.25 # Account for leap years
86
+
87
+ # Add seasonality features (day of year)
88
+ if "season" in compute_features:
89
+ data["season_sine"] = np.sin(2 * np.pi * data.index.dayofyear / days_in_year)
90
+ data["season_cosine"] = np.cos(2 * np.pi * data.index.dayofyear / days_in_year)
91
+
92
+ # Add weekly features (day of the week)
93
+ if "dayofweek" in compute_features:
94
+ data["day0fweek_sine"] = np.sin(2 * np.pi * data.index.day_of_week / 7)
95
+ data["day0fweek_cosine"] = np.cos(2 * np.pi * data.index.day_of_week / 7)
96
+
97
+ # Add monthly features (month of the year)
98
+ if "month" in compute_features:
99
+ data["month_sine"] = np.sin(2 * np.pi * data.index.month / 12)
100
+ data["month_cosine"] = np.cos(2 * np.pi * data.index.month / 12)
101
+
102
+ return data
@@ -22,6 +22,8 @@ Example:
22
22
  from datetime import datetime, timedelta
23
23
  from pathlib import Path
24
24
 
25
+ import pandas as pd
26
+
25
27
  from openstef.data_classes.prediction_job import PredictionJobDataClass
26
28
  from openstef.enums import ModelType, PipelineType
27
29
  from openstef.exceptions import (
@@ -114,10 +116,16 @@ def train_model_task(
114
116
  return
115
117
 
116
118
  # Define start and end of the training input data
119
+ training_period_days_to_fetch = (
120
+ TRAINING_PERIOD_DAYS
121
+ if pj.data_balancing_ratio is None
122
+ else int(pj.data_balancing_ratio * TRAINING_PERIOD_DAYS)
123
+ )
124
+
117
125
  if datetime_end is None:
118
126
  datetime_end = datetime.utcnow()
119
127
  if datetime_start is None:
120
- datetime_start = datetime_end - timedelta(days=TRAINING_PERIOD_DAYS)
128
+ datetime_start = datetime_end - timedelta(days=training_period_days_to_fetch)
121
129
 
122
130
  # Get training input data from database
123
131
  input_data = context.database.get_model_input(
@@ -127,6 +135,29 @@ def train_model_task(
127
135
  datetime_end=datetime_end,
128
136
  )
129
137
 
138
+ # If data balancing is enabled, fetch data from 1 year ago and combine it with the
139
+ # current data
140
+ if pj.data_balancing_ratio is not None:
141
+ # Because the data is from the past, we can use the data from the "future"
142
+ balanced_datetime_start = datetime_end - timedelta(days=365)
143
+ balanced_datetime_end = balanced_datetime_start + timedelta(
144
+ days=training_period_days_to_fetch
145
+ )
146
+
147
+ balanced_input_data = context.database.get_model_input(
148
+ pid=pj["id"],
149
+ location=[pj["lat"], pj["lon"]],
150
+ datetime_start=balanced_datetime_start,
151
+ datetime_end=balanced_datetime_end,
152
+ )
153
+
154
+ input_data = pd.concat(
155
+ [
156
+ balanced_input_data,
157
+ input_data,
158
+ ]
159
+ )
160
+
130
161
  context.perf_meter.checkpoint("Retrieved timeseries input")
131
162
 
132
163
  # Excecute the model training pipeline
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openstef
3
- Version: 3.4.43
3
+ Version: 3.4.45
4
4
  Summary: Open short term energy forecaster
5
5
  Home-page: https://github.com/OpenSTEF/openstef
6
6
  Author: Alliander N.V
@@ -15,11 +15,12 @@ openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license,sha25
15
15
  openstef/data_classes/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
16
16
  openstef/data_classes/data_prep.py,sha256=gRSL7UiHvZis8m8z7VoTCZc0Ccffhef5_hmSyApnqK0,3417
17
17
  openstef/data_classes/model_specifications.py,sha256=Uod1W3QzhRqVLb6zvXwxh9wRL3EHCzSvX0oDNd28cFk,1197
18
- openstef/data_classes/prediction_job.py,sha256=oVgk6rTC8IYYIGpEuaZHPi01l7gomQQ--BHlixj0Eb0,5496
18
+ openstef/data_classes/prediction_job.py,sha256=_o5_9HYv6ERTIWlcMpUE-mWwe7dRpaiP83dgNpqpa5Y,5657
19
19
  openstef/data_classes/split_function.py,sha256=ljQIQQu1t1Y_CVWGAy25jrM6wG9odIVVQVimrT1n-1s,3358
20
20
  openstef/feature_engineering/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
21
- openstef/feature_engineering/apply_features.py,sha256=AgIFnfud55vj0UUWJYwrMTjUzFCBxpV5ACaUK21-ZUo,4451
21
+ openstef/feature_engineering/apply_features.py,sha256=9scyEpUZcSWQrhMXV4c7iT1KvmHDk1J_KSZ_qI63lfY,4866
22
22
  openstef/feature_engineering/bidding_zone_to_country_mapping.py,sha256=u9aabjFDImydkO6_cXiaQxBT4gb5zy0gGTg2EoIUO_Y,2106
23
+ openstef/feature_engineering/cyclic_features.py,sha256=gmU49D40yR9-Fh9ajiv3SyIWVLQcnibvQ4fFnpvAOj4,3527
23
24
  openstef/feature_engineering/data_preparation.py,sha256=htca9LBO3ZN1D-iX4vXf0UN1fw_rRO7y6N3AuYVMpfk,5628
24
25
  openstef/feature_engineering/feature_adder.py,sha256=aSqDl_gUrB3H2TD3cNvU5JniY_KOb4u4a2A6J7zB2BQ,6835
25
26
  openstef/feature_engineering/feature_applicator.py,sha256=DR7jayrEMlra4BFL1Ps5WV2fxbkQ6VaOTa5RIKM-YNk,7447
@@ -82,15 +83,15 @@ openstef/tasks/create_solar_forecast.py,sha256=cZiIoCVHlLlDrsWeH3ZX4zfcMMrgGgqkG
82
83
  openstef/tasks/create_wind_forecast.py,sha256=RhshkmNSyFWx4Y6yQn02GzHjWTREbN5A5GAeWv0JpcE,2907
83
84
  openstef/tasks/optimize_hyperparameters.py,sha256=meiOn5S4yBrk5ANCFwcBCfTZIhm-b1rdh9TFh7KFr3E,4754
84
85
  openstef/tasks/split_forecast.py,sha256=AF_AwFcD6BqOrfvNLhIm_8gb7SpyKxEx60mymoxohPg,9323
85
- openstef/tasks/train_model.py,sha256=x1YlLC71l_9AD1r_IwvzKVA4ZTnMv6VMfKYvrPp6gpU,7471
86
+ openstef/tasks/train_model.py,sha256=o8QVPReJ71BZVCOL6Rs3PFD9Zg4LT16dPcbf87xnXpA,8494
86
87
  openstef/tasks/utils/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
87
88
  openstef/tasks/utils/dependencies.py,sha256=Jy9dtV_G7lTEa5Cdy--wvMxJuAb0adb3R0X4QDjVteM,3077
88
89
  openstef/tasks/utils/predictionjobloop.py,sha256=Ysy3zF5lzPMz_asYDKeF5m0qgVT3tCtwSPihqMjnI5Q,9580
89
90
  openstef/tasks/utils/taskcontext.py,sha256=L9K14ycwgVxbIVUjH2DIn_QWbnu-OfxcGtQ1K9T6sus,5630
90
91
  openstef/validation/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
91
92
  openstef/validation/validation.py,sha256=HVgreHvcZvPazfwC3NNE8_3lsMsZEd_42osCAg1_6W4,11128
92
- openstef-3.4.43.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
93
- openstef-3.4.43.dist-info/METADATA,sha256=L2J9ATvIMBE06--ITAksEWnqGYNZr5T-oODgLroNZFw,8068
94
- openstef-3.4.43.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
95
- openstef-3.4.43.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
96
- openstef-3.4.43.dist-info/RECORD,,
93
+ openstef-3.4.45.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
94
+ openstef-3.4.45.dist-info/METADATA,sha256=nszNfTz9kFGVZyiXyRL18H4_6WtSIlZDvLuAAleM5wM,8068
95
+ openstef-3.4.45.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
96
+ openstef-3.4.45.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
97
+ openstef-3.4.45.dist-info/RECORD,,