openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef-4.0.0a3.dist-info/METADATA +177 -0
- openstef-4.0.0a3.dist-info/RECORD +4 -0
- {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
- openstef/__init__.py +0 -14
- openstef/__main__.py +0 -3
- openstef/app_settings.py +0 -19
- openstef/data/NL_terrestrial_radiation.csv +0 -25585
- openstef/data/NL_terrestrial_radiation.csv.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
- openstef/data/dutch_holidays.csv +0 -1759
- openstef/data/dutch_holidays.csv.license +0 -3
- openstef/data/pv_single_coefs.csv +0 -601
- openstef/data/pv_single_coefs.csv.license +0 -3
- openstef/data_classes/__init__.py +0 -3
- openstef/data_classes/data_prep.py +0 -99
- openstef/data_classes/model_specifications.py +0 -30
- openstef/data_classes/prediction_job.py +0 -135
- openstef/data_classes/split_function.py +0 -97
- openstef/enums.py +0 -140
- openstef/exceptions.py +0 -74
- openstef/feature_engineering/__init__.py +0 -3
- openstef/feature_engineering/apply_features.py +0 -138
- openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
- openstef/feature_engineering/cyclic_features.py +0 -161
- openstef/feature_engineering/data_preparation.py +0 -152
- openstef/feature_engineering/feature_adder.py +0 -206
- openstef/feature_engineering/feature_applicator.py +0 -202
- openstef/feature_engineering/general.py +0 -141
- openstef/feature_engineering/holiday_features.py +0 -231
- openstef/feature_engineering/lag_features.py +0 -165
- openstef/feature_engineering/missing_values_transformer.py +0 -141
- openstef/feature_engineering/rolling_features.py +0 -58
- openstef/feature_engineering/weather_features.py +0 -492
- openstef/metrics/__init__.py +0 -3
- openstef/metrics/figure.py +0 -303
- openstef/metrics/metrics.py +0 -486
- openstef/metrics/reporter.py +0 -222
- openstef/model/__init__.py +0 -3
- openstef/model/basecase.py +0 -82
- openstef/model/confidence_interval_applicator.py +0 -242
- openstef/model/fallback.py +0 -77
- openstef/model/metamodels/__init__.py +0 -3
- openstef/model/metamodels/feature_clipper.py +0 -90
- openstef/model/metamodels/grouped_regressor.py +0 -222
- openstef/model/metamodels/missing_values_handler.py +0 -138
- openstef/model/model_creator.py +0 -214
- openstef/model/objective.py +0 -426
- openstef/model/objective_creator.py +0 -65
- openstef/model/regressors/__init__.py +0 -3
- openstef/model/regressors/arima.py +0 -197
- openstef/model/regressors/custom_regressor.py +0 -64
- openstef/model/regressors/dazls.py +0 -116
- openstef/model/regressors/flatliner.py +0 -95
- openstef/model/regressors/gblinear_quantile.py +0 -334
- openstef/model/regressors/lgbm.py +0 -29
- openstef/model/regressors/linear.py +0 -90
- openstef/model/regressors/linear_quantile.py +0 -305
- openstef/model/regressors/regressor.py +0 -114
- openstef/model/regressors/xgb.py +0 -52
- openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
- openstef/model/regressors/xgb_quantile.py +0 -228
- openstef/model/serializer.py +0 -431
- openstef/model/standard_deviation_generator.py +0 -81
- openstef/model_selection/__init__.py +0 -3
- openstef/model_selection/model_selection.py +0 -311
- openstef/monitoring/__init__.py +0 -3
- openstef/monitoring/performance_meter.py +0 -92
- openstef/monitoring/teams.py +0 -203
- openstef/pipeline/__init__.py +0 -3
- openstef/pipeline/create_basecase_forecast.py +0 -133
- openstef/pipeline/create_component_forecast.py +0 -168
- openstef/pipeline/create_forecast.py +0 -171
- openstef/pipeline/optimize_hyperparameters.py +0 -317
- openstef/pipeline/train_create_forecast_backtest.py +0 -163
- openstef/pipeline/train_model.py +0 -561
- openstef/pipeline/utils.py +0 -52
- openstef/postprocessing/__init__.py +0 -3
- openstef/postprocessing/postprocessing.py +0 -275
- openstef/preprocessing/__init__.py +0 -3
- openstef/preprocessing/preprocessing.py +0 -42
- openstef/settings.py +0 -15
- openstef/tasks/__init__.py +0 -3
- openstef/tasks/calculate_kpi.py +0 -324
- openstef/tasks/create_basecase_forecast.py +0 -118
- openstef/tasks/create_components_forecast.py +0 -162
- openstef/tasks/create_forecast.py +0 -145
- openstef/tasks/create_solar_forecast.py +0 -420
- openstef/tasks/create_wind_forecast.py +0 -80
- openstef/tasks/optimize_hyperparameters.py +0 -135
- openstef/tasks/split_forecast.py +0 -273
- openstef/tasks/train_model.py +0 -224
- openstef/tasks/utils/__init__.py +0 -3
- openstef/tasks/utils/dependencies.py +0 -107
- openstef/tasks/utils/predictionjobloop.py +0 -243
- openstef/tasks/utils/taskcontext.py +0 -160
- openstef/validation/__init__.py +0 -3
- openstef/validation/validation.py +0 -322
- openstef-3.4.56.dist-info/METADATA +0 -154
- openstef-3.4.56.dist-info/RECORD +0 -102
- openstef-3.4.56.dist-info/top_level.txt +0 -1
- /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
"""This module provides functionality for applying features to the input data to improve forecast accuracy.
|
|
5
|
-
|
|
6
|
-
Examples of features that are added:
|
|
7
|
-
- The load 1 day and 7 days ago at the same time.
|
|
8
|
-
- If a day is a weekday or a holiday.
|
|
9
|
-
- The extrapolated windspeed at 100m.
|
|
10
|
-
- The normalised wind power according to the turbine-specific power curve.
|
|
11
|
-
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
import pandas as pd
|
|
15
|
-
|
|
16
|
-
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
|
17
|
-
from openstef.enums import BiddingZone
|
|
18
|
-
from openstef.feature_engineering.holiday_features import (
|
|
19
|
-
generate_holiday_feature_functions,
|
|
20
|
-
)
|
|
21
|
-
from openstef.feature_engineering.lag_features import generate_lag_feature_functions
|
|
22
|
-
from openstef.feature_engineering.bidding_zone_to_country_mapping import (
|
|
23
|
-
BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING,
|
|
24
|
-
)
|
|
25
|
-
from openstef.feature_engineering.rolling_features import add_rolling_aggregate_features
|
|
26
|
-
from openstef.feature_engineering.weather_features import (
|
|
27
|
-
add_additional_solar_features,
|
|
28
|
-
add_additional_wind_features,
|
|
29
|
-
add_humidity_features,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
from openstef.feature_engineering.cyclic_features import (
|
|
33
|
-
add_seasonal_cyclic_features,
|
|
34
|
-
add_time_cyclic_features,
|
|
35
|
-
add_daylight_terrestrial_feature,
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def apply_features(
|
|
40
|
-
data: pd.DataFrame,
|
|
41
|
-
pj: PredictionJobDataClass = None,
|
|
42
|
-
feature_names: list[str] = None,
|
|
43
|
-
horizon: float = 24.0,
|
|
44
|
-
years: list[int] | None = None,
|
|
45
|
-
) -> pd.DataFrame:
|
|
46
|
-
"""Applies the feature functions defined in ``feature_functions.py`` and returns the complete dataframe.
|
|
47
|
-
|
|
48
|
-
Features requiring more recent label-data are omitted.
|
|
49
|
-
|
|
50
|
-
.. note::
|
|
51
|
-
For the time derived features only the ones in the features list will be added. But for the weather features all will be added at present.
|
|
52
|
-
These unrequested additional features have to be filtered out later.
|
|
53
|
-
|
|
54
|
-
Args:
|
|
55
|
-
data (pandas.DataFrame): a pandas dataframe with input data in the form:
|
|
56
|
-
pd.DataFrame(
|
|
57
|
-
index=datetime,
|
|
58
|
-
columns=[label, predictor_1,..., predictor_n]
|
|
59
|
-
)
|
|
60
|
-
pj (PredictionJobDataClass): Prediction job.
|
|
61
|
-
feature_names (list[str]): list of requested features
|
|
62
|
-
horizon (float): Forecast horizon limit in hours.
|
|
63
|
-
years (list[int] | None): years for which to create holiday features.
|
|
64
|
-
|
|
65
|
-
Returns:
|
|
66
|
-
pd.DataFrame(index = datetime, columns = [label, predictor_1,..., predictor_n, feature_1, ..., feature_m])
|
|
67
|
-
|
|
68
|
-
Example output:
|
|
69
|
-
|
|
70
|
-
.. code-block:: py
|
|
71
|
-
|
|
72
|
-
import pandas as pd
|
|
73
|
-
import numpy as np
|
|
74
|
-
from geopy.geocoders import Nominatim
|
|
75
|
-
index = pd.date_range(start = "2017-01-01 09:00:00",
|
|
76
|
-
freq = '15T', periods = 200)
|
|
77
|
-
data = pd.DataFrame(index = index,
|
|
78
|
-
data = dict(load=
|
|
79
|
-
np.sin(index.hour/24*np.pi)*
|
|
80
|
-
np.random.uniform(0.7,1.7, 200)))
|
|
81
|
-
|
|
82
|
-
"""
|
|
83
|
-
if pj is None:
|
|
84
|
-
pj = {"electricity_bidding_zone": BiddingZone.NL}
|
|
85
|
-
|
|
86
|
-
# Get lag feature functions
|
|
87
|
-
feature_functions = generate_lag_feature_functions(feature_names, horizon)
|
|
88
|
-
|
|
89
|
-
# Get timedrivenfeature functions
|
|
90
|
-
feature_functions.update(
|
|
91
|
-
{
|
|
92
|
-
"IsWeekendDay": lambda x: (x.index.weekday // 5) == 1,
|
|
93
|
-
"IsWeekDay": lambda x: x.index.weekday < 5,
|
|
94
|
-
"IsSunday": lambda x: x.index.weekday == 6,
|
|
95
|
-
"Month": lambda x: x.index.month,
|
|
96
|
-
"Quarter": lambda x: x.index.quarter,
|
|
97
|
-
}
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
# Get country code from bidding zone if available
|
|
101
|
-
electricity_bidding_zone = pj.get("electricity_bidding_zone", BiddingZone.NL)
|
|
102
|
-
country_code = BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING[electricity_bidding_zone.name]
|
|
103
|
-
|
|
104
|
-
# Get holiday feature functions
|
|
105
|
-
feature_functions.update(
|
|
106
|
-
generate_holiday_feature_functions(country_code=country_code, years=years)
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
# Add the features to the dataframe using previously defined feature functions
|
|
110
|
-
for key, featfunc in feature_functions.items():
|
|
111
|
-
# Don't generate feature is not in features
|
|
112
|
-
if feature_names is not None and key not in feature_names:
|
|
113
|
-
continue
|
|
114
|
-
data.loc[:, key] = data.iloc[:, [0]].apply(featfunc)
|
|
115
|
-
|
|
116
|
-
# Add additional wind features
|
|
117
|
-
data = add_additional_wind_features(data, feature_names)
|
|
118
|
-
|
|
119
|
-
# Add humidity features
|
|
120
|
-
data = add_humidity_features(data, feature_names)
|
|
121
|
-
|
|
122
|
-
# Add solar features; when pj is unavailable a default location is used.
|
|
123
|
-
data = add_additional_solar_features(data, pj, feature_names)
|
|
124
|
-
|
|
125
|
-
# Adds cyclical features to capture seasonal and periodic patterns in time-based data.
|
|
126
|
-
data = add_seasonal_cyclic_features(data)
|
|
127
|
-
|
|
128
|
-
# Adds polar time features (sine and cosine) to capture periodic patterns based on the timestamp index.
|
|
129
|
-
data = add_time_cyclic_features(data)
|
|
130
|
-
|
|
131
|
-
# Adds daylight terrestrial feature
|
|
132
|
-
data = add_daylight_terrestrial_feature(data)
|
|
133
|
-
|
|
134
|
-
if pj.get("rolling_aggregate_features") is not None:
|
|
135
|
-
data = add_rolling_aggregate_features(data, pj=pj)
|
|
136
|
-
|
|
137
|
-
# Return dataframe including all requested features
|
|
138
|
-
return data
|
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING = {
|
|
5
|
-
"DE_50HZ": "DE",
|
|
6
|
-
"AL": "AL",
|
|
7
|
-
"DE_AMPRION": "DE",
|
|
8
|
-
"AT": "AT",
|
|
9
|
-
"BY": "BY",
|
|
10
|
-
"BE": "BE",
|
|
11
|
-
"BA": "BA",
|
|
12
|
-
"BG": "BG",
|
|
13
|
-
"CZ_DE_SK": "CZ",
|
|
14
|
-
"HR": "HR",
|
|
15
|
-
"CWE": "CWE",
|
|
16
|
-
"CY": "CY",
|
|
17
|
-
"CZ": "CZ",
|
|
18
|
-
"DE_AT_LU": "DE",
|
|
19
|
-
"DE_LU": "DE",
|
|
20
|
-
"DK": "DK",
|
|
21
|
-
"DK_1": "DK",
|
|
22
|
-
"DK_1_NO_1": "DK",
|
|
23
|
-
"DK_2": "DK",
|
|
24
|
-
"DK_CA": "DK",
|
|
25
|
-
"EE": "EE",
|
|
26
|
-
"FI": "FI",
|
|
27
|
-
"MK": "MK",
|
|
28
|
-
"FR": "FR",
|
|
29
|
-
"DE": "DE",
|
|
30
|
-
"GR": "GR",
|
|
31
|
-
"HU": "HU",
|
|
32
|
-
"IS": "IS",
|
|
33
|
-
"IE_SEM": "IE",
|
|
34
|
-
"IE": "IE",
|
|
35
|
-
"IT": "IT",
|
|
36
|
-
"IT_SACO_AC": "IT",
|
|
37
|
-
"IT_CALA": "IT",
|
|
38
|
-
"IT_SACO_DC": "IT",
|
|
39
|
-
"IT_BRNN": "IT",
|
|
40
|
-
"IT_CNOR": "IT",
|
|
41
|
-
"IT_CSUD": "IT",
|
|
42
|
-
"IT_FOGN": "IT",
|
|
43
|
-
"IT_GR": "IT",
|
|
44
|
-
"IT_MACRO_NORTH": "IT",
|
|
45
|
-
"IT_MACRO_SOUTH": "IT",
|
|
46
|
-
"IT_MALTA": "IT",
|
|
47
|
-
"IT_NORD": "IT",
|
|
48
|
-
"IT_NORD_AT": "IT",
|
|
49
|
-
"IT_NORD_CH": "IT",
|
|
50
|
-
"IT_NORD_FR": "IT",
|
|
51
|
-
"IT_NORD_SI": "IT",
|
|
52
|
-
"IT_PRGP": "IT",
|
|
53
|
-
"IT_ROSN": "IT",
|
|
54
|
-
"IT_SARD": "IT",
|
|
55
|
-
"IT_SICI": "IT",
|
|
56
|
-
"IT_SUD": "IT",
|
|
57
|
-
"RU_KGD": "RU",
|
|
58
|
-
"LV": "LV",
|
|
59
|
-
"LT": "LT",
|
|
60
|
-
"LU": "LU",
|
|
61
|
-
"LU_BZN": "LU",
|
|
62
|
-
"MT": "MT",
|
|
63
|
-
"ME": "ME",
|
|
64
|
-
"GB": "GB",
|
|
65
|
-
"GE": "GE",
|
|
66
|
-
"GB_IFA": "GB",
|
|
67
|
-
"GB_IFA2": "GB",
|
|
68
|
-
"GB_ELECLINK": "GB",
|
|
69
|
-
"UK": "UK",
|
|
70
|
-
"NL": "NL",
|
|
71
|
-
"NO_1": "NO",
|
|
72
|
-
"NO_1A": "NO",
|
|
73
|
-
"NO_2": "NO",
|
|
74
|
-
"NO_2_NSL": "NO",
|
|
75
|
-
"NO_2A": "NO",
|
|
76
|
-
"NO_3": "NO",
|
|
77
|
-
"NO_4": "NO",
|
|
78
|
-
"NO_5": "NO",
|
|
79
|
-
"NO": "NO",
|
|
80
|
-
"PL_CZ": "PL",
|
|
81
|
-
"PL": "PL",
|
|
82
|
-
"PT": "PT",
|
|
83
|
-
"MD": "MD",
|
|
84
|
-
"RO": "RO",
|
|
85
|
-
"RU": "RU",
|
|
86
|
-
"SE_1": "SE",
|
|
87
|
-
"SE_2": "SE",
|
|
88
|
-
"SE_3": "SE",
|
|
89
|
-
"SE_4": "SE",
|
|
90
|
-
"RS": "RS",
|
|
91
|
-
"SK": "SK",
|
|
92
|
-
"SI": "SI",
|
|
93
|
-
"GB_NIR": "GB",
|
|
94
|
-
"ES": "ES",
|
|
95
|
-
"SE": "SE",
|
|
96
|
-
"CH": "CH",
|
|
97
|
-
"DE_TENNET": "DE",
|
|
98
|
-
"DE_TRANSNET": "DE",
|
|
99
|
-
"TR": "TR",
|
|
100
|
-
"UA": "UA",
|
|
101
|
-
"UA_DOBTPP": "UA",
|
|
102
|
-
"UA_BEI": "UA",
|
|
103
|
-
"UA_IPS": "UA",
|
|
104
|
-
"XK": "XK",
|
|
105
|
-
"DE_AMP_LU": "DE",
|
|
106
|
-
}
|
|
@@ -1,161 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
|
|
5
|
-
# Module for adding temporal cyclic features to time-based data for capturing seasonality and periodic patterns.
|
|
6
|
-
# Features include yearly, weekly, and monthly seasonality, as well as time-of-day periodicity.
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
import numpy as np
|
|
10
|
-
import pandas as pd
|
|
11
|
-
|
|
12
|
-
import structlog
|
|
13
|
-
import logging
|
|
14
|
-
|
|
15
|
-
from openstef.settings import Settings
|
|
16
|
-
from openstef import PROJECT_ROOT
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
structlog.configure(
|
|
20
|
-
wrapper_class=structlog.make_filtering_bound_logger(
|
|
21
|
-
logging.getLevelName(Settings.log_level)
|
|
22
|
-
)
|
|
23
|
-
)
|
|
24
|
-
logger = structlog.get_logger(__name__)
|
|
25
|
-
|
|
26
|
-
TERRESTRIAL_RADIATION_CSV_PATH: str = (
|
|
27
|
-
PROJECT_ROOT / "openstef" / "data" / "NL_terrestrial_radiation.csv"
|
|
28
|
-
)
|
|
29
|
-
NUM_SECONDS_IN_A_DAY = 24 * 60 * 60
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def add_daylight_terrestrial_feature(
|
|
33
|
-
data: pd.DataFrame,
|
|
34
|
-
path_to_terrestrial_radiation_csv: str = TERRESTRIAL_RADIATION_CSV_PATH,
|
|
35
|
-
) -> pd.DataFrame:
|
|
36
|
-
"""Add daylight terrestrial radiation feature to the input dataset. This function processes terrestrial radiation
|
|
37
|
-
data and aligns it with the time indices of the input dataset. The terrestrial radiation data is normalized,
|
|
38
|
-
interpolated, and merged with the main dataset to provide a feature representing terrestrial radiation.
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
data (pd.DataFrame):
|
|
42
|
-
The input dataset containing a time-indexed DataFrame.
|
|
43
|
-
path_to_terrestrial_radiation_csv (str):
|
|
44
|
-
File path to the CSV file containing terrestrial radiation data. The CSV file
|
|
45
|
-
should have a time-based index.
|
|
46
|
-
|
|
47
|
-
Returns:
|
|
48
|
-
pd.DataFrame:
|
|
49
|
-
The input dataset with an added column for the terrestrial radiation feature.
|
|
50
|
-
|
|
51
|
-
Notes:
|
|
52
|
-
- The function assumes the input data and the terrestrial radiation data share
|
|
53
|
-
the same time zone and frequency alignment.
|
|
54
|
-
- The terrestrial radiation values are normalized using z-score normalization.
|
|
55
|
-
|
|
56
|
-
"""
|
|
57
|
-
# Load the terrestrial radiation data
|
|
58
|
-
terrestrial_radiation = pd.read_csv(path_to_terrestrial_radiation_csv, index_col=0)
|
|
59
|
-
terrestrial_radiation.index = pd.to_datetime(terrestrial_radiation.index)
|
|
60
|
-
|
|
61
|
-
# Align the index with the input data's year
|
|
62
|
-
year_diff = data.index.min().year - terrestrial_radiation.index.min().year
|
|
63
|
-
terrestrial_radiation.index += pd.DateOffset(years=year_diff)
|
|
64
|
-
|
|
65
|
-
# Resample to 15-minute intervals, and interpolate missing values
|
|
66
|
-
terrestrial_radiation = terrestrial_radiation.resample("15min").mean().interpolate()
|
|
67
|
-
|
|
68
|
-
# Normalize the terrestrial radiation values using z-score normalization
|
|
69
|
-
terrestrial_radiation = (
|
|
70
|
-
terrestrial_radiation - terrestrial_radiation.mean(axis=0)
|
|
71
|
-
) / terrestrial_radiation.std(axis=0)
|
|
72
|
-
terrestrial_radiation.columns = ["daylight_continuous"]
|
|
73
|
-
|
|
74
|
-
# Make a copy of the DataFrame to avoid modifying the original
|
|
75
|
-
data = data.copy()
|
|
76
|
-
|
|
77
|
-
# Merge the terrestrial radiation data with the input dataset
|
|
78
|
-
data = data.merge(
|
|
79
|
-
terrestrial_radiation, left_index=True, right_index=True, how="left"
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
return data
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def add_time_cyclic_features(
|
|
86
|
-
data: pd.DataFrame,
|
|
87
|
-
) -> pd.DataFrame:
|
|
88
|
-
"""Adds time of the day features cyclically encoded using sine and cosine to the input data.
|
|
89
|
-
|
|
90
|
-
Args:
|
|
91
|
-
data: Dataframe indexed by datetime.
|
|
92
|
-
|
|
93
|
-
Returns:
|
|
94
|
-
DataFrame that is the same as input dataframe with extra columns for the added time of the day features.
|
|
95
|
-
|
|
96
|
-
"""
|
|
97
|
-
# Ensure the index is a DatetimeIndex
|
|
98
|
-
if not isinstance(data.index, pd.DatetimeIndex):
|
|
99
|
-
raise ValueError("Index should be a pandas DatetimeIndex")
|
|
100
|
-
|
|
101
|
-
# Make a copy of the DataFrame to avoid modifying the original
|
|
102
|
-
data = data.copy()
|
|
103
|
-
|
|
104
|
-
second_of_the_day = (
|
|
105
|
-
data.index.second + data.index.minute * 60 + data.index.hour * 60 * 60
|
|
106
|
-
)
|
|
107
|
-
period_of_the_day = 2 * np.pi * second_of_the_day / NUM_SECONDS_IN_A_DAY
|
|
108
|
-
|
|
109
|
-
data["time0fday_sine"] = np.sin(period_of_the_day)
|
|
110
|
-
data["time0fday_cosine"] = np.cos(period_of_the_day)
|
|
111
|
-
|
|
112
|
-
return data
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def add_seasonal_cyclic_features(
|
|
116
|
-
data: pd.DataFrame, compute_features: list = None
|
|
117
|
-
) -> pd.DataFrame:
|
|
118
|
-
"""Adds cyclical features to capture seasonal and periodic patterns in time-based data.
|
|
119
|
-
|
|
120
|
-
Args:
|
|
121
|
-
- data (pd.DataFrame): DataFrame with a DatetimeIndex.
|
|
122
|
-
- compute_features (list): Optional. List of features to compute. Options are:
|
|
123
|
-
['season', 'dayofweek', 'month']. Default is all features.
|
|
124
|
-
|
|
125
|
-
Returns:
|
|
126
|
-
- pd.DataFrame: DataFrame with added cyclical features.
|
|
127
|
-
|
|
128
|
-
Example:
|
|
129
|
-
>>> data = pd.DataFrame(index=pd.date_range(start='2023-01-01', periods=365, freq='D'))
|
|
130
|
-
>>> data_with_features = add_cyclical_features(data)
|
|
131
|
-
>>> print(data_with_features.head())
|
|
132
|
-
|
|
133
|
-
"""
|
|
134
|
-
# Ensure the index is a DatetimeIndex
|
|
135
|
-
if not isinstance(data.index, pd.DatetimeIndex):
|
|
136
|
-
raise ValueError("The DataFrame index must be a DatetimeIndex.")
|
|
137
|
-
|
|
138
|
-
# Make a copy of the DataFrame to avoid modifying the original
|
|
139
|
-
data = data.copy()
|
|
140
|
-
|
|
141
|
-
# Default to all features if none specified
|
|
142
|
-
compute_features = compute_features or ["season", "dayofweek", "month"]
|
|
143
|
-
|
|
144
|
-
days_in_year = 365.25 # Account for leap years
|
|
145
|
-
|
|
146
|
-
# Add seasonality features (day of year)
|
|
147
|
-
if "season" in compute_features:
|
|
148
|
-
data["season_sine"] = np.sin(2 * np.pi * data.index.dayofyear / days_in_year)
|
|
149
|
-
data["season_cosine"] = np.cos(2 * np.pi * data.index.dayofyear / days_in_year)
|
|
150
|
-
|
|
151
|
-
# Add weekly features (day of the week)
|
|
152
|
-
if "dayofweek" in compute_features:
|
|
153
|
-
data["day0fweek_sine"] = np.sin(2 * np.pi * data.index.day_of_week / 7)
|
|
154
|
-
data["day0fweek_cosine"] = np.cos(2 * np.pi * data.index.day_of_week / 7)
|
|
155
|
-
|
|
156
|
-
# Add monthly features (month of the year)
|
|
157
|
-
if "month" in compute_features:
|
|
158
|
-
data["month_sine"] = np.sin(2 * np.pi * data.index.month / 12)
|
|
159
|
-
data["month_cosine"] = np.cos(2 * np.pi * data.index.month / 12)
|
|
160
|
-
|
|
161
|
-
return data
|
|
@@ -1,152 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
import logging
|
|
5
|
-
from abc import ABC, abstractmethod
|
|
6
|
-
from datetime import timedelta
|
|
7
|
-
from typing import Optional
|
|
8
|
-
|
|
9
|
-
import pandas as pd
|
|
10
|
-
import structlog
|
|
11
|
-
|
|
12
|
-
from openstef.data_classes.model_specifications import ModelSpecificationDataClass
|
|
13
|
-
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
|
14
|
-
from openstef.feature_engineering.feature_applicator import (
|
|
15
|
-
OperationalPredictFeatureApplicator,
|
|
16
|
-
TrainFeatureApplicator,
|
|
17
|
-
)
|
|
18
|
-
from openstef.feature_engineering.general import (
|
|
19
|
-
enforce_feature_order,
|
|
20
|
-
remove_non_requested_feature_columns,
|
|
21
|
-
)
|
|
22
|
-
from openstef.model.regressors.regressor import OpenstfRegressor
|
|
23
|
-
from openstef.pipeline.utils import generate_forecast_datetime_range
|
|
24
|
-
from openstef.settings import Settings
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class AbstractDataPreparation(ABC):
|
|
28
|
-
def __init__(
|
|
29
|
-
self,
|
|
30
|
-
pj: PredictionJobDataClass,
|
|
31
|
-
model_specs: ModelSpecificationDataClass,
|
|
32
|
-
model: Optional[OpenstfRegressor] = None,
|
|
33
|
-
horizons: Optional[list[float]] = None,
|
|
34
|
-
) -> None:
|
|
35
|
-
super().__init__()
|
|
36
|
-
self.pj = pj
|
|
37
|
-
self.model_specs = model_specs
|
|
38
|
-
self.model = model
|
|
39
|
-
self.horizons = horizons
|
|
40
|
-
|
|
41
|
-
@abstractmethod
|
|
42
|
-
def prepare_train_data(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
43
|
-
pass
|
|
44
|
-
|
|
45
|
-
@abstractmethod
|
|
46
|
-
def prepare_forecast_data(
|
|
47
|
-
self, data: pd.DataFrame
|
|
48
|
-
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
49
|
-
pass
|
|
50
|
-
|
|
51
|
-
def check_model(self):
|
|
52
|
-
if self.model is None:
|
|
53
|
-
raise ValueError(
|
|
54
|
-
"If no model has been provided to the data prep class, it cannot perform preparation for forecast task!"
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
class LegacyDataPreparation(AbstractDataPreparation):
|
|
59
|
-
def prepare_train_data(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
60
|
-
if self.horizons:
|
|
61
|
-
horizons = self.horizons
|
|
62
|
-
else:
|
|
63
|
-
horizons = self.pj.resolution_minutes
|
|
64
|
-
|
|
65
|
-
features_applicator = TrainFeatureApplicator(
|
|
66
|
-
horizons=horizons,
|
|
67
|
-
feature_names=self.model_specs.feature_names,
|
|
68
|
-
feature_modules=self.model_specs.feature_modules,
|
|
69
|
-
)
|
|
70
|
-
return features_applicator.add_features(data, pj=self.pj)
|
|
71
|
-
|
|
72
|
-
def prepare_forecast_data(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
73
|
-
self.check_model()
|
|
74
|
-
|
|
75
|
-
features_applicator = OperationalPredictFeatureApplicator(
|
|
76
|
-
horizons=[self.pj["resolution_minutes"] / 60.0],
|
|
77
|
-
feature_names=self.model.feature_names,
|
|
78
|
-
feature_modules=self.model_specs.feature_modules,
|
|
79
|
-
)
|
|
80
|
-
data_with_features = features_applicator.add_features(data)
|
|
81
|
-
|
|
82
|
-
# Prep forecast input by selecting only the forecast datetime interval (this is much smaller than the input range)
|
|
83
|
-
# Also drop the load column
|
|
84
|
-
forecast_start, forecast_end = generate_forecast_datetime_range(
|
|
85
|
-
data_with_features
|
|
86
|
-
)
|
|
87
|
-
forecast_input_data = data_with_features[forecast_start:forecast_end].drop(
|
|
88
|
-
columns="load"
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
return forecast_input_data, data_with_features
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
class ARDataPreparation(AbstractDataPreparation):
|
|
95
|
-
def __init__(
|
|
96
|
-
self,
|
|
97
|
-
pj: PredictionJobDataClass,
|
|
98
|
-
model_specs: ModelSpecificationDataClass,
|
|
99
|
-
model: Optional[OpenstfRegressor] = None,
|
|
100
|
-
horizons: Optional[list[float]] = None,
|
|
101
|
-
historical_depth: Optional[int] = None,
|
|
102
|
-
) -> None:
|
|
103
|
-
super().__init__(pj, model_specs, model, horizons)
|
|
104
|
-
self.historical_depth = historical_depth
|
|
105
|
-
|
|
106
|
-
def prepare_train_data(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
107
|
-
# Add dummy horizon column
|
|
108
|
-
data["horizon"] = 0
|
|
109
|
-
|
|
110
|
-
# remove non requested feature
|
|
111
|
-
features = self.model_specs.feature_names + ["horizon"]
|
|
112
|
-
result = remove_non_requested_feature_columns(data, features)
|
|
113
|
-
|
|
114
|
-
# Sort all features except for the (first) load and (last) horizon columns
|
|
115
|
-
result = result[["load"] + [c for c in result.columns if c != "load"]]
|
|
116
|
-
result = result.sort_index()
|
|
117
|
-
result = enforce_feature_order(result)
|
|
118
|
-
|
|
119
|
-
result = result[result.iloc[:, 0].notna()]
|
|
120
|
-
return result
|
|
121
|
-
|
|
122
|
-
def prepare_forecast_data(
|
|
123
|
-
self, data: pd.DataFrame
|
|
124
|
-
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
125
|
-
structlog.configure(
|
|
126
|
-
wrapper_class=structlog.make_filtering_bound_logger(
|
|
127
|
-
logging.getLevelName(Settings.log_level)
|
|
128
|
-
)
|
|
129
|
-
)
|
|
130
|
-
logger = structlog.get_logger(__name__)
|
|
131
|
-
self.check_model()
|
|
132
|
-
# Prep forecast input by selecting only the forecast datetime interval (this is much smaller than the input range)
|
|
133
|
-
# Also drop the load column
|
|
134
|
-
data = data[["load"] + self.model.feature_names]
|
|
135
|
-
forecast_start, forecast_end = generate_forecast_datetime_range(data)
|
|
136
|
-
forecast_input_data = data[forecast_start:forecast_end].drop(columns="load")
|
|
137
|
-
|
|
138
|
-
historical_start = None
|
|
139
|
-
if self.historical_depth:
|
|
140
|
-
historical_start = forecast_start - self.historical_depth * timedelta(
|
|
141
|
-
minutes=self.pj.resolution_minutes
|
|
142
|
-
)
|
|
143
|
-
past_data = data[historical_start:forecast_start].iloc[:-1]
|
|
144
|
-
self.model.update_historic_data(
|
|
145
|
-
past_data.drop(columns="load"), past_data["load"]
|
|
146
|
-
)
|
|
147
|
-
logger.info(
|
|
148
|
-
"Watch-out side effect on the model performed in the feature builder to update the historical data."
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
data[self.model.feature_importance_dataframe.index.tolist()] = 0
|
|
152
|
-
return forecast_input_data, data
|