openstef 3.4.10__py3-none-any.whl → 3.4.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef/app_settings.py +19 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +18 -0
- openstef/data/dutch_holidays.csv +1759 -0
- openstef/data_classes/data_prep.py +1 -1
- openstef/data_classes/prediction_job.py +15 -9
- openstef/enums.py +108 -9
- openstef/exceptions.py +1 -1
- openstef/feature_engineering/apply_features.py +25 -6
- openstef/feature_engineering/bidding_zone_to_country_mapping.py +106 -0
- openstef/feature_engineering/cyclic_features.py +102 -0
- openstef/feature_engineering/data_preparation.py +12 -5
- openstef/feature_engineering/feature_applicator.py +1 -5
- openstef/feature_engineering/general.py +14 -0
- openstef/feature_engineering/holiday_features.py +35 -26
- openstef/feature_engineering/missing_values_transformer.py +141 -0
- openstef/feature_engineering/weather_features.py +7 -0
- openstef/metrics/figure.py +3 -0
- openstef/metrics/metrics.py +58 -1
- openstef/metrics/reporter.py +7 -0
- openstef/model/confidence_interval_applicator.py +28 -3
- openstef/model/model_creator.py +54 -41
- openstef/model/objective.py +17 -34
- openstef/model/objective_creator.py +13 -12
- openstef/model/regressors/arima.py +1 -1
- openstef/model/regressors/dazls.py +35 -96
- openstef/model/regressors/flatliner.py +95 -0
- openstef/model/regressors/linear_quantile.py +296 -0
- openstef/model/regressors/xgb.py +23 -0
- openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
- openstef/model/regressors/xgb_quantile.py +3 -0
- openstef/model/serializer.py +10 -0
- openstef/model_selection/model_selection.py +4 -1
- openstef/monitoring/performance_meter.py +1 -2
- openstef/monitoring/teams.py +11 -0
- openstef/pipeline/create_basecase_forecast.py +11 -1
- openstef/pipeline/create_component_forecast.py +24 -28
- openstef/pipeline/create_forecast.py +20 -1
- openstef/pipeline/optimize_hyperparameters.py +18 -16
- openstef/pipeline/train_create_forecast_backtest.py +11 -1
- openstef/pipeline/train_model.py +31 -12
- openstef/pipeline/utils.py +3 -0
- openstef/postprocessing/postprocessing.py +29 -0
- openstef/settings.py +15 -0
- openstef/tasks/calculate_kpi.py +23 -20
- openstef/tasks/create_basecase_forecast.py +15 -7
- openstef/tasks/create_components_forecast.py +24 -8
- openstef/tasks/create_forecast.py +9 -6
- openstef/tasks/create_solar_forecast.py +4 -4
- openstef/tasks/optimize_hyperparameters.py +2 -2
- openstef/tasks/split_forecast.py +9 -2
- openstef/tasks/train_model.py +9 -7
- openstef/tasks/utils/taskcontext.py +7 -0
- openstef/validation/validation.py +28 -3
- {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/METADATA +65 -57
- openstef-3.4.44.dist-info/RECORD +97 -0
- {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/WHEEL +1 -1
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
- openstef/data/dutch_holidays_2020-2022.csv +0 -831
- openstef/data/dutch_holidays_2020-2022.csv.license +0 -3
- openstef/feature_engineering/historic_features.py +0 -40
- openstef/model/regressors/proloaf.py +0 -281
- openstef/tasks/run_tracy.py +0 -145
- openstef-3.4.10.dist-info/RECORD +0 -104
- /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license} +0 -0
- /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license} +0 -0
- /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license → dutch_holidays.csv.license} +0 -0
- {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/LICENSE +0 -0
- {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/top_level.txt +0 -0
@@ -6,10 +6,10 @@ from typing import Optional, Union
|
|
6
6
|
|
7
7
|
from pydantic.v1 import BaseModel
|
8
8
|
|
9
|
+
from openstef.data_classes.data_prep import DataPrepDataClass
|
9
10
|
from openstef.data_classes.model_specifications import ModelSpecificationDataClass
|
10
11
|
from openstef.data_classes.split_function import SplitFuncDataClass
|
11
|
-
from openstef.
|
12
|
-
from openstef.enums import PipelineType
|
12
|
+
from openstef.enums import PipelineType, BiddingZone
|
13
13
|
|
14
14
|
|
15
15
|
class PredictionJobDataClass(BaseModel):
|
@@ -25,11 +25,15 @@ class PredictionJobDataClass(BaseModel):
|
|
25
25
|
- ``"xgb_quantile"``
|
26
26
|
- ``"lgb"``
|
27
27
|
- ``"linear"``
|
28
|
-
- ``"
|
28
|
+
- ``"linear_quantile"``
|
29
|
+
- ``"xgb_multioutput_quantile"``
|
30
|
+
- ``"flatliner"``
|
29
31
|
|
30
32
|
If unsure what to pick, choose ``"xgb"``.
|
31
33
|
|
32
34
|
"""
|
35
|
+
model_kwargs: Optional[dict]
|
36
|
+
"""The model parameters that should be used."""
|
33
37
|
forecast_type: str
|
34
38
|
"""The type of forecasts that should be made.
|
35
39
|
|
@@ -41,15 +45,17 @@ class PredictionJobDataClass(BaseModel):
|
|
41
45
|
If unsure what to pick, choose ``"demand"``.
|
42
46
|
|
43
47
|
"""
|
44
|
-
horizon_minutes: int = 2880
|
45
|
-
"""The horizon of the desired forecast in minutes. Defaults to 2880 minutes (i.e. 2 days)."""
|
48
|
+
horizon_minutes: Optional[int] = 2880
|
49
|
+
"""The horizon of the desired forecast in minutes used in tasks. Defaults to 2880 minutes (i.e. 2 days)."""
|
46
50
|
resolution_minutes: int
|
47
51
|
"""The resolution of the desired forecast in minutes."""
|
48
|
-
lat: float
|
49
|
-
"""Latitude of the forecasted location in degrees."""
|
50
|
-
lon: float
|
51
|
-
"""Longitude of the forecasted location in degrees."""
|
52
|
+
lat: Optional[float] = 52.132633
|
53
|
+
"""Latitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting."""
|
54
|
+
lon: Optional[float] = 5.291266
|
55
|
+
"""Longitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting."""
|
52
56
|
name: str
|
57
|
+
"""Bidding zone is used to determine the electricity price. It is also used to determine the holidays that should be used. Currently only ENTSO-E bidding zones are supported."""
|
58
|
+
electricity_bidding_zone: Optional[BiddingZone] = BiddingZone.NL
|
53
59
|
"""Name of the forecast, e.g. the location name."""
|
54
60
|
train_components: Optional[bool]
|
55
61
|
"""Whether splitting the forecasts in wind, solar, rest is desired."""
|
openstef/enums.py
CHANGED
@@ -4,14 +4,119 @@
|
|
4
4
|
from enum import Enum
|
5
5
|
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
class BiddingZone(Enum):
|
8
|
+
DE_50HZ = "DE_50HZ"
|
9
|
+
AL = "AL"
|
10
|
+
DE_AMPRION = "DE_AMPRION"
|
11
|
+
AT = "AT"
|
12
|
+
BY = "BY"
|
13
|
+
BE = "BE"
|
14
|
+
BA = "BA"
|
15
|
+
BG = "BG"
|
16
|
+
CZ_DE_SK = "CZ_DE_SK"
|
17
|
+
HR = "HR"
|
18
|
+
CWE = "CWE"
|
19
|
+
CY = "CY"
|
20
|
+
CZ = "CZ"
|
21
|
+
DE_AT_LU = "DE_AT_LU"
|
22
|
+
DE_LU = "DE_LU"
|
23
|
+
DK = "DK"
|
24
|
+
DK_1 = "DK_1"
|
25
|
+
DK_1_NO_1 = "DK_1_NO_1"
|
26
|
+
DK_2 = "DK_2"
|
27
|
+
DK_CA = "DK_CA"
|
28
|
+
EE = "EE"
|
29
|
+
FI = "FI"
|
30
|
+
MK = "MK"
|
31
|
+
FR = "FR"
|
32
|
+
DE = "DE"
|
33
|
+
GR = "GR"
|
34
|
+
HU = "HU"
|
35
|
+
IS = "IS"
|
36
|
+
IE_SEM = "IE_SEM"
|
37
|
+
IE = "IE"
|
38
|
+
IT = "IT"
|
39
|
+
IT_SACO_AC = "IT_SACO_AC"
|
40
|
+
IT_CALA = "IT_CALA"
|
41
|
+
IT_SACO_DC = "IT_SACO_DC"
|
42
|
+
IT_BRNN = "IT_BRNN"
|
43
|
+
IT_CNOR = "IT_CNOR"
|
44
|
+
IT_CSUD = "IT_CSUD"
|
45
|
+
IT_FOGN = "IT_FOGN"
|
46
|
+
IT_GR = "IT_GR"
|
47
|
+
IT_MACRO_NORTH = "IT_MACRO_NORTH"
|
48
|
+
IT_MACRO_SOUTH = "IT_MACRO_SOUTH"
|
49
|
+
IT_MALTA = "IT_MALTA"
|
50
|
+
IT_NORD = "IT_NORD"
|
51
|
+
IT_NORD_AT = "IT_NORD_AT"
|
52
|
+
IT_NORD_CH = "IT_NORD_CH"
|
53
|
+
IT_NORD_FR = "IT_NORD_FR"
|
54
|
+
IT_NORD_SI = "IT_NORD_SI"
|
55
|
+
IT_PRGP = "IT_PRGP"
|
56
|
+
IT_ROSN = "IT_ROSN"
|
57
|
+
IT_SARD = "IT_SARD"
|
58
|
+
IT_SICI = "IT_SICI"
|
59
|
+
IT_SUD = "IT_SUD"
|
60
|
+
RU_KGD = "RU_KGD"
|
61
|
+
LV = "LV"
|
62
|
+
LT = "LT"
|
63
|
+
LU = "LU"
|
64
|
+
LU_BZN = "LU_BZN"
|
65
|
+
MT = "MT"
|
66
|
+
ME = "ME"
|
67
|
+
GB = "GB"
|
68
|
+
GE = "GE"
|
69
|
+
GB_IFA = "GB_IFA"
|
70
|
+
GB_IFA2 = "GB_IFA2"
|
71
|
+
GB_ELECLINK = "GB_ELECLINK"
|
72
|
+
UK = "UK"
|
73
|
+
NL = "NL"
|
74
|
+
NO_1 = "NO_1"
|
75
|
+
NO_1A = "NO_1A"
|
76
|
+
NO_2 = "NO_2"
|
77
|
+
NO_2_NSL = "NO_2_NSL"
|
78
|
+
NO_2A = "NO_2A"
|
79
|
+
NO_3 = "NO_3"
|
80
|
+
NO_4 = "NO_4"
|
81
|
+
NO_5 = "NO_5"
|
82
|
+
NO = "NO"
|
83
|
+
PL_CZ = "PL_CZ"
|
84
|
+
PL = "PL"
|
85
|
+
PT = "PT"
|
86
|
+
MD = "MD"
|
87
|
+
RO = "RO"
|
88
|
+
RU = "RU"
|
89
|
+
SE_1 = "SE_1"
|
90
|
+
SE_2 = "SE_2"
|
91
|
+
SE_3 = "SE_3"
|
92
|
+
SE_4 = "SE_4"
|
93
|
+
RS = "RS"
|
94
|
+
SK = "SK"
|
95
|
+
SI = "SI"
|
96
|
+
GB_NIR = "GB_NIR"
|
97
|
+
ES = "ES"
|
98
|
+
SE = "SE"
|
99
|
+
CH = "CH"
|
100
|
+
DE_TENNET = "DE_TENNET"
|
101
|
+
DE_TRANSNET = "DE_TRANSNET"
|
102
|
+
TR = "TR"
|
103
|
+
UA = "UA"
|
104
|
+
UA_DOBTPP = "UA_DOBTPP"
|
105
|
+
UA_BEI = "UA_BEI"
|
106
|
+
UA_IPS = "UA_IPS"
|
107
|
+
XK = "XK"
|
108
|
+
DE_AMP_LU = "DE_AMP_LU"
|
109
|
+
|
110
|
+
|
111
|
+
class ModelType(Enum):
|
9
112
|
XGB = "xgb"
|
10
113
|
XGB_QUANTILE = "xgb_quantile"
|
114
|
+
XGB_MULTIOUTPUT_QUANTILE = "xgb_multioutput_quantile"
|
11
115
|
LGB = "lgb"
|
12
116
|
LINEAR = "linear"
|
13
|
-
|
117
|
+
LINEAR_QUANTILE = "linear_quantile"
|
14
118
|
ARIMA = "arima"
|
119
|
+
FLATLINER = "flatliner"
|
15
120
|
|
16
121
|
|
17
122
|
class ForecastType(Enum):
|
@@ -21,12 +126,6 @@ class ForecastType(Enum):
|
|
21
126
|
BASECASE = "basecase"
|
22
127
|
|
23
128
|
|
24
|
-
class TracyJobResult(Enum):
|
25
|
-
SUCCESS = "success"
|
26
|
-
FAILED = "failed"
|
27
|
-
UNKNOWN = "unknown"
|
28
|
-
|
29
|
-
|
30
129
|
class PipelineType(Enum):
|
31
130
|
FORECAST = "forecast"
|
32
131
|
TRAIN = "train"
|
openstef/exceptions.py
CHANGED
@@ -14,19 +14,25 @@ Examples of features that are added:
|
|
14
14
|
import pandas as pd
|
15
15
|
|
16
16
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
17
|
-
from openstef.
|
18
|
-
add_historic_load_as_a_feature,
|
19
|
-
)
|
17
|
+
from openstef.enums import BiddingZone
|
20
18
|
from openstef.feature_engineering.holiday_features import (
|
21
19
|
generate_holiday_feature_functions,
|
22
20
|
)
|
23
21
|
from openstef.feature_engineering.lag_features import generate_lag_feature_functions
|
22
|
+
from openstef.feature_engineering.bidding_zone_to_country_mapping import (
|
23
|
+
BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING,
|
24
|
+
)
|
24
25
|
from openstef.feature_engineering.weather_features import (
|
25
26
|
add_additional_solar_features,
|
26
27
|
add_additional_wind_features,
|
27
28
|
add_humidity_features,
|
28
29
|
)
|
29
30
|
|
31
|
+
from openstef.feature_engineering.cyclic_features import (
|
32
|
+
add_seasonal_cyclic_features,
|
33
|
+
add_time_cyclic_features,
|
34
|
+
)
|
35
|
+
|
30
36
|
|
31
37
|
def apply_features(
|
32
38
|
data: pd.DataFrame,
|
@@ -61,6 +67,7 @@ def apply_features(
|
|
61
67
|
|
62
68
|
import pandas as pd
|
63
69
|
import numpy as np
|
70
|
+
from geopy.geocoders import Nominatim
|
64
71
|
index = pd.date_range(start = "2017-01-01 09:00:00",
|
65
72
|
freq = '15T', periods = 200)
|
66
73
|
data = pd.DataFrame(index = index,
|
@@ -69,8 +76,8 @@ def apply_features(
|
|
69
76
|
np.random.uniform(0.7,1.7, 200)))
|
70
77
|
|
71
78
|
"""
|
72
|
-
|
73
|
-
|
79
|
+
if pj is None:
|
80
|
+
pj = {"electricity_bidding_zone": BiddingZone.NL}
|
74
81
|
|
75
82
|
# Get lag feature functions
|
76
83
|
feature_functions = generate_lag_feature_functions(feature_names, horizon)
|
@@ -86,8 +93,14 @@ def apply_features(
|
|
86
93
|
}
|
87
94
|
)
|
88
95
|
|
96
|
+
# Get country code from bidding zone if available
|
97
|
+
electricity_bidding_zone = pj.get("electricity_bidding_zone", BiddingZone.NL)
|
98
|
+
country_code = BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING[electricity_bidding_zone.name]
|
99
|
+
|
89
100
|
# Get holiday feature functions
|
90
|
-
feature_functions.update(
|
101
|
+
feature_functions.update(
|
102
|
+
generate_holiday_feature_functions(country_code=country_code)
|
103
|
+
)
|
91
104
|
|
92
105
|
# Add the features to the dataframe using previously defined feature functions
|
93
106
|
for key, featfunc in feature_functions.items():
|
@@ -105,5 +118,11 @@ def apply_features(
|
|
105
118
|
# Add solar features; when pj is unavailable a default location is used.
|
106
119
|
data = add_additional_solar_features(data, pj, feature_names)
|
107
120
|
|
121
|
+
# Adds cyclical features to capture seasonal and periodic patterns in time-based data.
|
122
|
+
data = add_seasonal_cyclic_features(data)
|
123
|
+
|
124
|
+
# Adds polar time features (sine and cosine) to capture periodic patterns based on the timestamp index.
|
125
|
+
data = add_time_cyclic_features(data)
|
126
|
+
|
108
127
|
# Return dataframe including all requested features
|
109
128
|
return data
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING = {
|
5
|
+
"DE_50HZ": "DE",
|
6
|
+
"AL": "AL",
|
7
|
+
"DE_AMPRION": "DE",
|
8
|
+
"AT": "AT",
|
9
|
+
"BY": "BY",
|
10
|
+
"BE": "BE",
|
11
|
+
"BA": "BA",
|
12
|
+
"BG": "BG",
|
13
|
+
"CZ_DE_SK": "CZ",
|
14
|
+
"HR": "HR",
|
15
|
+
"CWE": "CWE",
|
16
|
+
"CY": "CY",
|
17
|
+
"CZ": "CZ",
|
18
|
+
"DE_AT_LU": "DE",
|
19
|
+
"DE_LU": "DE",
|
20
|
+
"DK": "DK",
|
21
|
+
"DK_1": "DK",
|
22
|
+
"DK_1_NO_1": "DK",
|
23
|
+
"DK_2": "DK",
|
24
|
+
"DK_CA": "DK",
|
25
|
+
"EE": "EE",
|
26
|
+
"FI": "FI",
|
27
|
+
"MK": "MK",
|
28
|
+
"FR": "FR",
|
29
|
+
"DE": "DE",
|
30
|
+
"GR": "GR",
|
31
|
+
"HU": "HU",
|
32
|
+
"IS": "IS",
|
33
|
+
"IE_SEM": "IE",
|
34
|
+
"IE": "IE",
|
35
|
+
"IT": "IT",
|
36
|
+
"IT_SACO_AC": "IT",
|
37
|
+
"IT_CALA": "IT",
|
38
|
+
"IT_SACO_DC": "IT",
|
39
|
+
"IT_BRNN": "IT",
|
40
|
+
"IT_CNOR": "IT",
|
41
|
+
"IT_CSUD": "IT",
|
42
|
+
"IT_FOGN": "IT",
|
43
|
+
"IT_GR": "IT",
|
44
|
+
"IT_MACRO_NORTH": "IT",
|
45
|
+
"IT_MACRO_SOUTH": "IT",
|
46
|
+
"IT_MALTA": "IT",
|
47
|
+
"IT_NORD": "IT",
|
48
|
+
"IT_NORD_AT": "IT",
|
49
|
+
"IT_NORD_CH": "IT",
|
50
|
+
"IT_NORD_FR": "IT",
|
51
|
+
"IT_NORD_SI": "IT",
|
52
|
+
"IT_PRGP": "IT",
|
53
|
+
"IT_ROSN": "IT",
|
54
|
+
"IT_SARD": "IT",
|
55
|
+
"IT_SICI": "IT",
|
56
|
+
"IT_SUD": "IT",
|
57
|
+
"RU_KGD": "RU",
|
58
|
+
"LV": "LV",
|
59
|
+
"LT": "LT",
|
60
|
+
"LU": "LU",
|
61
|
+
"LU_BZN": "LU",
|
62
|
+
"MT": "MT",
|
63
|
+
"ME": "ME",
|
64
|
+
"GB": "GB",
|
65
|
+
"GE": "GE",
|
66
|
+
"GB_IFA": "GB",
|
67
|
+
"GB_IFA2": "GB",
|
68
|
+
"GB_ELECLINK": "GB",
|
69
|
+
"UK": "UK",
|
70
|
+
"NL": "NL",
|
71
|
+
"NO_1": "NO",
|
72
|
+
"NO_1A": "NO",
|
73
|
+
"NO_2": "NO",
|
74
|
+
"NO_2_NSL": "NO",
|
75
|
+
"NO_2A": "NO",
|
76
|
+
"NO_3": "NO",
|
77
|
+
"NO_4": "NO",
|
78
|
+
"NO_5": "NO",
|
79
|
+
"NO": "NO",
|
80
|
+
"PL_CZ": "PL",
|
81
|
+
"PL": "PL",
|
82
|
+
"PT": "PT",
|
83
|
+
"MD": "MD",
|
84
|
+
"RO": "RO",
|
85
|
+
"RU": "RU",
|
86
|
+
"SE_1": "SE",
|
87
|
+
"SE_2": "SE",
|
88
|
+
"SE_3": "SE",
|
89
|
+
"SE_4": "SE",
|
90
|
+
"RS": "RS",
|
91
|
+
"SK": "SK",
|
92
|
+
"SI": "SI",
|
93
|
+
"GB_NIR": "GB",
|
94
|
+
"ES": "ES",
|
95
|
+
"SE": "SE",
|
96
|
+
"CH": "CH",
|
97
|
+
"DE_TENNET": "DE",
|
98
|
+
"DE_TRANSNET": "DE",
|
99
|
+
"TR": "TR",
|
100
|
+
"UA": "UA",
|
101
|
+
"UA_DOBTPP": "UA",
|
102
|
+
"UA_BEI": "UA",
|
103
|
+
"UA_IPS": "UA",
|
104
|
+
"XK": "XK",
|
105
|
+
"DE_AMP_LU": "DE",
|
106
|
+
}
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
|
5
|
+
# Module for adding temporal cyclic features to time-based data for capturing seasonality and periodic patterns.
|
6
|
+
# Features include yearly, weekly, and monthly seasonality, as well as time-of-day periodicity.
|
7
|
+
|
8
|
+
|
9
|
+
import numpy as np
|
10
|
+
import pandas as pd
|
11
|
+
|
12
|
+
import structlog
|
13
|
+
import logging
|
14
|
+
|
15
|
+
from openstef.settings import Settings
|
16
|
+
|
17
|
+
structlog.configure(
|
18
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
19
|
+
logging.getLevelName(Settings.log_level)
|
20
|
+
)
|
21
|
+
)
|
22
|
+
logger = structlog.get_logger(__name__)
|
23
|
+
|
24
|
+
|
25
|
+
NUM_SECONDS_IN_A_DAY = 24 * 60 * 60
|
26
|
+
|
27
|
+
|
28
|
+
def add_time_cyclic_features(
|
29
|
+
data: pd.DataFrame,
|
30
|
+
) -> pd.DataFrame:
|
31
|
+
"""Adds time of the day features cyclically encoded using sine and cosine to the input data.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
data: Dataframe indexed by datetime.
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
DataFrame that is the same as input dataframe with extra columns for the added time of the day features.
|
38
|
+
"""
|
39
|
+
# Ensure the index is a DatetimeIndex
|
40
|
+
if not isinstance(data.index, pd.DatetimeIndex):
|
41
|
+
raise ValueError("Index should be a pandas DatetimeIndex")
|
42
|
+
|
43
|
+
# Make a copy of the DataFrame to avoid modifying the original
|
44
|
+
data = data.copy()
|
45
|
+
|
46
|
+
second_of_the_day = (
|
47
|
+
data.index.second + data.index.minute * 60 + data.index.hour * 60 * 60
|
48
|
+
)
|
49
|
+
period_of_the_day = 2 * np.pi * second_of_the_day / NUM_SECONDS_IN_A_DAY
|
50
|
+
|
51
|
+
data["time0fday_sine"] = np.sin(period_of_the_day)
|
52
|
+
data["time0fday_cosine"] = np.cos(period_of_the_day)
|
53
|
+
|
54
|
+
return data
|
55
|
+
|
56
|
+
|
57
|
+
def add_seasonal_cyclic_features(
|
58
|
+
data: pd.DataFrame, compute_features: list = None
|
59
|
+
) -> pd.DataFrame:
|
60
|
+
"""Adds cyclical features to capture seasonal and periodic patterns in time-based data.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
- data (pd.DataFrame): DataFrame with a DatetimeIndex.
|
64
|
+
- compute_features (list): Optional. List of features to compute. Options are:
|
65
|
+
['season', 'dayofweek', 'month']. Default is all features.
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
- pd.DataFrame: DataFrame with added cyclical features.
|
69
|
+
|
70
|
+
Example:
|
71
|
+
>>> data = pd.DataFrame(index=pd.date_range(start='2023-01-01', periods=365, freq='D'))
|
72
|
+
>>> data_with_features = add_cyclical_features(data)
|
73
|
+
>>> print(data_with_features.head())
|
74
|
+
"""
|
75
|
+
# Ensure the index is a DatetimeIndex
|
76
|
+
if not isinstance(data.index, pd.DatetimeIndex):
|
77
|
+
raise ValueError("The DataFrame index must be a DatetimeIndex.")
|
78
|
+
|
79
|
+
# Make a copy of the DataFrame to avoid modifying the original
|
80
|
+
data = data.copy()
|
81
|
+
|
82
|
+
# Default to all features if none specified
|
83
|
+
compute_features = compute_features or ["season", "dayofweek", "month"]
|
84
|
+
|
85
|
+
days_in_year = 365.25 # Account for leap years
|
86
|
+
|
87
|
+
# Add seasonality features (day of year)
|
88
|
+
if "season" in compute_features:
|
89
|
+
data["season_sine"] = np.sin(2 * np.pi * data.index.dayofyear / days_in_year)
|
90
|
+
data["season_cosine"] = np.cos(2 * np.pi * data.index.dayofyear / days_in_year)
|
91
|
+
|
92
|
+
# Add weekly features (day of the week)
|
93
|
+
if "dayofweek" in compute_features:
|
94
|
+
data["day0fweek_sine"] = np.sin(2 * np.pi * data.index.day_of_week / 7)
|
95
|
+
data["day0fweek_cosine"] = np.cos(2 * np.pi * data.index.day_of_week / 7)
|
96
|
+
|
97
|
+
# Add monthly features (month of the year)
|
98
|
+
if "month" in compute_features:
|
99
|
+
data["month_sine"] = np.sin(2 * np.pi * data.index.month / 12)
|
100
|
+
data["month_cosine"] = np.cos(2 * np.pi * data.index.month / 12)
|
101
|
+
|
102
|
+
return data
|
@@ -1,25 +1,27 @@
|
|
1
1
|
# SPDX-FileCopyrightText: 2017-2023 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
|
-
import
|
5
|
-
|
4
|
+
import logging
|
6
5
|
from abc import ABC, abstractmethod
|
6
|
+
from datetime import timedelta
|
7
7
|
from typing import Optional
|
8
8
|
|
9
9
|
import pandas as pd
|
10
|
-
|
10
|
+
import structlog
|
11
|
+
|
11
12
|
from openstef.data_classes.model_specifications import ModelSpecificationDataClass
|
12
13
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
13
|
-
from openstef.model.regressors.regressor import OpenstfRegressor
|
14
14
|
from openstef.feature_engineering.feature_applicator import (
|
15
|
-
TrainFeatureApplicator,
|
16
15
|
OperationalPredictFeatureApplicator,
|
16
|
+
TrainFeatureApplicator,
|
17
17
|
)
|
18
18
|
from openstef.feature_engineering.general import (
|
19
19
|
enforce_feature_order,
|
20
20
|
remove_non_requested_feature_columns,
|
21
21
|
)
|
22
|
+
from openstef.model.regressors.regressor import OpenstfRegressor
|
22
23
|
from openstef.pipeline.utils import generate_forecast_datetime_range
|
24
|
+
from openstef.settings import Settings
|
23
25
|
|
24
26
|
|
25
27
|
class AbstractDataPreparation(ABC):
|
@@ -120,6 +122,11 @@ class ARDataPreparation(AbstractDataPreparation):
|
|
120
122
|
def prepare_forecast_data(
|
121
123
|
self, data: pd.DataFrame
|
122
124
|
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
125
|
+
structlog.configure(
|
126
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
127
|
+
logging.getLevelName(Settings.log_level)
|
128
|
+
)
|
129
|
+
)
|
123
130
|
logger = structlog.get_logger(__name__)
|
124
131
|
self.check_model()
|
125
132
|
# Prep forecast input by selecting only the forecast datetime interval (this is much smaller than the input range)
|
@@ -149,11 +149,7 @@ class TrainFeatureApplicator(AbstractFeatureApplicator):
|
|
149
149
|
|
150
150
|
# NOTE this is required since apply_features could add additional features
|
151
151
|
if self.feature_names is not None:
|
152
|
-
|
153
|
-
if pj.get("model") == "proloaf":
|
154
|
-
features = self.feature_names + ["historic_load"] + ["horizon"]
|
155
|
-
else:
|
156
|
-
features = self.feature_names + ["horizon"]
|
152
|
+
features = self.feature_names + ["horizon"]
|
157
153
|
result = remove_non_requested_feature_columns(result, features)
|
158
154
|
|
159
155
|
# Sort all features except for the (first) load and (last) horizon columns
|
@@ -3,10 +3,14 @@
|
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
4
|
"""This modelu contains various helper functions."""
|
5
5
|
|
6
|
+
import logging
|
7
|
+
|
6
8
|
import numpy as np
|
7
9
|
import pandas as pd
|
8
10
|
import structlog
|
9
11
|
|
12
|
+
from openstef.settings import Settings
|
13
|
+
|
10
14
|
|
11
15
|
def add_missing_feature_columns(
|
12
16
|
input_data: pd.DataFrame, features: list[str]
|
@@ -30,6 +34,11 @@ def add_missing_feature_columns(
|
|
30
34
|
Input dataframe with missing columns filled with ``np.N=nan``.
|
31
35
|
|
32
36
|
"""
|
37
|
+
structlog.configure(
|
38
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
39
|
+
logging.getLevelName(Settings.log_level)
|
40
|
+
)
|
41
|
+
)
|
33
42
|
logger = structlog.get_logger(__name__)
|
34
43
|
|
35
44
|
if features is None:
|
@@ -61,6 +70,11 @@ def remove_non_requested_feature_columns(
|
|
61
70
|
Model input data with features.
|
62
71
|
|
63
72
|
"""
|
73
|
+
structlog.configure(
|
74
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
75
|
+
logging.getLevelName(Settings.log_level)
|
76
|
+
)
|
77
|
+
)
|
64
78
|
logger = structlog.get_logger(__name__)
|
65
79
|
|
66
80
|
if requested_features is None:
|