openstef 3.4.10__py3-none-any.whl → 3.4.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. openstef/app_settings.py +19 -0
  2. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  3. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +18 -0
  4. openstef/data/dutch_holidays.csv +1759 -0
  5. openstef/data_classes/data_prep.py +1 -1
  6. openstef/data_classes/prediction_job.py +15 -9
  7. openstef/enums.py +108 -9
  8. openstef/exceptions.py +1 -1
  9. openstef/feature_engineering/apply_features.py +25 -6
  10. openstef/feature_engineering/bidding_zone_to_country_mapping.py +106 -0
  11. openstef/feature_engineering/cyclic_features.py +102 -0
  12. openstef/feature_engineering/data_preparation.py +12 -5
  13. openstef/feature_engineering/feature_applicator.py +1 -5
  14. openstef/feature_engineering/general.py +14 -0
  15. openstef/feature_engineering/holiday_features.py +35 -26
  16. openstef/feature_engineering/missing_values_transformer.py +141 -0
  17. openstef/feature_engineering/weather_features.py +7 -0
  18. openstef/metrics/figure.py +3 -0
  19. openstef/metrics/metrics.py +58 -1
  20. openstef/metrics/reporter.py +7 -0
  21. openstef/model/confidence_interval_applicator.py +28 -3
  22. openstef/model/model_creator.py +54 -41
  23. openstef/model/objective.py +17 -34
  24. openstef/model/objective_creator.py +13 -12
  25. openstef/model/regressors/arima.py +1 -1
  26. openstef/model/regressors/dazls.py +35 -96
  27. openstef/model/regressors/flatliner.py +95 -0
  28. openstef/model/regressors/linear_quantile.py +296 -0
  29. openstef/model/regressors/xgb.py +23 -0
  30. openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
  31. openstef/model/regressors/xgb_quantile.py +3 -0
  32. openstef/model/serializer.py +10 -0
  33. openstef/model_selection/model_selection.py +4 -1
  34. openstef/monitoring/performance_meter.py +1 -2
  35. openstef/monitoring/teams.py +11 -0
  36. openstef/pipeline/create_basecase_forecast.py +11 -1
  37. openstef/pipeline/create_component_forecast.py +24 -28
  38. openstef/pipeline/create_forecast.py +20 -1
  39. openstef/pipeline/optimize_hyperparameters.py +18 -16
  40. openstef/pipeline/train_create_forecast_backtest.py +11 -1
  41. openstef/pipeline/train_model.py +31 -12
  42. openstef/pipeline/utils.py +3 -0
  43. openstef/postprocessing/postprocessing.py +29 -0
  44. openstef/settings.py +15 -0
  45. openstef/tasks/calculate_kpi.py +23 -20
  46. openstef/tasks/create_basecase_forecast.py +15 -7
  47. openstef/tasks/create_components_forecast.py +24 -8
  48. openstef/tasks/create_forecast.py +9 -6
  49. openstef/tasks/create_solar_forecast.py +4 -4
  50. openstef/tasks/optimize_hyperparameters.py +2 -2
  51. openstef/tasks/split_forecast.py +9 -2
  52. openstef/tasks/train_model.py +9 -7
  53. openstef/tasks/utils/taskcontext.py +7 -0
  54. openstef/validation/validation.py +28 -3
  55. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/METADATA +65 -57
  56. openstef-3.4.44.dist-info/RECORD +97 -0
  57. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/WHEEL +1 -1
  58. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
  59. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
  60. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
  61. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
  62. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
  63. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
  64. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
  65. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
  66. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
  67. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
  68. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
  69. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
  70. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
  71. openstef/data/dutch_holidays_2020-2022.csv +0 -831
  72. openstef/data/dutch_holidays_2020-2022.csv.license +0 -3
  73. openstef/feature_engineering/historic_features.py +0 -40
  74. openstef/model/regressors/proloaf.py +0 -281
  75. openstef/tasks/run_tracy.py +0 -145
  76. openstef-3.4.10.dist-info/RECORD +0 -104
  77. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license} +0 -0
  78. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license} +0 -0
  79. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license → dutch_holidays.csv.license} +0 -0
  80. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/LICENSE +0 -0
  81. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@
5
5
  import inspect
6
6
  import json
7
7
  from importlib import import_module
8
- from typing import Any, Sequence, Union, TypeVar
8
+ from typing import Any, Sequence, TypeVar, Union
9
9
 
10
10
  from pydantic.v1 import BaseModel
11
11
 
@@ -6,10 +6,10 @@ from typing import Optional, Union
6
6
 
7
7
  from pydantic.v1 import BaseModel
8
8
 
9
+ from openstef.data_classes.data_prep import DataPrepDataClass
9
10
  from openstef.data_classes.model_specifications import ModelSpecificationDataClass
10
11
  from openstef.data_classes.split_function import SplitFuncDataClass
11
- from openstef.data_classes.data_prep import DataPrepDataClass
12
- from openstef.enums import PipelineType
12
+ from openstef.enums import PipelineType, BiddingZone
13
13
 
14
14
 
15
15
  class PredictionJobDataClass(BaseModel):
@@ -25,11 +25,15 @@ class PredictionJobDataClass(BaseModel):
25
25
  - ``"xgb_quantile"``
26
26
  - ``"lgb"``
27
27
  - ``"linear"``
28
- - ``"proloaf"`` (extra dependencies requiered, see README)
28
+ - ``"linear_quantile"``
29
+ - ``"xgb_multioutput_quantile"``
30
+ - ``"flatliner"``
29
31
 
30
32
  If unsure what to pick, choose ``"xgb"``.
31
33
 
32
34
  """
35
+ model_kwargs: Optional[dict]
36
+ """The model parameters that should be used."""
33
37
  forecast_type: str
34
38
  """The type of forecasts that should be made.
35
39
 
@@ -41,15 +45,17 @@ class PredictionJobDataClass(BaseModel):
41
45
  If unsure what to pick, choose ``"demand"``.
42
46
 
43
47
  """
44
- horizon_minutes: int = 2880
45
- """The horizon of the desired forecast in minutes. Defaults to 2880 minutes (i.e. 2 days)."""
48
+ horizon_minutes: Optional[int] = 2880
49
+ """The horizon of the desired forecast in minutes used in tasks. Defaults to 2880 minutes (i.e. 2 days)."""
46
50
  resolution_minutes: int
47
51
  """The resolution of the desired forecast in minutes."""
48
- lat: float
49
- """Latitude of the forecasted location in degrees."""
50
- lon: float
51
- """Longitude of the forecasted location in degrees."""
52
+ lat: Optional[float] = 52.132633
53
+ """Latitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting."""
54
+ lon: Optional[float] = 5.291266
55
+ """Longitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting."""
52
56
  name: str
57
+ """Bidding zone is used to determine the electricity price. It is also used to determine the holidays that should be used. Currently only ENTSO-E bidding zones are supported."""
58
+ electricity_bidding_zone: Optional[BiddingZone] = BiddingZone.NL
53
59
  """Name of the forecast, e.g. the location name."""
54
60
  train_components: Optional[bool]
55
61
  """Whether splitting the forecasts in wind, solar, rest is desired."""
openstef/enums.py CHANGED
@@ -4,14 +4,119 @@
4
4
  from enum import Enum
5
5
 
6
6
 
7
- # TODO replace this with ModelType (MLModelType == Machine Learning model type)
8
- class MLModelType(Enum):
7
+ class BiddingZone(Enum):
8
+ DE_50HZ = "DE_50HZ"
9
+ AL = "AL"
10
+ DE_AMPRION = "DE_AMPRION"
11
+ AT = "AT"
12
+ BY = "BY"
13
+ BE = "BE"
14
+ BA = "BA"
15
+ BG = "BG"
16
+ CZ_DE_SK = "CZ_DE_SK"
17
+ HR = "HR"
18
+ CWE = "CWE"
19
+ CY = "CY"
20
+ CZ = "CZ"
21
+ DE_AT_LU = "DE_AT_LU"
22
+ DE_LU = "DE_LU"
23
+ DK = "DK"
24
+ DK_1 = "DK_1"
25
+ DK_1_NO_1 = "DK_1_NO_1"
26
+ DK_2 = "DK_2"
27
+ DK_CA = "DK_CA"
28
+ EE = "EE"
29
+ FI = "FI"
30
+ MK = "MK"
31
+ FR = "FR"
32
+ DE = "DE"
33
+ GR = "GR"
34
+ HU = "HU"
35
+ IS = "IS"
36
+ IE_SEM = "IE_SEM"
37
+ IE = "IE"
38
+ IT = "IT"
39
+ IT_SACO_AC = "IT_SACO_AC"
40
+ IT_CALA = "IT_CALA"
41
+ IT_SACO_DC = "IT_SACO_DC"
42
+ IT_BRNN = "IT_BRNN"
43
+ IT_CNOR = "IT_CNOR"
44
+ IT_CSUD = "IT_CSUD"
45
+ IT_FOGN = "IT_FOGN"
46
+ IT_GR = "IT_GR"
47
+ IT_MACRO_NORTH = "IT_MACRO_NORTH"
48
+ IT_MACRO_SOUTH = "IT_MACRO_SOUTH"
49
+ IT_MALTA = "IT_MALTA"
50
+ IT_NORD = "IT_NORD"
51
+ IT_NORD_AT = "IT_NORD_AT"
52
+ IT_NORD_CH = "IT_NORD_CH"
53
+ IT_NORD_FR = "IT_NORD_FR"
54
+ IT_NORD_SI = "IT_NORD_SI"
55
+ IT_PRGP = "IT_PRGP"
56
+ IT_ROSN = "IT_ROSN"
57
+ IT_SARD = "IT_SARD"
58
+ IT_SICI = "IT_SICI"
59
+ IT_SUD = "IT_SUD"
60
+ RU_KGD = "RU_KGD"
61
+ LV = "LV"
62
+ LT = "LT"
63
+ LU = "LU"
64
+ LU_BZN = "LU_BZN"
65
+ MT = "MT"
66
+ ME = "ME"
67
+ GB = "GB"
68
+ GE = "GE"
69
+ GB_IFA = "GB_IFA"
70
+ GB_IFA2 = "GB_IFA2"
71
+ GB_ELECLINK = "GB_ELECLINK"
72
+ UK = "UK"
73
+ NL = "NL"
74
+ NO_1 = "NO_1"
75
+ NO_1A = "NO_1A"
76
+ NO_2 = "NO_2"
77
+ NO_2_NSL = "NO_2_NSL"
78
+ NO_2A = "NO_2A"
79
+ NO_3 = "NO_3"
80
+ NO_4 = "NO_4"
81
+ NO_5 = "NO_5"
82
+ NO = "NO"
83
+ PL_CZ = "PL_CZ"
84
+ PL = "PL"
85
+ PT = "PT"
86
+ MD = "MD"
87
+ RO = "RO"
88
+ RU = "RU"
89
+ SE_1 = "SE_1"
90
+ SE_2 = "SE_2"
91
+ SE_3 = "SE_3"
92
+ SE_4 = "SE_4"
93
+ RS = "RS"
94
+ SK = "SK"
95
+ SI = "SI"
96
+ GB_NIR = "GB_NIR"
97
+ ES = "ES"
98
+ SE = "SE"
99
+ CH = "CH"
100
+ DE_TENNET = "DE_TENNET"
101
+ DE_TRANSNET = "DE_TRANSNET"
102
+ TR = "TR"
103
+ UA = "UA"
104
+ UA_DOBTPP = "UA_DOBTPP"
105
+ UA_BEI = "UA_BEI"
106
+ UA_IPS = "UA_IPS"
107
+ XK = "XK"
108
+ DE_AMP_LU = "DE_AMP_LU"
109
+
110
+
111
+ class ModelType(Enum):
9
112
  XGB = "xgb"
10
113
  XGB_QUANTILE = "xgb_quantile"
114
+ XGB_MULTIOUTPUT_QUANTILE = "xgb_multioutput_quantile"
11
115
  LGB = "lgb"
12
116
  LINEAR = "linear"
13
- ProLoaf = "proloaf"
117
+ LINEAR_QUANTILE = "linear_quantile"
14
118
  ARIMA = "arima"
119
+ FLATLINER = "flatliner"
15
120
 
16
121
 
17
122
  class ForecastType(Enum):
@@ -21,12 +126,6 @@ class ForecastType(Enum):
21
126
  BASECASE = "basecase"
22
127
 
23
128
 
24
- class TracyJobResult(Enum):
25
- SUCCESS = "success"
26
- FAILED = "failed"
27
- UNKNOWN = "unknown"
28
-
29
-
30
129
  class PipelineType(Enum):
31
130
  FORECAST = "forecast"
32
131
  TRAIN = "train"
openstef/exceptions.py CHANGED
@@ -5,7 +5,7 @@
5
5
  """Openstef custom exceptions."""
6
6
 
7
7
 
8
- # Define custom exception
8
+ # Define custom exceptions
9
9
  class NoPredictedLoadError(Exception):
10
10
  """No predicted load for given datatime range."""
11
11
 
@@ -14,19 +14,25 @@ Examples of features that are added:
14
14
  import pandas as pd
15
15
 
16
16
  from openstef.data_classes.prediction_job import PredictionJobDataClass
17
- from openstef.feature_engineering.historic_features import (
18
- add_historic_load_as_a_feature,
19
- )
17
+ from openstef.enums import BiddingZone
20
18
  from openstef.feature_engineering.holiday_features import (
21
19
  generate_holiday_feature_functions,
22
20
  )
23
21
  from openstef.feature_engineering.lag_features import generate_lag_feature_functions
22
+ from openstef.feature_engineering.bidding_zone_to_country_mapping import (
23
+ BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING,
24
+ )
24
25
  from openstef.feature_engineering.weather_features import (
25
26
  add_additional_solar_features,
26
27
  add_additional_wind_features,
27
28
  add_humidity_features,
28
29
  )
29
30
 
31
+ from openstef.feature_engineering.cyclic_features import (
32
+ add_seasonal_cyclic_features,
33
+ add_time_cyclic_features,
34
+ )
35
+
30
36
 
31
37
  def apply_features(
32
38
  data: pd.DataFrame,
@@ -61,6 +67,7 @@ def apply_features(
61
67
 
62
68
  import pandas as pd
63
69
  import numpy as np
70
+ from geopy.geocoders import Nominatim
64
71
  index = pd.date_range(start = "2017-01-01 09:00:00",
65
72
  freq = '15T', periods = 200)
66
73
  data = pd.DataFrame(index = index,
@@ -69,8 +76,8 @@ def apply_features(
69
76
  np.random.uniform(0.7,1.7, 200)))
70
77
 
71
78
  """
72
- # Add if needed the proloaf feature (historic_load)
73
- data = add_historic_load_as_a_feature(data, pj)
79
+ if pj is None:
80
+ pj = {"electricity_bidding_zone": BiddingZone.NL}
74
81
 
75
82
  # Get lag feature functions
76
83
  feature_functions = generate_lag_feature_functions(feature_names, horizon)
@@ -86,8 +93,14 @@ def apply_features(
86
93
  }
87
94
  )
88
95
 
96
+ # Get country code from bidding zone if available
97
+ electricity_bidding_zone = pj.get("electricity_bidding_zone", BiddingZone.NL)
98
+ country_code = BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING[electricity_bidding_zone.name]
99
+
89
100
  # Get holiday feature functions
90
- feature_functions.update(generate_holiday_feature_functions())
101
+ feature_functions.update(
102
+ generate_holiday_feature_functions(country_code=country_code)
103
+ )
91
104
 
92
105
  # Add the features to the dataframe using previously defined feature functions
93
106
  for key, featfunc in feature_functions.items():
@@ -105,5 +118,11 @@ def apply_features(
105
118
  # Add solar features; when pj is unavailable a default location is used.
106
119
  data = add_additional_solar_features(data, pj, feature_names)
107
120
 
121
+ # Adds cyclical features to capture seasonal and periodic patterns in time-based data.
122
+ data = add_seasonal_cyclic_features(data)
123
+
124
+ # Adds polar time features (sine and cosine) to capture periodic patterns based on the timestamp index.
125
+ data = add_time_cyclic_features(data)
126
+
108
127
  # Return dataframe including all requested features
109
128
  return data
@@ -0,0 +1,106 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING = {
5
+ "DE_50HZ": "DE",
6
+ "AL": "AL",
7
+ "DE_AMPRION": "DE",
8
+ "AT": "AT",
9
+ "BY": "BY",
10
+ "BE": "BE",
11
+ "BA": "BA",
12
+ "BG": "BG",
13
+ "CZ_DE_SK": "CZ",
14
+ "HR": "HR",
15
+ "CWE": "CWE",
16
+ "CY": "CY",
17
+ "CZ": "CZ",
18
+ "DE_AT_LU": "DE",
19
+ "DE_LU": "DE",
20
+ "DK": "DK",
21
+ "DK_1": "DK",
22
+ "DK_1_NO_1": "DK",
23
+ "DK_2": "DK",
24
+ "DK_CA": "DK",
25
+ "EE": "EE",
26
+ "FI": "FI",
27
+ "MK": "MK",
28
+ "FR": "FR",
29
+ "DE": "DE",
30
+ "GR": "GR",
31
+ "HU": "HU",
32
+ "IS": "IS",
33
+ "IE_SEM": "IE",
34
+ "IE": "IE",
35
+ "IT": "IT",
36
+ "IT_SACO_AC": "IT",
37
+ "IT_CALA": "IT",
38
+ "IT_SACO_DC": "IT",
39
+ "IT_BRNN": "IT",
40
+ "IT_CNOR": "IT",
41
+ "IT_CSUD": "IT",
42
+ "IT_FOGN": "IT",
43
+ "IT_GR": "IT",
44
+ "IT_MACRO_NORTH": "IT",
45
+ "IT_MACRO_SOUTH": "IT",
46
+ "IT_MALTA": "IT",
47
+ "IT_NORD": "IT",
48
+ "IT_NORD_AT": "IT",
49
+ "IT_NORD_CH": "IT",
50
+ "IT_NORD_FR": "IT",
51
+ "IT_NORD_SI": "IT",
52
+ "IT_PRGP": "IT",
53
+ "IT_ROSN": "IT",
54
+ "IT_SARD": "IT",
55
+ "IT_SICI": "IT",
56
+ "IT_SUD": "IT",
57
+ "RU_KGD": "RU",
58
+ "LV": "LV",
59
+ "LT": "LT",
60
+ "LU": "LU",
61
+ "LU_BZN": "LU",
62
+ "MT": "MT",
63
+ "ME": "ME",
64
+ "GB": "GB",
65
+ "GE": "GE",
66
+ "GB_IFA": "GB",
67
+ "GB_IFA2": "GB",
68
+ "GB_ELECLINK": "GB",
69
+ "UK": "UK",
70
+ "NL": "NL",
71
+ "NO_1": "NO",
72
+ "NO_1A": "NO",
73
+ "NO_2": "NO",
74
+ "NO_2_NSL": "NO",
75
+ "NO_2A": "NO",
76
+ "NO_3": "NO",
77
+ "NO_4": "NO",
78
+ "NO_5": "NO",
79
+ "NO": "NO",
80
+ "PL_CZ": "PL",
81
+ "PL": "PL",
82
+ "PT": "PT",
83
+ "MD": "MD",
84
+ "RO": "RO",
85
+ "RU": "RU",
86
+ "SE_1": "SE",
87
+ "SE_2": "SE",
88
+ "SE_3": "SE",
89
+ "SE_4": "SE",
90
+ "RS": "RS",
91
+ "SK": "SK",
92
+ "SI": "SI",
93
+ "GB_NIR": "GB",
94
+ "ES": "ES",
95
+ "SE": "SE",
96
+ "CH": "CH",
97
+ "DE_TENNET": "DE",
98
+ "DE_TRANSNET": "DE",
99
+ "TR": "TR",
100
+ "UA": "UA",
101
+ "UA_DOBTPP": "UA",
102
+ "UA_BEI": "UA",
103
+ "UA_IPS": "UA",
104
+ "XK": "XK",
105
+ "DE_AMP_LU": "DE",
106
+ }
@@ -0,0 +1,102 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ # Module for adding temporal cyclic features to time-based data for capturing seasonality and periodic patterns.
6
+ # Features include yearly, weekly, and monthly seasonality, as well as time-of-day periodicity.
7
+
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ import structlog
13
+ import logging
14
+
15
+ from openstef.settings import Settings
16
+
17
+ structlog.configure(
18
+ wrapper_class=structlog.make_filtering_bound_logger(
19
+ logging.getLevelName(Settings.log_level)
20
+ )
21
+ )
22
+ logger = structlog.get_logger(__name__)
23
+
24
+
25
+ NUM_SECONDS_IN_A_DAY = 24 * 60 * 60
26
+
27
+
28
+ def add_time_cyclic_features(
29
+ data: pd.DataFrame,
30
+ ) -> pd.DataFrame:
31
+ """Adds time of the day features cyclically encoded using sine and cosine to the input data.
32
+
33
+ Args:
34
+ data: Dataframe indexed by datetime.
35
+
36
+ Returns:
37
+ DataFrame that is the same as input dataframe with extra columns for the added time of the day features.
38
+ """
39
+ # Ensure the index is a DatetimeIndex
40
+ if not isinstance(data.index, pd.DatetimeIndex):
41
+ raise ValueError("Index should be a pandas DatetimeIndex")
42
+
43
+ # Make a copy of the DataFrame to avoid modifying the original
44
+ data = data.copy()
45
+
46
+ second_of_the_day = (
47
+ data.index.second + data.index.minute * 60 + data.index.hour * 60 * 60
48
+ )
49
+ period_of_the_day = 2 * np.pi * second_of_the_day / NUM_SECONDS_IN_A_DAY
50
+
51
+ data["time0fday_sine"] = np.sin(period_of_the_day)
52
+ data["time0fday_cosine"] = np.cos(period_of_the_day)
53
+
54
+ return data
55
+
56
+
57
+ def add_seasonal_cyclic_features(
58
+ data: pd.DataFrame, compute_features: list = None
59
+ ) -> pd.DataFrame:
60
+ """Adds cyclical features to capture seasonal and periodic patterns in time-based data.
61
+
62
+ Args:
63
+ - data (pd.DataFrame): DataFrame with a DatetimeIndex.
64
+ - compute_features (list): Optional. List of features to compute. Options are:
65
+ ['season', 'dayofweek', 'month']. Default is all features.
66
+
67
+ Returns:
68
+ - pd.DataFrame: DataFrame with added cyclical features.
69
+
70
+ Example:
71
+ >>> data = pd.DataFrame(index=pd.date_range(start='2023-01-01', periods=365, freq='D'))
72
+ >>> data_with_features = add_cyclical_features(data)
73
+ >>> print(data_with_features.head())
74
+ """
75
+ # Ensure the index is a DatetimeIndex
76
+ if not isinstance(data.index, pd.DatetimeIndex):
77
+ raise ValueError("The DataFrame index must be a DatetimeIndex.")
78
+
79
+ # Make a copy of the DataFrame to avoid modifying the original
80
+ data = data.copy()
81
+
82
+ # Default to all features if none specified
83
+ compute_features = compute_features or ["season", "dayofweek", "month"]
84
+
85
+ days_in_year = 365.25 # Account for leap years
86
+
87
+ # Add seasonality features (day of year)
88
+ if "season" in compute_features:
89
+ data["season_sine"] = np.sin(2 * np.pi * data.index.dayofyear / days_in_year)
90
+ data["season_cosine"] = np.cos(2 * np.pi * data.index.dayofyear / days_in_year)
91
+
92
+ # Add weekly features (day of the week)
93
+ if "dayofweek" in compute_features:
94
+ data["day0fweek_sine"] = np.sin(2 * np.pi * data.index.day_of_week / 7)
95
+ data["day0fweek_cosine"] = np.cos(2 * np.pi * data.index.day_of_week / 7)
96
+
97
+ # Add monthly features (month of the year)
98
+ if "month" in compute_features:
99
+ data["month_sine"] = np.sin(2 * np.pi * data.index.month / 12)
100
+ data["month_cosine"] = np.cos(2 * np.pi * data.index.month / 12)
101
+
102
+ return data
@@ -1,25 +1,27 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
- import structlog
5
-
4
+ import logging
6
5
  from abc import ABC, abstractmethod
6
+ from datetime import timedelta
7
7
  from typing import Optional
8
8
 
9
9
  import pandas as pd
10
- from datetime import timedelta
10
+ import structlog
11
+
11
12
  from openstef.data_classes.model_specifications import ModelSpecificationDataClass
12
13
  from openstef.data_classes.prediction_job import PredictionJobDataClass
13
- from openstef.model.regressors.regressor import OpenstfRegressor
14
14
  from openstef.feature_engineering.feature_applicator import (
15
- TrainFeatureApplicator,
16
15
  OperationalPredictFeatureApplicator,
16
+ TrainFeatureApplicator,
17
17
  )
18
18
  from openstef.feature_engineering.general import (
19
19
  enforce_feature_order,
20
20
  remove_non_requested_feature_columns,
21
21
  )
22
+ from openstef.model.regressors.regressor import OpenstfRegressor
22
23
  from openstef.pipeline.utils import generate_forecast_datetime_range
24
+ from openstef.settings import Settings
23
25
 
24
26
 
25
27
  class AbstractDataPreparation(ABC):
@@ -120,6 +122,11 @@ class ARDataPreparation(AbstractDataPreparation):
120
122
  def prepare_forecast_data(
121
123
  self, data: pd.DataFrame
122
124
  ) -> tuple[pd.DataFrame, pd.DataFrame]:
125
+ structlog.configure(
126
+ wrapper_class=structlog.make_filtering_bound_logger(
127
+ logging.getLevelName(Settings.log_level)
128
+ )
129
+ )
123
130
  logger = structlog.get_logger(__name__)
124
131
  self.check_model()
125
132
  # Prep forecast input by selecting only the forecast datetime interval (this is much smaller than the input range)
@@ -149,11 +149,7 @@ class TrainFeatureApplicator(AbstractFeatureApplicator):
149
149
 
150
150
  # NOTE this is required since apply_features could add additional features
151
151
  if self.feature_names is not None:
152
- # Add horizon to requested features else it is removed, and if needed the proloaf feature (historic_load)
153
- if pj.get("model") == "proloaf":
154
- features = self.feature_names + ["historic_load"] + ["horizon"]
155
- else:
156
- features = self.feature_names + ["horizon"]
152
+ features = self.feature_names + ["horizon"]
157
153
  result = remove_non_requested_feature_columns(result, features)
158
154
 
159
155
  # Sort all features except for the (first) load and (last) horizon columns
@@ -3,10 +3,14 @@
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
  """This modelu contains various helper functions."""
5
5
 
6
+ import logging
7
+
6
8
  import numpy as np
7
9
  import pandas as pd
8
10
  import structlog
9
11
 
12
+ from openstef.settings import Settings
13
+
10
14
 
11
15
  def add_missing_feature_columns(
12
16
  input_data: pd.DataFrame, features: list[str]
@@ -30,6 +34,11 @@ def add_missing_feature_columns(
30
34
  Input dataframe with missing columns filled with ``np.N=nan``.
31
35
 
32
36
  """
37
+ structlog.configure(
38
+ wrapper_class=structlog.make_filtering_bound_logger(
39
+ logging.getLevelName(Settings.log_level)
40
+ )
41
+ )
33
42
  logger = structlog.get_logger(__name__)
34
43
 
35
44
  if features is None:
@@ -61,6 +70,11 @@ def remove_non_requested_feature_columns(
61
70
  Model input data with features.
62
71
 
63
72
  """
73
+ structlog.configure(
74
+ wrapper_class=structlog.make_filtering_bound_logger(
75
+ logging.getLevelName(Settings.log_level)
76
+ )
77
+ )
64
78
  logger = structlog.get_logger(__name__)
65
79
 
66
80
  if requested_features is None: