openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef-4.0.0a3.dist-info/METADATA +177 -0
- openstef-4.0.0a3.dist-info/RECORD +4 -0
- {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
- openstef/__init__.py +0 -14
- openstef/__main__.py +0 -3
- openstef/app_settings.py +0 -19
- openstef/data/NL_terrestrial_radiation.csv +0 -25585
- openstef/data/NL_terrestrial_radiation.csv.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
- openstef/data/dutch_holidays.csv +0 -1759
- openstef/data/dutch_holidays.csv.license +0 -3
- openstef/data/pv_single_coefs.csv +0 -601
- openstef/data/pv_single_coefs.csv.license +0 -3
- openstef/data_classes/__init__.py +0 -3
- openstef/data_classes/data_prep.py +0 -99
- openstef/data_classes/model_specifications.py +0 -30
- openstef/data_classes/prediction_job.py +0 -135
- openstef/data_classes/split_function.py +0 -97
- openstef/enums.py +0 -140
- openstef/exceptions.py +0 -74
- openstef/feature_engineering/__init__.py +0 -3
- openstef/feature_engineering/apply_features.py +0 -138
- openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
- openstef/feature_engineering/cyclic_features.py +0 -161
- openstef/feature_engineering/data_preparation.py +0 -152
- openstef/feature_engineering/feature_adder.py +0 -206
- openstef/feature_engineering/feature_applicator.py +0 -202
- openstef/feature_engineering/general.py +0 -141
- openstef/feature_engineering/holiday_features.py +0 -231
- openstef/feature_engineering/lag_features.py +0 -165
- openstef/feature_engineering/missing_values_transformer.py +0 -141
- openstef/feature_engineering/rolling_features.py +0 -58
- openstef/feature_engineering/weather_features.py +0 -492
- openstef/metrics/__init__.py +0 -3
- openstef/metrics/figure.py +0 -303
- openstef/metrics/metrics.py +0 -486
- openstef/metrics/reporter.py +0 -222
- openstef/model/__init__.py +0 -3
- openstef/model/basecase.py +0 -82
- openstef/model/confidence_interval_applicator.py +0 -242
- openstef/model/fallback.py +0 -77
- openstef/model/metamodels/__init__.py +0 -3
- openstef/model/metamodels/feature_clipper.py +0 -90
- openstef/model/metamodels/grouped_regressor.py +0 -222
- openstef/model/metamodels/missing_values_handler.py +0 -138
- openstef/model/model_creator.py +0 -214
- openstef/model/objective.py +0 -426
- openstef/model/objective_creator.py +0 -65
- openstef/model/regressors/__init__.py +0 -3
- openstef/model/regressors/arima.py +0 -197
- openstef/model/regressors/custom_regressor.py +0 -64
- openstef/model/regressors/dazls.py +0 -116
- openstef/model/regressors/flatliner.py +0 -95
- openstef/model/regressors/gblinear_quantile.py +0 -334
- openstef/model/regressors/lgbm.py +0 -29
- openstef/model/regressors/linear.py +0 -90
- openstef/model/regressors/linear_quantile.py +0 -305
- openstef/model/regressors/regressor.py +0 -114
- openstef/model/regressors/xgb.py +0 -52
- openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
- openstef/model/regressors/xgb_quantile.py +0 -228
- openstef/model/serializer.py +0 -431
- openstef/model/standard_deviation_generator.py +0 -81
- openstef/model_selection/__init__.py +0 -3
- openstef/model_selection/model_selection.py +0 -311
- openstef/monitoring/__init__.py +0 -3
- openstef/monitoring/performance_meter.py +0 -92
- openstef/monitoring/teams.py +0 -203
- openstef/pipeline/__init__.py +0 -3
- openstef/pipeline/create_basecase_forecast.py +0 -133
- openstef/pipeline/create_component_forecast.py +0 -168
- openstef/pipeline/create_forecast.py +0 -171
- openstef/pipeline/optimize_hyperparameters.py +0 -317
- openstef/pipeline/train_create_forecast_backtest.py +0 -163
- openstef/pipeline/train_model.py +0 -561
- openstef/pipeline/utils.py +0 -52
- openstef/postprocessing/__init__.py +0 -3
- openstef/postprocessing/postprocessing.py +0 -275
- openstef/preprocessing/__init__.py +0 -3
- openstef/preprocessing/preprocessing.py +0 -42
- openstef/settings.py +0 -15
- openstef/tasks/__init__.py +0 -3
- openstef/tasks/calculate_kpi.py +0 -324
- openstef/tasks/create_basecase_forecast.py +0 -118
- openstef/tasks/create_components_forecast.py +0 -162
- openstef/tasks/create_forecast.py +0 -145
- openstef/tasks/create_solar_forecast.py +0 -420
- openstef/tasks/create_wind_forecast.py +0 -80
- openstef/tasks/optimize_hyperparameters.py +0 -135
- openstef/tasks/split_forecast.py +0 -273
- openstef/tasks/train_model.py +0 -224
- openstef/tasks/utils/__init__.py +0 -3
- openstef/tasks/utils/dependencies.py +0 -107
- openstef/tasks/utils/predictionjobloop.py +0 -243
- openstef/tasks/utils/taskcontext.py +0 -160
- openstef/validation/__init__.py +0 -3
- openstef/validation/validation.py +0 -322
- openstef-3.4.56.dist-info/METADATA +0 -154
- openstef-3.4.56.dist-info/RECORD +0 -102
- openstef-3.4.56.dist-info/top_level.txt +0 -1
- /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
|
@@ -1,206 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
"""This module provides functionality for defining custom feature adders."""
|
|
5
|
-
import inspect
|
|
6
|
-
import re
|
|
7
|
-
from abc import ABC, abstractmethod
|
|
8
|
-
from collections import Counter, namedtuple
|
|
9
|
-
from importlib import import_module
|
|
10
|
-
from typing import Optional, Sequence
|
|
11
|
-
|
|
12
|
-
import pandas as pd
|
|
13
|
-
|
|
14
|
-
ParsedFeature = namedtuple("ParsedFeature", ["name", "params"])
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class FeatureAdder(ABC):
|
|
18
|
-
"""Abstract class that implement the FeatureAdder interface.
|
|
19
|
-
|
|
20
|
-
It is the basic block that handles the logic for computing the specific feature and the syntactic sugar to load
|
|
21
|
-
properly the feature adder according to the feature name.
|
|
22
|
-
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
@property
|
|
26
|
-
@abstractmethod
|
|
27
|
-
def _regex(self) -> str:
|
|
28
|
-
pass
|
|
29
|
-
|
|
30
|
-
@property
|
|
31
|
-
@abstractmethod
|
|
32
|
-
def name(self) -> str:
|
|
33
|
-
"""Name of the FeatureAdder."""
|
|
34
|
-
|
|
35
|
-
@abstractmethod
|
|
36
|
-
def required_features(self, feature_names) -> list[str]:
|
|
37
|
-
"""List of features that are required to calculate this feature."""
|
|
38
|
-
|
|
39
|
-
def __hash__(self):
|
|
40
|
-
"""Genearate hash of the name of this feature."""
|
|
41
|
-
return hash(self.name)
|
|
42
|
-
|
|
43
|
-
def parse_feature_name(self, feature_name: str) -> Optional[dict[str, str]]:
|
|
44
|
-
"""Parse a feature name.
|
|
45
|
-
|
|
46
|
-
If the feature name is taken in charge by the feature adder, the method returns
|
|
47
|
-
a dictionnary with the potentially parsed parameters contained the feature name. In the
|
|
48
|
-
case the feature name does not contain parameters an empty dictionary is returned.
|
|
49
|
-
Otherwise the method returns None.
|
|
50
|
-
|
|
51
|
-
Args:
|
|
52
|
-
feature_name (str): The feature name, this may contain parameter informations.
|
|
53
|
-
|
|
54
|
-
Returns:
|
|
55
|
-
Optional[dict[str, Any]]: The parsed parameters. If the feature name is recognized but has no parameters
|
|
56
|
-
an empty dictionnary is returned. If the feature name is not recognized, None is
|
|
57
|
-
returned.
|
|
58
|
-
|
|
59
|
-
"""
|
|
60
|
-
reg = self._regex
|
|
61
|
-
match = re.match(reg, feature_name)
|
|
62
|
-
return None if match is None else match.groupdict()
|
|
63
|
-
|
|
64
|
-
@abstractmethod
|
|
65
|
-
def apply_features(
|
|
66
|
-
self, df: pd.DataFrame, parsed_feature_names: Sequence[ParsedFeature]
|
|
67
|
-
) -> pd.DataFrame:
|
|
68
|
-
"""Apply or add the features to the input dataframe."""
|
|
69
|
-
|
|
70
|
-
def __repr__(self):
|
|
71
|
-
"""Represent as string."""
|
|
72
|
-
return "%s(<%s>)" % (self.__class__.__name__, self.name)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
class FeatureDispatcher:
|
|
76
|
-
"""Orchestrator of the feature adders.
|
|
77
|
-
|
|
78
|
-
It scans the feature_names to assign to each feature the proper feature adder and launch the effective computing of
|
|
79
|
-
the features.
|
|
80
|
-
|
|
81
|
-
"""
|
|
82
|
-
|
|
83
|
-
def __init__(self, feature_adders: Sequence[FeatureAdder]):
|
|
84
|
-
"""Initialize feature dispatcher."""
|
|
85
|
-
self.feature_adders = list(feature_adders)
|
|
86
|
-
self._check_feature_adder_names_unicity()
|
|
87
|
-
|
|
88
|
-
def _check_feature_adder_names_unicity(self):
|
|
89
|
-
names = Counter(adder.name for adder in self.feature_adders)
|
|
90
|
-
duplicated_names = []
|
|
91
|
-
for name, count in names.items():
|
|
92
|
-
if count > 1:
|
|
93
|
-
duplicated_names.append(name)
|
|
94
|
-
|
|
95
|
-
if len(duplicated_names) > 0:
|
|
96
|
-
raise RuntimeError(
|
|
97
|
-
"There is at least one duplicated feature adder name: %s"
|
|
98
|
-
% duplicated_names
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
def dispatch_features(
|
|
102
|
-
self, feature_names: list[str]
|
|
103
|
-
) -> dict[FeatureAdder, list[ParsedFeature]]:
|
|
104
|
-
"""Dispatch features.
|
|
105
|
-
|
|
106
|
-
Args:
|
|
107
|
-
feature_names: The names of the features to be dispatched.
|
|
108
|
-
|
|
109
|
-
Returns:
|
|
110
|
-
Dictionary with parsed features.
|
|
111
|
-
|
|
112
|
-
"""
|
|
113
|
-
recognized_features = set()
|
|
114
|
-
dispatched_features = {}
|
|
115
|
-
|
|
116
|
-
for feature_name in feature_names:
|
|
117
|
-
for adder_obj in self.feature_adders:
|
|
118
|
-
parsed_params = adder_obj.parse_feature_name(feature_name)
|
|
119
|
-
if parsed_params is not None:
|
|
120
|
-
if feature_name in recognized_features:
|
|
121
|
-
raise RuntimeError(
|
|
122
|
-
"Ambiguous feature adder set detected. The feature name"
|
|
123
|
-
" '%s' is recognised by more than 1 feature adder"
|
|
124
|
-
% feature_names
|
|
125
|
-
)
|
|
126
|
-
recognized_features.add(feature_name)
|
|
127
|
-
features = dispatched_features.setdefault(adder_obj, [])
|
|
128
|
-
features.append(ParsedFeature(feature_name, parsed_params))
|
|
129
|
-
|
|
130
|
-
return dispatched_features
|
|
131
|
-
|
|
132
|
-
def apply_features(
|
|
133
|
-
self, df: pd.DataFrame, feature_names: list[str]
|
|
134
|
-
) -> pd.DataFrame:
|
|
135
|
-
"""Applies features to the input DataFrame.
|
|
136
|
-
|
|
137
|
-
Args:
|
|
138
|
-
df: DataFrame to which the features have to be added.
|
|
139
|
-
feature_names: Names of the features.
|
|
140
|
-
|
|
141
|
-
Returns:
|
|
142
|
-
DataFrame with the added features.
|
|
143
|
-
|
|
144
|
-
"""
|
|
145
|
-
if feature_names is None:
|
|
146
|
-
return df
|
|
147
|
-
dispatched_features = self.dispatch_features(feature_names)
|
|
148
|
-
|
|
149
|
-
applied_features = set()
|
|
150
|
-
applied_features_num = 0
|
|
151
|
-
|
|
152
|
-
while True:
|
|
153
|
-
for adder, parsed_features in dispatched_features.items():
|
|
154
|
-
parsed_feature_names = [pf.name for pf in parsed_features]
|
|
155
|
-
required_features = adder.required_features(parsed_feature_names)
|
|
156
|
-
|
|
157
|
-
if len(set(required_features) - set(df.columns)) == 0:
|
|
158
|
-
df = adder.apply_features(df, parsed_features)
|
|
159
|
-
applied_features |= set(parsed_feature_names)
|
|
160
|
-
|
|
161
|
-
if (
|
|
162
|
-
len(applied_features) == applied_features_num
|
|
163
|
-
): # No new feature was treated
|
|
164
|
-
break
|
|
165
|
-
|
|
166
|
-
applied_features_num = len(applied_features)
|
|
167
|
-
|
|
168
|
-
return df
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
def adders_from_module(module_name: str) -> list[FeatureAdder]:
|
|
172
|
-
"""Load all FeatureAdders classes on the fly from the module.
|
|
173
|
-
|
|
174
|
-
Args:
|
|
175
|
-
module_name: The name of the module from which to import.
|
|
176
|
-
|
|
177
|
-
Returns:
|
|
178
|
-
A list with all loaded FeatureAdders.
|
|
179
|
-
|
|
180
|
-
"""
|
|
181
|
-
module = import_module(module_name)
|
|
182
|
-
feature_adders = []
|
|
183
|
-
|
|
184
|
-
for element_name in dir(module):
|
|
185
|
-
element = getattr(module, element_name)
|
|
186
|
-
if (
|
|
187
|
-
isinstance(element, type)
|
|
188
|
-
and issubclass(element, FeatureAdder)
|
|
189
|
-
and not inspect.isabstract(element)
|
|
190
|
-
):
|
|
191
|
-
feature_adders.append(element())
|
|
192
|
-
|
|
193
|
-
return feature_adders
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
def adders_from_modules(module_names: list[str]) -> list[FeatureAdder]:
|
|
197
|
-
"""Load all FeatureAdders classes on the fly from multiple modules.
|
|
198
|
-
|
|
199
|
-
Args:
|
|
200
|
-
module_names: A list with names of the modules from which to import.
|
|
201
|
-
|
|
202
|
-
Returns:
|
|
203
|
-
A list with all loaded FeatureAdders.
|
|
204
|
-
|
|
205
|
-
"""
|
|
206
|
-
return sum((adders_from_module(module_name) for module_name in module_names), [])
|
|
@@ -1,202 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
"""This module defines several FeatureApplicators.
|
|
5
|
-
|
|
6
|
-
These applicatiors are used to add features to the input data in the corresponding pipelines.
|
|
7
|
-
|
|
8
|
-
"""
|
|
9
|
-
from abc import ABC, abstractmethod
|
|
10
|
-
from typing import Optional, Union
|
|
11
|
-
|
|
12
|
-
import numpy as np
|
|
13
|
-
import pandas as pd
|
|
14
|
-
|
|
15
|
-
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
|
16
|
-
from openstef.feature_engineering.apply_features import apply_features
|
|
17
|
-
from openstef.feature_engineering.feature_adder import (
|
|
18
|
-
FeatureDispatcher,
|
|
19
|
-
adders_from_modules,
|
|
20
|
-
)
|
|
21
|
-
from openstef.feature_engineering.general import (
|
|
22
|
-
add_missing_feature_columns,
|
|
23
|
-
enforce_feature_order,
|
|
24
|
-
remove_non_requested_feature_columns,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
LATENCY_CONFIG = {
|
|
28
|
-
"day_ahead_electricity_price": 24
|
|
29
|
-
} # A specific latency is part of a specific feature.
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class AbstractFeatureApplicator(ABC):
|
|
33
|
-
"""Defines the Applicator interface."""
|
|
34
|
-
|
|
35
|
-
def __init__(
|
|
36
|
-
self,
|
|
37
|
-
horizons: Union[list[float], str],
|
|
38
|
-
feature_names: Optional[list[str]] = None,
|
|
39
|
-
feature_modules: Optional[list[str]] = [],
|
|
40
|
-
) -> None:
|
|
41
|
-
"""Initialize abstract feature applicator.
|
|
42
|
-
|
|
43
|
-
Args:
|
|
44
|
-
horizons: list of horizons in hours
|
|
45
|
-
feature_names: List of requested features
|
|
46
|
-
feature_modules: List of modules from which FeatureAdders should be loaded.
|
|
47
|
-
|
|
48
|
-
"""
|
|
49
|
-
if not isinstance(horizons, str) and type(horizons) is not list and not None:
|
|
50
|
-
raise ValueError("horizons must be added as a list")
|
|
51
|
-
|
|
52
|
-
self.feature_names = feature_names
|
|
53
|
-
self.horizons = horizons
|
|
54
|
-
self.features_adder = adders_from_modules(feature_modules)
|
|
55
|
-
self.features_dispatcher = FeatureDispatcher(self.features_adder)
|
|
56
|
-
|
|
57
|
-
@abstractmethod
|
|
58
|
-
def add_features(
|
|
59
|
-
self, df: pd.DataFrame, pj: PredictionJobDataClass = None
|
|
60
|
-
) -> pd.DataFrame:
|
|
61
|
-
"""Adds features to an input DataFrame.
|
|
62
|
-
|
|
63
|
-
Args:
|
|
64
|
-
df: DataFrame with input data to which the features have to be added
|
|
65
|
-
pj: (Optional) A prediction job that is needed for location dependent features,
|
|
66
|
-
if not specified a default location is used
|
|
67
|
-
|
|
68
|
-
Returns:
|
|
69
|
-
Dataframe with added features.
|
|
70
|
-
|
|
71
|
-
"""
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
class TrainFeatureApplicator(AbstractFeatureApplicator):
|
|
75
|
-
"""Feature applicator for use during training."""
|
|
76
|
-
|
|
77
|
-
def add_features(
|
|
78
|
-
self,
|
|
79
|
-
df: pd.DataFrame,
|
|
80
|
-
pj: PredictionJobDataClass = None,
|
|
81
|
-
latency_config: dict = None,
|
|
82
|
-
) -> pd.DataFrame:
|
|
83
|
-
"""Adds features to an input DataFrame.
|
|
84
|
-
|
|
85
|
-
This method is implemented specifically for a model train pipeline. For larger
|
|
86
|
-
horzions data is invalidated as when they are not available.
|
|
87
|
-
|
|
88
|
-
For example:
|
|
89
|
-
For horzion 24 hours the feature T-720min is not added as the load
|
|
90
|
-
720 minutes ago is not available 24 hours in advance. In case of a horizon
|
|
91
|
-
0.25 hours this feature is added as in this case the feature is available.
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
df: Input data to which the features will be added.
|
|
95
|
-
pj: (Optional) A prediction job that is needed for location dependent features,
|
|
96
|
-
if not specified a default location is used
|
|
97
|
-
latency_config: (Optional) Invalidate certain features that are not
|
|
98
|
-
available for a specific horizon due to data latency. Defaults to
|
|
99
|
-
``{"day_ahead_electricity_price": 24}``.
|
|
100
|
-
|
|
101
|
-
Returns:
|
|
102
|
-
Input DataFrame with an extra column for every added feature and sorted on the datetime index.
|
|
103
|
-
|
|
104
|
-
"""
|
|
105
|
-
# If pj is none add empty dict
|
|
106
|
-
if pj is None:
|
|
107
|
-
pj = {}
|
|
108
|
-
|
|
109
|
-
if latency_config is None:
|
|
110
|
-
latency_config = LATENCY_CONFIG
|
|
111
|
-
|
|
112
|
-
# Set default horizons if none are provided
|
|
113
|
-
if self.horizons is None:
|
|
114
|
-
self.horizons = [0.25, 24]
|
|
115
|
-
|
|
116
|
-
# Pre define output variables
|
|
117
|
-
result = pd.DataFrame()
|
|
118
|
-
|
|
119
|
-
if isinstance(self.horizons, str):
|
|
120
|
-
# copy the custom horizon into the horizon column
|
|
121
|
-
res = df.copy(deep=True)
|
|
122
|
-
res["horizon"] = res[self.horizons]
|
|
123
|
-
result = pd.concat([result, res])
|
|
124
|
-
else:
|
|
125
|
-
# Loop over horizons and add corresponding features
|
|
126
|
-
for horizon in self.horizons:
|
|
127
|
-
# Deep copy of df is important, because we want a fresh start every iteration!
|
|
128
|
-
res = apply_features(
|
|
129
|
-
df.copy(deep=True),
|
|
130
|
-
horizon=horizon,
|
|
131
|
-
pj=pj,
|
|
132
|
-
feature_names=self.feature_names,
|
|
133
|
-
)
|
|
134
|
-
res["horizon"] = horizon
|
|
135
|
-
result = pd.concat([result, res])
|
|
136
|
-
|
|
137
|
-
# Add custom features with the dispatcher
|
|
138
|
-
result = self.features_dispatcher.apply_features(
|
|
139
|
-
result, feature_names=self.feature_names
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
# IMPORTANT: sort index to prevent errors when slicing on the (datetime) index
|
|
143
|
-
# if we don't sort, the duplicated indexes (one per horizon) have large gaps
|
|
144
|
-
# and slicing will give an exception.
|
|
145
|
-
result = result.sort_index(axis=0)
|
|
146
|
-
|
|
147
|
-
# Invalidate features that are not available for a specific horizon due to data
|
|
148
|
-
# latency
|
|
149
|
-
for feature, time in latency_config.items():
|
|
150
|
-
result.loc[result["horizon"] > time, feature] = np.nan
|
|
151
|
-
|
|
152
|
-
# NOTE this is required since apply_features could add additional features
|
|
153
|
-
if self.feature_names is not None:
|
|
154
|
-
features = self.feature_names + ["horizon"]
|
|
155
|
-
result = remove_non_requested_feature_columns(result, features)
|
|
156
|
-
|
|
157
|
-
# Sort all features except for the (first) load and (last) horizon columns
|
|
158
|
-
return enforce_feature_order(result)
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
class OperationalPredictFeatureApplicator(AbstractFeatureApplicator):
|
|
162
|
-
"""Feature applicator for use in operational forecasts."""
|
|
163
|
-
|
|
164
|
-
def add_features(
|
|
165
|
-
self, df: pd.DataFrame, pj: PredictionJobDataClass = None
|
|
166
|
-
) -> pd.DataFrame:
|
|
167
|
-
"""Adds features to an input DataFrame.
|
|
168
|
-
|
|
169
|
-
This method is implemented specifically for an operational prediction pipeline and will add every available feature.
|
|
170
|
-
|
|
171
|
-
Args:
|
|
172
|
-
df: DataFrame with input data to which the features have to be added
|
|
173
|
-
pj: (Optional) A prediction job that is needed for location dependent features,
|
|
174
|
-
if not specified a default location is used
|
|
175
|
-
Returns:
|
|
176
|
-
Input DataFrame with an extra column for every added feature.
|
|
177
|
-
|
|
178
|
-
"""
|
|
179
|
-
# If pj is none add empty dict
|
|
180
|
-
if pj is None:
|
|
181
|
-
pj = {}
|
|
182
|
-
|
|
183
|
-
num_horizons = len(self.horizons)
|
|
184
|
-
if num_horizons != 1:
|
|
185
|
-
raise ValueError(f"Expected one horizon, got {num_horizons}")
|
|
186
|
-
|
|
187
|
-
# Add core features
|
|
188
|
-
df = apply_features(
|
|
189
|
-
df, feature_names=self.feature_names, horizon=self.horizons[0], pj=pj
|
|
190
|
-
)
|
|
191
|
-
# Add custom features with the dispatcher
|
|
192
|
-
df = self.features_dispatcher.apply_features(
|
|
193
|
-
df, feature_names=self.feature_names
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
df = add_missing_feature_columns(df, self.feature_names)
|
|
197
|
-
|
|
198
|
-
# NOTE this is required since apply_features could add additional features
|
|
199
|
-
if self.feature_names is not None:
|
|
200
|
-
df = remove_non_requested_feature_columns(df, self.feature_names)
|
|
201
|
-
|
|
202
|
-
return enforce_feature_order(df)
|
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
"""This modelu contains various helper functions."""
|
|
5
|
-
|
|
6
|
-
import logging
|
|
7
|
-
|
|
8
|
-
import numpy as np
|
|
9
|
-
import pandas as pd
|
|
10
|
-
import structlog
|
|
11
|
-
|
|
12
|
-
from openstef.settings import Settings
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def add_missing_feature_columns(
|
|
16
|
-
input_data: pd.DataFrame, features: list[str]
|
|
17
|
-
) -> pd.DataFrame:
|
|
18
|
-
"""Adds feature column for features in the featurelist.
|
|
19
|
-
|
|
20
|
-
Add feature columns for features in the feature list if these columns don't
|
|
21
|
-
exist in the input data. If a column is added, its value is set to NaN.
|
|
22
|
-
This is especially usefull to make sure the required columns are in place when
|
|
23
|
-
making a prediction.
|
|
24
|
-
|
|
25
|
-
.. note::
|
|
26
|
-
This function is intended as a final check to prevent errors during predicion.
|
|
27
|
-
In an ideal world this function is not nescarry.
|
|
28
|
-
|
|
29
|
-
Args:
|
|
30
|
-
input_data: DataFrame with input data and featurs.
|
|
31
|
-
features: List of requiered features.
|
|
32
|
-
|
|
33
|
-
Returns:
|
|
34
|
-
Input dataframe with missing columns filled with ``np.N=nan``.
|
|
35
|
-
|
|
36
|
-
"""
|
|
37
|
-
structlog.configure(
|
|
38
|
-
wrapper_class=structlog.make_filtering_bound_logger(
|
|
39
|
-
logging.getLevelName(Settings.log_level)
|
|
40
|
-
)
|
|
41
|
-
)
|
|
42
|
-
logger = structlog.get_logger(__name__)
|
|
43
|
-
|
|
44
|
-
if features is None:
|
|
45
|
-
features = []
|
|
46
|
-
|
|
47
|
-
missing_features = [f for f in features if f not in list(input_data)]
|
|
48
|
-
|
|
49
|
-
for feature in missing_features:
|
|
50
|
-
logger.warning(
|
|
51
|
-
f"Adding NaN column for missing feature: {feature}", missing_feature=feature
|
|
52
|
-
)
|
|
53
|
-
input_data[feature] = np.nan
|
|
54
|
-
|
|
55
|
-
return input_data
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def remove_non_requested_feature_columns(
|
|
59
|
-
input_data: pd.DataFrame, requested_features: list[str]
|
|
60
|
-
) -> pd.DataFrame:
|
|
61
|
-
"""Removes features that are provided in the input data but not in the feature list.
|
|
62
|
-
|
|
63
|
-
This should not be nescesarry but serves as an extra failsave for making predicitons
|
|
64
|
-
|
|
65
|
-
Args:
|
|
66
|
-
input_data: DataFrame with features
|
|
67
|
-
requested_features: List of reuqested features
|
|
68
|
-
|
|
69
|
-
Returns:
|
|
70
|
-
Model input data with features.
|
|
71
|
-
|
|
72
|
-
"""
|
|
73
|
-
structlog.configure(
|
|
74
|
-
wrapper_class=structlog.make_filtering_bound_logger(
|
|
75
|
-
logging.getLevelName(Settings.log_level)
|
|
76
|
-
)
|
|
77
|
-
)
|
|
78
|
-
logger = structlog.get_logger(__name__)
|
|
79
|
-
|
|
80
|
-
if requested_features is None:
|
|
81
|
-
requested_features = []
|
|
82
|
-
|
|
83
|
-
not_requested_features = [
|
|
84
|
-
f for f in list(input_data) if f not in requested_features
|
|
85
|
-
]
|
|
86
|
-
|
|
87
|
-
# Do not see "load" or "horizon" as an extra feature as it is no feature
|
|
88
|
-
if "load" in not_requested_features:
|
|
89
|
-
not_requested_features.remove("load")
|
|
90
|
-
|
|
91
|
-
num_not_requested_features = len(not_requested_features)
|
|
92
|
-
|
|
93
|
-
if num_not_requested_features != 0:
|
|
94
|
-
logger.warning(
|
|
95
|
-
f"Removing {num_not_requested_features} unrequested features!",
|
|
96
|
-
num_not_requested_features=num_not_requested_features,
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
return input_data.drop(not_requested_features, axis=1)
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def enforce_feature_order(input_data: pd.DataFrame) -> pd.DataFrame:
|
|
103
|
-
"""Enforces correct order of features.
|
|
104
|
-
|
|
105
|
-
Alphabetically orders the feature columns. The load column remains the first column
|
|
106
|
-
and the horizons column remains the last column.
|
|
107
|
-
Everything in between is alphabetically sorted:
|
|
108
|
-
The order eventually looks like this:
|
|
109
|
-
["load"] -- [alphabetically sorted features] -- ['horizon']
|
|
110
|
-
|
|
111
|
-
This function assumes the first column contains the to be predicted variable
|
|
112
|
-
Furthermore the "horizon" is moved to the last position if it is pressent.
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
input_data: Input data with features.
|
|
116
|
-
|
|
117
|
-
Returns:
|
|
118
|
-
Properly sorted input data.
|
|
119
|
-
|
|
120
|
-
"""
|
|
121
|
-
# Extract first column name
|
|
122
|
-
first_column_name = input_data.columns.to_list()[
|
|
123
|
-
0
|
|
124
|
-
] # Most of the time this is "load"
|
|
125
|
-
|
|
126
|
-
# Sort columns
|
|
127
|
-
columns = list(np.sort(input_data.columns.to_list()))
|
|
128
|
-
|
|
129
|
-
# Remove first column and add to the start
|
|
130
|
-
columns.remove(first_column_name)
|
|
131
|
-
column_order = [first_column_name] + columns
|
|
132
|
-
|
|
133
|
-
# If "Horzion" column is available add to the end
|
|
134
|
-
if "horizon" in columns:
|
|
135
|
-
# "horizon" is pressent in the training procces
|
|
136
|
-
# but not in the forecasting process
|
|
137
|
-
column_order.remove("horizon")
|
|
138
|
-
column_order = column_order + ["horizon"]
|
|
139
|
-
|
|
140
|
-
# Return dataframe with columns in the correct order
|
|
141
|
-
return input_data.loc[:, column_order]
|