openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef-4.0.0a3.dist-info/METADATA +177 -0
- openstef-4.0.0a3.dist-info/RECORD +4 -0
- {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
- openstef/__init__.py +0 -14
- openstef/__main__.py +0 -3
- openstef/app_settings.py +0 -19
- openstef/data/NL_terrestrial_radiation.csv +0 -25585
- openstef/data/NL_terrestrial_radiation.csv.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
- openstef/data/dutch_holidays.csv +0 -1759
- openstef/data/dutch_holidays.csv.license +0 -3
- openstef/data/pv_single_coefs.csv +0 -601
- openstef/data/pv_single_coefs.csv.license +0 -3
- openstef/data_classes/__init__.py +0 -3
- openstef/data_classes/data_prep.py +0 -99
- openstef/data_classes/model_specifications.py +0 -30
- openstef/data_classes/prediction_job.py +0 -135
- openstef/data_classes/split_function.py +0 -97
- openstef/enums.py +0 -140
- openstef/exceptions.py +0 -74
- openstef/feature_engineering/__init__.py +0 -3
- openstef/feature_engineering/apply_features.py +0 -138
- openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
- openstef/feature_engineering/cyclic_features.py +0 -161
- openstef/feature_engineering/data_preparation.py +0 -152
- openstef/feature_engineering/feature_adder.py +0 -206
- openstef/feature_engineering/feature_applicator.py +0 -202
- openstef/feature_engineering/general.py +0 -141
- openstef/feature_engineering/holiday_features.py +0 -231
- openstef/feature_engineering/lag_features.py +0 -165
- openstef/feature_engineering/missing_values_transformer.py +0 -141
- openstef/feature_engineering/rolling_features.py +0 -58
- openstef/feature_engineering/weather_features.py +0 -492
- openstef/metrics/__init__.py +0 -3
- openstef/metrics/figure.py +0 -303
- openstef/metrics/metrics.py +0 -486
- openstef/metrics/reporter.py +0 -222
- openstef/model/__init__.py +0 -3
- openstef/model/basecase.py +0 -82
- openstef/model/confidence_interval_applicator.py +0 -242
- openstef/model/fallback.py +0 -77
- openstef/model/metamodels/__init__.py +0 -3
- openstef/model/metamodels/feature_clipper.py +0 -90
- openstef/model/metamodels/grouped_regressor.py +0 -222
- openstef/model/metamodels/missing_values_handler.py +0 -138
- openstef/model/model_creator.py +0 -214
- openstef/model/objective.py +0 -426
- openstef/model/objective_creator.py +0 -65
- openstef/model/regressors/__init__.py +0 -3
- openstef/model/regressors/arima.py +0 -197
- openstef/model/regressors/custom_regressor.py +0 -64
- openstef/model/regressors/dazls.py +0 -116
- openstef/model/regressors/flatliner.py +0 -95
- openstef/model/regressors/gblinear_quantile.py +0 -334
- openstef/model/regressors/lgbm.py +0 -29
- openstef/model/regressors/linear.py +0 -90
- openstef/model/regressors/linear_quantile.py +0 -305
- openstef/model/regressors/regressor.py +0 -114
- openstef/model/regressors/xgb.py +0 -52
- openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
- openstef/model/regressors/xgb_quantile.py +0 -228
- openstef/model/serializer.py +0 -431
- openstef/model/standard_deviation_generator.py +0 -81
- openstef/model_selection/__init__.py +0 -3
- openstef/model_selection/model_selection.py +0 -311
- openstef/monitoring/__init__.py +0 -3
- openstef/monitoring/performance_meter.py +0 -92
- openstef/monitoring/teams.py +0 -203
- openstef/pipeline/__init__.py +0 -3
- openstef/pipeline/create_basecase_forecast.py +0 -133
- openstef/pipeline/create_component_forecast.py +0 -168
- openstef/pipeline/create_forecast.py +0 -171
- openstef/pipeline/optimize_hyperparameters.py +0 -317
- openstef/pipeline/train_create_forecast_backtest.py +0 -163
- openstef/pipeline/train_model.py +0 -561
- openstef/pipeline/utils.py +0 -52
- openstef/postprocessing/__init__.py +0 -3
- openstef/postprocessing/postprocessing.py +0 -275
- openstef/preprocessing/__init__.py +0 -3
- openstef/preprocessing/preprocessing.py +0 -42
- openstef/settings.py +0 -15
- openstef/tasks/__init__.py +0 -3
- openstef/tasks/calculate_kpi.py +0 -324
- openstef/tasks/create_basecase_forecast.py +0 -118
- openstef/tasks/create_components_forecast.py +0 -162
- openstef/tasks/create_forecast.py +0 -145
- openstef/tasks/create_solar_forecast.py +0 -420
- openstef/tasks/create_wind_forecast.py +0 -80
- openstef/tasks/optimize_hyperparameters.py +0 -135
- openstef/tasks/split_forecast.py +0 -273
- openstef/tasks/train_model.py +0 -224
- openstef/tasks/utils/__init__.py +0 -3
- openstef/tasks/utils/dependencies.py +0 -107
- openstef/tasks/utils/predictionjobloop.py +0 -243
- openstef/tasks/utils/taskcontext.py +0 -160
- openstef/validation/__init__.py +0 -3
- openstef/validation/validation.py +0 -322
- openstef-3.4.56.dist-info/METADATA +0 -154
- openstef-3.4.56.dist-info/RECORD +0 -102
- openstef-3.4.56.dist-info/top_level.txt +0 -1
- /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
openstef/model/basecase.py
DELETED
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
import numpy as np
|
|
5
|
-
import pandas as pd
|
|
6
|
-
from sklearn.base import BaseEstimator, RegressorMixin
|
|
7
|
-
|
|
8
|
-
MINIMAL_RESOLUTION: int = 15 # Used for validating the forecast input
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class BaseCaseModel(BaseEstimator, RegressorMixin):
|
|
12
|
-
def predict(self, forecast_input_data: pd.DataFrame) -> pd.DataFrame:
|
|
13
|
-
"""Predict using the basecase method. The basecase forecast is determined by the T-7d and T-14d load.
|
|
14
|
-
|
|
15
|
-
This means fitting the model is not required. However a fit method is still included to be fully comatible with sklearn.
|
|
16
|
-
|
|
17
|
-
Args:
|
|
18
|
-
forecast_input_data: Forecast input dataframe
|
|
19
|
-
|
|
20
|
-
Returns:
|
|
21
|
-
Basecase forecast
|
|
22
|
-
|
|
23
|
-
"""
|
|
24
|
-
return self.make_basecase_forecast(forecast_input_data)
|
|
25
|
-
|
|
26
|
-
def fit(self):
|
|
27
|
-
return self
|
|
28
|
-
|
|
29
|
-
@staticmethod
|
|
30
|
-
def make_basecase_forecast(
|
|
31
|
-
forecast_input_data: pd.DataFrame, overwrite_delay_hours: int = 48
|
|
32
|
-
) -> pd.DataFrame:
|
|
33
|
-
"""Make a basecase forecast.
|
|
34
|
-
|
|
35
|
-
The idea of the basecase forecast is that if all else fails, this forecasts is
|
|
36
|
-
still available. Basecase example: the load of last week.
|
|
37
|
-
|
|
38
|
-
Args:
|
|
39
|
-
forecast_input_data: Forecast input dataframe
|
|
40
|
-
overwrite_delay_hours: times before this in the future are not
|
|
41
|
-
forecasted
|
|
42
|
-
|
|
43
|
-
Raises:
|
|
44
|
-
ValueError: if columns T-7d or T-14d is not present
|
|
45
|
-
ValueError: If the start of the forecast is before the horizon of the regular forecast
|
|
46
|
-
Returns:
|
|
47
|
-
Basecase forecast
|
|
48
|
-
|
|
49
|
-
"""
|
|
50
|
-
# Check if required features are provided
|
|
51
|
-
if not all(
|
|
52
|
-
item in forecast_input_data.columns.to_list() for item in ["T-14d", "T-7d"]
|
|
53
|
-
):
|
|
54
|
-
raise ValueError(
|
|
55
|
-
"Could not make basecase, features T-7d and T-14d are required! Tip:"
|
|
56
|
-
" Generate these features with a FeatureApplicator object."
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
# Make basecase forecast: Use load of last week
|
|
60
|
-
basecase_forecast = (
|
|
61
|
-
forecast_input_data[["T-7d"]].dropna().rename(columns={"T-7d": "forecast"})
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
# Maybe there is still missing data, for example if the cdb has been down for a
|
|
65
|
-
# while in this case, use the load of 2 weeks before
|
|
66
|
-
basecase_forecast = pd.concat(
|
|
67
|
-
[
|
|
68
|
-
basecase_forecast,
|
|
69
|
-
forecast_input_data[["T-14d"]]
|
|
70
|
-
.dropna()
|
|
71
|
-
.rename(columns={"T-14d": "forecast"}),
|
|
72
|
-
]
|
|
73
|
-
)
|
|
74
|
-
basecase_forecast = basecase_forecast[
|
|
75
|
-
np.invert(basecase_forecast.index.duplicated())
|
|
76
|
-
]
|
|
77
|
-
|
|
78
|
-
return basecase_forecast.sort_index()
|
|
79
|
-
|
|
80
|
-
@property
|
|
81
|
-
def can_predict_quantiles(self):
|
|
82
|
-
return False
|
|
@@ -1,242 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
import logging
|
|
5
|
-
from datetime import datetime
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
|
-
import pandas as pd
|
|
9
|
-
import structlog
|
|
10
|
-
from scipy import stats
|
|
11
|
-
from sklearn.base import RegressorMixin
|
|
12
|
-
|
|
13
|
-
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
|
14
|
-
from openstef.exceptions import ModelWithoutStDev
|
|
15
|
-
from openstef.settings import Settings
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class ConfidenceIntervalApplicator:
|
|
19
|
-
def __init__(self, model: RegressorMixin, forecast_input_data: pd.DataFrame):
|
|
20
|
-
self.model = model
|
|
21
|
-
self.forecast_input_data = forecast_input_data
|
|
22
|
-
structlog.configure(
|
|
23
|
-
wrapper_class=structlog.make_filtering_bound_logger(
|
|
24
|
-
logging.getLevelName(Settings.log_level)
|
|
25
|
-
)
|
|
26
|
-
)
|
|
27
|
-
self.logger = structlog.get_logger(self.__class__.__name__)
|
|
28
|
-
|
|
29
|
-
def add_confidence_interval(
|
|
30
|
-
self,
|
|
31
|
-
forecast: pd.DataFrame,
|
|
32
|
-
pj: PredictionJobDataClass,
|
|
33
|
-
) -> pd.DataFrame:
|
|
34
|
-
"""Add a confidence interval to a forecast.
|
|
35
|
-
|
|
36
|
-
Adds a confidence interval to a forecast in two ways:
|
|
37
|
-
1. "stdev" column, this is a column with a standard deviation that is
|
|
38
|
-
determined during training (ConfidenceGenerator)
|
|
39
|
-
2. Quantile columns, these columns give a more precise defenition of the
|
|
40
|
-
confidence interval. Quantile columns are determined with one of two
|
|
41
|
-
methods, depending on the model type group:
|
|
42
|
-
|
|
43
|
-
a. Default, using the "stdev" column and the assumption the error is
|
|
44
|
-
normally distributed.
|
|
45
|
-
b. Quantile regression, this method is only available for quantile
|
|
46
|
-
models and uses specifically trained models to estimate the
|
|
47
|
-
quantiles of the confidence interval.
|
|
48
|
-
|
|
49
|
-
Depending on the model type (quantile or non quantile),
|
|
50
|
-
a confidence interval is added to the forecast based on quantile
|
|
51
|
-
regression or the default method.
|
|
52
|
-
|
|
53
|
-
Args:
|
|
54
|
-
forecast: Forecast DataFrame with columns: "forecast"
|
|
55
|
-
pj: Prediction job
|
|
56
|
-
|
|
57
|
-
Returns:
|
|
58
|
-
Forecast DataFrame with columns; "forecast", "stdev" and quantile columns.
|
|
59
|
-
|
|
60
|
-
"""
|
|
61
|
-
temp_forecast = self._add_standard_deviation_to_forecast(forecast)
|
|
62
|
-
|
|
63
|
-
if self.model.can_predict_quantiles:
|
|
64
|
-
# Try to generate the quantiles that were requested
|
|
65
|
-
try:
|
|
66
|
-
result = self._add_quantiles_to_forecast_quantile_regression(
|
|
67
|
-
temp_forecast, pj["quantiles"]
|
|
68
|
-
)
|
|
69
|
-
return result
|
|
70
|
-
except Exception:
|
|
71
|
-
# Fallback on quantiles of the model if the requested quantiles cant be generated by the model.
|
|
72
|
-
# Can happen when the model was trained on different quantiles than are requested
|
|
73
|
-
result = self._add_quantiles_to_forecast_quantile_regression(
|
|
74
|
-
temp_forecast, self.model.quantiles
|
|
75
|
-
)
|
|
76
|
-
self.logger.warning(
|
|
77
|
-
"Quantiles are requested the model was not trained on. Using the quantiles the model was trained on",
|
|
78
|
-
requested_quantiles=pj["quantiles"],
|
|
79
|
-
trained_quantiles=self.model.quantiles,
|
|
80
|
-
)
|
|
81
|
-
return result
|
|
82
|
-
|
|
83
|
-
return self._add_quantiles_to_forecast_default(temp_forecast, pj["quantiles"])
|
|
84
|
-
|
|
85
|
-
def _add_standard_deviation_to_forecast(
|
|
86
|
-
self, forecast: pd.DataFrame
|
|
87
|
-
) -> pd.DataFrame:
|
|
88
|
-
"""Add a standard deviation to a live forecast.
|
|
89
|
-
|
|
90
|
-
The stdev for intermediate forecast horizons is interpolated.
|
|
91
|
-
|
|
92
|
-
Args:
|
|
93
|
-
forecast: Forecast DataFrame with columns: "forecast"
|
|
94
|
-
|
|
95
|
-
Returns:
|
|
96
|
-
Forecast with added standard deviation. DataFrame with columns:
|
|
97
|
-
"forecast", "stdev"
|
|
98
|
-
|
|
99
|
-
Raises:
|
|
100
|
-
ModelWithoutStDev: If the model does not have a valid standard deviation.
|
|
101
|
-
|
|
102
|
-
"""
|
|
103
|
-
minimal_resolution: int = 15 # Minimal time resolution in minutes
|
|
104
|
-
standard_deviation = self.model.standard_deviation
|
|
105
|
-
|
|
106
|
-
# raise an exception if no valid standard deviation is available
|
|
107
|
-
if standard_deviation is None:
|
|
108
|
-
raise ModelWithoutStDev("No stdev available")
|
|
109
|
-
|
|
110
|
-
if standard_deviation.empty: # make separate statement to avoid None.empty
|
|
111
|
-
raise ModelWithoutStDev("No stdev available")
|
|
112
|
-
|
|
113
|
-
if standard_deviation.stdev.isnull().values.all():
|
|
114
|
-
raise ModelWithoutStDev("All stdev values are NA")
|
|
115
|
-
|
|
116
|
-
# Fill stdev nans with the mean of all stdev values
|
|
117
|
-
if standard_deviation.stdev.isnull().values.any():
|
|
118
|
-
self.logger.warning(
|
|
119
|
-
"Stdev for some hours is not known, filling in with mean."
|
|
120
|
-
)
|
|
121
|
-
standard_deviation["stdev"] = standard_deviation.stdev.fillna(
|
|
122
|
-
standard_deviation.stdev.mean()
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
# pivot to have a dataframe with columns [stdev, hour, horizon] for a
|
|
126
|
-
# 'near' and a 'far' horizon
|
|
127
|
-
stdev = standard_deviation.pivot_table(columns=["horizon"], index="hour")[
|
|
128
|
-
"stdev"
|
|
129
|
-
]
|
|
130
|
-
# Prepare input dataframes for near and far horizon
|
|
131
|
-
near = stdev.columns.min()
|
|
132
|
-
far = stdev.columns.max()
|
|
133
|
-
|
|
134
|
-
forecast_copy = forecast.copy()
|
|
135
|
-
# add time ahead column if not already present
|
|
136
|
-
if "tAhead" not in forecast_copy.columns:
|
|
137
|
-
# Determine now, rounded on 15 minutes,
|
|
138
|
-
# Rounding helps to prevent fractional t_aheads
|
|
139
|
-
now = (
|
|
140
|
-
pd.Series(datetime.utcnow().replace(tzinfo=forecast_copy.index.tzinfo))
|
|
141
|
-
.min()
|
|
142
|
-
.round(f"{minimal_resolution}T")
|
|
143
|
-
.to_pydatetime()
|
|
144
|
-
)
|
|
145
|
-
# Determine t_aheads by subtracting with now
|
|
146
|
-
forecast_copy["tAhead"] = (
|
|
147
|
-
forecast_copy.index - now
|
|
148
|
-
).total_seconds() / 3600.0
|
|
149
|
-
|
|
150
|
-
# add helper column hour
|
|
151
|
-
forecast_copy["hour"] = forecast_copy.index.hour
|
|
152
|
-
|
|
153
|
-
# Define functions which can be used to approximate the error for in-between
|
|
154
|
-
# time horizons
|
|
155
|
-
# Let's fit and exponential decay of accuracy
|
|
156
|
-
def calc_exp_dec(t, stdev_row, near, far):
|
|
157
|
-
# We use the formula sigma(t) = (1 - A * exp(-t/tau)) + b
|
|
158
|
-
# Strictly speaking, tau is specific for each time series.
|
|
159
|
-
# However, for simplicity, we use tau = Far/4.
|
|
160
|
-
# This represents a situation where the stdev at 25% of the Far horizon,
|
|
161
|
-
# has increased by two.
|
|
162
|
-
tau = far / 4.0
|
|
163
|
-
# Filling in the known sigma(Near) and sigma(Far) gives:
|
|
164
|
-
sf, sn = stdev_row[far], stdev_row[near]
|
|
165
|
-
A = (sf - sn) / ((1 - np.exp(-far / tau)) - (1 - np.exp(-near / tau)))
|
|
166
|
-
b = sn - A * (1 - np.exp(-near / tau))
|
|
167
|
-
value = A * (1 - np.exp(-t / tau)) + b
|
|
168
|
-
# cap the value to keep it between near and far
|
|
169
|
-
if value < sn:
|
|
170
|
-
return sn
|
|
171
|
-
return sf if value > sf else value
|
|
172
|
-
|
|
173
|
-
# If only one horizon is available use that one
|
|
174
|
-
if len(stdev.columns) == 1:
|
|
175
|
-
forecast_copy["stdev"] = forecast_copy.apply(
|
|
176
|
-
lambda x: stdev.loc[x.hour], axis=1
|
|
177
|
-
)
|
|
178
|
-
# If more are available do something fancy with interpolation
|
|
179
|
-
else:
|
|
180
|
-
# Add stdev to forecast_copy dataframe
|
|
181
|
-
forecast_copy["stdev"] = forecast_copy.apply(
|
|
182
|
-
lambda x: calc_exp_dec(x.tAhead, stdev.loc[x.hour], near, far), axis=1
|
|
183
|
-
)
|
|
184
|
-
return forecast_copy.drop(columns=["hour"])
|
|
185
|
-
|
|
186
|
-
@staticmethod
|
|
187
|
-
def _add_quantiles_to_forecast_default(
|
|
188
|
-
forecast: pd.DataFrame, quantiles: list[float]
|
|
189
|
-
) -> pd.DataFrame:
|
|
190
|
-
"""Add quantiles to forecast.
|
|
191
|
-
|
|
192
|
-
Use the standard deviation to calculate the quantiles.
|
|
193
|
-
|
|
194
|
-
Args:
|
|
195
|
-
forecast: Forecast (should contain a 'forecast' + 'stdev' column)
|
|
196
|
-
quantiles: List with desired quantiles,
|
|
197
|
-
for example: [0.01, 0.1, 0.9, 0.99]
|
|
198
|
-
|
|
199
|
-
Returns:
|
|
200
|
-
Forecast DataFrame with quantile (e.g. 'quantile_PXX')
|
|
201
|
-
columns added.
|
|
202
|
-
|
|
203
|
-
"""
|
|
204
|
-
# Check if stdev and forecast are in the dataframe
|
|
205
|
-
if not all(elem in forecast.columns for elem in ["forecast", "stdev"]):
|
|
206
|
-
raise ValueError("Forecast should contain a 'forecast' and 'stdev' column")
|
|
207
|
-
|
|
208
|
-
for quantile in quantiles:
|
|
209
|
-
quantile_key = f"quantile_P{quantile * 100:02.0f}"
|
|
210
|
-
forecast[quantile_key] = (
|
|
211
|
-
forecast["forecast"] + stats.norm.ppf(quantile) * forecast["stdev"]
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
return forecast
|
|
215
|
-
|
|
216
|
-
def _add_quantiles_to_forecast_quantile_regression(
|
|
217
|
-
self, forecast: pd.DataFrame, quantiles: list[float]
|
|
218
|
-
) -> pd.DataFrame:
|
|
219
|
-
"""Add quantiles to forecast.
|
|
220
|
-
|
|
221
|
-
Use trained quantile regression model to calculate the quantiles.
|
|
222
|
-
|
|
223
|
-
Args:
|
|
224
|
-
forecast: Forecast
|
|
225
|
-
quantiles: List with desired quantiles
|
|
226
|
-
|
|
227
|
-
Returns:
|
|
228
|
-
Forecast DataFrame with quantile (e.g. 'quantile_PXX')
|
|
229
|
-
columns added.
|
|
230
|
-
|
|
231
|
-
"""
|
|
232
|
-
# Only determine quantiles for datetimes in forecast
|
|
233
|
-
quantile_df = pd.DataFrame(index=self.forecast_input_data.index)
|
|
234
|
-
for quantile in quantiles:
|
|
235
|
-
quantile_key = f"quantile_P{quantile * 100:02.0f}"
|
|
236
|
-
quantile_df[quantile_key] = self.model.predict(
|
|
237
|
-
self.forecast_input_data, quantile=quantile
|
|
238
|
-
)
|
|
239
|
-
|
|
240
|
-
return forecast.merge(
|
|
241
|
-
quantile_df, left_index=True, right_index=True, how="left"
|
|
242
|
-
)
|
openstef/model/fallback.py
DELETED
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
|
|
6
|
-
import pandas as pd
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def generate_fallback(
|
|
10
|
-
forecast_input: pd.DataFrame,
|
|
11
|
-
load: pd.DataFrame,
|
|
12
|
-
fallback_strategy: str = "extreme_day",
|
|
13
|
-
) -> pd.DataFrame:
|
|
14
|
-
"""Make a fall back forecast, Set the value of the forecast 'quality' column to 'substituted'.
|
|
15
|
-
|
|
16
|
-
Currently only fallback_strategy=extreme day is implemented which return historic profile of most extreme day.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
forecast_input : dataframe desired for the forecast
|
|
20
|
-
load: index=datetime, columns=['load']
|
|
21
|
-
fallback_strategy: strategy to determine fallback. options:
|
|
22
|
-
- extreme_day: use daily profile of most extreme day
|
|
23
|
-
Returns:
|
|
24
|
-
Fallback forecast DataFrame with columns; 'forecast', 'quality'
|
|
25
|
-
|
|
26
|
-
Raises:
|
|
27
|
-
ValueError if len(load) == 0
|
|
28
|
-
NotImplementedError if fallback_strategy != 'extreme_day'
|
|
29
|
-
|
|
30
|
-
"""
|
|
31
|
-
# Check if load is completely empty
|
|
32
|
-
if len(load.dropna()) == 0:
|
|
33
|
-
raise ValueError("No historic load data available")
|
|
34
|
-
|
|
35
|
-
if fallback_strategy != "extreme_day":
|
|
36
|
-
raise NotImplementedError(
|
|
37
|
-
f'fallback_strategy should be "extreme_day", received:{fallback_strategy}'
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
if fallback_strategy == "extreme_day":
|
|
41
|
-
# Execute this fallback strategy
|
|
42
|
-
# Find most extreme historic day and merge it by time-of-day to the requested moments
|
|
43
|
-
|
|
44
|
-
# Find most extreme historic day (do not count today as it is incomplete)
|
|
45
|
-
day_with_highest_load_date = (
|
|
46
|
-
load[load.index.tz_localize(None).date != datetime.utcnow().date()]
|
|
47
|
-
.idxmax()
|
|
48
|
-
.load.date()
|
|
49
|
-
)
|
|
50
|
-
# generate datetime range of the day with the highest load
|
|
51
|
-
from_datetime = pd.Timestamp(day_with_highest_load_date, tz=load.index.tz)
|
|
52
|
-
till_datetime = from_datetime + pd.Timedelta("1 days")
|
|
53
|
-
|
|
54
|
-
# slice load dataframe, only rows for the day with the highest load
|
|
55
|
-
highest_daily_loadprofile = load.loc[
|
|
56
|
-
(load.index >= from_datetime) & (load.index < till_datetime)
|
|
57
|
-
]
|
|
58
|
-
|
|
59
|
-
# Match moments by time-of-day
|
|
60
|
-
highest_daily_loadprofile.loc[:, "time"] = highest_daily_loadprofile.index.time
|
|
61
|
-
forecast = pd.DataFrame(index=forecast_input.index)
|
|
62
|
-
forecast["time"] = forecast.index.time
|
|
63
|
-
forecast = (
|
|
64
|
-
forecast.reset_index()
|
|
65
|
-
.merge(
|
|
66
|
-
highest_daily_loadprofile, left_on="time", right_on="time", how="outer"
|
|
67
|
-
)
|
|
68
|
-
.set_index("index")
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
# Rename so column is called forecast
|
|
72
|
-
forecast = forecast[["load"]].rename(columns=dict(load="forecast"))
|
|
73
|
-
|
|
74
|
-
# Add a column quality.
|
|
75
|
-
forecast["quality"] = "substituted"
|
|
76
|
-
|
|
77
|
-
return forecast
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
from sklearn.base import BaseEstimator, TransformerMixin
|
|
5
|
-
import pandas as pd
|
|
6
|
-
from typing import List, Dict, Tuple, Optional
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class FeatureClipper(BaseEstimator, TransformerMixin):
|
|
10
|
-
"""
|
|
11
|
-
A transformer that clips the values of specified columns to the minimum and
|
|
12
|
-
maximum values observed during training. This prevents the model from
|
|
13
|
-
extrapolating beyond these values during prediction.
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
def __init__(self, columns: List[str]):
|
|
17
|
-
"""
|
|
18
|
-
Initialize the FeatureClipper.
|
|
19
|
-
|
|
20
|
-
Parameters:
|
|
21
|
-
----------
|
|
22
|
-
columns : List[str]
|
|
23
|
-
List of column names to be clipped.
|
|
24
|
-
"""
|
|
25
|
-
self.columns: List[str] = columns
|
|
26
|
-
self.feature_ranges: Dict[str, Tuple[float, float]] = {}
|
|
27
|
-
|
|
28
|
-
def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> "FeatureClipper":
|
|
29
|
-
"""
|
|
30
|
-
Fits the transformer on the training data by calculating the min and max
|
|
31
|
-
values for the specified columns.
|
|
32
|
-
|
|
33
|
-
Parameters:
|
|
34
|
-
----------
|
|
35
|
-
X : pd.DataFrame
|
|
36
|
-
The input DataFrame containing training data.
|
|
37
|
-
|
|
38
|
-
y : Optional[pd.Series]
|
|
39
|
-
Ignored. This parameter exists for compatibility with scikit-learn's pipeline.
|
|
40
|
-
|
|
41
|
-
Returns:
|
|
42
|
-
-------
|
|
43
|
-
self : FeatureClipper
|
|
44
|
-
Fitted transformer.
|
|
45
|
-
|
|
46
|
-
Raises:
|
|
47
|
-
------
|
|
48
|
-
ValueError:
|
|
49
|
-
If the input is not a pandas DataFrame.
|
|
50
|
-
"""
|
|
51
|
-
if not isinstance(X, pd.DataFrame):
|
|
52
|
-
raise ValueError("Input must be a pandas DataFrame")
|
|
53
|
-
|
|
54
|
-
for col in self.columns:
|
|
55
|
-
if col in X.columns:
|
|
56
|
-
self.feature_ranges[col] = (X[col].min(), X[col].max())
|
|
57
|
-
|
|
58
|
-
return self
|
|
59
|
-
|
|
60
|
-
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
|
|
61
|
-
"""
|
|
62
|
-
Transforms new data by clipping the specified columns' values to be within
|
|
63
|
-
the min and max range observed during fitting.
|
|
64
|
-
|
|
65
|
-
Parameters:
|
|
66
|
-
----------
|
|
67
|
-
X : pd.DataFrame
|
|
68
|
-
The input DataFrame containing new data to be transformed.
|
|
69
|
-
|
|
70
|
-
Returns:
|
|
71
|
-
-------
|
|
72
|
-
X_ : pd.DataFrame
|
|
73
|
-
A copy of the input DataFrame with clipped values in the specified columns.
|
|
74
|
-
|
|
75
|
-
Raises:
|
|
76
|
-
------
|
|
77
|
-
ValueError:
|
|
78
|
-
If the input is not a pandas DataFrame.
|
|
79
|
-
"""
|
|
80
|
-
if not isinstance(X, pd.DataFrame):
|
|
81
|
-
raise ValueError("Input must be a pandas DataFrame")
|
|
82
|
-
|
|
83
|
-
X_copy = X.copy()
|
|
84
|
-
|
|
85
|
-
for col in self.columns:
|
|
86
|
-
if col in X_copy.columns and col in self.feature_ranges:
|
|
87
|
-
min_val, max_val = self.feature_ranges[col]
|
|
88
|
-
X_copy[col] = X_copy[col].clip(lower=min_val, upper=max_val)
|
|
89
|
-
|
|
90
|
-
return X_copy
|