openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef-4.0.0a3.dist-info/METADATA +177 -0
- openstef-4.0.0a3.dist-info/RECORD +4 -0
- {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
- openstef/__init__.py +0 -14
- openstef/__main__.py +0 -3
- openstef/app_settings.py +0 -19
- openstef/data/NL_terrestrial_radiation.csv +0 -25585
- openstef/data/NL_terrestrial_radiation.csv.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
- openstef/data/dutch_holidays.csv +0 -1759
- openstef/data/dutch_holidays.csv.license +0 -3
- openstef/data/pv_single_coefs.csv +0 -601
- openstef/data/pv_single_coefs.csv.license +0 -3
- openstef/data_classes/__init__.py +0 -3
- openstef/data_classes/data_prep.py +0 -99
- openstef/data_classes/model_specifications.py +0 -30
- openstef/data_classes/prediction_job.py +0 -135
- openstef/data_classes/split_function.py +0 -97
- openstef/enums.py +0 -140
- openstef/exceptions.py +0 -74
- openstef/feature_engineering/__init__.py +0 -3
- openstef/feature_engineering/apply_features.py +0 -138
- openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
- openstef/feature_engineering/cyclic_features.py +0 -161
- openstef/feature_engineering/data_preparation.py +0 -152
- openstef/feature_engineering/feature_adder.py +0 -206
- openstef/feature_engineering/feature_applicator.py +0 -202
- openstef/feature_engineering/general.py +0 -141
- openstef/feature_engineering/holiday_features.py +0 -231
- openstef/feature_engineering/lag_features.py +0 -165
- openstef/feature_engineering/missing_values_transformer.py +0 -141
- openstef/feature_engineering/rolling_features.py +0 -58
- openstef/feature_engineering/weather_features.py +0 -492
- openstef/metrics/__init__.py +0 -3
- openstef/metrics/figure.py +0 -303
- openstef/metrics/metrics.py +0 -486
- openstef/metrics/reporter.py +0 -222
- openstef/model/__init__.py +0 -3
- openstef/model/basecase.py +0 -82
- openstef/model/confidence_interval_applicator.py +0 -242
- openstef/model/fallback.py +0 -77
- openstef/model/metamodels/__init__.py +0 -3
- openstef/model/metamodels/feature_clipper.py +0 -90
- openstef/model/metamodels/grouped_regressor.py +0 -222
- openstef/model/metamodels/missing_values_handler.py +0 -138
- openstef/model/model_creator.py +0 -214
- openstef/model/objective.py +0 -426
- openstef/model/objective_creator.py +0 -65
- openstef/model/regressors/__init__.py +0 -3
- openstef/model/regressors/arima.py +0 -197
- openstef/model/regressors/custom_regressor.py +0 -64
- openstef/model/regressors/dazls.py +0 -116
- openstef/model/regressors/flatliner.py +0 -95
- openstef/model/regressors/gblinear_quantile.py +0 -334
- openstef/model/regressors/lgbm.py +0 -29
- openstef/model/regressors/linear.py +0 -90
- openstef/model/regressors/linear_quantile.py +0 -305
- openstef/model/regressors/regressor.py +0 -114
- openstef/model/regressors/xgb.py +0 -52
- openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
- openstef/model/regressors/xgb_quantile.py +0 -228
- openstef/model/serializer.py +0 -431
- openstef/model/standard_deviation_generator.py +0 -81
- openstef/model_selection/__init__.py +0 -3
- openstef/model_selection/model_selection.py +0 -311
- openstef/monitoring/__init__.py +0 -3
- openstef/monitoring/performance_meter.py +0 -92
- openstef/monitoring/teams.py +0 -203
- openstef/pipeline/__init__.py +0 -3
- openstef/pipeline/create_basecase_forecast.py +0 -133
- openstef/pipeline/create_component_forecast.py +0 -168
- openstef/pipeline/create_forecast.py +0 -171
- openstef/pipeline/optimize_hyperparameters.py +0 -317
- openstef/pipeline/train_create_forecast_backtest.py +0 -163
- openstef/pipeline/train_model.py +0 -561
- openstef/pipeline/utils.py +0 -52
- openstef/postprocessing/__init__.py +0 -3
- openstef/postprocessing/postprocessing.py +0 -275
- openstef/preprocessing/__init__.py +0 -3
- openstef/preprocessing/preprocessing.py +0 -42
- openstef/settings.py +0 -15
- openstef/tasks/__init__.py +0 -3
- openstef/tasks/calculate_kpi.py +0 -324
- openstef/tasks/create_basecase_forecast.py +0 -118
- openstef/tasks/create_components_forecast.py +0 -162
- openstef/tasks/create_forecast.py +0 -145
- openstef/tasks/create_solar_forecast.py +0 -420
- openstef/tasks/create_wind_forecast.py +0 -80
- openstef/tasks/optimize_hyperparameters.py +0 -135
- openstef/tasks/split_forecast.py +0 -273
- openstef/tasks/train_model.py +0 -224
- openstef/tasks/utils/__init__.py +0 -3
- openstef/tasks/utils/dependencies.py +0 -107
- openstef/tasks/utils/predictionjobloop.py +0 -243
- openstef/tasks/utils/taskcontext.py +0 -160
- openstef/validation/__init__.py +0 -3
- openstef/validation/validation.py +0 -322
- openstef-3.4.56.dist-info/METADATA +0 -154
- openstef-3.4.56.dist-info/RECORD +0 -102
- openstef-3.4.56.dist-info/top_level.txt +0 -1
- /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
import logging
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
import pandas as pd
|
|
8
|
-
import structlog
|
|
9
|
-
|
|
10
|
-
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
|
11
|
-
from openstef.exceptions import InputDataOngoingZeroFlatlinerError, NoRealisedLoadError
|
|
12
|
-
from openstef.feature_engineering.feature_applicator import (
|
|
13
|
-
OperationalPredictFeatureApplicator,
|
|
14
|
-
)
|
|
15
|
-
from openstef.model.basecase import BaseCaseModel
|
|
16
|
-
from openstef.model.confidence_interval_applicator import ConfidenceIntervalApplicator
|
|
17
|
-
from openstef.pipeline.utils import generate_forecast_datetime_range
|
|
18
|
-
from openstef.postprocessing.postprocessing import (
|
|
19
|
-
add_components_base_case_forecast,
|
|
20
|
-
add_prediction_job_properties_to_forecast,
|
|
21
|
-
)
|
|
22
|
-
from openstef.settings import Settings
|
|
23
|
-
from openstef.validation import validation
|
|
24
|
-
|
|
25
|
-
MODEL_LOCATION = Path(".")
|
|
26
|
-
BASECASE_HORIZON_MINUTES = 60 * 24 * 14 # 14 days ahead
|
|
27
|
-
BASECASE_RESOLUTION_MINUTES = 15
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def create_basecase_forecast_pipeline(
|
|
31
|
-
pj: PredictionJobDataClass,
|
|
32
|
-
input_data: pd.DataFrame,
|
|
33
|
-
) -> pd.DataFrame:
|
|
34
|
-
"""Compute the base case forecast and confidence intervals for a given prediction job and input data.
|
|
35
|
-
|
|
36
|
-
Args:
|
|
37
|
-
pj: Prediction job
|
|
38
|
-
input_data: data frame containing the input data necessary for the prediction.
|
|
39
|
-
|
|
40
|
-
Returns:
|
|
41
|
-
Base case forecast
|
|
42
|
-
|
|
43
|
-
Raises:
|
|
44
|
-
NoRealisedLoadError: When no realised load for given datetime range.
|
|
45
|
-
|
|
46
|
-
"""
|
|
47
|
-
structlog.configure(
|
|
48
|
-
wrapper_class=structlog.make_filtering_bound_logger(
|
|
49
|
-
logging.getLevelName(Settings.log_level)
|
|
50
|
-
)
|
|
51
|
-
)
|
|
52
|
-
logger = structlog.get_logger(__name__)
|
|
53
|
-
|
|
54
|
-
logger.info("Preprocessing data for basecase forecast")
|
|
55
|
-
|
|
56
|
-
forecast_start, forecast_end = generate_forecast_datetime_range(input_data)
|
|
57
|
-
|
|
58
|
-
if not isinstance(input_data.index, pd.DatetimeIndex):
|
|
59
|
-
raise ValueError("Input dataframe does not have a datetime index.")
|
|
60
|
-
|
|
61
|
-
zero_flatliner_ongoing = validation.detect_ongoing_zero_flatliner(
|
|
62
|
-
load=input_data.iloc[:, 0],
|
|
63
|
-
duration_threshold_minutes=pj.flatliner_threshold_minutes,
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
if zero_flatliner_ongoing:
|
|
67
|
-
# Set historic load to zero to force the basecase forecasts to be zero.
|
|
68
|
-
input_data.loc[input_data.index < forecast_start, "load"] = 0
|
|
69
|
-
|
|
70
|
-
# Add features
|
|
71
|
-
data_with_features = OperationalPredictFeatureApplicator(
|
|
72
|
-
horizons=[0.25],
|
|
73
|
-
feature_names=[
|
|
74
|
-
"T-7d",
|
|
75
|
-
"T-14d",
|
|
76
|
-
], # Generate features for load 7 days ago and load 14 days ago these are the same as the basecase forecast.
|
|
77
|
-
).add_features(input_data)
|
|
78
|
-
|
|
79
|
-
forecast_input = data_with_features[forecast_start:forecast_end]
|
|
80
|
-
|
|
81
|
-
# Initialize model
|
|
82
|
-
model = BaseCaseModel()
|
|
83
|
-
logger.info("Making basecase forecast")
|
|
84
|
-
# Make basecase forecast
|
|
85
|
-
basecase_forecast = BaseCaseModel().predict(forecast_input)
|
|
86
|
-
|
|
87
|
-
# Check if input data is available
|
|
88
|
-
if len(basecase_forecast) == 0:
|
|
89
|
-
raise NoRealisedLoadError(pj["id"])
|
|
90
|
-
|
|
91
|
-
# Estimate the stdev by using the stdev of the hour for historic (T-14d) load
|
|
92
|
-
model.standard_deviation = generate_basecase_confidence_interval(forecast_input)
|
|
93
|
-
logger.info("Postprocessing basecase forecast")
|
|
94
|
-
# Apply confidence interval
|
|
95
|
-
basecase_forecast = ConfidenceIntervalApplicator(
|
|
96
|
-
model, forecast_input
|
|
97
|
-
).add_confidence_interval(basecase_forecast, pj)
|
|
98
|
-
|
|
99
|
-
# Add basecase for the component forecasts
|
|
100
|
-
basecase_forecast = add_components_base_case_forecast(basecase_forecast)
|
|
101
|
-
|
|
102
|
-
# Do further postprocessing
|
|
103
|
-
basecase_forecast = add_prediction_job_properties_to_forecast(
|
|
104
|
-
pj=pj,
|
|
105
|
-
forecast=basecase_forecast,
|
|
106
|
-
algorithm_type="basecase_lastweek",
|
|
107
|
-
forecast_quality="not_renewed",
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
return basecase_forecast
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def generate_basecase_confidence_interval(
|
|
114
|
-
data_with_features: pd.DataFrame,
|
|
115
|
-
) -> pd.DataFrame:
|
|
116
|
-
"""Calculate confidence interval for a basecase forecast.
|
|
117
|
-
|
|
118
|
-
Args:
|
|
119
|
-
data_with_features: Input dataframe that is used to make the basecase forecast.
|
|
120
|
-
|
|
121
|
-
Returns:
|
|
122
|
-
Dataframe with the confidence interval.
|
|
123
|
-
|
|
124
|
-
"""
|
|
125
|
-
confidence_interval = (
|
|
126
|
-
data_with_features[["T-14d"]] # Select only the T-14d column as a DataFrame
|
|
127
|
-
.groupby(data_with_features.index.hour) # Get the std for every hour
|
|
128
|
-
.std()
|
|
129
|
-
.rename(columns={"T-14d": "stdev"}) # Rename the column to stdev
|
|
130
|
-
)
|
|
131
|
-
confidence_interval["hour"] = confidence_interval.index
|
|
132
|
-
confidence_interval["horizon"] = 48
|
|
133
|
-
return confidence_interval
|
|
@@ -1,168 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
|
|
5
|
-
import logging
|
|
6
|
-
|
|
7
|
-
import joblib
|
|
8
|
-
import numpy as np
|
|
9
|
-
import pandas as pd
|
|
10
|
-
import structlog
|
|
11
|
-
|
|
12
|
-
import openstef.postprocessing.postprocessing as postprocessing
|
|
13
|
-
from openstef import PROJECT_ROOT
|
|
14
|
-
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
|
15
|
-
from openstef.enums import ForecastType
|
|
16
|
-
from openstef.model.regressors.dazls import Dazls
|
|
17
|
-
from openstef.settings import Settings
|
|
18
|
-
|
|
19
|
-
# Set the path for the Dazls stored model
|
|
20
|
-
DAZLS_STORED = str(
|
|
21
|
-
PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.24" / "dazls_stored_3.4.24_"
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def create_input(
|
|
26
|
-
pj: PredictionJobDataClass, input_data: pd.DataFrame, weather_data: pd.DataFrame
|
|
27
|
-
) -> pd.DataFrame:
|
|
28
|
-
"""This function prepares the input data.
|
|
29
|
-
|
|
30
|
-
This data will be used for the Dazls model prediction, so they will be
|
|
31
|
-
according Dazls model requirements.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
pj: Prediction job
|
|
35
|
-
input_data: Input forecast for the components forecast.
|
|
36
|
-
weather_data: Weather data with 'radiation' and 'windspeed_100m' columns
|
|
37
|
-
|
|
38
|
-
Returns:
|
|
39
|
-
It outputs a dataframe which will be used for the Dazls prediction function.
|
|
40
|
-
|
|
41
|
-
"""
|
|
42
|
-
# Prepare raw input data
|
|
43
|
-
input_df = (
|
|
44
|
-
weather_data[["radiation", "windspeed_100m"]]
|
|
45
|
-
.merge(
|
|
46
|
-
input_data[["forecast"]].rename(columns={"forecast": "total_load"}),
|
|
47
|
-
how="inner",
|
|
48
|
-
right_index=True,
|
|
49
|
-
left_index=True,
|
|
50
|
-
)
|
|
51
|
-
.dropna()
|
|
52
|
-
)
|
|
53
|
-
# Add additional features
|
|
54
|
-
input_df["lat"] = pj["lat"]
|
|
55
|
-
input_df["lon"] = pj["lon"]
|
|
56
|
-
|
|
57
|
-
input_df["solar_on"] = 1
|
|
58
|
-
input_df["wind_on"] = 1
|
|
59
|
-
input_df["hour"] = input_df.index.hour
|
|
60
|
-
input_df["minute"] = input_df.index.minute
|
|
61
|
-
|
|
62
|
-
input_df["var0"] = input_df["total_load"].var()
|
|
63
|
-
input_df["var1"] = input_df["radiation"].var()
|
|
64
|
-
input_df["var2"] = input_df["windspeed_100m"].var()
|
|
65
|
-
|
|
66
|
-
input_df["sem0"] = input_df["total_load"].sem()
|
|
67
|
-
input_df["sem1"] = input_df["radiation"].sem()
|
|
68
|
-
input_df["sem2"] = input_df["windspeed_100m"].sem()
|
|
69
|
-
|
|
70
|
-
# Features for the new model
|
|
71
|
-
# Periodic Month feature
|
|
72
|
-
c = (1 / 11) * np.pi - (1 / 365)
|
|
73
|
-
n = np.array(input_df.index.month, dtype=float)
|
|
74
|
-
input_df["month_ff"] = np.sin(c * (n - 1))
|
|
75
|
-
|
|
76
|
-
return input_df
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def create_components_forecast_pipeline(
|
|
80
|
-
pj: PredictionJobDataClass, input_data: pd.DataFrame, weather_data: pd.DataFrame
|
|
81
|
-
) -> pd.DataFrame:
|
|
82
|
-
"""Pipeline for creating a component forecast using Dazls prediction model.
|
|
83
|
-
|
|
84
|
-
Args:
|
|
85
|
-
pj: Prediction job
|
|
86
|
-
input_data: Input forecast for the components forecast.
|
|
87
|
-
weather_data: Weather data with 'radiation' and 'windspeed_100m' columns
|
|
88
|
-
|
|
89
|
-
Returns:
|
|
90
|
-
DataFrame with component forecasts. The dataframe contains these columns;
|
|
91
|
-
"forecast_wind_on_shore",
|
|
92
|
-
"forecast_solar",
|
|
93
|
-
"forecast_other",
|
|
94
|
-
"pid",
|
|
95
|
-
"customer",
|
|
96
|
-
"description",
|
|
97
|
-
"type",
|
|
98
|
-
"algtype"
|
|
99
|
-
|
|
100
|
-
"""
|
|
101
|
-
structlog.configure(
|
|
102
|
-
wrapper_class=structlog.make_filtering_bound_logger(
|
|
103
|
-
logging.getLevelName(Settings.log_level)
|
|
104
|
-
)
|
|
105
|
-
)
|
|
106
|
-
logger = structlog.get_logger(__name__)
|
|
107
|
-
logger.info("Make components prediction", pid=pj["id"])
|
|
108
|
-
|
|
109
|
-
# Make component forecasts
|
|
110
|
-
try:
|
|
111
|
-
dazls_input_data = create_input(pj, input_data, weather_data)
|
|
112
|
-
|
|
113
|
-
# Save and load the model as .sav file (or as .z file)
|
|
114
|
-
# For the code contact: korte.termijn.prognoses@alliander.com
|
|
115
|
-
dazls_model = Dazls()
|
|
116
|
-
dazls_model.model_ = joblib.load(DAZLS_STORED + "baseline_model.z")
|
|
117
|
-
|
|
118
|
-
logger.info("DAZLS model loaded", dazls_model=str(dazls_model))
|
|
119
|
-
|
|
120
|
-
# Use the predict function of Dazls model
|
|
121
|
-
# As input data we use the input_data function which takes into consideration what we want as an input for the forecast and what Dazls can accept as an input
|
|
122
|
-
forecasts = dazls_model.predict(x=dazls_input_data)
|
|
123
|
-
|
|
124
|
-
# Set the columns for the output forecast dataframe
|
|
125
|
-
forecasts = pd.DataFrame(
|
|
126
|
-
forecasts,
|
|
127
|
-
columns=["forecast_wind_on_shore", "forecast_solar"],
|
|
128
|
-
index=dazls_input_data.index,
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
# Make post-processed forecasts for solar and wind power
|
|
132
|
-
# These forecasts are respectively for the components: "forecast_solar" and "forecast_wind_on_shore"
|
|
133
|
-
# The outcome forecasts are added in the "forecasts" DataFrame we created above
|
|
134
|
-
forecasts["forecast_solar"] = postprocessing.post_process_wind_solar(
|
|
135
|
-
forecasts["forecast_solar"], forecast_type=ForecastType.SOLAR
|
|
136
|
-
)
|
|
137
|
-
forecasts["forecast_wind_on_shore"] = postprocessing.post_process_wind_solar(
|
|
138
|
-
forecasts["forecast_wind_on_shore"], forecast_type=ForecastType.WIND
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
# Make forecast for the component: "forecast_other"
|
|
142
|
-
forecasts["forecast_other"] = (
|
|
143
|
-
dazls_input_data["total_load"]
|
|
144
|
-
- forecasts["forecast_solar"]
|
|
145
|
-
- forecasts["forecast_wind_on_shore"]
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
# Make sure the forecasts have the same form as the input data. Pad with 0 if necessary
|
|
149
|
-
forecasts = forecasts.reindex(index=input_data.index, fill_value=0)
|
|
150
|
-
except Exception as e:
|
|
151
|
-
# In case something goes wrong we fall back on an a zero-filled dataframe
|
|
152
|
-
logger.warning(
|
|
153
|
-
f"Could not make component forecasts: {e}, falling back on series of"
|
|
154
|
-
" zeros!",
|
|
155
|
-
exc_info=e,
|
|
156
|
-
)
|
|
157
|
-
forecasts = pd.DataFrame(
|
|
158
|
-
data=0,
|
|
159
|
-
index=input_data.index,
|
|
160
|
-
columns=["forecast_wind_on_shore", "forecast_solar", "forecast_other"],
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
# Prepare for output
|
|
164
|
-
# Add more prediction properties to the forecast ("pid","customer","description","type","algtype)
|
|
165
|
-
forecasts = postprocessing.add_prediction_job_properties_to_forecast(
|
|
166
|
-
pj, forecasts, algorithm_type="component"
|
|
167
|
-
)
|
|
168
|
-
return forecasts
|
|
@@ -1,171 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
import logging
|
|
5
|
-
|
|
6
|
-
import pandas as pd
|
|
7
|
-
import structlog
|
|
8
|
-
|
|
9
|
-
from openstef.data_classes.model_specifications import ModelSpecificationDataClass
|
|
10
|
-
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
|
11
|
-
from openstef.feature_engineering.feature_applicator import (
|
|
12
|
-
OperationalPredictFeatureApplicator,
|
|
13
|
-
)
|
|
14
|
-
from openstef.model.confidence_interval_applicator import ConfidenceIntervalApplicator
|
|
15
|
-
from openstef.model.fallback import generate_fallback
|
|
16
|
-
from openstef.model.regressors.regressor import OpenstfRegressor
|
|
17
|
-
from openstef.model.serializer import MLflowSerializer
|
|
18
|
-
from openstef.pipeline.utils import generate_forecast_datetime_range
|
|
19
|
-
from openstef.postprocessing.postprocessing import (
|
|
20
|
-
add_prediction_job_properties_to_forecast,
|
|
21
|
-
sort_quantiles,
|
|
22
|
-
)
|
|
23
|
-
from openstef.settings import Settings
|
|
24
|
-
from openstef.validation import validation
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def create_forecast_pipeline(
|
|
28
|
-
pj: PredictionJobDataClass,
|
|
29
|
-
input_data: pd.DataFrame,
|
|
30
|
-
mlflow_tracking_uri: str,
|
|
31
|
-
) -> pd.DataFrame:
|
|
32
|
-
"""Create forecast pipeline.
|
|
33
|
-
|
|
34
|
-
This is the top-level pipeline which included loading the most recent model for
|
|
35
|
-
the given prediction job.
|
|
36
|
-
|
|
37
|
-
Expected prediction job keys: "id",
|
|
38
|
-
|
|
39
|
-
Args:
|
|
40
|
-
pj: Prediction job
|
|
41
|
-
input_data: Training input data (without features)
|
|
42
|
-
mlflow_tracking_uri: MlFlow tracking URI
|
|
43
|
-
|
|
44
|
-
Returns:
|
|
45
|
-
DataFrame with the forecast
|
|
46
|
-
|
|
47
|
-
Raises:
|
|
48
|
-
InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
|
|
49
|
-
LookupError: When no model is found for the given prediction job in MLflow.
|
|
50
|
-
|
|
51
|
-
"""
|
|
52
|
-
prediction_model_pid = pj["id"]
|
|
53
|
-
# Use the alternative forecast model if it's specify in the pj
|
|
54
|
-
if pj.alternative_forecast_model_pid:
|
|
55
|
-
prediction_model_pid = pj.alternative_forecast_model_pid
|
|
56
|
-
|
|
57
|
-
# Load most recent model for the given pid
|
|
58
|
-
model, model_specs = MLflowSerializer(
|
|
59
|
-
mlflow_tracking_uri=mlflow_tracking_uri
|
|
60
|
-
).load_model(experiment_name=str(prediction_model_pid))
|
|
61
|
-
return create_forecast_pipeline_core(pj, input_data, model, model_specs)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def create_forecast_pipeline_core(
|
|
65
|
-
pj: PredictionJobDataClass,
|
|
66
|
-
input_data: pd.DataFrame,
|
|
67
|
-
model: OpenstfRegressor,
|
|
68
|
-
model_specs: ModelSpecificationDataClass,
|
|
69
|
-
) -> pd.DataFrame:
|
|
70
|
-
"""Create forecast pipeline (core).
|
|
71
|
-
|
|
72
|
-
Computes the forecasts and confidence intervals given a prediction job and input data.
|
|
73
|
-
This pipeline has no database or persisitent storage dependencies.
|
|
74
|
-
|
|
75
|
-
Expected prediction job keys: "resolution_minutes", "id", "type",
|
|
76
|
-
"name", "quantiles"
|
|
77
|
-
|
|
78
|
-
Args:
|
|
79
|
-
pj: Prediction job.
|
|
80
|
-
input_data: Input data for the prediction.
|
|
81
|
-
model: Model to use for this prediction.
|
|
82
|
-
model_specs: Model specifications.
|
|
83
|
-
|
|
84
|
-
Returns:
|
|
85
|
-
Forecast
|
|
86
|
-
|
|
87
|
-
Raises:
|
|
88
|
-
InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
|
|
89
|
-
|
|
90
|
-
"""
|
|
91
|
-
structlog.configure(
|
|
92
|
-
wrapper_class=structlog.make_filtering_bound_logger(
|
|
93
|
-
logging.getLevelName(Settings.log_level)
|
|
94
|
-
)
|
|
95
|
-
)
|
|
96
|
-
logger = structlog.get_logger(__name__)
|
|
97
|
-
|
|
98
|
-
fallback_strategy = "extreme_day" # this can later be expanded
|
|
99
|
-
|
|
100
|
-
# Validate and clean data
|
|
101
|
-
validated_data = validation.validate(
|
|
102
|
-
pj["id"],
|
|
103
|
-
input_data,
|
|
104
|
-
pj["flatliner_threshold_minutes"],
|
|
105
|
-
pj["resolution_minutes"],
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
# Custom data prep or legacy behavior
|
|
109
|
-
if pj.data_prep_class:
|
|
110
|
-
data_prep_class, data_prep_args = pj.data_prep_class.load()
|
|
111
|
-
forecast_input_data, data_with_features = data_prep_class(
|
|
112
|
-
pj=pj,
|
|
113
|
-
model_specs=model_specs,
|
|
114
|
-
model=model,
|
|
115
|
-
**data_prep_args,
|
|
116
|
-
).prepare_forecast_data(validated_data)
|
|
117
|
-
else:
|
|
118
|
-
# Add features
|
|
119
|
-
data_with_features = OperationalPredictFeatureApplicator(
|
|
120
|
-
horizons=[pj["resolution_minutes"] / 60.0],
|
|
121
|
-
feature_names=model.feature_names,
|
|
122
|
-
feature_modules=model_specs.feature_modules,
|
|
123
|
-
).add_features(validated_data)
|
|
124
|
-
|
|
125
|
-
# Prep forecast input by selecting only the forecast datetime interval (this is much smaller than the input range)
|
|
126
|
-
# Also drop the load column
|
|
127
|
-
forecast_start, forecast_end = generate_forecast_datetime_range(
|
|
128
|
-
data_with_features
|
|
129
|
-
)
|
|
130
|
-
forecast_input_data = data_with_features[forecast_start:forecast_end].drop(
|
|
131
|
-
columns="load"
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
# Check if sufficient data is left after cleaning
|
|
135
|
-
if not validation.is_data_sufficient(
|
|
136
|
-
data_with_features,
|
|
137
|
-
pj["completeness_threshold"],
|
|
138
|
-
pj["minimal_table_length"],
|
|
139
|
-
model,
|
|
140
|
-
):
|
|
141
|
-
logger.warning(
|
|
142
|
-
"Using fallback forecast",
|
|
143
|
-
forecast_type="fallback",
|
|
144
|
-
pid=pj["id"],
|
|
145
|
-
fallback_strategy=fallback_strategy,
|
|
146
|
-
)
|
|
147
|
-
forecast = generate_fallback(data_with_features, input_data[["load"]])
|
|
148
|
-
|
|
149
|
-
else:
|
|
150
|
-
# Predict
|
|
151
|
-
model_forecast = model.predict(forecast_input_data)
|
|
152
|
-
forecast = pd.DataFrame(
|
|
153
|
-
index=forecast_input_data.index, data={"forecast": model_forecast}
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
# Add confidence
|
|
157
|
-
forecast = ConfidenceIntervalApplicator(
|
|
158
|
-
model, forecast_input_data
|
|
159
|
-
).add_confidence_interval(forecast, pj)
|
|
160
|
-
|
|
161
|
-
# Sort quantiles - prevents crossing and is statistically sound
|
|
162
|
-
forecast = sort_quantiles(forecast)
|
|
163
|
-
|
|
164
|
-
# Prepare for output
|
|
165
|
-
forecast = add_prediction_job_properties_to_forecast(
|
|
166
|
-
pj,
|
|
167
|
-
forecast,
|
|
168
|
-
algorithm_type=str(model.path),
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
return forecast
|