openstef 3.4.10__py3-none-any.whl → 3.4.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef/app_settings.py +19 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +18 -0
- openstef/data/dutch_holidays.csv +1759 -0
- openstef/data_classes/data_prep.py +1 -1
- openstef/data_classes/prediction_job.py +15 -9
- openstef/enums.py +108 -9
- openstef/exceptions.py +1 -1
- openstef/feature_engineering/apply_features.py +25 -6
- openstef/feature_engineering/bidding_zone_to_country_mapping.py +106 -0
- openstef/feature_engineering/cyclic_features.py +102 -0
- openstef/feature_engineering/data_preparation.py +12 -5
- openstef/feature_engineering/feature_applicator.py +1 -5
- openstef/feature_engineering/general.py +14 -0
- openstef/feature_engineering/holiday_features.py +35 -26
- openstef/feature_engineering/missing_values_transformer.py +141 -0
- openstef/feature_engineering/weather_features.py +7 -0
- openstef/metrics/figure.py +3 -0
- openstef/metrics/metrics.py +58 -1
- openstef/metrics/reporter.py +7 -0
- openstef/model/confidence_interval_applicator.py +28 -3
- openstef/model/model_creator.py +54 -41
- openstef/model/objective.py +17 -34
- openstef/model/objective_creator.py +13 -12
- openstef/model/regressors/arima.py +1 -1
- openstef/model/regressors/dazls.py +35 -96
- openstef/model/regressors/flatliner.py +95 -0
- openstef/model/regressors/linear_quantile.py +296 -0
- openstef/model/regressors/xgb.py +23 -0
- openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
- openstef/model/regressors/xgb_quantile.py +3 -0
- openstef/model/serializer.py +10 -0
- openstef/model_selection/model_selection.py +4 -1
- openstef/monitoring/performance_meter.py +1 -2
- openstef/monitoring/teams.py +11 -0
- openstef/pipeline/create_basecase_forecast.py +11 -1
- openstef/pipeline/create_component_forecast.py +24 -28
- openstef/pipeline/create_forecast.py +20 -1
- openstef/pipeline/optimize_hyperparameters.py +18 -16
- openstef/pipeline/train_create_forecast_backtest.py +11 -1
- openstef/pipeline/train_model.py +31 -12
- openstef/pipeline/utils.py +3 -0
- openstef/postprocessing/postprocessing.py +29 -0
- openstef/settings.py +15 -0
- openstef/tasks/calculate_kpi.py +23 -20
- openstef/tasks/create_basecase_forecast.py +15 -7
- openstef/tasks/create_components_forecast.py +24 -8
- openstef/tasks/create_forecast.py +9 -6
- openstef/tasks/create_solar_forecast.py +4 -4
- openstef/tasks/optimize_hyperparameters.py +2 -2
- openstef/tasks/split_forecast.py +9 -2
- openstef/tasks/train_model.py +9 -7
- openstef/tasks/utils/taskcontext.py +7 -0
- openstef/validation/validation.py +28 -3
- {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/METADATA +65 -57
- openstef-3.4.44.dist-info/RECORD +97 -0
- {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/WHEEL +1 -1
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
- openstef/data/dutch_holidays_2020-2022.csv +0 -831
- openstef/data/dutch_holidays_2020-2022.csv.license +0 -3
- openstef/feature_engineering/historic_features.py +0 -40
- openstef/model/regressors/proloaf.py +0 -281
- openstef/tasks/run_tracy.py +0 -145
- openstef-3.4.10.dist-info/RECORD +0 -104
- /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license} +0 -0
- /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license} +0 -0
- /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license → dutch_holidays.csv.license} +0 -0
- {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/LICENSE +0 -0
- {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/top_level.txt +0 -0
@@ -10,13 +10,11 @@ import pandas as pd
|
|
10
10
|
|
11
11
|
from openstef import PROJECT_ROOT
|
12
12
|
|
13
|
-
HOLIDAY_CSV_PATH: str =
|
14
|
-
PROJECT_ROOT / "openstef" / "data" / "dutch_holidays_2020-2022.csv"
|
15
|
-
)
|
13
|
+
HOLIDAY_CSV_PATH: str = PROJECT_ROOT / "openstef" / "data" / "dutch_holidays.csv"
|
16
14
|
|
17
15
|
|
18
16
|
def generate_holiday_feature_functions(
|
19
|
-
|
17
|
+
country_code: str = "NL",
|
20
18
|
years: list = None,
|
21
19
|
path_to_school_holidays_csv: str = HOLIDAY_CSV_PATH,
|
22
20
|
) -> dict:
|
@@ -46,12 +44,14 @@ def generate_holiday_feature_functions(
|
|
46
44
|
- Pinksteren
|
47
45
|
- Kerst
|
48
46
|
|
47
|
+
|
49
48
|
The 'Brugdagen' are updated untill dec 2020. (Generated using agenda)
|
50
49
|
|
51
50
|
Args:
|
52
51
|
country: Country for which to create holiday features.
|
53
52
|
years: years for which to create holiday features.
|
54
53
|
path_to_school_holidays_csv: Filepath to csv with school holidays.
|
54
|
+
NOTE: Dutch holidays csv file is only until January 2026.
|
55
55
|
|
56
56
|
Returns:
|
57
57
|
Dictionary with functions that check if a given date is a holiday, keys
|
@@ -69,7 +69,7 @@ def generate_holiday_feature_functions(
|
|
69
69
|
now.year + 1,
|
70
70
|
]
|
71
71
|
|
72
|
-
country_holidays = holidays.country_holidays(
|
72
|
+
country_holidays = holidays.country_holidays(country_code, years=years)
|
73
73
|
|
74
74
|
# Make holiday function dict
|
75
75
|
holiday_functions = {}
|
@@ -96,7 +96,7 @@ def generate_holiday_feature_functions(
|
|
96
96
|
|
97
97
|
# Check for bridge day
|
98
98
|
holiday_functions, bridge_days = check_for_bridge_day(
|
99
|
-
date, holiday_name,
|
99
|
+
date, holiday_name, country_code, years, holiday_functions, bridge_days
|
100
100
|
)
|
101
101
|
|
102
102
|
# Add feature function that includes all bridgedays
|
@@ -104,33 +104,42 @@ def generate_holiday_feature_functions(
|
|
104
104
|
{"is_bridgeday": lambda x: np.isin(x.index.date, np.array(list(bridge_days)))}
|
105
105
|
)
|
106
106
|
|
107
|
-
#
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
)
|
115
|
-
|
116
|
-
# Loop over list of holidays names
|
117
|
-
for holiday_name in list(set(df_holidays.name)):
|
118
|
-
# Define function explicitely to mitigate 'late binding' problem
|
119
|
-
def make_holiday_func(holidayname=holiday_name):
|
120
|
-
return lambda x: np.isin(
|
121
|
-
x.index.date, df_holidays.datum[df_holidays.name == holidayname].values
|
122
|
-
)
|
107
|
+
# Add school holidays if country is NL
|
108
|
+
if country_code == "NL":
|
109
|
+
# Manully generated csv including all dutch schoolholidays for different regions
|
110
|
+
df_holidays = pd.read_csv(path_to_school_holidays_csv, index_col=None)
|
111
|
+
df_holidays["datum"] = pd.to_datetime(df_holidays.datum).apply(
|
112
|
+
lambda x: x.date()
|
113
|
+
)
|
123
114
|
|
124
|
-
#
|
115
|
+
# Add check function that includes all holidays of the provided csv
|
125
116
|
holiday_functions.update(
|
126
117
|
{
|
127
|
-
"
|
128
|
-
|
129
|
-
holidayname=holiday_name
|
118
|
+
"is_schoolholiday": lambda x: np.isin(
|
119
|
+
x.index.date, df_holidays.datum.values
|
130
120
|
)
|
131
121
|
}
|
132
122
|
)
|
133
123
|
|
124
|
+
# Loop over list of holidays names
|
125
|
+
for holiday_name in list(set(df_holidays.name)):
|
126
|
+
# Define function explicitely to mitigate 'late binding' problem
|
127
|
+
def make_holiday_func(holidayname=holiday_name):
|
128
|
+
return lambda x: np.isin(
|
129
|
+
x.index.date,
|
130
|
+
df_holidays.datum[df_holidays.name == holidayname].values,
|
131
|
+
)
|
132
|
+
|
133
|
+
# Create lag function for each holiday
|
134
|
+
holiday_functions.update(
|
135
|
+
{
|
136
|
+
"is_"
|
137
|
+
+ holiday_name.replace(" ", "_").lower(): make_holiday_func(
|
138
|
+
holidayname=holiday_name
|
139
|
+
)
|
140
|
+
}
|
141
|
+
)
|
142
|
+
|
134
143
|
return holiday_functions
|
135
144
|
|
136
145
|
|
@@ -0,0 +1,141 @@
|
|
1
|
+
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
from typing import Union, List, Optional
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
import pandas as pd
|
8
|
+
from sklearn.impute import SimpleImputer
|
9
|
+
from sklearn.preprocessing import FunctionTransformer
|
10
|
+
from sklearn.utils.validation import check_array, check_is_fitted
|
11
|
+
|
12
|
+
|
13
|
+
class MissingValuesTransformer:
|
14
|
+
"""MissingValuesTransformer handles missing values in data by imputing them with a given strategy.
|
15
|
+
|
16
|
+
It also removes columns that are always null from the data.
|
17
|
+
|
18
|
+
"""
|
19
|
+
|
20
|
+
in_feature_names: Optional[List[str]] = None
|
21
|
+
_n_in_features: Optional[int] = None
|
22
|
+
|
23
|
+
non_null_feature_names: List[str] = None
|
24
|
+
|
25
|
+
def __init__(
|
26
|
+
self,
|
27
|
+
missing_values: Union[int, float, str, None] = np.nan,
|
28
|
+
imputation_strategy: str = None,
|
29
|
+
fill_value: Union[str, int, float] = None,
|
30
|
+
no_fill_future_values_features: List[str] = None,
|
31
|
+
):
|
32
|
+
"""Initialize missing values handler.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
missing_values: The placeholder for the missing values. All occurrences of
|
36
|
+
`missing_values` will be imputed.
|
37
|
+
imputation_strategy: The imputation strategy to use
|
38
|
+
Can be one of "mean", "median", "most_frequent", "constant" or None.
|
39
|
+
fill_value: When strategy == "constant", fill_value is used to replace all
|
40
|
+
occurrences of missing_values.
|
41
|
+
no_fill_future_values_features: The features for which it does not make sense
|
42
|
+
to fill future values. Rows that contain trailing null values for these
|
43
|
+
features will be removed from the data.
|
44
|
+
|
45
|
+
"""
|
46
|
+
self.missing_values = missing_values
|
47
|
+
self.imputation_strategy = imputation_strategy
|
48
|
+
self.fill_value = fill_value
|
49
|
+
self.no_fill_future_values_features = no_fill_future_values_features or []
|
50
|
+
self.is_fitted_ = False
|
51
|
+
|
52
|
+
# Build the proper imputation transformer
|
53
|
+
# - Identity function if strategy is None
|
54
|
+
# - SimpleImputer with the dedicated strategy
|
55
|
+
if self.imputation_strategy is None:
|
56
|
+
self.imputer_ = FunctionTransformer(func=self._identity)
|
57
|
+
else:
|
58
|
+
self.imputer_ = SimpleImputer(
|
59
|
+
missing_values=self.missing_values,
|
60
|
+
strategy=self.imputation_strategy,
|
61
|
+
fill_value=self.fill_value,
|
62
|
+
).set_output(transform="pandas")
|
63
|
+
self.imputer_._validate_params()
|
64
|
+
|
65
|
+
@staticmethod
|
66
|
+
def _determine_trailing_null_rows(x: pd.DataFrame) -> pd.Series:
|
67
|
+
"""Determine rows with trailing null values in a DataFrame."""
|
68
|
+
return ~x.bfill().isnull().any(axis="columns")
|
69
|
+
|
70
|
+
def fit(self, x, y=None):
|
71
|
+
"""Fit the imputer on the input data."""
|
72
|
+
_ = check_array(x, force_all_finite="allow-nan")
|
73
|
+
if not isinstance(x, pd.DataFrame):
|
74
|
+
x = pd.DataFrame(np.asarray(x))
|
75
|
+
|
76
|
+
self.in_feature_names = list(x.columns)
|
77
|
+
self._n_in_features = x.shape[1]
|
78
|
+
|
79
|
+
# Remove always null columns
|
80
|
+
is_column_null = x.isnull().all(axis="index")
|
81
|
+
self.non_null_feature_names = list(x.columns[~is_column_null])
|
82
|
+
x = x[self.non_null_feature_names]
|
83
|
+
|
84
|
+
# Remove trailing null rows for features that should
|
85
|
+
# not be imputed in the future
|
86
|
+
trailing_null_rows = self._determine_trailing_null_rows(
|
87
|
+
x[self.no_fill_future_values_features]
|
88
|
+
)
|
89
|
+
x = x.loc[trailing_null_rows]
|
90
|
+
|
91
|
+
# Imputers do not support labels
|
92
|
+
self.imputer_.fit(X=x, y=None)
|
93
|
+
self.is_fitted_ = True
|
94
|
+
|
95
|
+
def transform(self, x) -> pd.DataFrame:
|
96
|
+
"""Transform the input data by imputing missing values."""
|
97
|
+
check_is_fitted(self)
|
98
|
+
_ = check_array(x, force_all_finite="allow-nan")
|
99
|
+
if not isinstance(x, pd.DataFrame):
|
100
|
+
x = pd.DataFrame(np.asarray(x))
|
101
|
+
|
102
|
+
x = x[self.non_null_feature_names]
|
103
|
+
|
104
|
+
transformed = self.imputer_.transform(x)
|
105
|
+
|
106
|
+
return transformed
|
107
|
+
|
108
|
+
def fit_transform(self, x, y=None) -> tuple[pd.DataFrame, Optional[pd.Series]]:
|
109
|
+
"""Fit the imputer on the input data and transform it.
|
110
|
+
|
111
|
+
Returns:
|
112
|
+
The data with missing values imputed.
|
113
|
+
|
114
|
+
"""
|
115
|
+
self.fit(x, y)
|
116
|
+
|
117
|
+
if not isinstance(x, pd.DataFrame):
|
118
|
+
x = pd.DataFrame(np.asarray(x))
|
119
|
+
|
120
|
+
x = x[self.non_null_feature_names]
|
121
|
+
|
122
|
+
# Remove trailing null rows for features that should
|
123
|
+
# not be imputed in the future
|
124
|
+
non_trailing_null_rows = self._determine_trailing_null_rows(
|
125
|
+
x[self.no_fill_future_values_features]
|
126
|
+
)
|
127
|
+
x = x.loc[non_trailing_null_rows]
|
128
|
+
|
129
|
+
x = self.transform(x)
|
130
|
+
|
131
|
+
if y is not None:
|
132
|
+
y = y.loc[non_trailing_null_rows]
|
133
|
+
|
134
|
+
return x, y
|
135
|
+
|
136
|
+
@classmethod
|
137
|
+
def _identity(cls, x):
|
138
|
+
return x
|
139
|
+
|
140
|
+
def __sklearn_is_fitted__(self) -> bool:
|
141
|
+
return self.in_feature_names is not None
|
@@ -3,6 +3,7 @@
|
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
4
|
|
5
5
|
"""This module contains all wheather related functions used for feature engineering."""
|
6
|
+
import logging
|
6
7
|
from typing import Union
|
7
8
|
|
8
9
|
import numpy as np
|
@@ -12,7 +13,13 @@ import structlog
|
|
12
13
|
from pvlib.location import Location
|
13
14
|
|
14
15
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
16
|
+
from openstef.settings import Settings
|
15
17
|
|
18
|
+
structlog.configure(
|
19
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
20
|
+
logging.getLevelName(Settings.log_level)
|
21
|
+
)
|
22
|
+
)
|
16
23
|
logger = structlog.get_logger(__name__)
|
17
24
|
|
18
25
|
|
openstef/metrics/figure.py
CHANGED
openstef/metrics/metrics.py
CHANGED
@@ -25,6 +25,9 @@ def get_eval_metric_function(metric_name: str) -> Callable:
|
|
25
25
|
Returns:
|
26
26
|
Function to calculate the metric.
|
27
27
|
|
28
|
+
Raises:
|
29
|
+
KeyError: If the metric is not available.
|
30
|
+
|
28
31
|
"""
|
29
32
|
evaluation_function = {
|
30
33
|
"rmse": rmse,
|
@@ -130,6 +133,9 @@ def r_mae_highest(
|
|
130
133
|
|
131
134
|
The range is based on the load range of the previous two weeks.
|
132
135
|
|
136
|
+
Raises:
|
137
|
+
ValueError: If the length of the realised and forecast arrays are not equal.
|
138
|
+
|
133
139
|
"""
|
134
140
|
# Check if length of both arrays is equal
|
135
141
|
if len(np.array(realised)) != len(np.array(forecast)):
|
@@ -395,7 +401,7 @@ def xgb_quantile_obj(
|
|
395
401
|
Args:
|
396
402
|
preds: numpy.ndarray
|
397
403
|
dmatrix: xgboost.DMatrix
|
398
|
-
quantile: float
|
404
|
+
quantile: float between 0 and 1
|
399
405
|
|
400
406
|
Returns:
|
401
407
|
Gradient and Hessian
|
@@ -425,3 +431,54 @@ def xgb_quantile_obj(
|
|
425
431
|
hess = np.ones_like(preds)
|
426
432
|
|
427
433
|
return grad, hess
|
434
|
+
|
435
|
+
|
436
|
+
def arctan_loss(y_true, y_pred, taus, s=0.1):
|
437
|
+
"""Compute the arctan pinball loss.
|
438
|
+
|
439
|
+
Note that XGBoost outputs the predictions in a slightly peculiar manner.
|
440
|
+
Suppose we have 100 data points and we predict 10 quantiles. The predictions
|
441
|
+
will be an array of size (1000 x 1). We first resize this to a (100x10) array
|
442
|
+
where each row corresponds to the 10 predicted quantile for a single data
|
443
|
+
point. We then use a for-loop (over the 10 columns) to calculate the gradients
|
444
|
+
and second derivatives. Legibility was chosen over efficiency. This part
|
445
|
+
can be made more efficient.
|
446
|
+
|
447
|
+
Args:
|
448
|
+
y_true: An array containing the true observations.
|
449
|
+
y_pred: An array containing the predicted quantiles.
|
450
|
+
taus: A list containing the true desired coverage of the quantiles.
|
451
|
+
s: A smoothing parameter.
|
452
|
+
|
453
|
+
Returns:
|
454
|
+
grad: An array containing the (negative) gradients with respect to y_pred.
|
455
|
+
hess: An array containing the second derivative with respect to y_pred.
|
456
|
+
|
457
|
+
"""
|
458
|
+
size = len(y_true)
|
459
|
+
n_dim = len(taus) # The number of columns
|
460
|
+
n_rows = size // n_dim
|
461
|
+
|
462
|
+
# Resize the predictions and targets.
|
463
|
+
# Each column corresponds to a quantile, each row to a data point.
|
464
|
+
y_pred = np.reshape(y_pred, (n_rows, n_dim))
|
465
|
+
y_true = np.reshape(y_true, (n_rows, n_dim))
|
466
|
+
|
467
|
+
# Calculate the differences
|
468
|
+
u = y_true - y_pred
|
469
|
+
|
470
|
+
# Calculate the gradient and second derivatives
|
471
|
+
grad = np.zeros_like(y_pred)
|
472
|
+
hess = np.zeros_like(y_pred)
|
473
|
+
z = u / s
|
474
|
+
for i, tau in enumerate(taus):
|
475
|
+
x = 1 + z[:, i] ** 2
|
476
|
+
grad[:, i] = (
|
477
|
+
tau - 0.5 + 1 / np.pi * np.arctan(z[:, i]) + z[:, i] / (np.pi) * x**-1
|
478
|
+
)
|
479
|
+
hess[:, i] = 2 / (np.pi * s) * x ** (-2)
|
480
|
+
|
481
|
+
# Reshape back to the original shape.
|
482
|
+
grad = grad.reshape(size)
|
483
|
+
hess = hess.reshape(size)
|
484
|
+
return -grad / n_dim, hess / n_dim
|
openstef/metrics/reporter.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
4
|
"""Defines reporter class."""
|
5
|
+
import logging
|
5
6
|
import os
|
6
7
|
import warnings
|
7
8
|
from dataclasses import dataclass
|
@@ -16,6 +17,7 @@ from plotly.graph_objects import Figure
|
|
16
17
|
from openstef.metrics import figure
|
17
18
|
from openstef.metrics.metrics import bias, mae, nsme, r_mae, rmse
|
18
19
|
from openstef.model.regressors.regressor import OpenstfRegressor
|
20
|
+
from openstef.settings import Settings
|
19
21
|
|
20
22
|
|
21
23
|
@dataclass
|
@@ -167,6 +169,11 @@ class Reporter:
|
|
167
169
|
def write_report_to_disk(report: Report, report_folder: str):
|
168
170
|
"""Write report to disk; e.g. for viewing report of latest models using grafana."""
|
169
171
|
# Initialize logger and serializer
|
172
|
+
structlog.configure(
|
173
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
174
|
+
logging.getLevelName(Settings.log_level)
|
175
|
+
)
|
176
|
+
)
|
170
177
|
logger = structlog.get_logger(__name__)
|
171
178
|
if report_folder:
|
172
179
|
# create path if does not exist
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
import logging
|
4
5
|
from datetime import datetime
|
5
6
|
|
6
7
|
import numpy as np
|
@@ -11,12 +12,18 @@ from sklearn.base import RegressorMixin
|
|
11
12
|
|
12
13
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
13
14
|
from openstef.exceptions import ModelWithoutStDev
|
15
|
+
from openstef.settings import Settings
|
14
16
|
|
15
17
|
|
16
18
|
class ConfidenceIntervalApplicator:
|
17
19
|
def __init__(self, model: RegressorMixin, forecast_input_data: pd.DataFrame):
|
18
20
|
self.model = model
|
19
21
|
self.forecast_input_data = forecast_input_data
|
22
|
+
structlog.configure(
|
23
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
24
|
+
logging.getLevelName(Settings.log_level)
|
25
|
+
)
|
26
|
+
)
|
20
27
|
self.logger = structlog.get_logger(self.__class__.__name__)
|
21
28
|
|
22
29
|
def add_confidence_interval(
|
@@ -54,9 +61,24 @@ class ConfidenceIntervalApplicator:
|
|
54
61
|
temp_forecast = self._add_standard_deviation_to_forecast(forecast)
|
55
62
|
|
56
63
|
if self.model.can_predict_quantiles:
|
57
|
-
|
58
|
-
|
59
|
-
|
64
|
+
# Try to generate the quantiles that were requested
|
65
|
+
try:
|
66
|
+
result = self._add_quantiles_to_forecast_quantile_regression(
|
67
|
+
temp_forecast, pj["quantiles"]
|
68
|
+
)
|
69
|
+
return result
|
70
|
+
except Exception:
|
71
|
+
# Fallback on quantiles of the model if the requested quantiles cant be generated by the model.
|
72
|
+
# Can happen when the model was trained on different quantiles than are requested
|
73
|
+
result = self._add_quantiles_to_forecast_quantile_regression(
|
74
|
+
temp_forecast, self.model.quantiles
|
75
|
+
)
|
76
|
+
self.logger.warning(
|
77
|
+
"Quantiles are requested the model was not trained on. Using the quantiles the model was trained on",
|
78
|
+
requested_quantiles=pj["quantiles"],
|
79
|
+
trained_quantiles=self.model.quantiles,
|
80
|
+
)
|
81
|
+
return result
|
60
82
|
|
61
83
|
return self._add_quantiles_to_forecast_default(temp_forecast, pj["quantiles"])
|
62
84
|
|
@@ -74,6 +96,9 @@ class ConfidenceIntervalApplicator:
|
|
74
96
|
Forecast with added standard deviation. DataFrame with columns:
|
75
97
|
"forecast", "stdev"
|
76
98
|
|
99
|
+
Raises:
|
100
|
+
ModelWithoutStDev: If the model does not have a valid standard deviation.
|
101
|
+
|
77
102
|
"""
|
78
103
|
minimal_resolution: int = 15 # Minimal time resolution in minutes
|
79
104
|
standard_deviation = self.model.standard_deviation
|
openstef/model/model_creator.py
CHANGED
@@ -1,28 +1,35 @@
|
|
1
1
|
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
import logging
|
4
5
|
from typing import Union
|
5
6
|
|
6
7
|
import structlog
|
7
8
|
|
8
|
-
from openstef.enums import
|
9
|
+
from openstef.enums import ModelType
|
10
|
+
from openstef.model.regressors.arima import ARIMAOpenstfRegressor
|
9
11
|
from openstef.model.regressors.custom_regressor import is_custom_type, load_custom_model
|
10
12
|
from openstef.model.regressors.lgbm import LGBMOpenstfRegressor
|
11
13
|
from openstef.model.regressors.linear import LinearOpenstfRegressor
|
14
|
+
from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegressor
|
12
15
|
from openstef.model.regressors.regressor import OpenstfRegressor
|
16
|
+
from openstef.model.regressors.flatliner import FlatlinerRegressor
|
13
17
|
from openstef.model.regressors.xgb import XGBOpenstfRegressor
|
14
18
|
from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor
|
15
|
-
from openstef.model.regressors.
|
19
|
+
from openstef.model.regressors.xgb_multioutput_quantile import (
|
20
|
+
XGBMultiOutputQuantileOpenstfRegressor,
|
21
|
+
)
|
22
|
+
from openstef.settings import Settings
|
16
23
|
|
24
|
+
structlog.configure(
|
25
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
26
|
+
logging.getLevelName(Settings.log_level)
|
27
|
+
)
|
28
|
+
)
|
17
29
|
logger = structlog.get_logger(__name__)
|
18
|
-
try:
|
19
|
-
from openstef.model.regressors.proloaf import OpenstfProloafRegressor
|
20
|
-
except ImportError:
|
21
|
-
logger.info("Proloaf not available, setting constructor to None")
|
22
|
-
OpenstfProloafRegressor = None
|
23
30
|
|
24
31
|
valid_model_kwargs = {
|
25
|
-
|
32
|
+
ModelType.XGB: [
|
26
33
|
"n_estimators",
|
27
34
|
"objective",
|
28
35
|
"max_depth",
|
@@ -53,7 +60,7 @@ valid_model_kwargs = {
|
|
53
60
|
"validate_parameters",
|
54
61
|
"early_stopping_rounds",
|
55
62
|
],
|
56
|
-
|
63
|
+
ModelType.LGB: [
|
57
64
|
"boosting_type",
|
58
65
|
"objective",
|
59
66
|
"num_leaves",
|
@@ -75,7 +82,7 @@ valid_model_kwargs = {
|
|
75
82
|
"importance_type",
|
76
83
|
"early_stopping_rounds",
|
77
84
|
],
|
78
|
-
|
85
|
+
ModelType.XGB_QUANTILE: [
|
79
86
|
"quantiles",
|
80
87
|
"gamma",
|
81
88
|
"colsample_bytree",
|
@@ -84,33 +91,37 @@ valid_model_kwargs = {
|
|
84
91
|
"max_depth",
|
85
92
|
"early_stopping_rounds",
|
86
93
|
],
|
87
|
-
|
88
|
-
"
|
89
|
-
"
|
90
|
-
"
|
91
|
-
"
|
92
|
-
"
|
93
|
-
"
|
94
|
-
"
|
95
|
-
"
|
96
|
-
|
97
|
-
|
98
|
-
"
|
99
|
-
"
|
100
|
-
"
|
101
|
-
|
102
|
-
|
103
|
-
"
|
104
|
-
"batch_size",
|
105
|
-
"history_horizon",
|
106
|
-
"horizon_minutes",
|
94
|
+
ModelType.XGB_MULTIOUTPUT_QUANTILE: [
|
95
|
+
"quantiles",
|
96
|
+
"gamma",
|
97
|
+
"colsample_bytree",
|
98
|
+
"subsample",
|
99
|
+
"min_child_weight",
|
100
|
+
"max_depth",
|
101
|
+
"early_stopping_rounds",
|
102
|
+
"arctan_smoothing",
|
103
|
+
],
|
104
|
+
ModelType.LINEAR: [
|
105
|
+
"missing_values",
|
106
|
+
"imputation_strategy",
|
107
|
+
"fill_value",
|
108
|
+
],
|
109
|
+
ModelType.FLATLINER: [
|
110
|
+
"quantiles",
|
107
111
|
],
|
108
|
-
|
112
|
+
ModelType.LINEAR_QUANTILE: [
|
113
|
+
"alpha",
|
114
|
+
"quantiles",
|
115
|
+
"solver",
|
109
116
|
"missing_values",
|
110
117
|
"imputation_strategy",
|
111
118
|
"fill_value",
|
119
|
+
"weight_scale_percentile",
|
120
|
+
"weight_exponent",
|
121
|
+
"weight_floor",
|
122
|
+
"no_fill_future_values_features",
|
112
123
|
],
|
113
|
-
|
124
|
+
ModelType.ARIMA: [
|
114
125
|
"backtest_max_horizon",
|
115
126
|
"order",
|
116
127
|
"seasonal_order",
|
@@ -124,16 +135,18 @@ class ModelCreator:
|
|
124
135
|
|
125
136
|
# Set object mapping
|
126
137
|
MODEL_CONSTRUCTORS = {
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
138
|
+
ModelType.XGB: XGBOpenstfRegressor,
|
139
|
+
ModelType.LGB: LGBMOpenstfRegressor,
|
140
|
+
ModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
|
141
|
+
ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
|
142
|
+
ModelType.LINEAR: LinearOpenstfRegressor,
|
143
|
+
ModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
|
144
|
+
ModelType.ARIMA: ARIMAOpenstfRegressor,
|
145
|
+
ModelType.FLATLINER: FlatlinerRegressor,
|
133
146
|
}
|
134
147
|
|
135
148
|
@staticmethod
|
136
|
-
def create_model(model_type: Union[
|
149
|
+
def create_model(model_type: Union[ModelType, str], **kwargs) -> OpenstfRegressor:
|
137
150
|
"""Create a machine learning model based on model type.
|
138
151
|
|
139
152
|
Args:
|
@@ -154,7 +167,7 @@ class ModelCreator:
|
|
154
167
|
model_class = load_custom_model(model_type)
|
155
168
|
valid_kwargs = model_class.valid_kwargs()
|
156
169
|
else:
|
157
|
-
model_type =
|
170
|
+
model_type = ModelType(model_type)
|
158
171
|
model_class = ModelCreator.MODEL_CONSTRUCTORS[model_type]
|
159
172
|
valid_kwargs = valid_model_kwargs[model_type]
|
160
173
|
# Check if model as imported
|
@@ -165,7 +178,7 @@ class ModelCreator:
|
|
165
178
|
"Please refer to the ReadMe for instructions"
|
166
179
|
)
|
167
180
|
except ValueError as e:
|
168
|
-
valid_types = [t.value for t in
|
181
|
+
valid_types = [t.value for t in ModelType]
|
169
182
|
raise NotImplementedError(
|
170
183
|
f"No constructor for '{model_type}', "
|
171
184
|
f"valid model_types are: {valid_types} "
|