openstef 3.4.52__tar.gz → 3.4.54__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openstef-3.4.52 → openstef-3.4.54}/PKG-INFO +1 -1
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data_classes/prediction_job.py +1 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/enums.py +1 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/feature_applicator.py +4 -2
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/model_creator.py +21 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/objective_creator.py +1 -0
- openstef-3.4.54/openstef/model/regressors/gblinear_quantile.py +334 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/regressors/linear_quantile.py +1 -1
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/create_components_forecast.py +1 -1
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/create_forecast.py +7 -2
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/train_model.py +1 -1
- {openstef-3.4.52 → openstef-3.4.54}/openstef/validation/validation.py +3 -3
- {openstef-3.4.52 → openstef-3.4.54}/openstef.egg-info/PKG-INFO +1 -1
- {openstef-3.4.52 → openstef-3.4.54}/openstef.egg-info/SOURCES.txt +1 -0
- {openstef-3.4.52 → openstef-3.4.54}/setup.py +1 -1
- {openstef-3.4.52 → openstef-3.4.54}/LICENSE +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/README.md +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/__main__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/app_settings.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data/NL_terrestrial_radiation.csv +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data/NL_terrestrial_radiation.csv.license +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data/dutch_holidays.csv +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data/dutch_holidays.csv.license +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data/pv_single_coefs.csv +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data/pv_single_coefs.csv.license +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data_classes/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data_classes/data_prep.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data_classes/model_specifications.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/data_classes/split_function.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/exceptions.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/apply_features.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/cyclic_features.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/data_preparation.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/feature_adder.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/general.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/holiday_features.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/lag_features.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/missing_values_transformer.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/weather_features.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/metrics/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/metrics/figure.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/metrics/metrics.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/metrics/reporter.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/basecase.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/confidence_interval_applicator.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/fallback.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/metamodels/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/metamodels/feature_clipper.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/metamodels/grouped_regressor.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/metamodels/missing_values_handler.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/objective.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/regressors/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/regressors/arima.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/regressors/custom_regressor.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/regressors/dazls.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/regressors/flatliner.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/regressors/lgbm.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/regressors/linear.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/regressors/regressor.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/regressors/xgb.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/regressors/xgb_multioutput_quantile.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/regressors/xgb_quantile.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/serializer.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model/standard_deviation_generator.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model_selection/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/model_selection/model_selection.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/monitoring/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/monitoring/performance_meter.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/monitoring/teams.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/pipeline/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/pipeline/create_basecase_forecast.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/pipeline/create_component_forecast.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/pipeline/create_forecast.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/pipeline/optimize_hyperparameters.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/pipeline/train_create_forecast_backtest.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/pipeline/train_model.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/pipeline/utils.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/postprocessing/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/postprocessing/postprocessing.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/preprocessing/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/preprocessing/preprocessing.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/settings.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/calculate_kpi.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/create_basecase_forecast.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/create_solar_forecast.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/create_wind_forecast.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/optimize_hyperparameters.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/split_forecast.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/utils/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/utils/dependencies.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/utils/predictionjobloop.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/tasks/utils/taskcontext.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef/validation/__init__.py +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef.egg-info/dependency_links.txt +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef.egg-info/requires.txt +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/openstef.egg-info/top_level.txt +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/pyproject.toml +0 -0
- {openstef-3.4.52 → openstef-3.4.54}/setup.cfg +0 -0
@@ -24,7 +24,9 @@ from openstef.feature_engineering.general import (
|
|
24
24
|
remove_non_requested_feature_columns,
|
25
25
|
)
|
26
26
|
|
27
|
-
LATENCY_CONFIG = {
|
27
|
+
LATENCY_CONFIG = {
|
28
|
+
"day_ahead_electricity_price": 24
|
29
|
+
} # A specific latency is part of a specific feature.
|
28
30
|
|
29
31
|
|
30
32
|
class AbstractFeatureApplicator(ABC):
|
@@ -94,7 +96,7 @@ class TrainFeatureApplicator(AbstractFeatureApplicator):
|
|
94
96
|
if not specified a default location is used
|
95
97
|
latency_config: (Optional) Invalidate certain features that are not
|
96
98
|
available for a specific horizon due to data latency. Defaults to
|
97
|
-
``{"
|
99
|
+
``{"day_ahead_electricity_price": 24}``.
|
98
100
|
|
99
101
|
Returns:
|
100
102
|
Input DataFrame with an extra column for every added feature and sorted on the datetime index.
|
@@ -9,6 +9,7 @@ import structlog
|
|
9
9
|
from openstef.enums import ModelType
|
10
10
|
from openstef.model.regressors.arima import ARIMAOpenstfRegressor
|
11
11
|
from openstef.model.regressors.custom_regressor import is_custom_type, load_custom_model
|
12
|
+
from openstef.model.regressors.gblinear_quantile import GBLinearQuantileOpenstfRegressor
|
12
13
|
from openstef.model.regressors.lgbm import LGBMOpenstfRegressor
|
13
14
|
from openstef.model.regressors.linear import LinearOpenstfRegressor
|
14
15
|
from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegressor
|
@@ -121,6 +122,25 @@ valid_model_kwargs = {
|
|
121
122
|
"weight_floor",
|
122
123
|
"no_fill_future_values_features",
|
123
124
|
],
|
125
|
+
ModelType.GBLINEAR_QUANTILE: [
|
126
|
+
"quantiles",
|
127
|
+
"missing_values",
|
128
|
+
"imputation_strategy",
|
129
|
+
"fill_value",
|
130
|
+
"weight_scale_percentile",
|
131
|
+
"weight_exponent",
|
132
|
+
"weight_floor",
|
133
|
+
"no_fill_future_values_features",
|
134
|
+
"clipped_features",
|
135
|
+
"learning_rate",
|
136
|
+
"num_boost_round",
|
137
|
+
"early_stopping_rounds",
|
138
|
+
"reg_alpha",
|
139
|
+
"reg_lambda",
|
140
|
+
"updater",
|
141
|
+
"feature_selector",
|
142
|
+
"top_k",
|
143
|
+
],
|
124
144
|
ModelType.ARIMA: [
|
125
145
|
"backtest_max_horizon",
|
126
146
|
"order",
|
@@ -141,6 +161,7 @@ class ModelCreator:
|
|
141
161
|
ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
|
142
162
|
ModelType.LINEAR: LinearOpenstfRegressor,
|
143
163
|
ModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
|
164
|
+
ModelType.GBLINEAR_QUANTILE: GBLinearQuantileOpenstfRegressor,
|
144
165
|
ModelType.ARIMA: ARIMAOpenstfRegressor,
|
145
166
|
ModelType.FLATLINER: FlatlinerRegressor,
|
146
167
|
}
|
@@ -28,6 +28,7 @@ class ObjectiveCreator:
|
|
28
28
|
ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
|
29
29
|
ModelType.LINEAR: LinearRegressorObjective,
|
30
30
|
ModelType.LINEAR_QUANTILE: LinearRegressorObjective,
|
31
|
+
ModelType.GBLINEAR_QUANTILE: LinearRegressorObjective,
|
31
32
|
ModelType.ARIMA: ARIMARegressorObjective,
|
32
33
|
}
|
33
34
|
|
@@ -0,0 +1,334 @@
|
|
1
|
+
# SPDX-FileCopyrightText: 2017-2025 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
import math
|
5
|
+
import re
|
6
|
+
from typing import Union, Optional, List
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import pandas as pd
|
10
|
+
from sklearn.model_selection import train_test_split
|
11
|
+
import xgboost as xgb
|
12
|
+
from sklearn.preprocessing import StandardScaler
|
13
|
+
from sklearn.utils.validation import check_is_fitted
|
14
|
+
|
15
|
+
from openstef.feature_engineering.missing_values_transformer import (
|
16
|
+
MissingValuesTransformer,
|
17
|
+
)
|
18
|
+
from openstef.model.metamodels.feature_clipper import FeatureClipper
|
19
|
+
from openstef.model.regressors.regressor import OpenstfRegressor
|
20
|
+
|
21
|
+
DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
|
22
|
+
|
23
|
+
|
24
|
+
class GBLinearQuantileOpenstfRegressor(OpenstfRegressor):
|
25
|
+
is_fitted_: bool = False
|
26
|
+
|
27
|
+
TO_KEEP_FEATURES: List[str] = [
|
28
|
+
"T-7d",
|
29
|
+
"T-1d",
|
30
|
+
]
|
31
|
+
TO_IGNORE_FEATURES: List[str] = [
|
32
|
+
"Month",
|
33
|
+
"Quarter",
|
34
|
+
]
|
35
|
+
|
36
|
+
def __init__(
|
37
|
+
self,
|
38
|
+
quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
|
39
|
+
missing_values: Union[int, float, str, None] = np.nan,
|
40
|
+
imputation_strategy: Optional[str] = "mean",
|
41
|
+
fill_value: Union[str, int, float] = None,
|
42
|
+
weight_scale_percentile: int = 95,
|
43
|
+
weight_exponent: float = 1,
|
44
|
+
weight_floor: float = 0.1,
|
45
|
+
validation_fraction: float = 0.2,
|
46
|
+
no_fill_future_values_features: List[str] = None,
|
47
|
+
clipped_features: List[str] = None,
|
48
|
+
learning_rate: float = 0.15,
|
49
|
+
num_boost_round: int = 500,
|
50
|
+
early_stopping_rounds: int = 10,
|
51
|
+
reg_alpha: float = 0.0001,
|
52
|
+
reg_lambda: float = 0.1,
|
53
|
+
updater: str = "shotgun",
|
54
|
+
feature_selector: str = "shuffle",
|
55
|
+
top_k: int = 0,
|
56
|
+
):
|
57
|
+
super().__init__()
|
58
|
+
|
59
|
+
# Check if quantile 0.5 is present. This is required.
|
60
|
+
if 0.5 not in quantiles:
|
61
|
+
raise ValueError(
|
62
|
+
"Cannot train quantile model as 0.5 is not in requested quantiles!"
|
63
|
+
)
|
64
|
+
|
65
|
+
if clipped_features is None:
|
66
|
+
clipped_features = ["APX"]
|
67
|
+
|
68
|
+
self.quantiles = quantiles
|
69
|
+
self.weight_scale_percentile = weight_scale_percentile
|
70
|
+
self.weight_exponent = weight_exponent
|
71
|
+
self.weight_floor = weight_floor
|
72
|
+
self.imputer_ = MissingValuesTransformer(
|
73
|
+
missing_values=missing_values,
|
74
|
+
imputation_strategy=imputation_strategy,
|
75
|
+
fill_value=fill_value,
|
76
|
+
no_fill_future_values_features=no_fill_future_values_features,
|
77
|
+
)
|
78
|
+
self.x_scaler_ = StandardScaler()
|
79
|
+
self.y_scaler_ = StandardScaler()
|
80
|
+
self.validation_fraction = validation_fraction
|
81
|
+
self.model_: xgb.Booster = None
|
82
|
+
self.feature_clipper_ = FeatureClipper(columns=clipped_features)
|
83
|
+
|
84
|
+
self.learning_rate = learning_rate
|
85
|
+
self.num_boost_round = num_boost_round
|
86
|
+
self.early_stopping_rounds = early_stopping_rounds
|
87
|
+
self.reg_alpha = reg_alpha
|
88
|
+
self.reg_labmda = reg_lambda
|
89
|
+
self.updater = updater
|
90
|
+
self.feature_selector = feature_selector
|
91
|
+
self.top_k = top_k
|
92
|
+
|
93
|
+
@property
|
94
|
+
def feature_names(self) -> list:
|
95
|
+
"""The names of the features used to train the model."""
|
96
|
+
check_is_fitted(self)
|
97
|
+
return self.imputer_.non_null_feature_names
|
98
|
+
|
99
|
+
@staticmethod
|
100
|
+
def _get_importance_names():
|
101
|
+
return {
|
102
|
+
"gain_importance_name": "total_gain",
|
103
|
+
"weight_importance_name": "weight",
|
104
|
+
}
|
105
|
+
|
106
|
+
@property
|
107
|
+
def can_predict_quantiles(self) -> bool:
|
108
|
+
"""Attribute that indicates if the model predict particular quantiles."""
|
109
|
+
return True
|
110
|
+
|
111
|
+
def _is_feature_ignored(self, feature_name: str) -> bool:
|
112
|
+
"""Check if a feature is ignored by the model.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
feature_name: Feature name
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
True if the feature is ignored, False otherwise
|
119
|
+
|
120
|
+
"""
|
121
|
+
|
122
|
+
if feature_name in self.TO_KEEP_FEATURES:
|
123
|
+
return False
|
124
|
+
|
125
|
+
return (
|
126
|
+
# Ignore named features
|
127
|
+
feature_name in self.TO_IGNORE_FEATURES
|
128
|
+
or
|
129
|
+
# Ignore holiday features
|
130
|
+
re.match(r"is_", feature_name) is not None
|
131
|
+
or
|
132
|
+
# Ignore lag features
|
133
|
+
re.match(r"T-", feature_name) is not None
|
134
|
+
or
|
135
|
+
# Ignore infeed MFFBAS profiles
|
136
|
+
re.match(r"E\d.*_I", feature_name) is not None
|
137
|
+
)
|
138
|
+
|
139
|
+
def _remove_ignored_features(self, x: pd.DataFrame) -> pd.DataFrame:
|
140
|
+
"""Remove ignored features from the input data.
|
141
|
+
|
142
|
+
Args:
|
143
|
+
x: Input data
|
144
|
+
|
145
|
+
Returns:
|
146
|
+
Data without ignored features
|
147
|
+
|
148
|
+
"""
|
149
|
+
return x.drop(columns=[c for c in x.columns if self._is_feature_ignored(c)])
|
150
|
+
|
151
|
+
def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> OpenstfRegressor:
|
152
|
+
if not isinstance(y, pd.Series):
|
153
|
+
y = pd.Series(np.asarray(y), name="load")
|
154
|
+
|
155
|
+
x = self._remove_ignored_features(x)
|
156
|
+
self.feature_clipper_.fit(x)
|
157
|
+
|
158
|
+
# Fix nan columns
|
159
|
+
x, y = self.imputer_.fit_transform(x, y)
|
160
|
+
if x.isna().any().any():
|
161
|
+
raise ValueError(
|
162
|
+
"There are nan values in the input data. Set "
|
163
|
+
"imputation_strategy to solve them."
|
164
|
+
)
|
165
|
+
|
166
|
+
# Apply feature scaling
|
167
|
+
x_scaled = self.x_scaler_.fit_transform(x)
|
168
|
+
y_scaled = self.y_scaler_.fit_transform(y.to_frame())[:, 0]
|
169
|
+
|
170
|
+
# Add more focus on extreme / peak values
|
171
|
+
sample_weight = self._calculate_sample_weights(y.values.squeeze())
|
172
|
+
|
173
|
+
# Split the data into training and validation sets
|
174
|
+
x_train, x_val, y_train, y_val, weight_train, weight_val = train_test_split(
|
175
|
+
x_scaled,
|
176
|
+
y_scaled,
|
177
|
+
sample_weight,
|
178
|
+
test_size=self.validation_fraction,
|
179
|
+
random_state=42,
|
180
|
+
)
|
181
|
+
|
182
|
+
# Preserve feature names
|
183
|
+
x_train = pd.DataFrame(x_train, columns=x.columns)
|
184
|
+
x_val = pd.DataFrame(x_val, columns=x.columns)
|
185
|
+
|
186
|
+
dtrain = xgb.DMatrix(x_train, label=y_train, weight=weight_train)
|
187
|
+
dval = xgb.DMatrix(x_val, label=y_val, weight=weight_val)
|
188
|
+
|
189
|
+
xgb_params = {
|
190
|
+
# Use the quantile objective function.
|
191
|
+
"objective": "reg:quantileerror", # This is pinball loss
|
192
|
+
"booster": "gblinear",
|
193
|
+
"updater": self.updater,
|
194
|
+
"alpha": self.reg_alpha,
|
195
|
+
"lambda": self.reg_labmda,
|
196
|
+
"feature_selector": self.feature_selector,
|
197
|
+
"quantile_alpha": np.array(self.quantiles),
|
198
|
+
"learning_rate": self.learning_rate,
|
199
|
+
}
|
200
|
+
|
201
|
+
if self.top_k > 0:
|
202
|
+
xgb_params["top_k"] = self.top_k
|
203
|
+
|
204
|
+
self.model_ = xgb.train(
|
205
|
+
params=xgb_params,
|
206
|
+
dtrain=dtrain,
|
207
|
+
num_boost_round=self.num_boost_round,
|
208
|
+
early_stopping_rounds=self.early_stopping_rounds,
|
209
|
+
evals=[(dtrain, "train"), (dval, "val")],
|
210
|
+
)
|
211
|
+
|
212
|
+
self._Booster = self.model_
|
213
|
+
|
214
|
+
self.is_fitted_ = True
|
215
|
+
|
216
|
+
self.feature_importances_ = self._get_feature_importances_from_booster(
|
217
|
+
self.model_
|
218
|
+
)
|
219
|
+
|
220
|
+
return self
|
221
|
+
|
222
|
+
def _calculate_sample_weights(self, y: np.array):
|
223
|
+
"""Calculate sample weights based on the y values of arbitrary scale.
|
224
|
+
|
225
|
+
The resulting weights are in the range [0,1] and are used to put more emphasis
|
226
|
+
on certain samples. The sample weighting function does:
|
227
|
+
|
228
|
+
* Rescale data to a [-1, 1] range using quantile scaling. 90% of the data will
|
229
|
+
be within this range. Rest is outside.
|
230
|
+
* Calculate the weight by taking the exponent of scaled data.
|
231
|
+
* exponent=0: Results in uniform weights for all samples.
|
232
|
+
* exponent=1: Results in linearly increasing weights for samples that are
|
233
|
+
closer to the extremes.
|
234
|
+
* exponent>1: Results in exponentially increasing weights for samples that are
|
235
|
+
closer to the extremes.
|
236
|
+
* Clip the data to [0, 1] range with weight_floor as the minimum weight.
|
237
|
+
* Weight floor is used to make sure that all the samples are considered.
|
238
|
+
|
239
|
+
"""
|
240
|
+
return np.clip(
|
241
|
+
_weight_exp(
|
242
|
+
_scale_percentile(y, percentile=self.weight_scale_percentile),
|
243
|
+
exponent=self.weight_exponent,
|
244
|
+
),
|
245
|
+
a_min=self.weight_floor,
|
246
|
+
a_max=1,
|
247
|
+
)
|
248
|
+
|
249
|
+
def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
|
250
|
+
check_is_fitted(self)
|
251
|
+
|
252
|
+
# Preprocess input data
|
253
|
+
x = self._remove_ignored_features(x)
|
254
|
+
x = self.feature_clipper_.transform(x)
|
255
|
+
x = self.imputer_.transform(x)
|
256
|
+
x_scaled = self.x_scaler_.transform(x)
|
257
|
+
|
258
|
+
# Preserve feature names
|
259
|
+
x_scaled = pd.DataFrame(x_scaled, columns=x.columns)
|
260
|
+
|
261
|
+
d_x_scaled = xgb.DMatrix(x_scaled)
|
262
|
+
|
263
|
+
# Make prediction
|
264
|
+
y_pred = self.model_.predict(d_x_scaled)
|
265
|
+
|
266
|
+
# When multiple quantiles are trained,
|
267
|
+
# we need to select the requested quantile
|
268
|
+
if len(self.quantiles) > 1:
|
269
|
+
# Get index of the quantile value in the quantiles list
|
270
|
+
quantile_index = self.quantiles.index(quantile)
|
271
|
+
|
272
|
+
# Get the quantile prediction
|
273
|
+
y_pred = y_pred[:, quantile_index]
|
274
|
+
|
275
|
+
# Inverse scaling
|
276
|
+
y_pred = self.y_scaler_.inverse_transform(y_pred.reshape(-1, 1))[:, 0]
|
277
|
+
|
278
|
+
return y_pred
|
279
|
+
|
280
|
+
@classmethod
|
281
|
+
def _get_feature_importances_from_booster(cls, booster: xgb.Booster) -> np.ndarray:
|
282
|
+
"""Gets feature importances from a XGB booster.
|
283
|
+
|
284
|
+
This is based on the feature_importance_ property defined in:
|
285
|
+
https://github.com/dmlc/xgboost/blob/master/python-package/xgboost/sklearn.py.
|
286
|
+
|
287
|
+
Args:
|
288
|
+
booster: Booster object,
|
289
|
+
most of the times the median model (quantile=0.5) is preferred
|
290
|
+
|
291
|
+
Returns:
|
292
|
+
Ndarray with normalized feature importances.
|
293
|
+
|
294
|
+
"""
|
295
|
+
# Get score
|
296
|
+
score = booster.get_score(importance_type="weight")
|
297
|
+
|
298
|
+
if type(next(iter(score.values()))) is list:
|
299
|
+
num_quantiles = len(next(iter(score.values())))
|
300
|
+
|
301
|
+
# Select middle quantile, assuming odd number of quantiles
|
302
|
+
quantile_index = num_quantiles // 2
|
303
|
+
|
304
|
+
score = {f: score[f][quantile_index] for f in score}
|
305
|
+
|
306
|
+
# Get feature names from booster
|
307
|
+
feature_names = booster.feature_names
|
308
|
+
|
309
|
+
# Get importance
|
310
|
+
feature_importance = [score.get(f, 0.0) for f in feature_names]
|
311
|
+
# Convert to array
|
312
|
+
features_importance_array = np.array(feature_importance, dtype=np.float32)
|
313
|
+
|
314
|
+
total = features_importance_array.sum() # For normalizing
|
315
|
+
if total == 0:
|
316
|
+
return features_importance_array
|
317
|
+
return features_importance_array / total # Normalize
|
318
|
+
|
319
|
+
@classmethod
|
320
|
+
def _get_param_names(cls):
|
321
|
+
return [
|
322
|
+
"quantiles",
|
323
|
+
]
|
324
|
+
|
325
|
+
def __sklearn_is_fitted__(self) -> bool:
|
326
|
+
return self.is_fitted_
|
327
|
+
|
328
|
+
|
329
|
+
def _scale_percentile(x: np.ndarray, percentile: int = 95):
|
330
|
+
return np.abs(x / np.percentile(np.abs(x), percentile))
|
331
|
+
|
332
|
+
|
333
|
+
def _weight_exp(x: np.ndarray, exponent: float = 1):
|
334
|
+
return np.abs(x) ** exponent
|
@@ -6,7 +6,7 @@
|
|
6
6
|
This code assumes trained models are available from the persistent storage.
|
7
7
|
If these are not available run model_train.py to train all models.
|
8
8
|
To provide the prognoses the following steps are carried out:
|
9
|
-
1. Get historic training data (TDCV, Load, Weather and
|
9
|
+
1. Get historic training data (TDCV, Load, Weather and day_ahead_electricity_price price data)
|
10
10
|
2. Apply features
|
11
11
|
3. Load model
|
12
12
|
4. Make component prediction
|
@@ -6,7 +6,7 @@
|
|
6
6
|
This code assumes trained models are available from the persistent storage. If these
|
7
7
|
are not available run model_train.py to train all models.
|
8
8
|
To provide the prognoses the folowing steps are carried out:
|
9
|
-
1. Get historic training data (TDCV, Load, Weather and
|
9
|
+
1. Get historic training data (TDCV, Load, Weather and day_ahead_electricity_price price data)
|
10
10
|
2. Apply features
|
11
11
|
3. Load model
|
12
12
|
4. Make prediction
|
@@ -24,7 +24,7 @@ from datetime import datetime, timedelta
|
|
24
24
|
from pathlib import Path
|
25
25
|
|
26
26
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
27
|
-
from openstef.enums import ModelType, PipelineType
|
27
|
+
from openstef.enums import BiddingZone, ModelType, PipelineType
|
28
28
|
from openstef.exceptions import InputDataOngoingZeroFlatlinerError
|
29
29
|
from openstef.pipeline.create_forecast import create_forecast_pipeline
|
30
30
|
from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
|
@@ -82,8 +82,13 @@ def create_forecast_task(
|
|
82
82
|
location=[pj["lat"], pj["lon"]],
|
83
83
|
datetime_start=datetime_start,
|
84
84
|
datetime_end=datetime_end,
|
85
|
+
market_price=pj.electricity_bidding_zone.value,
|
85
86
|
)
|
86
87
|
|
88
|
+
# Add APX price to the input data for backward compatibility,remove this line when all models are retrained
|
89
|
+
if pj.electricity_bidding_zone == BiddingZone.NL:
|
90
|
+
input_data["APX"] = input_data["day_ahead_electricity_price"]
|
91
|
+
|
87
92
|
try:
|
88
93
|
# Make forecast with the forecast pipeline
|
89
94
|
forecast = create_forecast_pipeline(
|
@@ -4,7 +4,7 @@
|
|
4
4
|
"""This module contains the CRON job that is periodically executed to retrain the prognosis models.
|
5
5
|
|
6
6
|
For this the folowing steps are caried out:
|
7
|
-
1. Get historic training data (TDCV, Load, Weather and
|
7
|
+
1. Get historic training data (TDCV, Load, Weather and day_ahead_electricity_price price data)
|
8
8
|
2. Apply features
|
9
9
|
3. Train and Test the new model
|
10
10
|
4. Check if new model performs better than the old model
|
@@ -313,9 +313,9 @@ def calc_completeness_dataframe(
|
|
313
313
|
expected_numbers_timedelayed=value,
|
314
314
|
)
|
315
315
|
|
316
|
-
# Correct for
|
317
|
-
if "
|
318
|
-
non_na_count["
|
316
|
+
# Correct for day_ahead_electricity_price being only expected to be available up to 24h
|
317
|
+
if "day_ahead_electricity_price" in non_na_count.index:
|
318
|
+
non_na_count["day_ahead_electricity_price"] += max([len(df) - 96, 0])
|
319
319
|
|
320
320
|
completeness_per_column_dataframe = non_na_count / (len(df))
|
321
321
|
|
@@ -63,6 +63,7 @@ openstef/model/regressors/arima.py
|
|
63
63
|
openstef/model/regressors/custom_regressor.py
|
64
64
|
openstef/model/regressors/dazls.py
|
65
65
|
openstef/model/regressors/flatliner.py
|
66
|
+
openstef/model/regressors/gblinear_quantile.py
|
66
67
|
openstef/model/regressors/lgbm.py
|
67
68
|
openstef/model/regressors/linear.py
|
68
69
|
openstef/model/regressors/linear_quantile.py
|
@@ -29,7 +29,7 @@ def read_long_description_from_readme():
|
|
29
29
|
|
30
30
|
setup(
|
31
31
|
name="openstef",
|
32
|
-
version="3.4.
|
32
|
+
version="3.4.54",
|
33
33
|
packages=find_packages(include=["openstef", "openstef.*"]),
|
34
34
|
description="Open short term energy forecaster",
|
35
35
|
long_description=read_long_description_from_readme(),
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/bidding_zone_to_country_mapping.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{openstef-3.4.52 → openstef-3.4.54}/openstef/feature_engineering/missing_values_transformer.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|