openstef 3.4.23__tar.gz → 3.4.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openstef-3.4.23 → openstef-3.4.25}/PKG-INFO +2 -2
- {openstef-3.4.23 → openstef-3.4.25}/README.md +1 -1
- {openstef-3.4.23 → openstef-3.4.25}/openstef/data_classes/prediction_job.py +1 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/enums.py +1 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/metrics/metrics.py +51 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/model_creator.py +14 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/objective.py +30 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/objective_creator.py +3 -0
- openstef-3.4.25/openstef/model/regressors/dazls.py +112 -0
- openstef-3.4.25/openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/create_component_forecast.py +2 -19
- {openstef-3.4.23 → openstef-3.4.25}/openstef.egg-info/PKG-INFO +2 -2
- {openstef-3.4.23 → openstef-3.4.25}/openstef.egg-info/SOURCES.txt +1 -18
- {openstef-3.4.23 → openstef-3.4.25}/setup.py +1 -1
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z +0 -0
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z.license +0 -3
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z +0 -0
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z.license +0 -3
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z +0 -0
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z.license +0 -3
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z +0 -0
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z.license +0 -3
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z +0 -2
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z.license +0 -3
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z +0 -0
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z.license +0 -3
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md +0 -14
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md.license +0 -3
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z +0 -0
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z.license +0 -3
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z +0 -0
- openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z.license +0 -3
- openstef-3.4.23/openstef/model/regressors/dazls.py +0 -191
- {openstef-3.4.23 → openstef-3.4.25}/LICENSE +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/__main__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/app_settings.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/data/dutch_holidays_2020-2022.csv +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/data/dutch_holidays_2020-2022.csv.license +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/data/pv_single_coefs.csv +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/data/pv_single_coefs.csv.license +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/data_classes/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/data_classes/data_prep.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/data_classes/model_specifications.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/data_classes/split_function.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/exceptions.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/apply_features.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/data_preparation.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/feature_adder.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/feature_applicator.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/general.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/holiday_features.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/lag_features.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/missing_values_transformer.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/weather_features.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/metrics/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/metrics/figure.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/metrics/reporter.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/basecase.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/confidence_interval_applicator.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/fallback.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/metamodels/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/metamodels/grouped_regressor.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/metamodels/missing_values_handler.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/arima.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/custom_regressor.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/lgbm.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/linear.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/linear_quantile.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/regressor.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/xgb.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/xgb_quantile.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/serializer.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model/standard_deviation_generator.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model_selection/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/model_selection/model_selection.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/monitoring/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/monitoring/performance_meter.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/monitoring/teams.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/create_basecase_forecast.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/create_forecast.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/optimize_hyperparameters.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/train_create_forecast_backtest.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/train_model.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/utils.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/postprocessing/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/postprocessing/postprocessing.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/preprocessing/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/preprocessing/preprocessing.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/settings.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/calculate_kpi.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/create_basecase_forecast.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/create_components_forecast.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/create_forecast.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/create_solar_forecast.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/create_wind_forecast.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/optimize_hyperparameters.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/split_forecast.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/train_model.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/utils/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/utils/dependencies.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/utils/predictionjobloop.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/utils/taskcontext.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/validation/__init__.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef/validation/validation.py +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef.egg-info/dependency_links.txt +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef.egg-info/requires.txt +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/openstef.egg-info/top_level.txt +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/pyproject.toml +0 -0
- {openstef-3.4.23 → openstef-3.4.25}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: openstef
|
3
|
-
Version: 3.4.
|
3
|
+
Version: 3.4.25
|
4
4
|
Summary: Open short term energy forecaster
|
5
5
|
Home-page: https://github.com/OpenSTEF/openstef
|
6
6
|
Author: Alliander N.V
|
@@ -128,4 +128,4 @@ Please read [CODE_OF_CONDUCT.md](https://github.com/OpenSTEF/.github/blob/main/C
|
|
128
128
|
|
129
129
|
# Contact
|
130
130
|
Please read [SUPPORT.md](https://github.com/OpenSTEF/.github/blob/main/SUPPORT.md) for how to connect and get into contact with the OpenSTEF project
|
131
|
-
|
131
|
+
|
@@ -431,3 +431,54 @@ def xgb_quantile_obj(
|
|
431
431
|
hess = np.ones_like(preds)
|
432
432
|
|
433
433
|
return grad, hess
|
434
|
+
|
435
|
+
|
436
|
+
def arctan_loss(y_true, y_pred, taus, s=0.1):
|
437
|
+
"""Compute the arctan pinball loss.
|
438
|
+
|
439
|
+
Note that XGBoost outputs the predictions in a slightly peculiar manner.
|
440
|
+
Suppose we have 100 data points and we predict 10 quantiles. The predictions
|
441
|
+
will be an array of size (1000 x 1). We first resize this to a (100x10) array
|
442
|
+
where each row corresponds to the 10 predicted quantile for a single data
|
443
|
+
point. We then use a for-loop (over the 10 columns) to calculate the gradients
|
444
|
+
and second derivatives. Legibility was chosen over efficiency. This part
|
445
|
+
can be made more efficient.
|
446
|
+
|
447
|
+
Args:
|
448
|
+
y_true: An array containing the true observations.
|
449
|
+
y_pred: An array containing the predicted quantiles.
|
450
|
+
taus: A list containing the true desired coverage of the quantiles.
|
451
|
+
s: A smoothing parameter.
|
452
|
+
|
453
|
+
Returns:
|
454
|
+
grad: An array containing the (negative) gradients with respect to y_pred.
|
455
|
+
hess: An array containing the second derivative with respect to y_pred.
|
456
|
+
|
457
|
+
"""
|
458
|
+
size = len(y_true)
|
459
|
+
n_dim = len(taus) # The number of columns
|
460
|
+
n_rows = size // n_dim
|
461
|
+
|
462
|
+
# Resize the predictions and targets.
|
463
|
+
# Each column corresponds to a quantile, each row to a data point.
|
464
|
+
y_pred = np.reshape(y_pred, (n_rows, n_dim))
|
465
|
+
y_true = np.reshape(y_true, (n_rows, n_dim))
|
466
|
+
|
467
|
+
# Calculate the differences
|
468
|
+
u = y_true - y_pred
|
469
|
+
|
470
|
+
# Calculate the gradient and second derivatives
|
471
|
+
grad = np.zeros_like(y_pred)
|
472
|
+
hess = np.zeros_like(y_pred)
|
473
|
+
z = u / s
|
474
|
+
for i, tau in enumerate(taus):
|
475
|
+
x = 1 + z[:, i] ** 2
|
476
|
+
grad[:, i] = (
|
477
|
+
tau - 0.5 + 1 / np.pi * np.arctan(z[:, i]) + z[:, i] / (np.pi) * x**-1
|
478
|
+
)
|
479
|
+
hess[:, i] = 2 / (np.pi * s) * x ** (-2)
|
480
|
+
|
481
|
+
# Reshape back to the original shape.
|
482
|
+
grad = grad.reshape(size)
|
483
|
+
hess = hess.reshape(size)
|
484
|
+
return -grad / n_dim, hess / n_dim
|
@@ -15,6 +15,9 @@ from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegre
|
|
15
15
|
from openstef.model.regressors.regressor import OpenstfRegressor
|
16
16
|
from openstef.model.regressors.xgb import XGBOpenstfRegressor
|
17
17
|
from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor
|
18
|
+
from openstef.model.regressors.xgb_multioutput_quantile import (
|
19
|
+
XGBMultiOutputQuantileOpenstfRegressor,
|
20
|
+
)
|
18
21
|
from openstef.settings import Settings
|
19
22
|
|
20
23
|
structlog.configure(
|
@@ -87,6 +90,16 @@ valid_model_kwargs = {
|
|
87
90
|
"max_depth",
|
88
91
|
"early_stopping_rounds",
|
89
92
|
],
|
93
|
+
MLModelType.XGB_MULTIOUTPUT_QUANTILE: [
|
94
|
+
"quantiles",
|
95
|
+
"gamma",
|
96
|
+
"colsample_bytree",
|
97
|
+
"subsample",
|
98
|
+
"min_child_weight",
|
99
|
+
"max_depth",
|
100
|
+
"early_stopping_rounds",
|
101
|
+
"arctan_smoothing",
|
102
|
+
],
|
90
103
|
MLModelType.LINEAR: [
|
91
104
|
"missing_values",
|
92
105
|
"imputation_strategy",
|
@@ -117,6 +130,7 @@ class ModelCreator:
|
|
117
130
|
MLModelType.XGB: XGBOpenstfRegressor,
|
118
131
|
MLModelType.LGB: LGBMOpenstfRegressor,
|
119
132
|
MLModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
|
133
|
+
MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
|
120
134
|
MLModelType.LINEAR: LinearOpenstfRegressor,
|
121
135
|
MLModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
|
122
136
|
MLModelType.ARIMA: ARIMAOpenstfRegressor,
|
@@ -349,6 +349,36 @@ class XGBQuantileRegressorObjective(RegressorObjective):
|
|
349
349
|
)
|
350
350
|
|
351
351
|
|
352
|
+
class XGBMultioutputQuantileRegressorObjective(RegressorObjective):
|
353
|
+
def __init__(self, *args, **kwargs):
|
354
|
+
super().__init__(*args, **kwargs)
|
355
|
+
self.model_type = MLModelType.XGB_QUANTILE
|
356
|
+
|
357
|
+
def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
|
358
|
+
"""Get parameters for XGB Multioutput Quantile Regressor Objective with objective specific parameters.
|
359
|
+
|
360
|
+
Args: trial
|
361
|
+
|
362
|
+
Returns:
|
363
|
+
Dictionary with hyperparameter name as key and hyperparamer value as value.
|
364
|
+
|
365
|
+
"""
|
366
|
+
# Filtered default parameters
|
367
|
+
model_params = super().get_params(trial)
|
368
|
+
|
369
|
+
# XGB specific parameters
|
370
|
+
params = {
|
371
|
+
"gamma": trial.suggest_float("gamma", 1e-8, 1.0),
|
372
|
+
"arctan_smoothing": trial.suggest_float("arctan_smoothing", 0.025, 0.15),
|
373
|
+
}
|
374
|
+
return {**model_params, **params}
|
375
|
+
|
376
|
+
def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
|
377
|
+
return optuna.integration.XGBoostPruningCallback(
|
378
|
+
trial, observation_key=f"validation_1-{self.eval_metric}"
|
379
|
+
)
|
380
|
+
|
381
|
+
|
352
382
|
class LinearRegressorObjective(RegressorObjective):
|
353
383
|
def __init__(self, *args, **kwargs):
|
354
384
|
super().__init__(*args, **kwargs)
|
@@ -12,6 +12,7 @@ from openstef.model.objective import (
|
|
12
12
|
RegressorObjective,
|
13
13
|
XGBQuantileRegressorObjective,
|
14
14
|
XGBRegressorObjective,
|
15
|
+
XGBMultioutputQuantileRegressorObjective,
|
15
16
|
)
|
16
17
|
from openstef.model.regressors.custom_regressor import (
|
17
18
|
create_custom_objective,
|
@@ -24,7 +25,9 @@ class ObjectiveCreator:
|
|
24
25
|
MLModelType.XGB: XGBRegressorObjective,
|
25
26
|
MLModelType.LGB: LGBRegressorObjective,
|
26
27
|
MLModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
|
28
|
+
MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
|
27
29
|
MLModelType.LINEAR: LinearRegressorObjective,
|
30
|
+
MLModelType.LINEAR_QUANTILE: LinearRegressorObjective,
|
28
31
|
MLModelType.ARIMA: ARIMARegressorObjective,
|
29
32
|
}
|
30
33
|
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
"""This module defines the DAZL model."""
|
5
|
+
import numpy as np
|
6
|
+
from sklearn.base import BaseEstimator
|
7
|
+
from sklearn.compose import TransformedTargetRegressor
|
8
|
+
from sklearn.linear_model import LinearRegression
|
9
|
+
from sklearn.metrics import mean_squared_error, r2_score
|
10
|
+
from sklearn.pipeline import Pipeline
|
11
|
+
from sklearn.preprocessing import MinMaxScaler
|
12
|
+
|
13
|
+
|
14
|
+
class Dazls(BaseEstimator):
|
15
|
+
"""DAZLS model.
|
16
|
+
|
17
|
+
The model carries out wind and solar power prediction for unseen target substations using training data from other
|
18
|
+
substations with known components.
|
19
|
+
|
20
|
+
"""
|
21
|
+
|
22
|
+
model_: Pipeline
|
23
|
+
|
24
|
+
def __init__(self):
|
25
|
+
"""Initialize DAZL model."""
|
26
|
+
self.__name__ = "DAZLS"
|
27
|
+
|
28
|
+
regressor = TransformedTargetRegressor(
|
29
|
+
regressor=LinearRegression(),
|
30
|
+
transformer=MinMaxScaler(clip=True),
|
31
|
+
)
|
32
|
+
|
33
|
+
self.model_ = Pipeline(
|
34
|
+
[("scaler", MinMaxScaler(clip=True)), ("regressor", regressor)]
|
35
|
+
)
|
36
|
+
|
37
|
+
# The input columns for the domain and adaptation models (with description)
|
38
|
+
self.baseline_input_columns = [
|
39
|
+
"radiation", # Weather parameter
|
40
|
+
"windspeed_100m", # Weather parameter
|
41
|
+
"total_load",
|
42
|
+
]
|
43
|
+
self.target_columns = ["total_wind_part", "total_solar_part"]
|
44
|
+
|
45
|
+
def fit(self, features, target):
|
46
|
+
"""Fit the model.
|
47
|
+
|
48
|
+
In this function we scale the input of the domain and adaptation models of the DAZLS MODEL. Then we fit the
|
49
|
+
two models. We separate the features into domain_model_input, adaptation_model_input and target, and we use them
|
50
|
+
for the fitting and the training of the models.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
features: inputs for domain and adaptation model (domain_model_input, adaptation_model_input)
|
54
|
+
target: the expected output (y_train)
|
55
|
+
"""
|
56
|
+
x, y = (
|
57
|
+
features.loc[:, self.baseline_input_columns],
|
58
|
+
target.loc[:, self.target_columns],
|
59
|
+
)
|
60
|
+
|
61
|
+
self.model_.fit(x, y)
|
62
|
+
|
63
|
+
def predict(self, x: np.array):
|
64
|
+
"""Make a prediction.
|
65
|
+
|
66
|
+
For the prediction we use the test data x. We use domain_model_input_columns and
|
67
|
+
adaptation_model_input_columns to separate x in test data for domain model and adaptation model respectively.
|
68
|
+
|
69
|
+
There is an option available to return the domain model and adaptation model predictions separately to more
|
70
|
+
easily investigate the effectiveness of the models.
|
71
|
+
|
72
|
+
Args:
|
73
|
+
x: domain_model_test_data, adaptation_model_test_data
|
74
|
+
return_sub_preds : a flag value indicating to return the predictions of the domain model and adaptation
|
75
|
+
model separately. (Default: False.)
|
76
|
+
|
77
|
+
Returns:
|
78
|
+
prediction: The output prediction after both models.
|
79
|
+
"""
|
80
|
+
model_test_data = x.loc[:, self.baseline_input_columns]
|
81
|
+
|
82
|
+
return self.model_.predict(model_test_data)
|
83
|
+
|
84
|
+
def score(self, truth, prediction):
|
85
|
+
"""Evaluation of the prediction's output.
|
86
|
+
|
87
|
+
Args:
|
88
|
+
truth: real values
|
89
|
+
prediction: predicted values
|
90
|
+
|
91
|
+
Returns:
|
92
|
+
RMSE and R2 scores
|
93
|
+
"""
|
94
|
+
rmse = (mean_squared_error(truth, prediction)) ** 0.5
|
95
|
+
r2_score_value = r2_score(truth, prediction)
|
96
|
+
return rmse, r2_score_value
|
97
|
+
|
98
|
+
def __str__(self):
|
99
|
+
"""String method of the DAZLs model, provides a summary of the model for easy inspection.
|
100
|
+
|
101
|
+
Returns:
|
102
|
+
Summary represented by a string
|
103
|
+
"""
|
104
|
+
summary_str = (
|
105
|
+
f"{self.__name__} model summary:\n\n"
|
106
|
+
f"Model: {self.model_} \n"
|
107
|
+
f"\tInput columns: {self.baseline_input_columns} \n"
|
108
|
+
f"\tScaler: {self.model_['scaler']} \n\n"
|
109
|
+
f"\tRegressor: {self.model_['regressor']} \n\n"
|
110
|
+
)
|
111
|
+
|
112
|
+
return summary_str
|
@@ -0,0 +1,261 @@
|
|
1
|
+
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
|
+
#
|
3
|
+
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
from functools import partial
|
5
|
+
from typing import Dict, Optional, Sequence, Tuple, Union
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
import sklearn.base
|
10
|
+
import xgboost as xgb
|
11
|
+
from sklearn.compose import TransformedTargetRegressor
|
12
|
+
from sklearn.preprocessing import StandardScaler
|
13
|
+
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
|
14
|
+
from xgboost import Booster
|
15
|
+
|
16
|
+
import openstef.metrics.metrics as metrics
|
17
|
+
from openstef.model.regressors.regressor import OpenstfRegressor
|
18
|
+
|
19
|
+
DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
|
20
|
+
|
21
|
+
|
22
|
+
class XGBMultiOutputQuantileOpenstfRegressor(OpenstfRegressor):
|
23
|
+
r"""Model that provides multioutput quantile regression with XGBoost by default using the arctan loss function.
|
24
|
+
|
25
|
+
Arctan loss:
|
26
|
+
Refence: https://github.com/LaurensSluyterman/XGBoost_quantile_regression/tree/master
|
27
|
+
The key idea is to use a smooth approximation of the pinball loss, the arctan
|
28
|
+
pinball loss, that has a relatively large second derivative.
|
29
|
+
|
30
|
+
The approximation is given by:
|
31
|
+
$$L^{(\text{arctan})}_{\tau, s}(u) = (\tau - 0.5 + \frac{\arctan (u/s)}{\pi})u + \frac{s}{\pi}$$. # noqa E501
|
32
|
+
|
33
|
+
Some important settings:
|
34
|
+
|
35
|
+
* The parameter in the loss function determines the amount of smoothing. A
|
36
|
+
smaller values gives a closer approximation but also a much smaller second
|
37
|
+
derivative. A larger value gives more conservative quantiles when
|
38
|
+
is larger than 0.5, the quantile becomes larger and vice versa.
|
39
|
+
Values between 0.05 and 0.1 appear to work well. It may be a good idea to
|
40
|
+
optimize this parameter.
|
41
|
+
* Set min-child-weight to zero. The second derivatives can be a lot smaller
|
42
|
+
than 1 and this parameter may prevent any splits.
|
43
|
+
* Use a relatively small max-delta-step. We used a default of 0.5.
|
44
|
+
This prevents excessive steps that could happen due to the relatively
|
45
|
+
small second derivative.
|
46
|
+
* For the same reason, use a slightly lower learning rate of 0.05.
|
47
|
+
|
48
|
+
"""
|
49
|
+
|
50
|
+
estimator_: TransformedTargetRegressor
|
51
|
+
quantile_indices_: Dict[float, int]
|
52
|
+
|
53
|
+
@staticmethod
|
54
|
+
def _get_importance_names():
|
55
|
+
return {
|
56
|
+
"gain_importance_name": "total_gain",
|
57
|
+
"weight_importance_name": "weight",
|
58
|
+
}
|
59
|
+
|
60
|
+
def __init__(
|
61
|
+
self,
|
62
|
+
quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
|
63
|
+
gamma: float = 0.0,
|
64
|
+
colsample_bytree: float = 1.0,
|
65
|
+
subsample: float = 1.0,
|
66
|
+
min_child_weight: int = 0,
|
67
|
+
max_depth: int = 6,
|
68
|
+
learning_rate: float = 0.22,
|
69
|
+
alpha: float = 0.0,
|
70
|
+
max_delta_step: int = 0.5,
|
71
|
+
arctan_smoothing: float = 0.055,
|
72
|
+
early_stopping_rounds: Optional[int] = None,
|
73
|
+
):
|
74
|
+
"""Initialize XGBMultiQuantileRegressor.
|
75
|
+
|
76
|
+
Model that provides quantile regression with XGBoost.
|
77
|
+
For each desired quantile an XGBoost model is trained,
|
78
|
+
these can later be used to predict quantiles.
|
79
|
+
|
80
|
+
Args:
|
81
|
+
quantiles: Tuple with desired quantiles, quantile 0.5 is required.
|
82
|
+
For example: (0.1, 0.5, 0.9)
|
83
|
+
gamma: Gamma.
|
84
|
+
colsample_bytree: Colsample by tree.
|
85
|
+
subsample: Subsample.
|
86
|
+
min_child_weight: Minimum child weight.
|
87
|
+
max_depth: Maximum depth.
|
88
|
+
learning_rate: Learning rate.
|
89
|
+
alpha: Alpha.
|
90
|
+
max_delta_step: Maximum delta step.
|
91
|
+
arctan_smoothing: smoothing parameter of the arctan loss function.
|
92
|
+
early_stopping_rounds: Number of rounds to stop training if no improvement
|
93
|
+
is made.
|
94
|
+
|
95
|
+
Raises:
|
96
|
+
ValueError in case quantile 0.5 is not in the requested quantiles.
|
97
|
+
|
98
|
+
"""
|
99
|
+
super().__init__()
|
100
|
+
if 0.5 not in quantiles:
|
101
|
+
raise ValueError(
|
102
|
+
"Cannot train quantile model as 0.5 is not in requested quantiles!"
|
103
|
+
)
|
104
|
+
|
105
|
+
self.quantiles = quantiles
|
106
|
+
|
107
|
+
# Set attributes for hyper parameters
|
108
|
+
self.subsample = subsample
|
109
|
+
self.min_child_weight = min_child_weight
|
110
|
+
self.max_depth = max_depth
|
111
|
+
self.gamma = gamma
|
112
|
+
self.alpha = alpha
|
113
|
+
self.max_delta_step = max_delta_step
|
114
|
+
self.colsample_bytree = colsample_bytree
|
115
|
+
self.learning_rate = learning_rate
|
116
|
+
self.early_stopping_rounds = early_stopping_rounds
|
117
|
+
self.arctan_smoothing = arctan_smoothing
|
118
|
+
|
119
|
+
# Get fitting parameters - only those required for xgbooster's
|
120
|
+
xgb_regressor_params = {
|
121
|
+
key: value
|
122
|
+
for key, value in self.get_params().items()
|
123
|
+
if key in xgb.XGBRegressor().get_params().keys()
|
124
|
+
}
|
125
|
+
|
126
|
+
# Define the model
|
127
|
+
objective = partial(
|
128
|
+
metrics.arctan_loss, taus=self.quantiles, s=arctan_smoothing
|
129
|
+
)
|
130
|
+
xgb_model: xgb.XGBRegressor = xgb.XGBRegressor(
|
131
|
+
objective=objective,
|
132
|
+
base_score=0,
|
133
|
+
multi_strategy="one_output_per_tree",
|
134
|
+
**xgb_regressor_params,
|
135
|
+
)
|
136
|
+
self.estimator_ = TransformedTargetRegressor(
|
137
|
+
regressor=xgb_model, transformer=StandardScaler()
|
138
|
+
)
|
139
|
+
|
140
|
+
# Set quantile indices to remap multioutput predictions
|
141
|
+
self.quantile_indices_ = {
|
142
|
+
quantile: i for i, quantile in enumerate(self.quantiles)
|
143
|
+
}
|
144
|
+
|
145
|
+
def fit(
|
146
|
+
self,
|
147
|
+
x: np.array,
|
148
|
+
y: np.array,
|
149
|
+
eval_set: Optional[Sequence[Tuple[np.array, np.array]]] = None,
|
150
|
+
verbose: Optional[Union[bool, int]] = 0,
|
151
|
+
**kwargs
|
152
|
+
) -> OpenstfRegressor:
|
153
|
+
"""Fits xgb quantile model.
|
154
|
+
|
155
|
+
Args:
|
156
|
+
x: Feature matrix.
|
157
|
+
y: Labels.
|
158
|
+
eval_set: Evaluation set to monitor training performance.
|
159
|
+
verbose: Verbosity level (disabled by default).
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
Fitted XGBQuantile model.
|
163
|
+
|
164
|
+
"""
|
165
|
+
if isinstance(y, pd.Series):
|
166
|
+
y = y.to_numpy()
|
167
|
+
|
168
|
+
if not isinstance(x, pd.DataFrame):
|
169
|
+
x = pd.DataFrame(np.asarray(x))
|
170
|
+
|
171
|
+
# Check/validate input
|
172
|
+
check_X_y(x, y, force_all_finite="allow-nan")
|
173
|
+
|
174
|
+
# Prepare inputs
|
175
|
+
y_multioutput = replicate_for_multioutput(y, len(self.quantiles))
|
176
|
+
|
177
|
+
# Define watchlist if eval_set is defined
|
178
|
+
eval_set_multioutput = []
|
179
|
+
if eval_set:
|
180
|
+
for x_eval, y_eval in eval_set:
|
181
|
+
if isinstance(y_eval, pd.Series):
|
182
|
+
y_eval = y_eval.to_numpy()
|
183
|
+
|
184
|
+
y_eval_multioutput = replicate_for_multioutput(
|
185
|
+
y=y_eval, num_quantiles=len(self.quantiles)
|
186
|
+
)
|
187
|
+
eval_set_multioutput.append((x_eval, y_eval_multioutput))
|
188
|
+
|
189
|
+
eval_set_multioutput.append((x, y_multioutput))
|
190
|
+
|
191
|
+
self.estimator_.fit(
|
192
|
+
X=x.copy(deep=True),
|
193
|
+
y=y_multioutput,
|
194
|
+
eval_set=eval_set_multioutput,
|
195
|
+
verbose=verbose,
|
196
|
+
)
|
197
|
+
|
198
|
+
# Update state of the estimator
|
199
|
+
self.feature_importances_ = self.estimator_.regressor_.feature_importances_
|
200
|
+
self.is_fitted_ = True
|
201
|
+
|
202
|
+
return self
|
203
|
+
|
204
|
+
def predict(self, x: np.array, quantile: float = 0.5) -> np.array:
|
205
|
+
"""Makes a prediction for a desired quantile.
|
206
|
+
|
207
|
+
Args:
|
208
|
+
x: Feature matrix.
|
209
|
+
quantile: Quantile for which a prediciton is desired,
|
210
|
+
note that only quantile are available for which a model is trained,
|
211
|
+
and that this is a quantile-model specific keyword.
|
212
|
+
|
213
|
+
Returns:
|
214
|
+
Prediction
|
215
|
+
|
216
|
+
Raises:
|
217
|
+
ValueError in case no model is trained for the requested quantile.
|
218
|
+
|
219
|
+
"""
|
220
|
+
# Check if model is trained for this quantile
|
221
|
+
if quantile not in self.quantiles:
|
222
|
+
raise ValueError("No model trained for requested quantile!")
|
223
|
+
|
224
|
+
# Check/validate input
|
225
|
+
check_array(x, force_all_finite="allow-nan")
|
226
|
+
check_is_fitted(self)
|
227
|
+
|
228
|
+
# best_iteration is only available if early stopping was used during training
|
229
|
+
prediction: np.array
|
230
|
+
if hasattr(self.estimator_, "best_iteration"):
|
231
|
+
prediction = self.estimator_.predict(
|
232
|
+
X=x,
|
233
|
+
iteration_range=(0, self.estimator_.best_iteration + 1),
|
234
|
+
)
|
235
|
+
else:
|
236
|
+
prediction = self.estimator_.predict(X=x)
|
237
|
+
|
238
|
+
quantile_index = self.quantile_indices_[quantile]
|
239
|
+
return prediction[:, quantile_index]
|
240
|
+
|
241
|
+
@property
|
242
|
+
def feature_names(self):
|
243
|
+
return self.estimator_.feature_names_in_
|
244
|
+
|
245
|
+
@property
|
246
|
+
def can_predict_quantiles(self):
|
247
|
+
return True
|
248
|
+
|
249
|
+
|
250
|
+
def replicate_for_multioutput(y: np.array, num_quantiles: int) -> np.array:
|
251
|
+
"""Replicates a 1D array to a 2D array for multioutput regression.
|
252
|
+
|
253
|
+
Args:
|
254
|
+
y: 1D array.
|
255
|
+
num_quantiles: Number of columns in the output array.
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
2D array with shape (len(y), num_quantiles)
|
259
|
+
|
260
|
+
"""
|
261
|
+
return np.repeat(y[:, None], num_quantiles, axis=1)
|
@@ -18,7 +18,7 @@ from openstef.settings import Settings
|
|
18
18
|
|
19
19
|
# Set the path for the Dazls stored model
|
20
20
|
DAZLS_STORED = str(
|
21
|
-
PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.
|
21
|
+
PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.24" / "dazls_stored_3.4.24_"
|
22
22
|
)
|
23
23
|
|
24
24
|
|
@@ -113,24 +113,7 @@ def create_components_forecast_pipeline(
|
|
113
113
|
# Save and load the model as .sav file (or as .z file)
|
114
114
|
# For the code contact: korte.termijn.prognoses@alliander.com
|
115
115
|
dazls_model = Dazls()
|
116
|
-
dazls_model.
|
117
|
-
dazls_model.domain_model_scaler = joblib.load(
|
118
|
-
DAZLS_STORED + "domain_model_scaler.z"
|
119
|
-
)
|
120
|
-
dazls_model.domain_model_input_columns = joblib.load(
|
121
|
-
DAZLS_STORED + "domain_model_features.z"
|
122
|
-
)
|
123
|
-
|
124
|
-
dazls_model.adaptation_model = joblib.load(DAZLS_STORED + "adaptation_model.z")
|
125
|
-
dazls_model.adaptation_model_scaler = joblib.load(
|
126
|
-
DAZLS_STORED + "adaptation_model_scaler.z"
|
127
|
-
)
|
128
|
-
dazls_model.adaptation_model_input_columns = joblib.load(
|
129
|
-
DAZLS_STORED + "adaptation_model_features.z"
|
130
|
-
)
|
131
|
-
|
132
|
-
dazls_model.target_columns = joblib.load(DAZLS_STORED + "target.z")
|
133
|
-
dazls_model.target_scaler = joblib.load(DAZLS_STORED + "target_scaler.z")
|
116
|
+
dazls_model.model_ = joblib.load(DAZLS_STORED + "baseline_model.z")
|
134
117
|
|
135
118
|
logger.info("DAZLS model loaded", dazls_model=str(dazls_model))
|
136
119
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: openstef
|
3
|
-
Version: 3.4.
|
3
|
+
Version: 3.4.25
|
4
4
|
Summary: Open short term energy forecaster
|
5
5
|
Home-page: https://github.com/OpenSTEF/openstef
|
6
6
|
Author: Alliander N.V
|
@@ -128,4 +128,4 @@ Please read [CODE_OF_CONDUCT.md](https://github.com/OpenSTEF/.github/blob/main/C
|
|
128
128
|
|
129
129
|
# Contact
|
130
130
|
Please read [SUPPORT.md](https://github.com/OpenSTEF/.github/blob/main/SUPPORT.md) for how to connect and get into contact with the OpenSTEF project
|
131
|
-
|
131
|
+
|
@@ -18,24 +18,6 @@ openstef/data/dutch_holidays_2020-2022.csv
|
|
18
18
|
openstef/data/dutch_holidays_2020-2022.csv.license
|
19
19
|
openstef/data/pv_single_coefs.csv
|
20
20
|
openstef/data/pv_single_coefs.csv.license
|
21
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z
|
22
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z.license
|
23
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z
|
24
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z.license
|
25
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z
|
26
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z.license
|
27
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z
|
28
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z.license
|
29
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z
|
30
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z.license
|
31
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z
|
32
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z.license
|
33
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md
|
34
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md.license
|
35
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z
|
36
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z.license
|
37
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z
|
38
|
-
openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z.license
|
39
21
|
openstef/data_classes/__init__.py
|
40
22
|
openstef/data_classes/data_prep.py
|
41
23
|
openstef/data_classes/model_specifications.py
|
@@ -76,6 +58,7 @@ openstef/model/regressors/linear.py
|
|
76
58
|
openstef/model/regressors/linear_quantile.py
|
77
59
|
openstef/model/regressors/regressor.py
|
78
60
|
openstef/model/regressors/xgb.py
|
61
|
+
openstef/model/regressors/xgb_multioutput_quantile.py
|
79
62
|
openstef/model/regressors/xgb_quantile.py
|
80
63
|
openstef/model_selection/__init__.py
|
81
64
|
openstef/model_selection/model_selection.py
|
@@ -29,7 +29,7 @@ def read_long_description_from_readme():
|
|
29
29
|
|
30
30
|
setup(
|
31
31
|
name="openstef",
|
32
|
-
version="3.4.
|
32
|
+
version="3.4.25",
|
33
33
|
packages=find_packages(include=["openstef", "openstef.*"]),
|
34
34
|
description="Open short term energy forecaster",
|
35
35
|
long_description=read_long_description_from_readme(),
|
Binary file
|
openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z
DELETED
Binary file
|
openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z
DELETED
Binary file
|