openstef 3.4.23__tar.gz → 3.4.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. {openstef-3.4.23 → openstef-3.4.25}/PKG-INFO +2 -2
  2. {openstef-3.4.23 → openstef-3.4.25}/README.md +1 -1
  3. {openstef-3.4.23 → openstef-3.4.25}/openstef/data_classes/prediction_job.py +1 -0
  4. {openstef-3.4.23 → openstef-3.4.25}/openstef/enums.py +1 -0
  5. {openstef-3.4.23 → openstef-3.4.25}/openstef/metrics/metrics.py +51 -0
  6. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/model_creator.py +14 -0
  7. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/objective.py +30 -0
  8. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/objective_creator.py +3 -0
  9. openstef-3.4.25/openstef/model/regressors/dazls.py +112 -0
  10. openstef-3.4.25/openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
  11. {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/create_component_forecast.py +2 -19
  12. {openstef-3.4.23 → openstef-3.4.25}/openstef.egg-info/PKG-INFO +2 -2
  13. {openstef-3.4.23 → openstef-3.4.25}/openstef.egg-info/SOURCES.txt +1 -18
  14. {openstef-3.4.23 → openstef-3.4.25}/setup.py +1 -1
  15. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z +0 -0
  16. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z.license +0 -3
  17. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z +0 -0
  18. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z.license +0 -3
  19. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z +0 -0
  20. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z.license +0 -3
  21. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z +0 -0
  22. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z.license +0 -3
  23. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z +0 -2
  24. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z.license +0 -3
  25. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z +0 -0
  26. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z.license +0 -3
  27. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md +0 -14
  28. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md.license +0 -3
  29. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z +0 -0
  30. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z.license +0 -3
  31. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z +0 -0
  32. openstef-3.4.23/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z.license +0 -3
  33. openstef-3.4.23/openstef/model/regressors/dazls.py +0 -191
  34. {openstef-3.4.23 → openstef-3.4.25}/LICENSE +0 -0
  35. {openstef-3.4.23 → openstef-3.4.25}/openstef/__init__.py +0 -0
  36. {openstef-3.4.23 → openstef-3.4.25}/openstef/__main__.py +0 -0
  37. {openstef-3.4.23 → openstef-3.4.25}/openstef/app_settings.py +0 -0
  38. {openstef-3.4.23 → openstef-3.4.25}/openstef/data/dutch_holidays_2020-2022.csv +0 -0
  39. {openstef-3.4.23 → openstef-3.4.25}/openstef/data/dutch_holidays_2020-2022.csv.license +0 -0
  40. {openstef-3.4.23 → openstef-3.4.25}/openstef/data/pv_single_coefs.csv +0 -0
  41. {openstef-3.4.23 → openstef-3.4.25}/openstef/data/pv_single_coefs.csv.license +0 -0
  42. {openstef-3.4.23 → openstef-3.4.25}/openstef/data_classes/__init__.py +0 -0
  43. {openstef-3.4.23 → openstef-3.4.25}/openstef/data_classes/data_prep.py +0 -0
  44. {openstef-3.4.23 → openstef-3.4.25}/openstef/data_classes/model_specifications.py +0 -0
  45. {openstef-3.4.23 → openstef-3.4.25}/openstef/data_classes/split_function.py +0 -0
  46. {openstef-3.4.23 → openstef-3.4.25}/openstef/exceptions.py +0 -0
  47. {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/__init__.py +0 -0
  48. {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/apply_features.py +0 -0
  49. {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/data_preparation.py +0 -0
  50. {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/feature_adder.py +0 -0
  51. {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/feature_applicator.py +0 -0
  52. {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/general.py +0 -0
  53. {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/holiday_features.py +0 -0
  54. {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/lag_features.py +0 -0
  55. {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/missing_values_transformer.py +0 -0
  56. {openstef-3.4.23 → openstef-3.4.25}/openstef/feature_engineering/weather_features.py +0 -0
  57. {openstef-3.4.23 → openstef-3.4.25}/openstef/metrics/__init__.py +0 -0
  58. {openstef-3.4.23 → openstef-3.4.25}/openstef/metrics/figure.py +0 -0
  59. {openstef-3.4.23 → openstef-3.4.25}/openstef/metrics/reporter.py +0 -0
  60. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/__init__.py +0 -0
  61. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/basecase.py +0 -0
  62. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/confidence_interval_applicator.py +0 -0
  63. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/fallback.py +0 -0
  64. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/metamodels/__init__.py +0 -0
  65. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/metamodels/grouped_regressor.py +0 -0
  66. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/metamodels/missing_values_handler.py +0 -0
  67. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/__init__.py +0 -0
  68. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/arima.py +0 -0
  69. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/custom_regressor.py +0 -0
  70. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/lgbm.py +0 -0
  71. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/linear.py +0 -0
  72. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/linear_quantile.py +0 -0
  73. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/regressor.py +0 -0
  74. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/xgb.py +0 -0
  75. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/regressors/xgb_quantile.py +0 -0
  76. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/serializer.py +0 -0
  77. {openstef-3.4.23 → openstef-3.4.25}/openstef/model/standard_deviation_generator.py +0 -0
  78. {openstef-3.4.23 → openstef-3.4.25}/openstef/model_selection/__init__.py +0 -0
  79. {openstef-3.4.23 → openstef-3.4.25}/openstef/model_selection/model_selection.py +0 -0
  80. {openstef-3.4.23 → openstef-3.4.25}/openstef/monitoring/__init__.py +0 -0
  81. {openstef-3.4.23 → openstef-3.4.25}/openstef/monitoring/performance_meter.py +0 -0
  82. {openstef-3.4.23 → openstef-3.4.25}/openstef/monitoring/teams.py +0 -0
  83. {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/__init__.py +0 -0
  84. {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/create_basecase_forecast.py +0 -0
  85. {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/create_forecast.py +0 -0
  86. {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/optimize_hyperparameters.py +0 -0
  87. {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/train_create_forecast_backtest.py +0 -0
  88. {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/train_model.py +0 -0
  89. {openstef-3.4.23 → openstef-3.4.25}/openstef/pipeline/utils.py +0 -0
  90. {openstef-3.4.23 → openstef-3.4.25}/openstef/postprocessing/__init__.py +0 -0
  91. {openstef-3.4.23 → openstef-3.4.25}/openstef/postprocessing/postprocessing.py +0 -0
  92. {openstef-3.4.23 → openstef-3.4.25}/openstef/preprocessing/__init__.py +0 -0
  93. {openstef-3.4.23 → openstef-3.4.25}/openstef/preprocessing/preprocessing.py +0 -0
  94. {openstef-3.4.23 → openstef-3.4.25}/openstef/settings.py +0 -0
  95. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/__init__.py +0 -0
  96. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/calculate_kpi.py +0 -0
  97. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/create_basecase_forecast.py +0 -0
  98. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/create_components_forecast.py +0 -0
  99. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/create_forecast.py +0 -0
  100. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/create_solar_forecast.py +0 -0
  101. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/create_wind_forecast.py +0 -0
  102. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/optimize_hyperparameters.py +0 -0
  103. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/split_forecast.py +0 -0
  104. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/train_model.py +0 -0
  105. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/utils/__init__.py +0 -0
  106. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/utils/dependencies.py +0 -0
  107. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/utils/predictionjobloop.py +0 -0
  108. {openstef-3.4.23 → openstef-3.4.25}/openstef/tasks/utils/taskcontext.py +0 -0
  109. {openstef-3.4.23 → openstef-3.4.25}/openstef/validation/__init__.py +0 -0
  110. {openstef-3.4.23 → openstef-3.4.25}/openstef/validation/validation.py +0 -0
  111. {openstef-3.4.23 → openstef-3.4.25}/openstef.egg-info/dependency_links.txt +0 -0
  112. {openstef-3.4.23 → openstef-3.4.25}/openstef.egg-info/requires.txt +0 -0
  113. {openstef-3.4.23 → openstef-3.4.25}/openstef.egg-info/top_level.txt +0 -0
  114. {openstef-3.4.23 → openstef-3.4.25}/pyproject.toml +0 -0
  115. {openstef-3.4.23 → openstef-3.4.25}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openstef
3
- Version: 3.4.23
3
+ Version: 3.4.25
4
4
  Summary: Open short term energy forecaster
5
5
  Home-page: https://github.com/OpenSTEF/openstef
6
6
  Author: Alliander N.V
@@ -128,4 +128,4 @@ Please read [CODE_OF_CONDUCT.md](https://github.com/OpenSTEF/.github/blob/main/C
128
128
 
129
129
  # Contact
130
130
  Please read [SUPPORT.md](https://github.com/OpenSTEF/.github/blob/main/SUPPORT.md) for how to connect and get into contact with the OpenSTEF project
131
-
131
+
@@ -91,4 +91,4 @@ Please read [CODE_OF_CONDUCT.md](https://github.com/OpenSTEF/.github/blob/main/C
91
91
 
92
92
  # Contact
93
93
  Please read [SUPPORT.md](https://github.com/OpenSTEF/.github/blob/main/SUPPORT.md) for how to connect and get into contact with the OpenSTEF project
94
-
94
+
@@ -26,6 +26,7 @@ class PredictionJobDataClass(BaseModel):
26
26
  - ``"lgb"``
27
27
  - ``"linear"``
28
28
  - ``"linear_quantile"``
29
+ - ``"xgb_multioutput_quantile"``
29
30
 
30
31
  If unsure what to pick, choose ``"xgb"``.
31
32
 
@@ -8,6 +8,7 @@ from enum import Enum
8
8
  class MLModelType(Enum):
9
9
  XGB = "xgb"
10
10
  XGB_QUANTILE = "xgb_quantile"
11
+ XGB_MULTIOUTPUT_QUANTILE = "xgb_multioutput_quantile"
11
12
  LGB = "lgb"
12
13
  LINEAR = "linear"
13
14
  LINEAR_QUANTILE = "linear_quantile"
@@ -431,3 +431,54 @@ def xgb_quantile_obj(
431
431
  hess = np.ones_like(preds)
432
432
 
433
433
  return grad, hess
434
+
435
+
436
+ def arctan_loss(y_true, y_pred, taus, s=0.1):
437
+ """Compute the arctan pinball loss.
438
+
439
+ Note that XGBoost outputs the predictions in a slightly peculiar manner.
440
+ Suppose we have 100 data points and we predict 10 quantiles. The predictions
441
+ will be an array of size (1000 x 1). We first resize this to a (100x10) array
442
+ where each row corresponds to the 10 predicted quantile for a single data
443
+ point. We then use a for-loop (over the 10 columns) to calculate the gradients
444
+ and second derivatives. Legibility was chosen over efficiency. This part
445
+ can be made more efficient.
446
+
447
+ Args:
448
+ y_true: An array containing the true observations.
449
+ y_pred: An array containing the predicted quantiles.
450
+ taus: A list containing the true desired coverage of the quantiles.
451
+ s: A smoothing parameter.
452
+
453
+ Returns:
454
+ grad: An array containing the (negative) gradients with respect to y_pred.
455
+ hess: An array containing the second derivative with respect to y_pred.
456
+
457
+ """
458
+ size = len(y_true)
459
+ n_dim = len(taus) # The number of columns
460
+ n_rows = size // n_dim
461
+
462
+ # Resize the predictions and targets.
463
+ # Each column corresponds to a quantile, each row to a data point.
464
+ y_pred = np.reshape(y_pred, (n_rows, n_dim))
465
+ y_true = np.reshape(y_true, (n_rows, n_dim))
466
+
467
+ # Calculate the differences
468
+ u = y_true - y_pred
469
+
470
+ # Calculate the gradient and second derivatives
471
+ grad = np.zeros_like(y_pred)
472
+ hess = np.zeros_like(y_pred)
473
+ z = u / s
474
+ for i, tau in enumerate(taus):
475
+ x = 1 + z[:, i] ** 2
476
+ grad[:, i] = (
477
+ tau - 0.5 + 1 / np.pi * np.arctan(z[:, i]) + z[:, i] / (np.pi) * x**-1
478
+ )
479
+ hess[:, i] = 2 / (np.pi * s) * x ** (-2)
480
+
481
+ # Reshape back to the original shape.
482
+ grad = grad.reshape(size)
483
+ hess = hess.reshape(size)
484
+ return -grad / n_dim, hess / n_dim
@@ -15,6 +15,9 @@ from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegre
15
15
  from openstef.model.regressors.regressor import OpenstfRegressor
16
16
  from openstef.model.regressors.xgb import XGBOpenstfRegressor
17
17
  from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor
18
+ from openstef.model.regressors.xgb_multioutput_quantile import (
19
+ XGBMultiOutputQuantileOpenstfRegressor,
20
+ )
18
21
  from openstef.settings import Settings
19
22
 
20
23
  structlog.configure(
@@ -87,6 +90,16 @@ valid_model_kwargs = {
87
90
  "max_depth",
88
91
  "early_stopping_rounds",
89
92
  ],
93
+ MLModelType.XGB_MULTIOUTPUT_QUANTILE: [
94
+ "quantiles",
95
+ "gamma",
96
+ "colsample_bytree",
97
+ "subsample",
98
+ "min_child_weight",
99
+ "max_depth",
100
+ "early_stopping_rounds",
101
+ "arctan_smoothing",
102
+ ],
90
103
  MLModelType.LINEAR: [
91
104
  "missing_values",
92
105
  "imputation_strategy",
@@ -117,6 +130,7 @@ class ModelCreator:
117
130
  MLModelType.XGB: XGBOpenstfRegressor,
118
131
  MLModelType.LGB: LGBMOpenstfRegressor,
119
132
  MLModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
133
+ MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
120
134
  MLModelType.LINEAR: LinearOpenstfRegressor,
121
135
  MLModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
122
136
  MLModelType.ARIMA: ARIMAOpenstfRegressor,
@@ -349,6 +349,36 @@ class XGBQuantileRegressorObjective(RegressorObjective):
349
349
  )
350
350
 
351
351
 
352
+ class XGBMultioutputQuantileRegressorObjective(RegressorObjective):
353
+ def __init__(self, *args, **kwargs):
354
+ super().__init__(*args, **kwargs)
355
+ self.model_type = MLModelType.XGB_QUANTILE
356
+
357
+ def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
358
+ """Get parameters for XGB Multioutput Quantile Regressor Objective with objective specific parameters.
359
+
360
+ Args: trial
361
+
362
+ Returns:
363
+ Dictionary with hyperparameter name as key and hyperparamer value as value.
364
+
365
+ """
366
+ # Filtered default parameters
367
+ model_params = super().get_params(trial)
368
+
369
+ # XGB specific parameters
370
+ params = {
371
+ "gamma": trial.suggest_float("gamma", 1e-8, 1.0),
372
+ "arctan_smoothing": trial.suggest_float("arctan_smoothing", 0.025, 0.15),
373
+ }
374
+ return {**model_params, **params}
375
+
376
+ def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
377
+ return optuna.integration.XGBoostPruningCallback(
378
+ trial, observation_key=f"validation_1-{self.eval_metric}"
379
+ )
380
+
381
+
352
382
  class LinearRegressorObjective(RegressorObjective):
353
383
  def __init__(self, *args, **kwargs):
354
384
  super().__init__(*args, **kwargs)
@@ -12,6 +12,7 @@ from openstef.model.objective import (
12
12
  RegressorObjective,
13
13
  XGBQuantileRegressorObjective,
14
14
  XGBRegressorObjective,
15
+ XGBMultioutputQuantileRegressorObjective,
15
16
  )
16
17
  from openstef.model.regressors.custom_regressor import (
17
18
  create_custom_objective,
@@ -24,7 +25,9 @@ class ObjectiveCreator:
24
25
  MLModelType.XGB: XGBRegressorObjective,
25
26
  MLModelType.LGB: LGBRegressorObjective,
26
27
  MLModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
28
+ MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
27
29
  MLModelType.LINEAR: LinearRegressorObjective,
30
+ MLModelType.LINEAR_QUANTILE: LinearRegressorObjective,
28
31
  MLModelType.ARIMA: ARIMARegressorObjective,
29
32
  }
30
33
 
@@ -0,0 +1,112 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ """This module defines the DAZL model."""
5
+ import numpy as np
6
+ from sklearn.base import BaseEstimator
7
+ from sklearn.compose import TransformedTargetRegressor
8
+ from sklearn.linear_model import LinearRegression
9
+ from sklearn.metrics import mean_squared_error, r2_score
10
+ from sklearn.pipeline import Pipeline
11
+ from sklearn.preprocessing import MinMaxScaler
12
+
13
+
14
+ class Dazls(BaseEstimator):
15
+ """DAZLS model.
16
+
17
+ The model carries out wind and solar power prediction for unseen target substations using training data from other
18
+ substations with known components.
19
+
20
+ """
21
+
22
+ model_: Pipeline
23
+
24
+ def __init__(self):
25
+ """Initialize DAZL model."""
26
+ self.__name__ = "DAZLS"
27
+
28
+ regressor = TransformedTargetRegressor(
29
+ regressor=LinearRegression(),
30
+ transformer=MinMaxScaler(clip=True),
31
+ )
32
+
33
+ self.model_ = Pipeline(
34
+ [("scaler", MinMaxScaler(clip=True)), ("regressor", regressor)]
35
+ )
36
+
37
+ # The input columns for the domain and adaptation models (with description)
38
+ self.baseline_input_columns = [
39
+ "radiation", # Weather parameter
40
+ "windspeed_100m", # Weather parameter
41
+ "total_load",
42
+ ]
43
+ self.target_columns = ["total_wind_part", "total_solar_part"]
44
+
45
+ def fit(self, features, target):
46
+ """Fit the model.
47
+
48
+ In this function we scale the input of the domain and adaptation models of the DAZLS MODEL. Then we fit the
49
+ two models. We separate the features into domain_model_input, adaptation_model_input and target, and we use them
50
+ for the fitting and the training of the models.
51
+
52
+ Args:
53
+ features: inputs for domain and adaptation model (domain_model_input, adaptation_model_input)
54
+ target: the expected output (y_train)
55
+ """
56
+ x, y = (
57
+ features.loc[:, self.baseline_input_columns],
58
+ target.loc[:, self.target_columns],
59
+ )
60
+
61
+ self.model_.fit(x, y)
62
+
63
+ def predict(self, x: np.array):
64
+ """Make a prediction.
65
+
66
+ For the prediction we use the test data x. We use domain_model_input_columns and
67
+ adaptation_model_input_columns to separate x in test data for domain model and adaptation model respectively.
68
+
69
+ There is an option available to return the domain model and adaptation model predictions separately to more
70
+ easily investigate the effectiveness of the models.
71
+
72
+ Args:
73
+ x: domain_model_test_data, adaptation_model_test_data
74
+ return_sub_preds : a flag value indicating to return the predictions of the domain model and adaptation
75
+ model separately. (Default: False.)
76
+
77
+ Returns:
78
+ prediction: The output prediction after both models.
79
+ """
80
+ model_test_data = x.loc[:, self.baseline_input_columns]
81
+
82
+ return self.model_.predict(model_test_data)
83
+
84
+ def score(self, truth, prediction):
85
+ """Evaluation of the prediction's output.
86
+
87
+ Args:
88
+ truth: real values
89
+ prediction: predicted values
90
+
91
+ Returns:
92
+ RMSE and R2 scores
93
+ """
94
+ rmse = (mean_squared_error(truth, prediction)) ** 0.5
95
+ r2_score_value = r2_score(truth, prediction)
96
+ return rmse, r2_score_value
97
+
98
+ def __str__(self):
99
+ """String method of the DAZLs model, provides a summary of the model for easy inspection.
100
+
101
+ Returns:
102
+ Summary represented by a string
103
+ """
104
+ summary_str = (
105
+ f"{self.__name__} model summary:\n\n"
106
+ f"Model: {self.model_} \n"
107
+ f"\tInput columns: {self.baseline_input_columns} \n"
108
+ f"\tScaler: {self.model_['scaler']} \n\n"
109
+ f"\tRegressor: {self.model_['regressor']} \n\n"
110
+ )
111
+
112
+ return summary_str
@@ -0,0 +1,261 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ from functools import partial
5
+ from typing import Dict, Optional, Sequence, Tuple, Union
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import sklearn.base
10
+ import xgboost as xgb
11
+ from sklearn.compose import TransformedTargetRegressor
12
+ from sklearn.preprocessing import StandardScaler
13
+ from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
14
+ from xgboost import Booster
15
+
16
+ import openstef.metrics.metrics as metrics
17
+ from openstef.model.regressors.regressor import OpenstfRegressor
18
+
19
+ DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
20
+
21
+
22
+ class XGBMultiOutputQuantileOpenstfRegressor(OpenstfRegressor):
23
+ r"""Model that provides multioutput quantile regression with XGBoost by default using the arctan loss function.
24
+
25
+ Arctan loss:
26
+ Refence: https://github.com/LaurensSluyterman/XGBoost_quantile_regression/tree/master
27
+ The key idea is to use a smooth approximation of the pinball loss, the arctan
28
+ pinball loss, that has a relatively large second derivative.
29
+
30
+ The approximation is given by:
31
+ $$L^{(\text{arctan})}_{\tau, s}(u) = (\tau - 0.5 + \frac{\arctan (u/s)}{\pi})u + \frac{s}{\pi}$$. # noqa E501
32
+
33
+ Some important settings:
34
+
35
+ * The parameter in the loss function determines the amount of smoothing. A
36
+ smaller values gives a closer approximation but also a much smaller second
37
+ derivative. A larger value gives more conservative quantiles when
38
+ is larger than 0.5, the quantile becomes larger and vice versa.
39
+ Values between 0.05 and 0.1 appear to work well. It may be a good idea to
40
+ optimize this parameter.
41
+ * Set min-child-weight to zero. The second derivatives can be a lot smaller
42
+ than 1 and this parameter may prevent any splits.
43
+ * Use a relatively small max-delta-step. We used a default of 0.5.
44
+ This prevents excessive steps that could happen due to the relatively
45
+ small second derivative.
46
+ * For the same reason, use a slightly lower learning rate of 0.05.
47
+
48
+ """
49
+
50
+ estimator_: TransformedTargetRegressor
51
+ quantile_indices_: Dict[float, int]
52
+
53
+ @staticmethod
54
+ def _get_importance_names():
55
+ return {
56
+ "gain_importance_name": "total_gain",
57
+ "weight_importance_name": "weight",
58
+ }
59
+
60
+ def __init__(
61
+ self,
62
+ quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
63
+ gamma: float = 0.0,
64
+ colsample_bytree: float = 1.0,
65
+ subsample: float = 1.0,
66
+ min_child_weight: int = 0,
67
+ max_depth: int = 6,
68
+ learning_rate: float = 0.22,
69
+ alpha: float = 0.0,
70
+ max_delta_step: int = 0.5,
71
+ arctan_smoothing: float = 0.055,
72
+ early_stopping_rounds: Optional[int] = None,
73
+ ):
74
+ """Initialize XGBMultiQuantileRegressor.
75
+
76
+ Model that provides quantile regression with XGBoost.
77
+ For each desired quantile an XGBoost model is trained,
78
+ these can later be used to predict quantiles.
79
+
80
+ Args:
81
+ quantiles: Tuple with desired quantiles, quantile 0.5 is required.
82
+ For example: (0.1, 0.5, 0.9)
83
+ gamma: Gamma.
84
+ colsample_bytree: Colsample by tree.
85
+ subsample: Subsample.
86
+ min_child_weight: Minimum child weight.
87
+ max_depth: Maximum depth.
88
+ learning_rate: Learning rate.
89
+ alpha: Alpha.
90
+ max_delta_step: Maximum delta step.
91
+ arctan_smoothing: smoothing parameter of the arctan loss function.
92
+ early_stopping_rounds: Number of rounds to stop training if no improvement
93
+ is made.
94
+
95
+ Raises:
96
+ ValueError in case quantile 0.5 is not in the requested quantiles.
97
+
98
+ """
99
+ super().__init__()
100
+ if 0.5 not in quantiles:
101
+ raise ValueError(
102
+ "Cannot train quantile model as 0.5 is not in requested quantiles!"
103
+ )
104
+
105
+ self.quantiles = quantiles
106
+
107
+ # Set attributes for hyper parameters
108
+ self.subsample = subsample
109
+ self.min_child_weight = min_child_weight
110
+ self.max_depth = max_depth
111
+ self.gamma = gamma
112
+ self.alpha = alpha
113
+ self.max_delta_step = max_delta_step
114
+ self.colsample_bytree = colsample_bytree
115
+ self.learning_rate = learning_rate
116
+ self.early_stopping_rounds = early_stopping_rounds
117
+ self.arctan_smoothing = arctan_smoothing
118
+
119
+ # Get fitting parameters - only those required for xgbooster's
120
+ xgb_regressor_params = {
121
+ key: value
122
+ for key, value in self.get_params().items()
123
+ if key in xgb.XGBRegressor().get_params().keys()
124
+ }
125
+
126
+ # Define the model
127
+ objective = partial(
128
+ metrics.arctan_loss, taus=self.quantiles, s=arctan_smoothing
129
+ )
130
+ xgb_model: xgb.XGBRegressor = xgb.XGBRegressor(
131
+ objective=objective,
132
+ base_score=0,
133
+ multi_strategy="one_output_per_tree",
134
+ **xgb_regressor_params,
135
+ )
136
+ self.estimator_ = TransformedTargetRegressor(
137
+ regressor=xgb_model, transformer=StandardScaler()
138
+ )
139
+
140
+ # Set quantile indices to remap multioutput predictions
141
+ self.quantile_indices_ = {
142
+ quantile: i for i, quantile in enumerate(self.quantiles)
143
+ }
144
+
145
+ def fit(
146
+ self,
147
+ x: np.array,
148
+ y: np.array,
149
+ eval_set: Optional[Sequence[Tuple[np.array, np.array]]] = None,
150
+ verbose: Optional[Union[bool, int]] = 0,
151
+ **kwargs
152
+ ) -> OpenstfRegressor:
153
+ """Fits xgb quantile model.
154
+
155
+ Args:
156
+ x: Feature matrix.
157
+ y: Labels.
158
+ eval_set: Evaluation set to monitor training performance.
159
+ verbose: Verbosity level (disabled by default).
160
+
161
+ Returns:
162
+ Fitted XGBQuantile model.
163
+
164
+ """
165
+ if isinstance(y, pd.Series):
166
+ y = y.to_numpy()
167
+
168
+ if not isinstance(x, pd.DataFrame):
169
+ x = pd.DataFrame(np.asarray(x))
170
+
171
+ # Check/validate input
172
+ check_X_y(x, y, force_all_finite="allow-nan")
173
+
174
+ # Prepare inputs
175
+ y_multioutput = replicate_for_multioutput(y, len(self.quantiles))
176
+
177
+ # Define watchlist if eval_set is defined
178
+ eval_set_multioutput = []
179
+ if eval_set:
180
+ for x_eval, y_eval in eval_set:
181
+ if isinstance(y_eval, pd.Series):
182
+ y_eval = y_eval.to_numpy()
183
+
184
+ y_eval_multioutput = replicate_for_multioutput(
185
+ y=y_eval, num_quantiles=len(self.quantiles)
186
+ )
187
+ eval_set_multioutput.append((x_eval, y_eval_multioutput))
188
+
189
+ eval_set_multioutput.append((x, y_multioutput))
190
+
191
+ self.estimator_.fit(
192
+ X=x.copy(deep=True),
193
+ y=y_multioutput,
194
+ eval_set=eval_set_multioutput,
195
+ verbose=verbose,
196
+ )
197
+
198
+ # Update state of the estimator
199
+ self.feature_importances_ = self.estimator_.regressor_.feature_importances_
200
+ self.is_fitted_ = True
201
+
202
+ return self
203
+
204
+ def predict(self, x: np.array, quantile: float = 0.5) -> np.array:
205
+ """Makes a prediction for a desired quantile.
206
+
207
+ Args:
208
+ x: Feature matrix.
209
+ quantile: Quantile for which a prediciton is desired,
210
+ note that only quantile are available for which a model is trained,
211
+ and that this is a quantile-model specific keyword.
212
+
213
+ Returns:
214
+ Prediction
215
+
216
+ Raises:
217
+ ValueError in case no model is trained for the requested quantile.
218
+
219
+ """
220
+ # Check if model is trained for this quantile
221
+ if quantile not in self.quantiles:
222
+ raise ValueError("No model trained for requested quantile!")
223
+
224
+ # Check/validate input
225
+ check_array(x, force_all_finite="allow-nan")
226
+ check_is_fitted(self)
227
+
228
+ # best_iteration is only available if early stopping was used during training
229
+ prediction: np.array
230
+ if hasattr(self.estimator_, "best_iteration"):
231
+ prediction = self.estimator_.predict(
232
+ X=x,
233
+ iteration_range=(0, self.estimator_.best_iteration + 1),
234
+ )
235
+ else:
236
+ prediction = self.estimator_.predict(X=x)
237
+
238
+ quantile_index = self.quantile_indices_[quantile]
239
+ return prediction[:, quantile_index]
240
+
241
+ @property
242
+ def feature_names(self):
243
+ return self.estimator_.feature_names_in_
244
+
245
+ @property
246
+ def can_predict_quantiles(self):
247
+ return True
248
+
249
+
250
+ def replicate_for_multioutput(y: np.array, num_quantiles: int) -> np.array:
251
+ """Replicates a 1D array to a 2D array for multioutput regression.
252
+
253
+ Args:
254
+ y: 1D array.
255
+ num_quantiles: Number of columns in the output array.
256
+
257
+ Returns:
258
+ 2D array with shape (len(y), num_quantiles)
259
+
260
+ """
261
+ return np.repeat(y[:, None], num_quantiles, axis=1)
@@ -18,7 +18,7 @@ from openstef.settings import Settings
18
18
 
19
19
  # Set the path for the Dazls stored model
20
20
  DAZLS_STORED = str(
21
- PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.7" / "dazls_stored_3.4.7_"
21
+ PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.24" / "dazls_stored_3.4.24_"
22
22
  )
23
23
 
24
24
 
@@ -113,24 +113,7 @@ def create_components_forecast_pipeline(
113
113
  # Save and load the model as .sav file (or as .z file)
114
114
  # For the code contact: korte.termijn.prognoses@alliander.com
115
115
  dazls_model = Dazls()
116
- dazls_model.domain_model = joblib.load(DAZLS_STORED + "domain_model.z")
117
- dazls_model.domain_model_scaler = joblib.load(
118
- DAZLS_STORED + "domain_model_scaler.z"
119
- )
120
- dazls_model.domain_model_input_columns = joblib.load(
121
- DAZLS_STORED + "domain_model_features.z"
122
- )
123
-
124
- dazls_model.adaptation_model = joblib.load(DAZLS_STORED + "adaptation_model.z")
125
- dazls_model.adaptation_model_scaler = joblib.load(
126
- DAZLS_STORED + "adaptation_model_scaler.z"
127
- )
128
- dazls_model.adaptation_model_input_columns = joblib.load(
129
- DAZLS_STORED + "adaptation_model_features.z"
130
- )
131
-
132
- dazls_model.target_columns = joblib.load(DAZLS_STORED + "target.z")
133
- dazls_model.target_scaler = joblib.load(DAZLS_STORED + "target_scaler.z")
116
+ dazls_model.model_ = joblib.load(DAZLS_STORED + "baseline_model.z")
134
117
 
135
118
  logger.info("DAZLS model loaded", dazls_model=str(dazls_model))
136
119
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openstef
3
- Version: 3.4.23
3
+ Version: 3.4.25
4
4
  Summary: Open short term energy forecaster
5
5
  Home-page: https://github.com/OpenSTEF/openstef
6
6
  Author: Alliander N.V
@@ -128,4 +128,4 @@ Please read [CODE_OF_CONDUCT.md](https://github.com/OpenSTEF/.github/blob/main/C
128
128
 
129
129
  # Contact
130
130
  Please read [SUPPORT.md](https://github.com/OpenSTEF/.github/blob/main/SUPPORT.md) for how to connect and get into contact with the OpenSTEF project
131
-
131
+
@@ -18,24 +18,6 @@ openstef/data/dutch_holidays_2020-2022.csv
18
18
  openstef/data/dutch_holidays_2020-2022.csv.license
19
19
  openstef/data/pv_single_coefs.csv
20
20
  openstef/data/pv_single_coefs.csv.license
21
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z
22
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z.license
23
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z
24
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z.license
25
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z
26
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z.license
27
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z
28
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z.license
29
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z
30
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z.license
31
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z
32
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z.license
33
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md
34
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md.license
35
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z
36
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z.license
37
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z
38
- openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z.license
39
21
  openstef/data_classes/__init__.py
40
22
  openstef/data_classes/data_prep.py
41
23
  openstef/data_classes/model_specifications.py
@@ -76,6 +58,7 @@ openstef/model/regressors/linear.py
76
58
  openstef/model/regressors/linear_quantile.py
77
59
  openstef/model/regressors/regressor.py
78
60
  openstef/model/regressors/xgb.py
61
+ openstef/model/regressors/xgb_multioutput_quantile.py
79
62
  openstef/model/regressors/xgb_quantile.py
80
63
  openstef/model_selection/__init__.py
81
64
  openstef/model_selection/model_selection.py
@@ -29,7 +29,7 @@ def read_long_description_from_readme():
29
29
 
30
30
  setup(
31
31
  name="openstef",
32
- version="3.4.23",
32
+ version="3.4.25",
33
33
  packages=find_packages(include=["openstef", "openstef.*"]),
34
34
  description="Open short term energy forecaster",
35
35
  long_description=read_long_description_from_readme(),
@@ -1,3 +0,0 @@
1
- SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com>
2
-
3
- SPDX-License-Identifier: MPL-2.0
@@ -1,3 +0,0 @@
1
- SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com>
2
-
3
- SPDX-License-Identifier: MPL-2.0
@@ -1,3 +0,0 @@
1
- SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com>
2
-
3
- SPDX-License-Identifier: MPL-2.0