openstef 3.4.22__tar.gz → 3.4.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. {openstef-3.4.22 → openstef-3.4.24}/PKG-INFO +2 -2
  2. {openstef-3.4.22 → openstef-3.4.24}/README.md +1 -1
  3. {openstef-3.4.22 → openstef-3.4.24}/openstef/data_classes/prediction_job.py +1 -0
  4. {openstef-3.4.22 → openstef-3.4.24}/openstef/enums.py +1 -0
  5. {openstef-3.4.22 → openstef-3.4.24}/openstef/metrics/metrics.py +51 -0
  6. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/model_creator.py +14 -0
  7. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/objective.py +30 -0
  8. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/objective_creator.py +3 -0
  9. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/regressors/linear_quantile.py +4 -0
  10. openstef-3.4.24/openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
  11. {openstef-3.4.22 → openstef-3.4.24}/openstef.egg-info/PKG-INFO +2 -2
  12. {openstef-3.4.22 → openstef-3.4.24}/openstef.egg-info/SOURCES.txt +1 -0
  13. {openstef-3.4.22 → openstef-3.4.24}/setup.py +1 -1
  14. {openstef-3.4.22 → openstef-3.4.24}/LICENSE +0 -0
  15. {openstef-3.4.22 → openstef-3.4.24}/openstef/__init__.py +0 -0
  16. {openstef-3.4.22 → openstef-3.4.24}/openstef/__main__.py +0 -0
  17. {openstef-3.4.22 → openstef-3.4.24}/openstef/app_settings.py +0 -0
  18. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z +0 -0
  19. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model.z.license +0 -0
  20. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z +0 -0
  21. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_features.z.license +0 -0
  22. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z +0 -0
  23. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_adaptation_model_scaler.z.license +0 -0
  24. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z +0 -0
  25. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model.z.license +0 -0
  26. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z +0 -0
  27. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_features.z.license +0 -0
  28. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z +0 -0
  29. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_domain_model_scaler.z.license +0 -0
  30. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md +0 -0
  31. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_model_card.md.license +0 -0
  32. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z +0 -0
  33. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target.z.license +0 -0
  34. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z +0 -0
  35. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dazls_model_3.4.7/dazls_stored_3.4.7_target_scaler.z.license +0 -0
  36. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dutch_holidays_2020-2022.csv +0 -0
  37. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/dutch_holidays_2020-2022.csv.license +0 -0
  38. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/pv_single_coefs.csv +0 -0
  39. {openstef-3.4.22 → openstef-3.4.24}/openstef/data/pv_single_coefs.csv.license +0 -0
  40. {openstef-3.4.22 → openstef-3.4.24}/openstef/data_classes/__init__.py +0 -0
  41. {openstef-3.4.22 → openstef-3.4.24}/openstef/data_classes/data_prep.py +0 -0
  42. {openstef-3.4.22 → openstef-3.4.24}/openstef/data_classes/model_specifications.py +0 -0
  43. {openstef-3.4.22 → openstef-3.4.24}/openstef/data_classes/split_function.py +0 -0
  44. {openstef-3.4.22 → openstef-3.4.24}/openstef/exceptions.py +0 -0
  45. {openstef-3.4.22 → openstef-3.4.24}/openstef/feature_engineering/__init__.py +0 -0
  46. {openstef-3.4.22 → openstef-3.4.24}/openstef/feature_engineering/apply_features.py +0 -0
  47. {openstef-3.4.22 → openstef-3.4.24}/openstef/feature_engineering/data_preparation.py +0 -0
  48. {openstef-3.4.22 → openstef-3.4.24}/openstef/feature_engineering/feature_adder.py +0 -0
  49. {openstef-3.4.22 → openstef-3.4.24}/openstef/feature_engineering/feature_applicator.py +0 -0
  50. {openstef-3.4.22 → openstef-3.4.24}/openstef/feature_engineering/general.py +0 -0
  51. {openstef-3.4.22 → openstef-3.4.24}/openstef/feature_engineering/holiday_features.py +0 -0
  52. {openstef-3.4.22 → openstef-3.4.24}/openstef/feature_engineering/lag_features.py +0 -0
  53. {openstef-3.4.22 → openstef-3.4.24}/openstef/feature_engineering/missing_values_transformer.py +0 -0
  54. {openstef-3.4.22 → openstef-3.4.24}/openstef/feature_engineering/weather_features.py +0 -0
  55. {openstef-3.4.22 → openstef-3.4.24}/openstef/metrics/__init__.py +0 -0
  56. {openstef-3.4.22 → openstef-3.4.24}/openstef/metrics/figure.py +0 -0
  57. {openstef-3.4.22 → openstef-3.4.24}/openstef/metrics/reporter.py +0 -0
  58. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/__init__.py +0 -0
  59. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/basecase.py +0 -0
  60. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/confidence_interval_applicator.py +0 -0
  61. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/fallback.py +0 -0
  62. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/metamodels/__init__.py +0 -0
  63. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/metamodels/grouped_regressor.py +0 -0
  64. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/metamodels/missing_values_handler.py +0 -0
  65. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/regressors/__init__.py +0 -0
  66. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/regressors/arima.py +0 -0
  67. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/regressors/custom_regressor.py +0 -0
  68. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/regressors/dazls.py +0 -0
  69. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/regressors/lgbm.py +0 -0
  70. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/regressors/linear.py +0 -0
  71. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/regressors/regressor.py +0 -0
  72. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/regressors/xgb.py +0 -0
  73. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/regressors/xgb_quantile.py +0 -0
  74. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/serializer.py +0 -0
  75. {openstef-3.4.22 → openstef-3.4.24}/openstef/model/standard_deviation_generator.py +0 -0
  76. {openstef-3.4.22 → openstef-3.4.24}/openstef/model_selection/__init__.py +0 -0
  77. {openstef-3.4.22 → openstef-3.4.24}/openstef/model_selection/model_selection.py +0 -0
  78. {openstef-3.4.22 → openstef-3.4.24}/openstef/monitoring/__init__.py +0 -0
  79. {openstef-3.4.22 → openstef-3.4.24}/openstef/monitoring/performance_meter.py +0 -0
  80. {openstef-3.4.22 → openstef-3.4.24}/openstef/monitoring/teams.py +0 -0
  81. {openstef-3.4.22 → openstef-3.4.24}/openstef/pipeline/__init__.py +0 -0
  82. {openstef-3.4.22 → openstef-3.4.24}/openstef/pipeline/create_basecase_forecast.py +0 -0
  83. {openstef-3.4.22 → openstef-3.4.24}/openstef/pipeline/create_component_forecast.py +0 -0
  84. {openstef-3.4.22 → openstef-3.4.24}/openstef/pipeline/create_forecast.py +0 -0
  85. {openstef-3.4.22 → openstef-3.4.24}/openstef/pipeline/optimize_hyperparameters.py +0 -0
  86. {openstef-3.4.22 → openstef-3.4.24}/openstef/pipeline/train_create_forecast_backtest.py +0 -0
  87. {openstef-3.4.22 → openstef-3.4.24}/openstef/pipeline/train_model.py +0 -0
  88. {openstef-3.4.22 → openstef-3.4.24}/openstef/pipeline/utils.py +0 -0
  89. {openstef-3.4.22 → openstef-3.4.24}/openstef/postprocessing/__init__.py +0 -0
  90. {openstef-3.4.22 → openstef-3.4.24}/openstef/postprocessing/postprocessing.py +0 -0
  91. {openstef-3.4.22 → openstef-3.4.24}/openstef/preprocessing/__init__.py +0 -0
  92. {openstef-3.4.22 → openstef-3.4.24}/openstef/preprocessing/preprocessing.py +0 -0
  93. {openstef-3.4.22 → openstef-3.4.24}/openstef/settings.py +0 -0
  94. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/__init__.py +0 -0
  95. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/calculate_kpi.py +0 -0
  96. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/create_basecase_forecast.py +0 -0
  97. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/create_components_forecast.py +0 -0
  98. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/create_forecast.py +0 -0
  99. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/create_solar_forecast.py +0 -0
  100. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/create_wind_forecast.py +0 -0
  101. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/optimize_hyperparameters.py +0 -0
  102. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/split_forecast.py +0 -0
  103. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/train_model.py +0 -0
  104. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/utils/__init__.py +0 -0
  105. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/utils/dependencies.py +0 -0
  106. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/utils/predictionjobloop.py +0 -0
  107. {openstef-3.4.22 → openstef-3.4.24}/openstef/tasks/utils/taskcontext.py +0 -0
  108. {openstef-3.4.22 → openstef-3.4.24}/openstef/validation/__init__.py +0 -0
  109. {openstef-3.4.22 → openstef-3.4.24}/openstef/validation/validation.py +0 -0
  110. {openstef-3.4.22 → openstef-3.4.24}/openstef.egg-info/dependency_links.txt +0 -0
  111. {openstef-3.4.22 → openstef-3.4.24}/openstef.egg-info/requires.txt +0 -0
  112. {openstef-3.4.22 → openstef-3.4.24}/openstef.egg-info/top_level.txt +0 -0
  113. {openstef-3.4.22 → openstef-3.4.24}/pyproject.toml +0 -0
  114. {openstef-3.4.22 → openstef-3.4.24}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openstef
3
- Version: 3.4.22
3
+ Version: 3.4.24
4
4
  Summary: Open short term energy forecaster
5
5
  Home-page: https://github.com/OpenSTEF/openstef
6
6
  Author: Alliander N.V
@@ -128,4 +128,4 @@ Please read [CODE_OF_CONDUCT.md](https://github.com/OpenSTEF/.github/blob/main/C
128
128
 
129
129
  # Contact
130
130
  Please read [SUPPORT.md](https://github.com/OpenSTEF/.github/blob/main/SUPPORT.md) for how to connect and get into contact with the OpenSTEF project
131
-
131
+
@@ -91,4 +91,4 @@ Please read [CODE_OF_CONDUCT.md](https://github.com/OpenSTEF/.github/blob/main/C
91
91
 
92
92
  # Contact
93
93
  Please read [SUPPORT.md](https://github.com/OpenSTEF/.github/blob/main/SUPPORT.md) for how to connect and get into contact with the OpenSTEF project
94
-
94
+
@@ -26,6 +26,7 @@ class PredictionJobDataClass(BaseModel):
26
26
  - ``"lgb"``
27
27
  - ``"linear"``
28
28
  - ``"linear_quantile"``
29
+ - ``"xgb_multioutput_quantile"``
29
30
 
30
31
  If unsure what to pick, choose ``"xgb"``.
31
32
 
@@ -8,6 +8,7 @@ from enum import Enum
8
8
  class MLModelType(Enum):
9
9
  XGB = "xgb"
10
10
  XGB_QUANTILE = "xgb_quantile"
11
+ XGB_MULTIOUTPUT_QUANTILE = "xgb_multioutput_quantile"
11
12
  LGB = "lgb"
12
13
  LINEAR = "linear"
13
14
  LINEAR_QUANTILE = "linear_quantile"
@@ -431,3 +431,54 @@ def xgb_quantile_obj(
431
431
  hess = np.ones_like(preds)
432
432
 
433
433
  return grad, hess
434
+
435
+
436
+ def arctan_loss(y_true, y_pred, taus, s=0.1):
437
+ """Compute the arctan pinball loss.
438
+
439
+ Note that XGBoost outputs the predictions in a slightly peculiar manner.
440
+ Suppose we have 100 data points and we predict 10 quantiles. The predictions
441
+ will be an array of size (1000 x 1). We first resize this to a (100x10) array
442
+ where each row corresponds to the 10 predicted quantile for a single data
443
+ point. We then use a for-loop (over the 10 columns) to calculate the gradients
444
+ and second derivatives. Legibility was chosen over efficiency. This part
445
+ can be made more efficient.
446
+
447
+ Args:
448
+ y_true: An array containing the true observations.
449
+ y_pred: An array containing the predicted quantiles.
450
+ taus: A list containing the true desired coverage of the quantiles.
451
+ s: A smoothing parameter.
452
+
453
+ Returns:
454
+ grad: An array containing the (negative) gradients with respect to y_pred.
455
+ hess: An array containing the second derivative with respect to y_pred.
456
+
457
+ """
458
+ size = len(y_true)
459
+ n_dim = len(taus) # The number of columns
460
+ n_rows = size // n_dim
461
+
462
+ # Resize the predictions and targets.
463
+ # Each column corresponds to a quantile, each row to a data point.
464
+ y_pred = np.reshape(y_pred, (n_rows, n_dim))
465
+ y_true = np.reshape(y_true, (n_rows, n_dim))
466
+
467
+ # Calculate the differences
468
+ u = y_true - y_pred
469
+
470
+ # Calculate the gradient and second derivatives
471
+ grad = np.zeros_like(y_pred)
472
+ hess = np.zeros_like(y_pred)
473
+ z = u / s
474
+ for i, tau in enumerate(taus):
475
+ x = 1 + z[:, i] ** 2
476
+ grad[:, i] = (
477
+ tau - 0.5 + 1 / np.pi * np.arctan(z[:, i]) + z[:, i] / (np.pi) * x**-1
478
+ )
479
+ hess[:, i] = 2 / (np.pi * s) * x ** (-2)
480
+
481
+ # Reshape back to the original shape.
482
+ grad = grad.reshape(size)
483
+ hess = hess.reshape(size)
484
+ return -grad / n_dim, hess / n_dim
@@ -15,6 +15,9 @@ from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegre
15
15
  from openstef.model.regressors.regressor import OpenstfRegressor
16
16
  from openstef.model.regressors.xgb import XGBOpenstfRegressor
17
17
  from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor
18
+ from openstef.model.regressors.xgb_multioutput_quantile import (
19
+ XGBMultiOutputQuantileOpenstfRegressor,
20
+ )
18
21
  from openstef.settings import Settings
19
22
 
20
23
  structlog.configure(
@@ -87,6 +90,16 @@ valid_model_kwargs = {
87
90
  "max_depth",
88
91
  "early_stopping_rounds",
89
92
  ],
93
+ MLModelType.XGB_MULTIOUTPUT_QUANTILE: [
94
+ "quantiles",
95
+ "gamma",
96
+ "colsample_bytree",
97
+ "subsample",
98
+ "min_child_weight",
99
+ "max_depth",
100
+ "early_stopping_rounds",
101
+ "arctan_smoothing",
102
+ ],
90
103
  MLModelType.LINEAR: [
91
104
  "missing_values",
92
105
  "imputation_strategy",
@@ -117,6 +130,7 @@ class ModelCreator:
117
130
  MLModelType.XGB: XGBOpenstfRegressor,
118
131
  MLModelType.LGB: LGBMOpenstfRegressor,
119
132
  MLModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
133
+ MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
120
134
  MLModelType.LINEAR: LinearOpenstfRegressor,
121
135
  MLModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
122
136
  MLModelType.ARIMA: ARIMAOpenstfRegressor,
@@ -349,6 +349,36 @@ class XGBQuantileRegressorObjective(RegressorObjective):
349
349
  )
350
350
 
351
351
 
352
+ class XGBMultioutputQuantileRegressorObjective(RegressorObjective):
353
+ def __init__(self, *args, **kwargs):
354
+ super().__init__(*args, **kwargs)
355
+ self.model_type = MLModelType.XGB_QUANTILE
356
+
357
+ def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
358
+ """Get parameters for XGB Multioutput Quantile Regressor Objective with objective specific parameters.
359
+
360
+ Args: trial
361
+
362
+ Returns:
363
+ Dictionary with hyperparameter name as key and hyperparamer value as value.
364
+
365
+ """
366
+ # Filtered default parameters
367
+ model_params = super().get_params(trial)
368
+
369
+ # XGB specific parameters
370
+ params = {
371
+ "gamma": trial.suggest_float("gamma", 1e-8, 1.0),
372
+ "arctan_smoothing": trial.suggest_float("arctan_smoothing", 0.025, 0.15),
373
+ }
374
+ return {**model_params, **params}
375
+
376
+ def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
377
+ return optuna.integration.XGBoostPruningCallback(
378
+ trial, observation_key=f"validation_1-{self.eval_metric}"
379
+ )
380
+
381
+
352
382
  class LinearRegressorObjective(RegressorObjective):
353
383
  def __init__(self, *args, **kwargs):
354
384
  super().__init__(*args, **kwargs)
@@ -12,6 +12,7 @@ from openstef.model.objective import (
12
12
  RegressorObjective,
13
13
  XGBQuantileRegressorObjective,
14
14
  XGBRegressorObjective,
15
+ XGBMultioutputQuantileRegressorObjective,
15
16
  )
16
17
  from openstef.model.regressors.custom_regressor import (
17
18
  create_custom_objective,
@@ -24,7 +25,9 @@ class ObjectiveCreator:
24
25
  MLModelType.XGB: XGBRegressorObjective,
25
26
  MLModelType.LGB: LGBRegressorObjective,
26
27
  MLModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
28
+ MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
27
29
  MLModelType.LINEAR: LinearRegressorObjective,
30
+ MLModelType.LINEAR_QUANTILE: LinearRegressorObjective,
28
31
  MLModelType.ARIMA: ARIMARegressorObjective,
29
32
  }
30
33
 
@@ -59,6 +59,7 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
59
59
  - Holiday features (is_christmas, is_*)
60
60
  - Lagged features (T-1d, T-*)
61
61
  - Point in time features (IsWeekendDay, IsWeekDay, IsSunday, Month, Quarter)
62
+ - Infeed MFFBAS profiles (E*_I)
62
63
 
63
64
  Args:
64
65
  quantiles: Tuple with desired quantiles, quantile 0.5 is required.
@@ -130,6 +131,9 @@ class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
130
131
  or
131
132
  # Ignore lag features
132
133
  re.match(r"T-", feature_name) is not None
134
+ or
135
+ # Ignore infeed MFFBAS profiles
136
+ re.match(r"E\d.*_I", feature_name) is not None
133
137
  )
134
138
 
135
139
  def _remove_ignored_features(self, x: pd.DataFrame) -> pd.DataFrame:
@@ -0,0 +1,261 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ from functools import partial
5
+ from typing import Dict, Optional, Sequence, Tuple, Union
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import sklearn.base
10
+ import xgboost as xgb
11
+ from sklearn.compose import TransformedTargetRegressor
12
+ from sklearn.preprocessing import StandardScaler
13
+ from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
14
+ from xgboost import Booster
15
+
16
+ import openstef.metrics.metrics as metrics
17
+ from openstef.model.regressors.regressor import OpenstfRegressor
18
+
19
+ DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
20
+
21
+
22
+ class XGBMultiOutputQuantileOpenstfRegressor(OpenstfRegressor):
23
+ r"""Model that provides multioutput quantile regression with XGBoost by default using the arctan loss function.
24
+
25
+ Arctan loss:
26
+ Refence: https://github.com/LaurensSluyterman/XGBoost_quantile_regression/tree/master
27
+ The key idea is to use a smooth approximation of the pinball loss, the arctan
28
+ pinball loss, that has a relatively large second derivative.
29
+
30
+ The approximation is given by:
31
+ $$L^{(\text{arctan})}_{\tau, s}(u) = (\tau - 0.5 + \frac{\arctan (u/s)}{\pi})u + \frac{s}{\pi}$$. # noqa E501
32
+
33
+ Some important settings:
34
+
35
+ * The parameter in the loss function determines the amount of smoothing. A
36
+ smaller values gives a closer approximation but also a much smaller second
37
+ derivative. A larger value gives more conservative quantiles when
38
+ is larger than 0.5, the quantile becomes larger and vice versa.
39
+ Values between 0.05 and 0.1 appear to work well. It may be a good idea to
40
+ optimize this parameter.
41
+ * Set min-child-weight to zero. The second derivatives can be a lot smaller
42
+ than 1 and this parameter may prevent any splits.
43
+ * Use a relatively small max-delta-step. We used a default of 0.5.
44
+ This prevents excessive steps that could happen due to the relatively
45
+ small second derivative.
46
+ * For the same reason, use a slightly lower learning rate of 0.05.
47
+
48
+ """
49
+
50
+ estimator_: TransformedTargetRegressor
51
+ quantile_indices_: Dict[float, int]
52
+
53
+ @staticmethod
54
+ def _get_importance_names():
55
+ return {
56
+ "gain_importance_name": "total_gain",
57
+ "weight_importance_name": "weight",
58
+ }
59
+
60
+ def __init__(
61
+ self,
62
+ quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
63
+ gamma: float = 0.0,
64
+ colsample_bytree: float = 1.0,
65
+ subsample: float = 1.0,
66
+ min_child_weight: int = 0,
67
+ max_depth: int = 6,
68
+ learning_rate: float = 0.22,
69
+ alpha: float = 0.0,
70
+ max_delta_step: int = 0.5,
71
+ arctan_smoothing: float = 0.055,
72
+ early_stopping_rounds: Optional[int] = None,
73
+ ):
74
+ """Initialize XGBMultiQuantileRegressor.
75
+
76
+ Model that provides quantile regression with XGBoost.
77
+ For each desired quantile an XGBoost model is trained,
78
+ these can later be used to predict quantiles.
79
+
80
+ Args:
81
+ quantiles: Tuple with desired quantiles, quantile 0.5 is required.
82
+ For example: (0.1, 0.5, 0.9)
83
+ gamma: Gamma.
84
+ colsample_bytree: Colsample by tree.
85
+ subsample: Subsample.
86
+ min_child_weight: Minimum child weight.
87
+ max_depth: Maximum depth.
88
+ learning_rate: Learning rate.
89
+ alpha: Alpha.
90
+ max_delta_step: Maximum delta step.
91
+ arctan_smoothing: smoothing parameter of the arctan loss function.
92
+ early_stopping_rounds: Number of rounds to stop training if no improvement
93
+ is made.
94
+
95
+ Raises:
96
+ ValueError in case quantile 0.5 is not in the requested quantiles.
97
+
98
+ """
99
+ super().__init__()
100
+ if 0.5 not in quantiles:
101
+ raise ValueError(
102
+ "Cannot train quantile model as 0.5 is not in requested quantiles!"
103
+ )
104
+
105
+ self.quantiles = quantiles
106
+
107
+ # Set attributes for hyper parameters
108
+ self.subsample = subsample
109
+ self.min_child_weight = min_child_weight
110
+ self.max_depth = max_depth
111
+ self.gamma = gamma
112
+ self.alpha = alpha
113
+ self.max_delta_step = max_delta_step
114
+ self.colsample_bytree = colsample_bytree
115
+ self.learning_rate = learning_rate
116
+ self.early_stopping_rounds = early_stopping_rounds
117
+ self.arctan_smoothing = arctan_smoothing
118
+
119
+ # Get fitting parameters - only those required for xgbooster's
120
+ xgb_regressor_params = {
121
+ key: value
122
+ for key, value in self.get_params().items()
123
+ if key in xgb.XGBRegressor().get_params().keys()
124
+ }
125
+
126
+ # Define the model
127
+ objective = partial(
128
+ metrics.arctan_loss, taus=self.quantiles, s=arctan_smoothing
129
+ )
130
+ xgb_model: xgb.XGBRegressor = xgb.XGBRegressor(
131
+ objective=objective,
132
+ base_score=0,
133
+ multi_strategy="one_output_per_tree",
134
+ **xgb_regressor_params,
135
+ )
136
+ self.estimator_ = TransformedTargetRegressor(
137
+ regressor=xgb_model, transformer=StandardScaler()
138
+ )
139
+
140
+ # Set quantile indices to remap multioutput predictions
141
+ self.quantile_indices_ = {
142
+ quantile: i for i, quantile in enumerate(self.quantiles)
143
+ }
144
+
145
+ def fit(
146
+ self,
147
+ x: np.array,
148
+ y: np.array,
149
+ eval_set: Optional[Sequence[Tuple[np.array, np.array]]] = None,
150
+ verbose: Optional[Union[bool, int]] = 0,
151
+ **kwargs
152
+ ) -> OpenstfRegressor:
153
+ """Fits xgb quantile model.
154
+
155
+ Args:
156
+ x: Feature matrix.
157
+ y: Labels.
158
+ eval_set: Evaluation set to monitor training performance.
159
+ verbose: Verbosity level (disabled by default).
160
+
161
+ Returns:
162
+ Fitted XGBQuantile model.
163
+
164
+ """
165
+ if isinstance(y, pd.Series):
166
+ y = y.to_numpy()
167
+
168
+ if not isinstance(x, pd.DataFrame):
169
+ x = pd.DataFrame(np.asarray(x))
170
+
171
+ # Check/validate input
172
+ check_X_y(x, y, force_all_finite="allow-nan")
173
+
174
+ # Prepare inputs
175
+ y_multioutput = replicate_for_multioutput(y, len(self.quantiles))
176
+
177
+ # Define watchlist if eval_set is defined
178
+ eval_set_multioutput = []
179
+ if eval_set:
180
+ for x_eval, y_eval in eval_set:
181
+ if isinstance(y_eval, pd.Series):
182
+ y_eval = y_eval.to_numpy()
183
+
184
+ y_eval_multioutput = replicate_for_multioutput(
185
+ y=y_eval, num_quantiles=len(self.quantiles)
186
+ )
187
+ eval_set_multioutput.append((x_eval, y_eval_multioutput))
188
+
189
+ eval_set_multioutput.append((x, y_multioutput))
190
+
191
+ self.estimator_.fit(
192
+ X=x.copy(deep=True),
193
+ y=y_multioutput,
194
+ eval_set=eval_set_multioutput,
195
+ verbose=verbose,
196
+ )
197
+
198
+ # Update state of the estimator
199
+ self.feature_importances_ = self.estimator_.regressor_.feature_importances_
200
+ self.is_fitted_ = True
201
+
202
+ return self
203
+
204
+ def predict(self, x: np.array, quantile: float = 0.5) -> np.array:
205
+ """Makes a prediction for a desired quantile.
206
+
207
+ Args:
208
+ x: Feature matrix.
209
+ quantile: Quantile for which a prediciton is desired,
210
+ note that only quantile are available for which a model is trained,
211
+ and that this is a quantile-model specific keyword.
212
+
213
+ Returns:
214
+ Prediction
215
+
216
+ Raises:
217
+ ValueError in case no model is trained for the requested quantile.
218
+
219
+ """
220
+ # Check if model is trained for this quantile
221
+ if quantile not in self.quantiles:
222
+ raise ValueError("No model trained for requested quantile!")
223
+
224
+ # Check/validate input
225
+ check_array(x, force_all_finite="allow-nan")
226
+ check_is_fitted(self)
227
+
228
+ # best_iteration is only available if early stopping was used during training
229
+ prediction: np.array
230
+ if hasattr(self.estimator_, "best_iteration"):
231
+ prediction = self.estimator_.predict(
232
+ X=x,
233
+ iteration_range=(0, self.estimator_.best_iteration + 1),
234
+ )
235
+ else:
236
+ prediction = self.estimator_.predict(X=x)
237
+
238
+ quantile_index = self.quantile_indices_[quantile]
239
+ return prediction[:, quantile_index]
240
+
241
+ @property
242
+ def feature_names(self):
243
+ return self.estimator_.feature_names_in_
244
+
245
+ @property
246
+ def can_predict_quantiles(self):
247
+ return True
248
+
249
+
250
+ def replicate_for_multioutput(y: np.array, num_quantiles: int) -> np.array:
251
+ """Replicates a 1D array to a 2D array for multioutput regression.
252
+
253
+ Args:
254
+ y: 1D array.
255
+ num_quantiles: Number of columns in the output array.
256
+
257
+ Returns:
258
+ 2D array with shape (len(y), num_quantiles)
259
+
260
+ """
261
+ return np.repeat(y[:, None], num_quantiles, axis=1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openstef
3
- Version: 3.4.22
3
+ Version: 3.4.24
4
4
  Summary: Open short term energy forecaster
5
5
  Home-page: https://github.com/OpenSTEF/openstef
6
6
  Author: Alliander N.V
@@ -128,4 +128,4 @@ Please read [CODE_OF_CONDUCT.md](https://github.com/OpenSTEF/.github/blob/main/C
128
128
 
129
129
  # Contact
130
130
  Please read [SUPPORT.md](https://github.com/OpenSTEF/.github/blob/main/SUPPORT.md) for how to connect and get into contact with the OpenSTEF project
131
-
131
+
@@ -76,6 +76,7 @@ openstef/model/regressors/linear.py
76
76
  openstef/model/regressors/linear_quantile.py
77
77
  openstef/model/regressors/regressor.py
78
78
  openstef/model/regressors/xgb.py
79
+ openstef/model/regressors/xgb_multioutput_quantile.py
79
80
  openstef/model/regressors/xgb_quantile.py
80
81
  openstef/model_selection/__init__.py
81
82
  openstef/model_selection/model_selection.py
@@ -29,7 +29,7 @@ def read_long_description_from_readme():
29
29
 
30
30
  setup(
31
31
  name="openstef",
32
- version="3.4.22",
32
+ version="3.4.24",
33
33
  packages=find_packages(include=["openstef", "openstef.*"]),
34
34
  description="Open short term energy forecaster",
35
35
  long_description=read_long_description_from_readme(),
File without changes
File without changes
File without changes