openstef 3.4.10__py3-none-any.whl → 3.4.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. openstef/app_settings.py +19 -0
  2. openstef/data_classes/data_prep.py +1 -1
  3. openstef/data_classes/prediction_job.py +12 -8
  4. openstef/enums.py +3 -7
  5. openstef/exceptions.py +1 -1
  6. openstef/feature_engineering/apply_features.py +0 -6
  7. openstef/feature_engineering/data_preparation.py +12 -5
  8. openstef/feature_engineering/feature_applicator.py +1 -5
  9. openstef/feature_engineering/general.py +14 -0
  10. openstef/feature_engineering/missing_values_transformer.py +99 -0
  11. openstef/feature_engineering/weather_features.py +7 -0
  12. openstef/metrics/figure.py +3 -0
  13. openstef/metrics/metrics.py +58 -1
  14. openstef/metrics/reporter.py +7 -0
  15. openstef/model/confidence_interval_applicator.py +28 -3
  16. openstef/model/model_creator.py +36 -27
  17. openstef/model/objective.py +11 -28
  18. openstef/model/objective_creator.py +4 -3
  19. openstef/model/regressors/arima.py +1 -1
  20. openstef/model/regressors/dazls.py +35 -96
  21. openstef/model/regressors/flatliner.py +100 -0
  22. openstef/model/regressors/linear_quantile.py +247 -0
  23. openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
  24. openstef/model/regressors/xgb_quantile.py +3 -0
  25. openstef/model/serializer.py +10 -0
  26. openstef/model_selection/model_selection.py +3 -0
  27. openstef/monitoring/performance_meter.py +1 -2
  28. openstef/monitoring/teams.py +11 -0
  29. openstef/pipeline/create_basecase_forecast.py +11 -1
  30. openstef/pipeline/create_component_forecast.py +11 -22
  31. openstef/pipeline/create_forecast.py +20 -1
  32. openstef/pipeline/optimize_hyperparameters.py +18 -16
  33. openstef/pipeline/train_create_forecast_backtest.py +11 -1
  34. openstef/pipeline/train_model.py +23 -7
  35. openstef/pipeline/utils.py +3 -0
  36. openstef/postprocessing/postprocessing.py +29 -0
  37. openstef/settings.py +15 -0
  38. openstef/tasks/calculate_kpi.py +20 -17
  39. openstef/tasks/create_basecase_forecast.py +13 -5
  40. openstef/tasks/create_components_forecast.py +20 -4
  41. openstef/tasks/create_forecast.py +5 -2
  42. openstef/tasks/split_forecast.py +7 -0
  43. openstef/tasks/train_model.py +7 -5
  44. openstef/tasks/utils/taskcontext.py +7 -0
  45. openstef/validation/validation.py +27 -2
  46. {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/METADATA +34 -38
  47. openstef-3.4.29.dist-info/RECORD +91 -0
  48. {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/WHEEL +1 -1
  49. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
  50. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license +0 -3
  51. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
  52. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license +0 -3
  53. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
  54. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license +0 -3
  55. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
  56. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
  57. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
  58. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
  59. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
  60. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
  61. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
  62. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
  63. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
  64. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
  65. openstef/feature_engineering/historic_features.py +0 -40
  66. openstef/model/regressors/proloaf.py +0 -281
  67. openstef/tasks/run_tracy.py +0 -145
  68. openstef-3.4.10.dist-info/RECORD +0 -104
  69. {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/LICENSE +0 -0
  70. {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,19 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+
5
+ from pydantic import Field
6
+ from pydantic_settings import BaseSettings, SettingsConfigDict
7
+
8
+
9
+ class AppSettings(BaseSettings):
10
+ """Global app settings."""
11
+
12
+ model_config = SettingsConfigDict(
13
+ env_prefix="openstef_", env_file=".env", extra="ignore"
14
+ )
15
+
16
+ post_teams_messages: bool = True
17
+
18
+ # Logging settings.
19
+ log_level: str = Field("INFO", description="Log level used for logging statements.")
@@ -5,7 +5,7 @@
5
5
  import inspect
6
6
  import json
7
7
  from importlib import import_module
8
- from typing import Any, Sequence, Union, TypeVar
8
+ from typing import Any, Sequence, TypeVar, Union
9
9
 
10
10
  from pydantic.v1 import BaseModel
11
11
 
@@ -6,9 +6,9 @@ from typing import Optional, Union
6
6
 
7
7
  from pydantic.v1 import BaseModel
8
8
 
9
+ from openstef.data_classes.data_prep import DataPrepDataClass
9
10
  from openstef.data_classes.model_specifications import ModelSpecificationDataClass
10
11
  from openstef.data_classes.split_function import SplitFuncDataClass
11
- from openstef.data_classes.data_prep import DataPrepDataClass
12
12
  from openstef.enums import PipelineType
13
13
 
14
14
 
@@ -25,11 +25,15 @@ class PredictionJobDataClass(BaseModel):
25
25
  - ``"xgb_quantile"``
26
26
  - ``"lgb"``
27
27
  - ``"linear"``
28
- - ``"proloaf"`` (extra dependencies requiered, see README)
28
+ - ``"linear_quantile"``
29
+ - ``"xgb_multioutput_quantile"``
30
+ - ``"flatliner"``
29
31
 
30
32
  If unsure what to pick, choose ``"xgb"``.
31
33
 
32
34
  """
35
+ model_kwargs: Optional[dict]
36
+ """The model parameters that should be used."""
33
37
  forecast_type: str
34
38
  """The type of forecasts that should be made.
35
39
 
@@ -41,14 +45,14 @@ class PredictionJobDataClass(BaseModel):
41
45
  If unsure what to pick, choose ``"demand"``.
42
46
 
43
47
  """
44
- horizon_minutes: int = 2880
45
- """The horizon of the desired forecast in minutes. Defaults to 2880 minutes (i.e. 2 days)."""
48
+ horizon_minutes: Optional[int] = 2880
49
+ """The horizon of the desired forecast in minutes used in tasks. Defaults to 2880 minutes (i.e. 2 days)."""
46
50
  resolution_minutes: int
47
51
  """The resolution of the desired forecast in minutes."""
48
- lat: float
49
- """Latitude of the forecasted location in degrees."""
50
- lon: float
51
- """Longitude of the forecasted location in degrees."""
52
+ lat: Optional[float] = 52.132633
53
+ """Latitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting."""
54
+ lon: Optional[float] = 5.291266
55
+ """Longitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting."""
52
56
  name: str
53
57
  """Name of the forecast, e.g. the location name."""
54
58
  train_components: Optional[bool]
openstef/enums.py CHANGED
@@ -8,10 +8,12 @@ from enum import Enum
8
8
  class MLModelType(Enum):
9
9
  XGB = "xgb"
10
10
  XGB_QUANTILE = "xgb_quantile"
11
+ XGB_MULTIOUTPUT_QUANTILE = "xgb_multioutput_quantile"
11
12
  LGB = "lgb"
12
13
  LINEAR = "linear"
13
- ProLoaf = "proloaf"
14
+ LINEAR_QUANTILE = "linear_quantile"
14
15
  ARIMA = "arima"
16
+ FLATLINER = "flatliner"
15
17
 
16
18
 
17
19
  class ForecastType(Enum):
@@ -21,12 +23,6 @@ class ForecastType(Enum):
21
23
  BASECASE = "basecase"
22
24
 
23
25
 
24
- class TracyJobResult(Enum):
25
- SUCCESS = "success"
26
- FAILED = "failed"
27
- UNKNOWN = "unknown"
28
-
29
-
30
26
  class PipelineType(Enum):
31
27
  FORECAST = "forecast"
32
28
  TRAIN = "train"
openstef/exceptions.py CHANGED
@@ -5,7 +5,7 @@
5
5
  """Openstef custom exceptions."""
6
6
 
7
7
 
8
- # Define custom exception
8
+ # Define custom exceptions
9
9
  class NoPredictedLoadError(Exception):
10
10
  """No predicted load for given datatime range."""
11
11
 
@@ -14,9 +14,6 @@ Examples of features that are added:
14
14
  import pandas as pd
15
15
 
16
16
  from openstef.data_classes.prediction_job import PredictionJobDataClass
17
- from openstef.feature_engineering.historic_features import (
18
- add_historic_load_as_a_feature,
19
- )
20
17
  from openstef.feature_engineering.holiday_features import (
21
18
  generate_holiday_feature_functions,
22
19
  )
@@ -69,9 +66,6 @@ def apply_features(
69
66
  np.random.uniform(0.7,1.7, 200)))
70
67
 
71
68
  """
72
- # Add if needed the proloaf feature (historic_load)
73
- data = add_historic_load_as_a_feature(data, pj)
74
-
75
69
  # Get lag feature functions
76
70
  feature_functions = generate_lag_feature_functions(feature_names, horizon)
77
71
 
@@ -1,25 +1,27 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
- import structlog
5
-
4
+ import logging
6
5
  from abc import ABC, abstractmethod
6
+ from datetime import timedelta
7
7
  from typing import Optional
8
8
 
9
9
  import pandas as pd
10
- from datetime import timedelta
10
+ import structlog
11
+
11
12
  from openstef.data_classes.model_specifications import ModelSpecificationDataClass
12
13
  from openstef.data_classes.prediction_job import PredictionJobDataClass
13
- from openstef.model.regressors.regressor import OpenstfRegressor
14
14
  from openstef.feature_engineering.feature_applicator import (
15
- TrainFeatureApplicator,
16
15
  OperationalPredictFeatureApplicator,
16
+ TrainFeatureApplicator,
17
17
  )
18
18
  from openstef.feature_engineering.general import (
19
19
  enforce_feature_order,
20
20
  remove_non_requested_feature_columns,
21
21
  )
22
+ from openstef.model.regressors.regressor import OpenstfRegressor
22
23
  from openstef.pipeline.utils import generate_forecast_datetime_range
24
+ from openstef.settings import Settings
23
25
 
24
26
 
25
27
  class AbstractDataPreparation(ABC):
@@ -120,6 +122,11 @@ class ARDataPreparation(AbstractDataPreparation):
120
122
  def prepare_forecast_data(
121
123
  self, data: pd.DataFrame
122
124
  ) -> tuple[pd.DataFrame, pd.DataFrame]:
125
+ structlog.configure(
126
+ wrapper_class=structlog.make_filtering_bound_logger(
127
+ logging.getLevelName(Settings.log_level)
128
+ )
129
+ )
123
130
  logger = structlog.get_logger(__name__)
124
131
  self.check_model()
125
132
  # Prep forecast input by selecting only the forecast datetime interval (this is much smaller than the input range)
@@ -149,11 +149,7 @@ class TrainFeatureApplicator(AbstractFeatureApplicator):
149
149
 
150
150
  # NOTE this is required since apply_features could add additional features
151
151
  if self.feature_names is not None:
152
- # Add horizon to requested features else it is removed, and if needed the proloaf feature (historic_load)
153
- if pj.get("model") == "proloaf":
154
- features = self.feature_names + ["historic_load"] + ["horizon"]
155
- else:
156
- features = self.feature_names + ["horizon"]
152
+ features = self.feature_names + ["horizon"]
157
153
  result = remove_non_requested_feature_columns(result, features)
158
154
 
159
155
  # Sort all features except for the (first) load and (last) horizon columns
@@ -3,10 +3,14 @@
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
  """This modelu contains various helper functions."""
5
5
 
6
+ import logging
7
+
6
8
  import numpy as np
7
9
  import pandas as pd
8
10
  import structlog
9
11
 
12
+ from openstef.settings import Settings
13
+
10
14
 
11
15
  def add_missing_feature_columns(
12
16
  input_data: pd.DataFrame, features: list[str]
@@ -30,6 +34,11 @@ def add_missing_feature_columns(
30
34
  Input dataframe with missing columns filled with ``np.N=nan``.
31
35
 
32
36
  """
37
+ structlog.configure(
38
+ wrapper_class=structlog.make_filtering_bound_logger(
39
+ logging.getLevelName(Settings.log_level)
40
+ )
41
+ )
33
42
  logger = structlog.get_logger(__name__)
34
43
 
35
44
  if features is None:
@@ -61,6 +70,11 @@ def remove_non_requested_feature_columns(
61
70
  Model input data with features.
62
71
 
63
72
  """
73
+ structlog.configure(
74
+ wrapper_class=structlog.make_filtering_bound_logger(
75
+ logging.getLevelName(Settings.log_level)
76
+ )
77
+ )
64
78
  logger = structlog.get_logger(__name__)
65
79
 
66
80
  if requested_features is None:
@@ -0,0 +1,99 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ from typing import Union, List, Optional
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from sklearn.impute import SimpleImputer
9
+ from sklearn.preprocessing import FunctionTransformer
10
+ from sklearn.utils.validation import check_array
11
+
12
+
13
+ class MissingValuesTransformer:
14
+ """MissingValuesTransformer handles missing values in data by imputing them with a given strategy.
15
+
16
+ It also removes columns that are always null from the data.
17
+
18
+ """
19
+
20
+ in_feature_names: Optional[List[str]] = None
21
+ _n_in_features: Optional[int] = None
22
+
23
+ non_null_feature_names: List[str] = None
24
+
25
+ def __init__(
26
+ self,
27
+ missing_values: Union[int, float, str, None] = np.nan,
28
+ imputation_strategy: str = None,
29
+ fill_value: Union[str, int, float] = None,
30
+ ):
31
+ """Initialize missing values handler.
32
+
33
+ Args:
34
+ missing_values: The placeholder for the missing values. All occurrences of
35
+ `missing_values` will be imputed.
36
+ imputation_strategy: The imputation strategy to use
37
+ Can be one of "mean", "median", "most_frequent", "constant" or None.
38
+ fill_value: When strategy == "constant", fill_value is used to replace all
39
+ occurrences of missing_values.
40
+
41
+ """
42
+ self.missing_values = missing_values
43
+ self.imputation_strategy = imputation_strategy
44
+ self.fill_value = fill_value
45
+
46
+ def fit(self, x, y=None):
47
+ """Fit the imputer on the input data."""
48
+ _ = check_array(x, force_all_finite="allow-nan")
49
+ if not isinstance(x, pd.DataFrame):
50
+ x = pd.DataFrame(np.asarray(x))
51
+
52
+ self.in_feature_names = list(x.columns)
53
+ self._n_in_features = x.shape[1]
54
+
55
+ # Remove always null columns
56
+ is_column_null = x.isnull().all(axis="index")
57
+ self.non_null_feature_names = list(x.columns[~is_column_null])
58
+
59
+ # Build the proper imputation transformer
60
+ # - Identity function if strategy is None
61
+ # - SimpleImputer with the dedicated strategy
62
+ if self.imputation_strategy is None:
63
+ self.imputer_ = FunctionTransformer(func=self._identity)
64
+ else:
65
+ self.imputer_ = SimpleImputer(
66
+ missing_values=self.missing_values,
67
+ strategy=self.imputation_strategy,
68
+ fill_value=self.fill_value,
69
+ ).set_output(transform="pandas")
70
+
71
+ # Imputers do not support labels
72
+ self.imputer_.fit(X=x, y=None)
73
+
74
+ def transform(self, x) -> pd.DataFrame:
75
+ """Transform the input data by imputing missing values."""
76
+ _ = check_array(x, force_all_finite="allow-nan")
77
+ if not isinstance(x, pd.DataFrame):
78
+ x = pd.DataFrame(np.asarray(x))
79
+
80
+ x = x[self.non_null_feature_names]
81
+
82
+ return self.imputer_.transform(x)
83
+
84
+ def fit_transform(self, x, y=None):
85
+ """Fit the imputer on the input data and transform it.
86
+
87
+ Returns:
88
+ The data with missing values imputed.
89
+
90
+ """
91
+ self.fit(x, y)
92
+ return self.transform(x)
93
+
94
+ @classmethod
95
+ def _identity(cls, x):
96
+ return x
97
+
98
+ def __sklearn_is_fitted__(self) -> bool:
99
+ return self.in_feature_names is not None
@@ -3,6 +3,7 @@
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
 
5
5
  """This module contains all wheather related functions used for feature engineering."""
6
+ import logging
6
7
  from typing import Union
7
8
 
8
9
  import numpy as np
@@ -12,7 +13,13 @@ import structlog
12
13
  from pvlib.location import Location
13
14
 
14
15
  from openstef.data_classes.prediction_job import PredictionJobDataClass
16
+ from openstef.settings import Settings
15
17
 
18
+ structlog.configure(
19
+ wrapper_class=structlog.make_filtering_bound_logger(
20
+ logging.getLevelName(Settings.log_level)
21
+ )
22
+ )
16
23
  logger = structlog.get_logger(__name__)
17
24
 
18
25
 
@@ -71,6 +71,9 @@ def plot_data_series(
71
71
  Returns:
72
72
  A line plot of each passed data series.
73
73
 
74
+ Raises:
75
+ ValueError: If names is None and the number of series is greater than 3.
76
+
74
77
  """
75
78
  series_names = {
76
79
  1: ("series",),
@@ -25,6 +25,9 @@ def get_eval_metric_function(metric_name: str) -> Callable:
25
25
  Returns:
26
26
  Function to calculate the metric.
27
27
 
28
+ Raises:
29
+ KeyError: If the metric is not available.
30
+
28
31
  """
29
32
  evaluation_function = {
30
33
  "rmse": rmse,
@@ -130,6 +133,9 @@ def r_mae_highest(
130
133
 
131
134
  The range is based on the load range of the previous two weeks.
132
135
 
136
+ Raises:
137
+ ValueError: If the length of the realised and forecast arrays are not equal.
138
+
133
139
  """
134
140
  # Check if length of both arrays is equal
135
141
  if len(np.array(realised)) != len(np.array(forecast)):
@@ -395,7 +401,7 @@ def xgb_quantile_obj(
395
401
  Args:
396
402
  preds: numpy.ndarray
397
403
  dmatrix: xgboost.DMatrix
398
- quantile: float
404
+ quantile: float between 0 and 1
399
405
 
400
406
  Returns:
401
407
  Gradient and Hessian
@@ -425,3 +431,54 @@ def xgb_quantile_obj(
425
431
  hess = np.ones_like(preds)
426
432
 
427
433
  return grad, hess
434
+
435
+
436
+ def arctan_loss(y_true, y_pred, taus, s=0.1):
437
+ """Compute the arctan pinball loss.
438
+
439
+ Note that XGBoost outputs the predictions in a slightly peculiar manner.
440
+ Suppose we have 100 data points and we predict 10 quantiles. The predictions
441
+ will be an array of size (1000 x 1). We first resize this to a (100x10) array
442
+ where each row corresponds to the 10 predicted quantile for a single data
443
+ point. We then use a for-loop (over the 10 columns) to calculate the gradients
444
+ and second derivatives. Legibility was chosen over efficiency. This part
445
+ can be made more efficient.
446
+
447
+ Args:
448
+ y_true: An array containing the true observations.
449
+ y_pred: An array containing the predicted quantiles.
450
+ taus: A list containing the true desired coverage of the quantiles.
451
+ s: A smoothing parameter.
452
+
453
+ Returns:
454
+ grad: An array containing the (negative) gradients with respect to y_pred.
455
+ hess: An array containing the second derivative with respect to y_pred.
456
+
457
+ """
458
+ size = len(y_true)
459
+ n_dim = len(taus) # The number of columns
460
+ n_rows = size // n_dim
461
+
462
+ # Resize the predictions and targets.
463
+ # Each column corresponds to a quantile, each row to a data point.
464
+ y_pred = np.reshape(y_pred, (n_rows, n_dim))
465
+ y_true = np.reshape(y_true, (n_rows, n_dim))
466
+
467
+ # Calculate the differences
468
+ u = y_true - y_pred
469
+
470
+ # Calculate the gradient and second derivatives
471
+ grad = np.zeros_like(y_pred)
472
+ hess = np.zeros_like(y_pred)
473
+ z = u / s
474
+ for i, tau in enumerate(taus):
475
+ x = 1 + z[:, i] ** 2
476
+ grad[:, i] = (
477
+ tau - 0.5 + 1 / np.pi * np.arctan(z[:, i]) + z[:, i] / (np.pi) * x**-1
478
+ )
479
+ hess[:, i] = 2 / (np.pi * s) * x ** (-2)
480
+
481
+ # Reshape back to the original shape.
482
+ grad = grad.reshape(size)
483
+ hess = hess.reshape(size)
484
+ return -grad / n_dim, hess / n_dim
@@ -2,6 +2,7 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
  """Defines reporter class."""
5
+ import logging
5
6
  import os
6
7
  import warnings
7
8
  from dataclasses import dataclass
@@ -16,6 +17,7 @@ from plotly.graph_objects import Figure
16
17
  from openstef.metrics import figure
17
18
  from openstef.metrics.metrics import bias, mae, nsme, r_mae, rmse
18
19
  from openstef.model.regressors.regressor import OpenstfRegressor
20
+ from openstef.settings import Settings
19
21
 
20
22
 
21
23
  @dataclass
@@ -167,6 +169,11 @@ class Reporter:
167
169
  def write_report_to_disk(report: Report, report_folder: str):
168
170
  """Write report to disk; e.g. for viewing report of latest models using grafana."""
169
171
  # Initialize logger and serializer
172
+ structlog.configure(
173
+ wrapper_class=structlog.make_filtering_bound_logger(
174
+ logging.getLevelName(Settings.log_level)
175
+ )
176
+ )
170
177
  logger = structlog.get_logger(__name__)
171
178
  if report_folder:
172
179
  # create path if does not exist
@@ -1,6 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
4
5
  from datetime import datetime
5
6
 
6
7
  import numpy as np
@@ -11,12 +12,18 @@ from sklearn.base import RegressorMixin
11
12
 
12
13
  from openstef.data_classes.prediction_job import PredictionJobDataClass
13
14
  from openstef.exceptions import ModelWithoutStDev
15
+ from openstef.settings import Settings
14
16
 
15
17
 
16
18
  class ConfidenceIntervalApplicator:
17
19
  def __init__(self, model: RegressorMixin, forecast_input_data: pd.DataFrame):
18
20
  self.model = model
19
21
  self.forecast_input_data = forecast_input_data
22
+ structlog.configure(
23
+ wrapper_class=structlog.make_filtering_bound_logger(
24
+ logging.getLevelName(Settings.log_level)
25
+ )
26
+ )
20
27
  self.logger = structlog.get_logger(self.__class__.__name__)
21
28
 
22
29
  def add_confidence_interval(
@@ -54,9 +61,24 @@ class ConfidenceIntervalApplicator:
54
61
  temp_forecast = self._add_standard_deviation_to_forecast(forecast)
55
62
 
56
63
  if self.model.can_predict_quantiles:
57
- return self._add_quantiles_to_forecast_quantile_regression(
58
- temp_forecast, pj["quantiles"]
59
- )
64
+ # Try to generate the quantiles that were requested
65
+ try:
66
+ result = self._add_quantiles_to_forecast_quantile_regression(
67
+ temp_forecast, pj["quantiles"]
68
+ )
69
+ return result
70
+ except Exception:
71
+ # Fallback on quantiles of the model if the requested quantiles cant be generated by the model.
72
+ # Can happen when the model was trained on different quantiles than are requested
73
+ result = self._add_quantiles_to_forecast_quantile_regression(
74
+ temp_forecast, self.model.quantiles
75
+ )
76
+ self.logger.warning(
77
+ "Quantiles are requested the model was not trained on. Using the quantiles the model was trained on",
78
+ requested_quantiles=pj["quantiles"],
79
+ trained_quantiles=self.model.quantiles,
80
+ )
81
+ return result
60
82
 
61
83
  return self._add_quantiles_to_forecast_default(temp_forecast, pj["quantiles"])
62
84
 
@@ -74,6 +96,9 @@ class ConfidenceIntervalApplicator:
74
96
  Forecast with added standard deviation. DataFrame with columns:
75
97
  "forecast", "stdev"
76
98
 
99
+ Raises:
100
+ ModelWithoutStDev: If the model does not have a valid standard deviation.
101
+
77
102
  """
78
103
  minimal_resolution: int = 15 # Minimal time resolution in minutes
79
104
  standard_deviation = self.model.standard_deviation
@@ -1,25 +1,32 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
4
5
  from typing import Union
5
6
 
6
7
  import structlog
7
8
 
8
9
  from openstef.enums import MLModelType
10
+ from openstef.model.regressors.arima import ARIMAOpenstfRegressor
9
11
  from openstef.model.regressors.custom_regressor import is_custom_type, load_custom_model
10
12
  from openstef.model.regressors.lgbm import LGBMOpenstfRegressor
11
13
  from openstef.model.regressors.linear import LinearOpenstfRegressor
14
+ from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegressor
12
15
  from openstef.model.regressors.regressor import OpenstfRegressor
16
+ from openstef.model.regressors.flatliner import FlatlinerRegressor
13
17
  from openstef.model.regressors.xgb import XGBOpenstfRegressor
14
18
  from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor
15
- from openstef.model.regressors.arima import ARIMAOpenstfRegressor
19
+ from openstef.model.regressors.xgb_multioutput_quantile import (
20
+ XGBMultiOutputQuantileOpenstfRegressor,
21
+ )
22
+ from openstef.settings import Settings
16
23
 
24
+ structlog.configure(
25
+ wrapper_class=structlog.make_filtering_bound_logger(
26
+ logging.getLevelName(Settings.log_level)
27
+ )
28
+ )
17
29
  logger = structlog.get_logger(__name__)
18
- try:
19
- from openstef.model.regressors.proloaf import OpenstfProloafRegressor
20
- except ImportError:
21
- logger.info("Proloaf not available, setting constructor to None")
22
- OpenstfProloafRegressor = None
23
30
 
24
31
  valid_model_kwargs = {
25
32
  MLModelType.XGB: [
@@ -84,32 +91,32 @@ valid_model_kwargs = {
84
91
  "max_depth",
85
92
  "early_stopping_rounds",
86
93
  ],
87
- MLModelType.ProLoaf: [
88
- "relu_leak",
89
- "encoder_features",
90
- "decoder_features",
91
- "core_layers",
92
- "rel_linear_hidden_size",
93
- "rel_core_hidden_size",
94
- "dropout_fc",
95
- "dropout_core",
96
- "training_metric",
97
- "metric_options",
98
- "optimizer_name",
99
- "early_stopping_patience",
100
- "early_stopping_margin",
101
- "learning_rate",
102
- "max_epochs",
103
- "device",
104
- "batch_size",
105
- "history_horizon",
106
- "horizon_minutes",
94
+ MLModelType.XGB_MULTIOUTPUT_QUANTILE: [
95
+ "quantiles",
96
+ "gamma",
97
+ "colsample_bytree",
98
+ "subsample",
99
+ "min_child_weight",
100
+ "max_depth",
101
+ "early_stopping_rounds",
102
+ "arctan_smoothing",
107
103
  ],
108
104
  MLModelType.LINEAR: [
109
105
  "missing_values",
110
106
  "imputation_strategy",
111
107
  "fill_value",
112
108
  ],
109
+ MLModelType.FLATLINER: [
110
+ "quantiles",
111
+ ],
112
+ MLModelType.LINEAR_QUANTILE: [
113
+ "alpha",
114
+ "quantiles",
115
+ "solver",
116
+ "missing_values",
117
+ "imputation_strategy",
118
+ "fill_value",
119
+ ],
113
120
  MLModelType.ARIMA: [
114
121
  "backtest_max_horizon",
115
122
  "order",
@@ -127,9 +134,11 @@ class ModelCreator:
127
134
  MLModelType.XGB: XGBOpenstfRegressor,
128
135
  MLModelType.LGB: LGBMOpenstfRegressor,
129
136
  MLModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
130
- MLModelType.ProLoaf: OpenstfProloafRegressor,
137
+ MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
131
138
  MLModelType.LINEAR: LinearOpenstfRegressor,
139
+ MLModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
132
140
  MLModelType.ARIMA: ARIMAOpenstfRegressor,
141
+ MLModelType.FLATLINER: FlatlinerRegressor,
133
142
  }
134
143
 
135
144
  @staticmethod