openstef 3.4.10__py3-none-any.whl → 3.4.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. openstef/app_settings.py +19 -0
  2. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  3. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +18 -0
  4. openstef/data/dutch_holidays.csv +1759 -0
  5. openstef/data_classes/data_prep.py +1 -1
  6. openstef/data_classes/prediction_job.py +15 -9
  7. openstef/enums.py +108 -9
  8. openstef/exceptions.py +1 -1
  9. openstef/feature_engineering/apply_features.py +25 -6
  10. openstef/feature_engineering/bidding_zone_to_country_mapping.py +106 -0
  11. openstef/feature_engineering/cyclic_features.py +102 -0
  12. openstef/feature_engineering/data_preparation.py +12 -5
  13. openstef/feature_engineering/feature_applicator.py +1 -5
  14. openstef/feature_engineering/general.py +14 -0
  15. openstef/feature_engineering/holiday_features.py +35 -26
  16. openstef/feature_engineering/missing_values_transformer.py +141 -0
  17. openstef/feature_engineering/weather_features.py +7 -0
  18. openstef/metrics/figure.py +3 -0
  19. openstef/metrics/metrics.py +58 -1
  20. openstef/metrics/reporter.py +7 -0
  21. openstef/model/confidence_interval_applicator.py +28 -3
  22. openstef/model/model_creator.py +54 -41
  23. openstef/model/objective.py +17 -34
  24. openstef/model/objective_creator.py +13 -12
  25. openstef/model/regressors/arima.py +1 -1
  26. openstef/model/regressors/dazls.py +35 -96
  27. openstef/model/regressors/flatliner.py +95 -0
  28. openstef/model/regressors/linear_quantile.py +296 -0
  29. openstef/model/regressors/xgb.py +23 -0
  30. openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
  31. openstef/model/regressors/xgb_quantile.py +3 -0
  32. openstef/model/serializer.py +10 -0
  33. openstef/model_selection/model_selection.py +4 -1
  34. openstef/monitoring/performance_meter.py +1 -2
  35. openstef/monitoring/teams.py +11 -0
  36. openstef/pipeline/create_basecase_forecast.py +11 -1
  37. openstef/pipeline/create_component_forecast.py +24 -28
  38. openstef/pipeline/create_forecast.py +20 -1
  39. openstef/pipeline/optimize_hyperparameters.py +18 -16
  40. openstef/pipeline/train_create_forecast_backtest.py +11 -1
  41. openstef/pipeline/train_model.py +31 -12
  42. openstef/pipeline/utils.py +3 -0
  43. openstef/postprocessing/postprocessing.py +29 -0
  44. openstef/settings.py +15 -0
  45. openstef/tasks/calculate_kpi.py +23 -20
  46. openstef/tasks/create_basecase_forecast.py +15 -7
  47. openstef/tasks/create_components_forecast.py +24 -8
  48. openstef/tasks/create_forecast.py +9 -6
  49. openstef/tasks/create_solar_forecast.py +4 -4
  50. openstef/tasks/optimize_hyperparameters.py +2 -2
  51. openstef/tasks/split_forecast.py +9 -2
  52. openstef/tasks/train_model.py +9 -7
  53. openstef/tasks/utils/taskcontext.py +7 -0
  54. openstef/validation/validation.py +28 -3
  55. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/METADATA +65 -57
  56. openstef-3.4.44.dist-info/RECORD +97 -0
  57. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/WHEEL +1 -1
  58. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
  59. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
  60. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
  61. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
  62. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
  63. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
  64. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
  65. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
  66. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
  67. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
  68. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
  69. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
  70. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
  71. openstef/data/dutch_holidays_2020-2022.csv +0 -831
  72. openstef/data/dutch_holidays_2020-2022.csv.license +0 -3
  73. openstef/feature_engineering/historic_features.py +0 -40
  74. openstef/model/regressors/proloaf.py +0 -281
  75. openstef/tasks/run_tracy.py +0 -145
  76. openstef-3.4.10.dist-info/RECORD +0 -104
  77. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license} +0 -0
  78. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license} +0 -0
  79. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license → dutch_holidays.csv.license} +0 -0
  80. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/LICENSE +0 -0
  81. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/top_level.txt +0 -0
@@ -10,13 +10,11 @@ import pandas as pd
10
10
 
11
11
  from openstef import PROJECT_ROOT
12
12
 
13
- HOLIDAY_CSV_PATH: str = (
14
- PROJECT_ROOT / "openstef" / "data" / "dutch_holidays_2020-2022.csv"
15
- )
13
+ HOLIDAY_CSV_PATH: str = PROJECT_ROOT / "openstef" / "data" / "dutch_holidays.csv"
16
14
 
17
15
 
18
16
  def generate_holiday_feature_functions(
19
- country: str = "NL",
17
+ country_code: str = "NL",
20
18
  years: list = None,
21
19
  path_to_school_holidays_csv: str = HOLIDAY_CSV_PATH,
22
20
  ) -> dict:
@@ -46,12 +44,14 @@ def generate_holiday_feature_functions(
46
44
  - Pinksteren
47
45
  - Kerst
48
46
 
47
+
49
48
  The 'Brugdagen' are updated untill dec 2020. (Generated using agenda)
50
49
 
51
50
  Args:
52
51
  country: Country for which to create holiday features.
53
52
  years: years for which to create holiday features.
54
53
  path_to_school_holidays_csv: Filepath to csv with school holidays.
54
+ NOTE: Dutch holidays csv file is only until January 2026.
55
55
 
56
56
  Returns:
57
57
  Dictionary with functions that check if a given date is a holiday, keys
@@ -69,7 +69,7 @@ def generate_holiday_feature_functions(
69
69
  now.year + 1,
70
70
  ]
71
71
 
72
- country_holidays = holidays.country_holidays(country, years=years)
72
+ country_holidays = holidays.country_holidays(country_code, years=years)
73
73
 
74
74
  # Make holiday function dict
75
75
  holiday_functions = {}
@@ -96,7 +96,7 @@ def generate_holiday_feature_functions(
96
96
 
97
97
  # Check for bridge day
98
98
  holiday_functions, bridge_days = check_for_bridge_day(
99
- date, holiday_name, country, years, holiday_functions, bridge_days
99
+ date, holiday_name, country_code, years, holiday_functions, bridge_days
100
100
  )
101
101
 
102
102
  # Add feature function that includes all bridgedays
@@ -104,33 +104,42 @@ def generate_holiday_feature_functions(
104
104
  {"is_bridgeday": lambda x: np.isin(x.index.date, np.array(list(bridge_days)))}
105
105
  )
106
106
 
107
- # Manully generated csv including all dutch schoolholidays for different regions
108
- df_holidays = pd.read_csv(path_to_school_holidays_csv, index_col=None)
109
- df_holidays["datum"] = pd.to_datetime(df_holidays.datum).apply(lambda x: x.date())
110
-
111
- # Add check function that includes all holidays of the provided csv
112
- holiday_functions.update(
113
- {"is_schoolholiday": lambda x: np.isin(x.index.date, df_holidays.datum.values)}
114
- )
115
-
116
- # Loop over list of holidays names
117
- for holiday_name in list(set(df_holidays.name)):
118
- # Define function explicitely to mitigate 'late binding' problem
119
- def make_holiday_func(holidayname=holiday_name):
120
- return lambda x: np.isin(
121
- x.index.date, df_holidays.datum[df_holidays.name == holidayname].values
122
- )
107
+ # Add school holidays if country is NL
108
+ if country_code == "NL":
109
+ # Manully generated csv including all dutch schoolholidays for different regions
110
+ df_holidays = pd.read_csv(path_to_school_holidays_csv, index_col=None)
111
+ df_holidays["datum"] = pd.to_datetime(df_holidays.datum).apply(
112
+ lambda x: x.date()
113
+ )
123
114
 
124
- # Create lag function for each holiday
115
+ # Add check function that includes all holidays of the provided csv
125
116
  holiday_functions.update(
126
117
  {
127
- "is_"
128
- + holiday_name.replace(" ", "_").lower(): make_holiday_func(
129
- holidayname=holiday_name
118
+ "is_schoolholiday": lambda x: np.isin(
119
+ x.index.date, df_holidays.datum.values
130
120
  )
131
121
  }
132
122
  )
133
123
 
124
+ # Loop over list of holidays names
125
+ for holiday_name in list(set(df_holidays.name)):
126
+ # Define function explicitely to mitigate 'late binding' problem
127
+ def make_holiday_func(holidayname=holiday_name):
128
+ return lambda x: np.isin(
129
+ x.index.date,
130
+ df_holidays.datum[df_holidays.name == holidayname].values,
131
+ )
132
+
133
+ # Create lag function for each holiday
134
+ holiday_functions.update(
135
+ {
136
+ "is_"
137
+ + holiday_name.replace(" ", "_").lower(): make_holiday_func(
138
+ holidayname=holiday_name
139
+ )
140
+ }
141
+ )
142
+
134
143
  return holiday_functions
135
144
 
136
145
 
@@ -0,0 +1,141 @@
1
+ # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ from typing import Union, List, Optional
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from sklearn.impute import SimpleImputer
9
+ from sklearn.preprocessing import FunctionTransformer
10
+ from sklearn.utils.validation import check_array, check_is_fitted
11
+
12
+
13
+ class MissingValuesTransformer:
14
+ """MissingValuesTransformer handles missing values in data by imputing them with a given strategy.
15
+
16
+ It also removes columns that are always null from the data.
17
+
18
+ """
19
+
20
+ in_feature_names: Optional[List[str]] = None
21
+ _n_in_features: Optional[int] = None
22
+
23
+ non_null_feature_names: List[str] = None
24
+
25
+ def __init__(
26
+ self,
27
+ missing_values: Union[int, float, str, None] = np.nan,
28
+ imputation_strategy: str = None,
29
+ fill_value: Union[str, int, float] = None,
30
+ no_fill_future_values_features: List[str] = None,
31
+ ):
32
+ """Initialize missing values handler.
33
+
34
+ Args:
35
+ missing_values: The placeholder for the missing values. All occurrences of
36
+ `missing_values` will be imputed.
37
+ imputation_strategy: The imputation strategy to use
38
+ Can be one of "mean", "median", "most_frequent", "constant" or None.
39
+ fill_value: When strategy == "constant", fill_value is used to replace all
40
+ occurrences of missing_values.
41
+ no_fill_future_values_features: The features for which it does not make sense
42
+ to fill future values. Rows that contain trailing null values for these
43
+ features will be removed from the data.
44
+
45
+ """
46
+ self.missing_values = missing_values
47
+ self.imputation_strategy = imputation_strategy
48
+ self.fill_value = fill_value
49
+ self.no_fill_future_values_features = no_fill_future_values_features or []
50
+ self.is_fitted_ = False
51
+
52
+ # Build the proper imputation transformer
53
+ # - Identity function if strategy is None
54
+ # - SimpleImputer with the dedicated strategy
55
+ if self.imputation_strategy is None:
56
+ self.imputer_ = FunctionTransformer(func=self._identity)
57
+ else:
58
+ self.imputer_ = SimpleImputer(
59
+ missing_values=self.missing_values,
60
+ strategy=self.imputation_strategy,
61
+ fill_value=self.fill_value,
62
+ ).set_output(transform="pandas")
63
+ self.imputer_._validate_params()
64
+
65
+ @staticmethod
66
+ def _determine_trailing_null_rows(x: pd.DataFrame) -> pd.Series:
67
+ """Determine rows with trailing null values in a DataFrame."""
68
+ return ~x.bfill().isnull().any(axis="columns")
69
+
70
+ def fit(self, x, y=None):
71
+ """Fit the imputer on the input data."""
72
+ _ = check_array(x, force_all_finite="allow-nan")
73
+ if not isinstance(x, pd.DataFrame):
74
+ x = pd.DataFrame(np.asarray(x))
75
+
76
+ self.in_feature_names = list(x.columns)
77
+ self._n_in_features = x.shape[1]
78
+
79
+ # Remove always null columns
80
+ is_column_null = x.isnull().all(axis="index")
81
+ self.non_null_feature_names = list(x.columns[~is_column_null])
82
+ x = x[self.non_null_feature_names]
83
+
84
+ # Remove trailing null rows for features that should
85
+ # not be imputed in the future
86
+ trailing_null_rows = self._determine_trailing_null_rows(
87
+ x[self.no_fill_future_values_features]
88
+ )
89
+ x = x.loc[trailing_null_rows]
90
+
91
+ # Imputers do not support labels
92
+ self.imputer_.fit(X=x, y=None)
93
+ self.is_fitted_ = True
94
+
95
+ def transform(self, x) -> pd.DataFrame:
96
+ """Transform the input data by imputing missing values."""
97
+ check_is_fitted(self)
98
+ _ = check_array(x, force_all_finite="allow-nan")
99
+ if not isinstance(x, pd.DataFrame):
100
+ x = pd.DataFrame(np.asarray(x))
101
+
102
+ x = x[self.non_null_feature_names]
103
+
104
+ transformed = self.imputer_.transform(x)
105
+
106
+ return transformed
107
+
108
+ def fit_transform(self, x, y=None) -> tuple[pd.DataFrame, Optional[pd.Series]]:
109
+ """Fit the imputer on the input data and transform it.
110
+
111
+ Returns:
112
+ The data with missing values imputed.
113
+
114
+ """
115
+ self.fit(x, y)
116
+
117
+ if not isinstance(x, pd.DataFrame):
118
+ x = pd.DataFrame(np.asarray(x))
119
+
120
+ x = x[self.non_null_feature_names]
121
+
122
+ # Remove trailing null rows for features that should
123
+ # not be imputed in the future
124
+ non_trailing_null_rows = self._determine_trailing_null_rows(
125
+ x[self.no_fill_future_values_features]
126
+ )
127
+ x = x.loc[non_trailing_null_rows]
128
+
129
+ x = self.transform(x)
130
+
131
+ if y is not None:
132
+ y = y.loc[non_trailing_null_rows]
133
+
134
+ return x, y
135
+
136
+ @classmethod
137
+ def _identity(cls, x):
138
+ return x
139
+
140
+ def __sklearn_is_fitted__(self) -> bool:
141
+ return self.in_feature_names is not None
@@ -3,6 +3,7 @@
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
 
5
5
  """This module contains all wheather related functions used for feature engineering."""
6
+ import logging
6
7
  from typing import Union
7
8
 
8
9
  import numpy as np
@@ -12,7 +13,13 @@ import structlog
12
13
  from pvlib.location import Location
13
14
 
14
15
  from openstef.data_classes.prediction_job import PredictionJobDataClass
16
+ from openstef.settings import Settings
15
17
 
18
+ structlog.configure(
19
+ wrapper_class=structlog.make_filtering_bound_logger(
20
+ logging.getLevelName(Settings.log_level)
21
+ )
22
+ )
16
23
  logger = structlog.get_logger(__name__)
17
24
 
18
25
 
@@ -71,6 +71,9 @@ def plot_data_series(
71
71
  Returns:
72
72
  A line plot of each passed data series.
73
73
 
74
+ Raises:
75
+ ValueError: If names is None and the number of series is greater than 3.
76
+
74
77
  """
75
78
  series_names = {
76
79
  1: ("series",),
@@ -25,6 +25,9 @@ def get_eval_metric_function(metric_name: str) -> Callable:
25
25
  Returns:
26
26
  Function to calculate the metric.
27
27
 
28
+ Raises:
29
+ KeyError: If the metric is not available.
30
+
28
31
  """
29
32
  evaluation_function = {
30
33
  "rmse": rmse,
@@ -130,6 +133,9 @@ def r_mae_highest(
130
133
 
131
134
  The range is based on the load range of the previous two weeks.
132
135
 
136
+ Raises:
137
+ ValueError: If the length of the realised and forecast arrays are not equal.
138
+
133
139
  """
134
140
  # Check if length of both arrays is equal
135
141
  if len(np.array(realised)) != len(np.array(forecast)):
@@ -395,7 +401,7 @@ def xgb_quantile_obj(
395
401
  Args:
396
402
  preds: numpy.ndarray
397
403
  dmatrix: xgboost.DMatrix
398
- quantile: float
404
+ quantile: float between 0 and 1
399
405
 
400
406
  Returns:
401
407
  Gradient and Hessian
@@ -425,3 +431,54 @@ def xgb_quantile_obj(
425
431
  hess = np.ones_like(preds)
426
432
 
427
433
  return grad, hess
434
+
435
+
436
+ def arctan_loss(y_true, y_pred, taus, s=0.1):
437
+ """Compute the arctan pinball loss.
438
+
439
+ Note that XGBoost outputs the predictions in a slightly peculiar manner.
440
+ Suppose we have 100 data points and we predict 10 quantiles. The predictions
441
+ will be an array of size (1000 x 1). We first resize this to a (100x10) array
442
+ where each row corresponds to the 10 predicted quantile for a single data
443
+ point. We then use a for-loop (over the 10 columns) to calculate the gradients
444
+ and second derivatives. Legibility was chosen over efficiency. This part
445
+ can be made more efficient.
446
+
447
+ Args:
448
+ y_true: An array containing the true observations.
449
+ y_pred: An array containing the predicted quantiles.
450
+ taus: A list containing the true desired coverage of the quantiles.
451
+ s: A smoothing parameter.
452
+
453
+ Returns:
454
+ grad: An array containing the (negative) gradients with respect to y_pred.
455
+ hess: An array containing the second derivative with respect to y_pred.
456
+
457
+ """
458
+ size = len(y_true)
459
+ n_dim = len(taus) # The number of columns
460
+ n_rows = size // n_dim
461
+
462
+ # Resize the predictions and targets.
463
+ # Each column corresponds to a quantile, each row to a data point.
464
+ y_pred = np.reshape(y_pred, (n_rows, n_dim))
465
+ y_true = np.reshape(y_true, (n_rows, n_dim))
466
+
467
+ # Calculate the differences
468
+ u = y_true - y_pred
469
+
470
+ # Calculate the gradient and second derivatives
471
+ grad = np.zeros_like(y_pred)
472
+ hess = np.zeros_like(y_pred)
473
+ z = u / s
474
+ for i, tau in enumerate(taus):
475
+ x = 1 + z[:, i] ** 2
476
+ grad[:, i] = (
477
+ tau - 0.5 + 1 / np.pi * np.arctan(z[:, i]) + z[:, i] / (np.pi) * x**-1
478
+ )
479
+ hess[:, i] = 2 / (np.pi * s) * x ** (-2)
480
+
481
+ # Reshape back to the original shape.
482
+ grad = grad.reshape(size)
483
+ hess = hess.reshape(size)
484
+ return -grad / n_dim, hess / n_dim
@@ -2,6 +2,7 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
4
  """Defines reporter class."""
5
+ import logging
5
6
  import os
6
7
  import warnings
7
8
  from dataclasses import dataclass
@@ -16,6 +17,7 @@ from plotly.graph_objects import Figure
16
17
  from openstef.metrics import figure
17
18
  from openstef.metrics.metrics import bias, mae, nsme, r_mae, rmse
18
19
  from openstef.model.regressors.regressor import OpenstfRegressor
20
+ from openstef.settings import Settings
19
21
 
20
22
 
21
23
  @dataclass
@@ -167,6 +169,11 @@ class Reporter:
167
169
  def write_report_to_disk(report: Report, report_folder: str):
168
170
  """Write report to disk; e.g. for viewing report of latest models using grafana."""
169
171
  # Initialize logger and serializer
172
+ structlog.configure(
173
+ wrapper_class=structlog.make_filtering_bound_logger(
174
+ logging.getLevelName(Settings.log_level)
175
+ )
176
+ )
170
177
  logger = structlog.get_logger(__name__)
171
178
  if report_folder:
172
179
  # create path if does not exist
@@ -1,6 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
4
5
  from datetime import datetime
5
6
 
6
7
  import numpy as np
@@ -11,12 +12,18 @@ from sklearn.base import RegressorMixin
11
12
 
12
13
  from openstef.data_classes.prediction_job import PredictionJobDataClass
13
14
  from openstef.exceptions import ModelWithoutStDev
15
+ from openstef.settings import Settings
14
16
 
15
17
 
16
18
  class ConfidenceIntervalApplicator:
17
19
  def __init__(self, model: RegressorMixin, forecast_input_data: pd.DataFrame):
18
20
  self.model = model
19
21
  self.forecast_input_data = forecast_input_data
22
+ structlog.configure(
23
+ wrapper_class=structlog.make_filtering_bound_logger(
24
+ logging.getLevelName(Settings.log_level)
25
+ )
26
+ )
20
27
  self.logger = structlog.get_logger(self.__class__.__name__)
21
28
 
22
29
  def add_confidence_interval(
@@ -54,9 +61,24 @@ class ConfidenceIntervalApplicator:
54
61
  temp_forecast = self._add_standard_deviation_to_forecast(forecast)
55
62
 
56
63
  if self.model.can_predict_quantiles:
57
- return self._add_quantiles_to_forecast_quantile_regression(
58
- temp_forecast, pj["quantiles"]
59
- )
64
+ # Try to generate the quantiles that were requested
65
+ try:
66
+ result = self._add_quantiles_to_forecast_quantile_regression(
67
+ temp_forecast, pj["quantiles"]
68
+ )
69
+ return result
70
+ except Exception:
71
+ # Fallback on quantiles of the model if the requested quantiles cant be generated by the model.
72
+ # Can happen when the model was trained on different quantiles than are requested
73
+ result = self._add_quantiles_to_forecast_quantile_regression(
74
+ temp_forecast, self.model.quantiles
75
+ )
76
+ self.logger.warning(
77
+ "Quantiles are requested the model was not trained on. Using the quantiles the model was trained on",
78
+ requested_quantiles=pj["quantiles"],
79
+ trained_quantiles=self.model.quantiles,
80
+ )
81
+ return result
60
82
 
61
83
  return self._add_quantiles_to_forecast_default(temp_forecast, pj["quantiles"])
62
84
 
@@ -74,6 +96,9 @@ class ConfidenceIntervalApplicator:
74
96
  Forecast with added standard deviation. DataFrame with columns:
75
97
  "forecast", "stdev"
76
98
 
99
+ Raises:
100
+ ModelWithoutStDev: If the model does not have a valid standard deviation.
101
+
77
102
  """
78
103
  minimal_resolution: int = 15 # Minimal time resolution in minutes
79
104
  standard_deviation = self.model.standard_deviation
@@ -1,28 +1,35 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
4
5
  from typing import Union
5
6
 
6
7
  import structlog
7
8
 
8
- from openstef.enums import MLModelType
9
+ from openstef.enums import ModelType
10
+ from openstef.model.regressors.arima import ARIMAOpenstfRegressor
9
11
  from openstef.model.regressors.custom_regressor import is_custom_type, load_custom_model
10
12
  from openstef.model.regressors.lgbm import LGBMOpenstfRegressor
11
13
  from openstef.model.regressors.linear import LinearOpenstfRegressor
14
+ from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegressor
12
15
  from openstef.model.regressors.regressor import OpenstfRegressor
16
+ from openstef.model.regressors.flatliner import FlatlinerRegressor
13
17
  from openstef.model.regressors.xgb import XGBOpenstfRegressor
14
18
  from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor
15
- from openstef.model.regressors.arima import ARIMAOpenstfRegressor
19
+ from openstef.model.regressors.xgb_multioutput_quantile import (
20
+ XGBMultiOutputQuantileOpenstfRegressor,
21
+ )
22
+ from openstef.settings import Settings
16
23
 
24
+ structlog.configure(
25
+ wrapper_class=structlog.make_filtering_bound_logger(
26
+ logging.getLevelName(Settings.log_level)
27
+ )
28
+ )
17
29
  logger = structlog.get_logger(__name__)
18
- try:
19
- from openstef.model.regressors.proloaf import OpenstfProloafRegressor
20
- except ImportError:
21
- logger.info("Proloaf not available, setting constructor to None")
22
- OpenstfProloafRegressor = None
23
30
 
24
31
  valid_model_kwargs = {
25
- MLModelType.XGB: [
32
+ ModelType.XGB: [
26
33
  "n_estimators",
27
34
  "objective",
28
35
  "max_depth",
@@ -53,7 +60,7 @@ valid_model_kwargs = {
53
60
  "validate_parameters",
54
61
  "early_stopping_rounds",
55
62
  ],
56
- MLModelType.LGB: [
63
+ ModelType.LGB: [
57
64
  "boosting_type",
58
65
  "objective",
59
66
  "num_leaves",
@@ -75,7 +82,7 @@ valid_model_kwargs = {
75
82
  "importance_type",
76
83
  "early_stopping_rounds",
77
84
  ],
78
- MLModelType.XGB_QUANTILE: [
85
+ ModelType.XGB_QUANTILE: [
79
86
  "quantiles",
80
87
  "gamma",
81
88
  "colsample_bytree",
@@ -84,33 +91,37 @@ valid_model_kwargs = {
84
91
  "max_depth",
85
92
  "early_stopping_rounds",
86
93
  ],
87
- MLModelType.ProLoaf: [
88
- "relu_leak",
89
- "encoder_features",
90
- "decoder_features",
91
- "core_layers",
92
- "rel_linear_hidden_size",
93
- "rel_core_hidden_size",
94
- "dropout_fc",
95
- "dropout_core",
96
- "training_metric",
97
- "metric_options",
98
- "optimizer_name",
99
- "early_stopping_patience",
100
- "early_stopping_margin",
101
- "learning_rate",
102
- "max_epochs",
103
- "device",
104
- "batch_size",
105
- "history_horizon",
106
- "horizon_minutes",
94
+ ModelType.XGB_MULTIOUTPUT_QUANTILE: [
95
+ "quantiles",
96
+ "gamma",
97
+ "colsample_bytree",
98
+ "subsample",
99
+ "min_child_weight",
100
+ "max_depth",
101
+ "early_stopping_rounds",
102
+ "arctan_smoothing",
103
+ ],
104
+ ModelType.LINEAR: [
105
+ "missing_values",
106
+ "imputation_strategy",
107
+ "fill_value",
108
+ ],
109
+ ModelType.FLATLINER: [
110
+ "quantiles",
107
111
  ],
108
- MLModelType.LINEAR: [
112
+ ModelType.LINEAR_QUANTILE: [
113
+ "alpha",
114
+ "quantiles",
115
+ "solver",
109
116
  "missing_values",
110
117
  "imputation_strategy",
111
118
  "fill_value",
119
+ "weight_scale_percentile",
120
+ "weight_exponent",
121
+ "weight_floor",
122
+ "no_fill_future_values_features",
112
123
  ],
113
- MLModelType.ARIMA: [
124
+ ModelType.ARIMA: [
114
125
  "backtest_max_horizon",
115
126
  "order",
116
127
  "seasonal_order",
@@ -124,16 +135,18 @@ class ModelCreator:
124
135
 
125
136
  # Set object mapping
126
137
  MODEL_CONSTRUCTORS = {
127
- MLModelType.XGB: XGBOpenstfRegressor,
128
- MLModelType.LGB: LGBMOpenstfRegressor,
129
- MLModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
130
- MLModelType.ProLoaf: OpenstfProloafRegressor,
131
- MLModelType.LINEAR: LinearOpenstfRegressor,
132
- MLModelType.ARIMA: ARIMAOpenstfRegressor,
138
+ ModelType.XGB: XGBOpenstfRegressor,
139
+ ModelType.LGB: LGBMOpenstfRegressor,
140
+ ModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
141
+ ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
142
+ ModelType.LINEAR: LinearOpenstfRegressor,
143
+ ModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
144
+ ModelType.ARIMA: ARIMAOpenstfRegressor,
145
+ ModelType.FLATLINER: FlatlinerRegressor,
133
146
  }
134
147
 
135
148
  @staticmethod
136
- def create_model(model_type: Union[MLModelType, str], **kwargs) -> OpenstfRegressor:
149
+ def create_model(model_type: Union[ModelType, str], **kwargs) -> OpenstfRegressor:
137
150
  """Create a machine learning model based on model type.
138
151
 
139
152
  Args:
@@ -154,7 +167,7 @@ class ModelCreator:
154
167
  model_class = load_custom_model(model_type)
155
168
  valid_kwargs = model_class.valid_kwargs()
156
169
  else:
157
- model_type = MLModelType(model_type)
170
+ model_type = ModelType(model_type)
158
171
  model_class = ModelCreator.MODEL_CONSTRUCTORS[model_type]
159
172
  valid_kwargs = valid_model_kwargs[model_type]
160
173
  # Check if model as imported
@@ -165,7 +178,7 @@ class ModelCreator:
165
178
  "Please refer to the ReadMe for instructions"
166
179
  )
167
180
  except ValueError as e:
168
- valid_types = [t.value for t in MLModelType]
181
+ valid_types = [t.value for t in ModelType]
169
182
  raise NotImplementedError(
170
183
  f"No constructor for '{model_type}', "
171
184
  f"valid model_types are: {valid_types} "