openstef 3.2.69__py3-none-any.whl → 3.2.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,16 +106,29 @@ def plot_data_series(
106
106
  # Filter data on given horizon
107
107
  actuals = []
108
108
  predictions = []
109
+ q_low = []
110
+ q_high = []
109
111
 
110
112
  for series, predict_series in zip(data, predict_data):
111
113
  mask = series["horizon"] == horizon
112
114
  actuals.append(series[mask]["load"])
113
115
  predictions.append(predict_series[mask]["forecast"])
116
+ if len(predict_series[mask].columns) > 1:
117
+ q_low.append(predict_series[mask].iloc[:, -2])
118
+ q_high.append(predict_series[mask].iloc[:, -1])
119
+
114
120
  else:
115
121
  actuals = data
116
122
  predictions = predict_data
123
+ if len(predictions.columns) > 1:
124
+ q_low = predict_data.iloc[:, -2]
125
+ q_high = predict_data.iloc[:, -1]
126
+ else:
127
+ q_low = None
128
+ q_high = None
117
129
 
118
- fig = _plot_data_and_predictions(names, actuals, predictions)
130
+ quantiles = [q_low, q_high] if (q_low is not None) and (len(q_low) != 0) else None
131
+ fig = _plot_data_and_predictions(names, actuals, predictions, quantiles)
119
132
  fig.update_layout(
120
133
  title=f"Predictor in action for horizon: {horizon}",
121
134
  )
@@ -167,7 +180,10 @@ def _plot_data(names: list[str], series: list[pd.Series]) -> go.Figure:
167
180
 
168
181
 
169
182
  def _plot_data_and_predictions(
170
- names: list[str], actuals: list[pd.Series], predictions: list[pd.Series]
183
+ names: list[str],
184
+ actuals: list[pd.Series],
185
+ predictions: list[pd.Series],
186
+ quantiles: list[float] = None,
171
187
  ) -> go.Figure:
172
188
  """Create plot of different data and prediction splits.
173
189
 
@@ -178,6 +194,7 @@ def _plot_data_and_predictions(
178
194
  names: Name of each seperate split. The passed names will be suffixed with _actual and _predict for data and predictions respectively.
179
195
  actuals: Each data split as a seperate series.
180
196
  predictions: Each prediction split as a seperate series.
197
+ quantiles: List of predicted quantiles that have to be plotted.
181
198
 
182
199
  Returns:
183
200
  A line plot of each passed series.
@@ -186,13 +203,29 @@ def _plot_data_and_predictions(
186
203
  # Build a combined DataFrame with all data.
187
204
  # This step is important to create forced NaNs to create gaps in the plot.
188
205
  combined = []
189
- for name, actual, prediction in zip(names, actuals, predictions):
190
- combined.extend(
191
- [
192
- actual.rename(f"{name}_actual"),
193
- prediction.rename(f"{name}_predict"),
194
- ]
195
- )
206
+ if quantiles is None:
207
+ for name, actual, prediction in zip(names, actuals, predictions):
208
+ combined.extend(
209
+ [
210
+ actual.rename(f"{name}_actual"),
211
+ prediction.rename(f"{name}_predict"),
212
+ ]
213
+ )
214
+ else:
215
+ for name, actual, prediction, q_low, q_high in zip(
216
+ names, actuals, predictions, quantiles[0], quantiles[-1]
217
+ ):
218
+ q_low_name = q_low.name
219
+ q_high_name = q_high.name
220
+ combined.extend(
221
+ [
222
+ actual.rename(f"{name}_actual"),
223
+ prediction.rename(f"{name}_predict"),
224
+ q_low.rename(f"{name}_{q_low_name}"),
225
+ q_high.rename(f"{name}_{q_high_name}"),
226
+ ]
227
+ )
228
+
196
229
  df_plot = pd.concat(combined, axis=1)
197
230
 
198
231
  fig = go.Figure()
@@ -200,7 +233,6 @@ def _plot_data_and_predictions(
200
233
  # Add a trace for every data series
201
234
  for i, name in enumerate(names):
202
235
  actual, predict = f"{name}_actual", f"{name}_predict"
203
-
204
236
  fig.add_trace(
205
237
  go.Scatter(
206
238
  x=df_plot.index,
@@ -217,6 +249,32 @@ def _plot_data_and_predictions(
217
249
  line=dict(dash="dot", color=px.colors.qualitative.Dark2[i]),
218
250
  )
219
251
  )
252
+ if quantiles is not None:
253
+ q_low, q_high = f"{name}_{q_low_name}", f"{name}_{q_high_name}"
254
+ fig.add_trace(
255
+ go.Scatter(
256
+ x=df_plot.index,
257
+ y=df_plot[q_low],
258
+ mode="lines",
259
+ line=dict(
260
+ color=px.colors.qualitative.Dark2[i], width=0.5, dash="dash"
261
+ ),
262
+ name=q_low,
263
+ )
264
+ )
265
+ fig.add_trace(
266
+ go.Scatter(
267
+ x=df_plot.index,
268
+ y=df_plot[q_high],
269
+ fill="tonexty",
270
+ fillcolor=f"rgba({px.colors.qualitative.Dark2[i][4:-1]}, 0.3)",
271
+ mode="lines",
272
+ line=dict(
273
+ color=px.colors.qualitative.Dark2[i], width=0.5, dash="dash"
274
+ ),
275
+ name=q_high,
276
+ )
277
+ )
220
278
 
221
279
  fig.update_layout(yaxis_title="Load (MW)")
222
280
 
@@ -52,6 +52,7 @@ class Reporter:
52
52
  train_data: pd.DataFrame = None,
53
53
  validation_data: pd.DataFrame = None,
54
54
  test_data: pd.DataFrame = None,
55
+ quantiles: list[float] = None,
55
56
  ) -> None:
56
57
  """Initializes reporter.
57
58
 
@@ -59,11 +60,13 @@ class Reporter:
59
60
  train_data: Dataframe with training data
60
61
  validation_data: Dataframe with validation data
61
62
  test_data: Dataframe with test data
63
+ quantiles: List of predicted quantiles that have to be plotted.
62
64
 
63
65
  """
64
66
  self.horizons = train_data.horizon.unique()
65
67
  self.predicted_data_list = []
66
68
  self.input_data_list = [train_data, validation_data, test_data]
69
+ self.quantiles = [] if quantiles is None else sorted(quantiles)
67
70
 
68
71
  def generate_report(
69
72
  self,
@@ -102,15 +105,34 @@ class Reporter:
102
105
 
103
106
  with warnings.catch_warnings():
104
107
  warnings.simplefilter("ignore")
108
+
109
+ if model.can_predict_quantiles:
110
+ fiabilities = self.get_fiabilities(
111
+ {q: model.predict(valid_x, quantile=q) for q in self.quantiles},
112
+ valid_y,
113
+ )
114
+ else:
115
+ fiabilities = {}
116
+
105
117
  report = Report(
106
118
  data_series_figures=data_series_figures,
107
119
  feature_importance_figure=feature_importance_figure,
108
- metrics=self.get_metrics(model.predict(valid_x), valid_y),
120
+ metrics={
121
+ **self.get_metrics(model.predict(valid_x), valid_y),
122
+ **fiabilities,
123
+ },
109
124
  signature=infer_signature(train_x, train_y),
110
125
  )
111
126
 
112
127
  return report
113
128
 
129
+ @staticmethod
130
+ def get_fiabilities(quantiles: dict[float, np.array], y_true: np.array) -> dict:
131
+ fiabilities_dict = {}
132
+ for alpha, qhat in quantiles.items():
133
+ fiabilities_dict[f"fiability_at_q{alpha}"] = np.mean(qhat >= y_true)
134
+ return fiabilities_dict
135
+
114
136
  @staticmethod
115
137
  def get_metrics(y_pred: np.array, y_true: np.array) -> dict:
116
138
  """Calculate the metrics for a prediction.
@@ -171,6 +193,15 @@ class Reporter:
171
193
  forecast = pd.DataFrame(
172
194
  index=data_set.index, data={"forecast": model_forecast}
173
195
  )
196
+
197
+ if (model.can_predict_quantiles) & (len(self.quantiles) >= 2):
198
+ forecast.loc[:, f"q{100 * self.quantiles[0]}"] = model.predict(
199
+ data_set.iloc[:, 1:-1], quantile=self.quantiles[0]
200
+ )
201
+ forecast.loc[:, f"q{100 * self.quantiles[-1]}"] = model.predict(
202
+ data_set.iloc[:, 1:-1], quantile=self.quantiles[-1]
203
+ )
204
+
174
205
  self.predicted_data_list.append(forecast)
175
206
 
176
207
  # Make cufflinks plots for the data series
@@ -97,7 +97,12 @@ class RegressorObjective:
97
97
  "stratification_min_max": self.model_type != MLModelType.ProLoaf,
98
98
  "back_test": True,
99
99
  }
100
- (self.train_data, self.validation_data, self.test_data,) = self.split_func(
100
+ (
101
+ self.train_data,
102
+ self.validation_data,
103
+ self.test_data,
104
+ self.operational_score_data,
105
+ ) = self.split_func(
101
106
  self.input_data,
102
107
  test_fraction=self.test_fraction,
103
108
  validation_fraction=self.validation_fraction,
@@ -113,7 +113,7 @@ def split_data_train_validation_test(
113
113
  validation dataset. In an operational setting the following sequence is
114
114
  returned (when using stratification):
115
115
 
116
- Test >> Train >> Validation
116
+ Train >> Validation (and the test is the Train and Validation combined.)
117
117
 
118
118
  For a back test (indicated with argument "back_test") the following sequence
119
119
  is returned:
@@ -141,6 +141,7 @@ def split_data_train_validation_test(
141
141
  - Test data.
142
142
 
143
143
  """
144
+ test_fraction = test_fraction if back_test else 0
144
145
  train_fraction = 1 - (test_fraction + validation_fraction)
145
146
  if train_fraction < 0:
146
147
  raise ValueError(
@@ -172,10 +173,18 @@ def split_data_train_validation_test(
172
173
  start_date_test = end_date - np.round(number_indices * test_fraction) * delta
173
174
  test_data = data_[start_date_test:]
174
175
  train_val_data = data_[:start_date_test]
176
+ operational_score_data = (
177
+ pd.DataFrame()
178
+ ) # Empty because a backtest is no operational setting.
175
179
  else:
176
180
  start_date_val = start_date + np.round(number_indices * test_fraction) * delta
177
- test_data = data_[:start_date_val]
181
+ test_data = data_[
182
+ :start_date_val
183
+ ] # Empty as all data is used for training in an operational setting.
178
184
  train_val_data = data_[start_date_val:]
185
+ operational_score_data = data_.copy(deep=True).reset_index(
186
+ drop=True
187
+ ) # Used to check wether a new operationally train model is better than the old one.
179
188
 
180
189
  if stratification_min_max and (
181
190
  len(set(train_val_data.index.date)) >= min_days_for_stratification
@@ -248,11 +257,7 @@ def split_data_train_validation_test(
248
257
  validation_data = validation_data.sort_index()
249
258
  test_data = test_data.sort_index()
250
259
 
251
- return (
252
- train_data,
253
- validation_data,
254
- test_data,
255
- )
260
+ return (train_data, validation_data, test_data, operational_score_data)
256
261
 
257
262
 
258
263
  def backtest_split_default(
@@ -286,14 +291,14 @@ def backtest_split_default(
286
291
  for ifold in range(n_folds):
287
292
  test_data = data[data["random_fold"] == ifold].sort_index()
288
293
 
289
- (train_data, validation_data, _,) = split_data_train_validation_test(
294
+ (train_data, validation_data, _, _) = split_data_train_validation_test(
290
295
  data[data["random_fold"] != ifold].iloc[:, :-2],
291
296
  test_fraction=0,
292
297
  back_test=True,
293
298
  stratification_min_max=stratification_min_max,
294
299
  )
295
300
 
296
- yield train_data, validation_data, test_data.iloc[:, :-2]
301
+ yield train_data, validation_data, test_data.iloc[:, :-2], pd.DataFrame()
297
302
  else:
298
303
  yield split_data_train_validation_test(
299
304
  data,
@@ -82,10 +82,14 @@ def train_model_and_forecast_back_test(
82
82
  ) = zip(
83
83
  *(
84
84
  train_model_and_forecast_test_core(
85
- pj, modelspecs, train_data, validation_data, test_data
85
+ pj,
86
+ modelspecs,
87
+ train_data,
88
+ validation_data,
89
+ test_data,
86
90
  )
87
91
  + (train_data, validation_data, test_data)
88
- for train_data, validation_data, test_data in backtest_split_func(
92
+ for train_data, validation_data, test_data, _ in backtest_split_func(
89
93
  data_with_features, n_folds, **backtest_split_args
90
94
  )
91
95
  )
@@ -176,7 +176,14 @@ def train_model_pipeline_core(
176
176
  logger = structlog.get_logger(__name__)
177
177
 
178
178
  # Call common pipeline
179
- model, report, train_data, validation_data, test_data = train_pipeline_common(
179
+ (
180
+ model,
181
+ report,
182
+ train_data,
183
+ validation_data,
184
+ test_data,
185
+ operational_score_data,
186
+ ) = train_pipeline_common(
180
187
  pj,
181
188
  model_specs,
182
189
  input_data,
@@ -192,8 +199,8 @@ def train_model_pipeline_core(
192
199
  combined = combined.iloc[:, :-1]
193
200
 
194
201
  x_data, y_data = (
195
- combined.iloc[:, 1:-1],
196
- combined.iloc[:, 0],
202
+ operational_score_data.iloc[:, 1:-1],
203
+ operational_score_data.iloc[:, 0],
197
204
  )
198
205
 
199
206
  # Score method always returns R^2
@@ -260,7 +267,12 @@ def train_pipeline_common(
260
267
  horizons=horizons,
261
268
  )
262
269
 
263
- train_data, validation_data, test_data = train_pipeline_step_split_data(
270
+ (
271
+ train_data,
272
+ validation_data,
273
+ test_data,
274
+ operational_score_data,
275
+ ) = train_pipeline_step_split_data(
264
276
  data_with_features=data_with_features,
265
277
  pj=pj,
266
278
  test_fraction=test_fraction,
@@ -276,7 +288,7 @@ def train_pipeline_common(
276
288
  )
277
289
 
278
290
  # Report about the training process
279
- reporter = Reporter(train_data, validation_data, test_data)
291
+ reporter = Reporter(train_data, validation_data, test_data, pj.quantiles)
280
292
  report = reporter.generate_report(model)
281
293
 
282
294
  if pj.save_train_forecasts:
@@ -284,7 +296,7 @@ def train_pipeline_common(
284
296
  validation_data["forecast"] = model.predict(validation_data.iloc[:, 1:-1])
285
297
  test_data["forecast"] = model.predict(test_data.iloc[:, 1:-1])
286
298
 
287
- return model, report, train_data, validation_data, test_data
299
+ return model, report, train_data, validation_data, test_data, operational_score_data
288
300
 
289
301
 
290
302
  def train_pipeline_step_load_model(
@@ -515,7 +527,7 @@ def train_pipeline_step_split_data(
515
527
  required_arguments=["data", "test_fraction"]
516
528
  )
517
529
 
518
- train_data, validation_data, test_data = split_func(
530
+ train_data, validation_data, test_data, operational_score_data = split_func(
519
531
  data_with_features, test_fraction, **split_args
520
532
  )
521
533
 
@@ -523,4 +535,4 @@ def train_pipeline_step_split_data(
523
535
  if not test_data_predefined.empty:
524
536
  test_data = test_data_predefined
525
537
 
526
- return train_data, validation_data, test_data
538
+ return train_data, validation_data, test_data, operational_score_data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openstef
3
- Version: 3.2.69
3
+ Version: 3.2.71
4
4
  Summary: Open short term energy forecaster
5
5
  Home-page: https://github.com/OpenSTEF/openstef
6
6
  Author: Alliander N.V
@@ -24,15 +24,15 @@ openstef/feature_engineering/holiday_features.py,sha256=J24CURDmQOlYTFh9ffnuWc7k
24
24
  openstef/feature_engineering/lag_features.py,sha256=cMAZ5ekhNSKg7J9shoEjSa3VPrT0Z4ZjatMRsOfdeh4,5639
25
25
  openstef/feature_engineering/weather_features.py,sha256=wy3KFXUIIwSydFJZpiejsJMwURtDpv9l0HBHu-uLAGQ,15561
26
26
  openstef/metrics/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
27
- openstef/metrics/figure.py,sha256=n2n5YaRZODTJ5oIuN1AwAkFHgFFbj_qtbxTQB4BbvH4,7473
27
+ openstef/metrics/figure.py,sha256=NPJGI4FygjSnOQuL8qCbB87-T31q6EkewkbVmpLwmnk,9657
28
28
  openstef/metrics/metrics.py,sha256=c6HGQubArT5G4YxF0KY9HCP19PRHaVfXQ8KEkSwrt0w,13164
29
- openstef/metrics/reporter.py,sha256=uuHWWtrYBpbw7gWgdBgb5VSHO11pkZluZ-YYEcrVAUM,6412
29
+ openstef/metrics/reporter.py,sha256=V6pa4IUOzVcZ8OY632g5KoF8hr2MT2ySexrjZCjnuwY,7668
30
30
  openstef/model/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
31
31
  openstef/model/basecase.py,sha256=caI6Q-8y0ymlxGK9Js_H3Vh0q6ruNHlGD5RG0_kE5M0,2878
32
32
  openstef/model/confidence_interval_applicator.py,sha256=7E1_JFLZ4-hyEhleacMvp5szdmYZS4tpKAjfhGvXXvg,8602
33
33
  openstef/model/fallback.py,sha256=VV9ehgnoMZtWzqKk9H1t8wnERFh5CyC4TvDIuRP_ZDI,2861
34
34
  openstef/model/model_creator.py,sha256=U1Lw4HFyajfxQ2o5lEnCxnmRC62DEu5PBHXrm_jnEJU,5582
35
- openstef/model/objective.py,sha256=1v8ghCqEY2-Fku5NApQBVN52hIqzoYLvw9uWVKzmkE4,15347
35
+ openstef/model/objective.py,sha256=85CWxLOw8eDe1Waj81H_f8Rm5YaS-AlhCfzcMT4yFyM,15434
36
36
  openstef/model/objective_creator.py,sha256=OiPPFSiSu7z9K_983ib5iqhhu6_9tt7iyTyKNZ2Iz68,2057
37
37
  openstef/model/serializer.py,sha256=mfa8VZOXKK05zhDKBpXFw4E_UKndHl-fUuMQJiGPGjI,16909
38
38
  openstef/model/standard_deviation_generator.py,sha256=WCgZwerAEURUnSNW-DzpvJHC-3piD8TMZiOI60-HfZ8,2913
@@ -50,7 +50,7 @@ openstef/model/regressors/regressor.py,sha256=uJcx59AyCPE9f_yPcAQ59h2ZS7eNsDpIHJ
50
50
  openstef/model/regressors/xgb.py,sha256=HggA1U10srzdysjV560BMMX66kfaxCKAnOZB3JyyT_Y,808
51
51
  openstef/model/regressors/xgb_quantile.py,sha256=pjtG0WxEEPnKlL63iDHhUqydx_UVK_9w49uhlp0WS6c,7458
52
52
  openstef/model_selection/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
53
- openstef/model_selection/model_selection.py,sha256=3wbFVJY_fI7_t-7ZAky3VR4hCZ_z9O9x5dqzDK-5Jpk,10569
53
+ openstef/model_selection/model_selection.py,sha256=oGloQBP_FPdNyCs9wzS3l8zFNJxMs1P5XPjVN9qUOsw,11081
54
54
  openstef/monitoring/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
55
55
  openstef/monitoring/performance_meter.py,sha256=mMQKpDNv_-RcNYdEvEFPvB76lkG8V9gJOKYQqnH5BX4,2851
56
56
  openstef/monitoring/teams.py,sha256=fnZScPD55z9yC0q3YavWj40GEZmL7tsSGhWzG_sMPws,6401
@@ -59,8 +59,8 @@ openstef/pipeline/create_basecase_forecast.py,sha256=BPxf2MSvJyfbNCQGCr1Rol5ShqC
59
59
  openstef/pipeline/create_component_forecast.py,sha256=HgByae6ruVhy6TuGIJEuPyLyx7g4zSvJfk6Dynlqjl4,5030
60
60
  openstef/pipeline/create_forecast.py,sha256=2vK2cH_VeRcoDWPXR06zFmwQ043FPA9uPvg5_OyxUfU,5008
61
61
  openstef/pipeline/optimize_hyperparameters.py,sha256=qptTlg6v0hvHt1ocA7nueLwYRXAB82VI6bQ9ATmBVKQ,10824
62
- openstef/pipeline/train_create_forecast_backtest.py,sha256=BTJKH_VNu-ZLgy7UuIzEOHK8a_eK-o6JCSpcG-uUIQo,5444
63
- openstef/pipeline/train_model.py,sha256=rGTZ8ZztLLMAnrojI_tawYCW-8M6W_P4nP9wuN1llBQ,18307
62
+ openstef/pipeline/train_create_forecast_backtest.py,sha256=upuoiE01vjjxUu_sY0tANPqdOtpGKrQQ3azhVDnBJdc,5512
63
+ openstef/pipeline/train_model.py,sha256=tYC8xh6eKpea0CutHNoSGYvt6hoQt4vDWqXmZs6ejbk,18567
64
64
  openstef/pipeline/utils.py,sha256=fkc-oNirJ-JiyuOAL08RFrnPYPwudWal_N-BO6Cw980,2086
65
65
  openstef/postprocessing/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
66
66
  openstef/postprocessing/postprocessing.py,sha256=nehd0tDpkdIaWFJggQ-fDizIKdfmqJ3IOGfk0sDnrzk,8409
@@ -83,8 +83,8 @@ openstef/tasks/utils/predictionjobloop.py,sha256=u4WQjvqBM6z9T7VFUZ-9JqgdepNJO0Z
83
83
  openstef/tasks/utils/taskcontext.py,sha256=yI6TntOkZcW8JiNVuw4uJIigEBL0_iIrkPklF4ZeCX4,5401
84
84
  openstef/validation/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
85
85
  openstef/validation/validation.py,sha256=AYQJBXwbFhpq34bqEhybw0lTIJ8Td4vr2-AbWxGxm3M,16917
86
- openstef-3.2.69.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
87
- openstef-3.2.69.dist-info/METADATA,sha256=RuxSKGf7C7DTHEMCdxcfzrkComwDDI1TxoLXSw0R0Fg,6972
88
- openstef-3.2.69.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
89
- openstef-3.2.69.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
90
- openstef-3.2.69.dist-info/RECORD,,
86
+ openstef-3.2.71.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
87
+ openstef-3.2.71.dist-info/METADATA,sha256=Uou71qcVT-bsF0YLEHWasre0rbhX68vvkfqF-W0OBxM,6972
88
+ openstef-3.2.71.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
89
+ openstef-3.2.71.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
90
+ openstef-3.2.71.dist-info/RECORD,,