openstef 3.2.70__py3-none-any.whl → 3.2.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -97,7 +97,12 @@ class RegressorObjective:
97
97
  "stratification_min_max": self.model_type != MLModelType.ProLoaf,
98
98
  "back_test": True,
99
99
  }
100
- (self.train_data, self.validation_data, self.test_data,) = self.split_func(
100
+ (
101
+ self.train_data,
102
+ self.validation_data,
103
+ self.test_data,
104
+ self.operational_score_data,
105
+ ) = self.split_func(
101
106
  self.input_data,
102
107
  test_fraction=self.test_fraction,
103
108
  validation_fraction=self.validation_fraction,
@@ -113,7 +113,7 @@ def split_data_train_validation_test(
113
113
  validation dataset. In an operational setting the following sequence is
114
114
  returned (when using stratification):
115
115
 
116
- Test >> Train >> Validation
116
+ Train >> Validation (and the test is the Train and Validation combined.)
117
117
 
118
118
  For a back test (indicated with argument "back_test") the following sequence
119
119
  is returned:
@@ -141,6 +141,7 @@ def split_data_train_validation_test(
141
141
  - Test data.
142
142
 
143
143
  """
144
+ test_fraction = test_fraction if back_test else 0
144
145
  train_fraction = 1 - (test_fraction + validation_fraction)
145
146
  if train_fraction < 0:
146
147
  raise ValueError(
@@ -172,10 +173,18 @@ def split_data_train_validation_test(
172
173
  start_date_test = end_date - np.round(number_indices * test_fraction) * delta
173
174
  test_data = data_[start_date_test:]
174
175
  train_val_data = data_[:start_date_test]
176
+ operational_score_data = (
177
+ pd.DataFrame()
178
+ ) # Empty because a backtest is no operational setting.
175
179
  else:
176
180
  start_date_val = start_date + np.round(number_indices * test_fraction) * delta
177
- test_data = data_[:start_date_val]
181
+ test_data = data_[
182
+ :start_date_val
183
+ ] # Empty as all data is used for training in an operational setting.
178
184
  train_val_data = data_[start_date_val:]
185
+ operational_score_data = data_.copy(deep=True).reset_index(
186
+ drop=True
187
+ ) # Used to check wether a new operationally train model is better than the old one.
179
188
 
180
189
  if stratification_min_max and (
181
190
  len(set(train_val_data.index.date)) >= min_days_for_stratification
@@ -248,11 +257,7 @@ def split_data_train_validation_test(
248
257
  validation_data = validation_data.sort_index()
249
258
  test_data = test_data.sort_index()
250
259
 
251
- return (
252
- train_data,
253
- validation_data,
254
- test_data,
255
- )
260
+ return (train_data, validation_data, test_data, operational_score_data)
256
261
 
257
262
 
258
263
  def backtest_split_default(
@@ -286,14 +291,14 @@ def backtest_split_default(
286
291
  for ifold in range(n_folds):
287
292
  test_data = data[data["random_fold"] == ifold].sort_index()
288
293
 
289
- (train_data, validation_data, _,) = split_data_train_validation_test(
294
+ (train_data, validation_data, _, _) = split_data_train_validation_test(
290
295
  data[data["random_fold"] != ifold].iloc[:, :-2],
291
296
  test_fraction=0,
292
297
  back_test=True,
293
298
  stratification_min_max=stratification_min_max,
294
299
  )
295
300
 
296
- yield train_data, validation_data, test_data.iloc[:, :-2]
301
+ yield train_data, validation_data, test_data.iloc[:, :-2], pd.DataFrame()
297
302
  else:
298
303
  yield split_data_train_validation_test(
299
304
  data,
@@ -82,10 +82,14 @@ def train_model_and_forecast_back_test(
82
82
  ) = zip(
83
83
  *(
84
84
  train_model_and_forecast_test_core(
85
- pj, modelspecs, train_data, validation_data, test_data
85
+ pj,
86
+ modelspecs,
87
+ train_data,
88
+ validation_data,
89
+ test_data,
86
90
  )
87
91
  + (train_data, validation_data, test_data)
88
- for train_data, validation_data, test_data in backtest_split_func(
92
+ for train_data, validation_data, test_data, _ in backtest_split_func(
89
93
  data_with_features, n_folds, **backtest_split_args
90
94
  )
91
95
  )
@@ -176,7 +176,14 @@ def train_model_pipeline_core(
176
176
  logger = structlog.get_logger(__name__)
177
177
 
178
178
  # Call common pipeline
179
- model, report, train_data, validation_data, test_data = train_pipeline_common(
179
+ (
180
+ model,
181
+ report,
182
+ train_data,
183
+ validation_data,
184
+ test_data,
185
+ operational_score_data,
186
+ ) = train_pipeline_common(
180
187
  pj,
181
188
  model_specs,
182
189
  input_data,
@@ -192,8 +199,8 @@ def train_model_pipeline_core(
192
199
  combined = combined.iloc[:, :-1]
193
200
 
194
201
  x_data, y_data = (
195
- combined.iloc[:, 1:-1],
196
- combined.iloc[:, 0],
202
+ operational_score_data.iloc[:, 1:-1],
203
+ operational_score_data.iloc[:, 0],
197
204
  )
198
205
 
199
206
  # Score method always returns R^2
@@ -260,7 +267,12 @@ def train_pipeline_common(
260
267
  horizons=horizons,
261
268
  )
262
269
 
263
- train_data, validation_data, test_data = train_pipeline_step_split_data(
270
+ (
271
+ train_data,
272
+ validation_data,
273
+ test_data,
274
+ operational_score_data,
275
+ ) = train_pipeline_step_split_data(
264
276
  data_with_features=data_with_features,
265
277
  pj=pj,
266
278
  test_fraction=test_fraction,
@@ -284,7 +296,7 @@ def train_pipeline_common(
284
296
  validation_data["forecast"] = model.predict(validation_data.iloc[:, 1:-1])
285
297
  test_data["forecast"] = model.predict(test_data.iloc[:, 1:-1])
286
298
 
287
- return model, report, train_data, validation_data, test_data
299
+ return model, report, train_data, validation_data, test_data, operational_score_data
288
300
 
289
301
 
290
302
  def train_pipeline_step_load_model(
@@ -515,7 +527,7 @@ def train_pipeline_step_split_data(
515
527
  required_arguments=["data", "test_fraction"]
516
528
  )
517
529
 
518
- train_data, validation_data, test_data = split_func(
530
+ train_data, validation_data, test_data, operational_score_data = split_func(
519
531
  data_with_features, test_fraction, **split_args
520
532
  )
521
533
 
@@ -523,4 +535,4 @@ def train_pipeline_step_split_data(
523
535
  if not test_data_predefined.empty:
524
536
  test_data = test_data_predefined
525
537
 
526
- return train_data, validation_data, test_data
538
+ return train_data, validation_data, test_data, operational_score_data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openstef
3
- Version: 3.2.70
3
+ Version: 3.2.71
4
4
  Summary: Open short term energy forecaster
5
5
  Home-page: https://github.com/OpenSTEF/openstef
6
6
  Author: Alliander N.V
@@ -32,7 +32,7 @@ openstef/model/basecase.py,sha256=caI6Q-8y0ymlxGK9Js_H3Vh0q6ruNHlGD5RG0_kE5M0,28
32
32
  openstef/model/confidence_interval_applicator.py,sha256=7E1_JFLZ4-hyEhleacMvp5szdmYZS4tpKAjfhGvXXvg,8602
33
33
  openstef/model/fallback.py,sha256=VV9ehgnoMZtWzqKk9H1t8wnERFh5CyC4TvDIuRP_ZDI,2861
34
34
  openstef/model/model_creator.py,sha256=U1Lw4HFyajfxQ2o5lEnCxnmRC62DEu5PBHXrm_jnEJU,5582
35
- openstef/model/objective.py,sha256=1v8ghCqEY2-Fku5NApQBVN52hIqzoYLvw9uWVKzmkE4,15347
35
+ openstef/model/objective.py,sha256=85CWxLOw8eDe1Waj81H_f8Rm5YaS-AlhCfzcMT4yFyM,15434
36
36
  openstef/model/objective_creator.py,sha256=OiPPFSiSu7z9K_983ib5iqhhu6_9tt7iyTyKNZ2Iz68,2057
37
37
  openstef/model/serializer.py,sha256=mfa8VZOXKK05zhDKBpXFw4E_UKndHl-fUuMQJiGPGjI,16909
38
38
  openstef/model/standard_deviation_generator.py,sha256=WCgZwerAEURUnSNW-DzpvJHC-3piD8TMZiOI60-HfZ8,2913
@@ -50,7 +50,7 @@ openstef/model/regressors/regressor.py,sha256=uJcx59AyCPE9f_yPcAQ59h2ZS7eNsDpIHJ
50
50
  openstef/model/regressors/xgb.py,sha256=HggA1U10srzdysjV560BMMX66kfaxCKAnOZB3JyyT_Y,808
51
51
  openstef/model/regressors/xgb_quantile.py,sha256=pjtG0WxEEPnKlL63iDHhUqydx_UVK_9w49uhlp0WS6c,7458
52
52
  openstef/model_selection/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
53
- openstef/model_selection/model_selection.py,sha256=3wbFVJY_fI7_t-7ZAky3VR4hCZ_z9O9x5dqzDK-5Jpk,10569
53
+ openstef/model_selection/model_selection.py,sha256=oGloQBP_FPdNyCs9wzS3l8zFNJxMs1P5XPjVN9qUOsw,11081
54
54
  openstef/monitoring/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
55
55
  openstef/monitoring/performance_meter.py,sha256=mMQKpDNv_-RcNYdEvEFPvB76lkG8V9gJOKYQqnH5BX4,2851
56
56
  openstef/monitoring/teams.py,sha256=fnZScPD55z9yC0q3YavWj40GEZmL7tsSGhWzG_sMPws,6401
@@ -59,8 +59,8 @@ openstef/pipeline/create_basecase_forecast.py,sha256=BPxf2MSvJyfbNCQGCr1Rol5ShqC
59
59
  openstef/pipeline/create_component_forecast.py,sha256=HgByae6ruVhy6TuGIJEuPyLyx7g4zSvJfk6Dynlqjl4,5030
60
60
  openstef/pipeline/create_forecast.py,sha256=2vK2cH_VeRcoDWPXR06zFmwQ043FPA9uPvg5_OyxUfU,5008
61
61
  openstef/pipeline/optimize_hyperparameters.py,sha256=qptTlg6v0hvHt1ocA7nueLwYRXAB82VI6bQ9ATmBVKQ,10824
62
- openstef/pipeline/train_create_forecast_backtest.py,sha256=BTJKH_VNu-ZLgy7UuIzEOHK8a_eK-o6JCSpcG-uUIQo,5444
63
- openstef/pipeline/train_model.py,sha256=CEbgP77UMay9SBIXRkHSRVXS8D5JgaXjWCgJiTLyl0Q,18321
62
+ openstef/pipeline/train_create_forecast_backtest.py,sha256=upuoiE01vjjxUu_sY0tANPqdOtpGKrQQ3azhVDnBJdc,5512
63
+ openstef/pipeline/train_model.py,sha256=tYC8xh6eKpea0CutHNoSGYvt6hoQt4vDWqXmZs6ejbk,18567
64
64
  openstef/pipeline/utils.py,sha256=fkc-oNirJ-JiyuOAL08RFrnPYPwudWal_N-BO6Cw980,2086
65
65
  openstef/postprocessing/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
66
66
  openstef/postprocessing/postprocessing.py,sha256=nehd0tDpkdIaWFJggQ-fDizIKdfmqJ3IOGfk0sDnrzk,8409
@@ -83,8 +83,8 @@ openstef/tasks/utils/predictionjobloop.py,sha256=u4WQjvqBM6z9T7VFUZ-9JqgdepNJO0Z
83
83
  openstef/tasks/utils/taskcontext.py,sha256=yI6TntOkZcW8JiNVuw4uJIigEBL0_iIrkPklF4ZeCX4,5401
84
84
  openstef/validation/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
85
85
  openstef/validation/validation.py,sha256=AYQJBXwbFhpq34bqEhybw0lTIJ8Td4vr2-AbWxGxm3M,16917
86
- openstef-3.2.70.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
87
- openstef-3.2.70.dist-info/METADATA,sha256=K_3wFOKQml_tsf4wH-galdQ9-hSFKASsb14D5diR0iE,6972
88
- openstef-3.2.70.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
89
- openstef-3.2.70.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
90
- openstef-3.2.70.dist-info/RECORD,,
86
+ openstef-3.2.71.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
87
+ openstef-3.2.71.dist-info/METADATA,sha256=Uou71qcVT-bsF0YLEHWasre0rbhX68vvkfqF-W0OBxM,6972
88
+ openstef-3.2.71.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
89
+ openstef-3.2.71.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
90
+ openstef-3.2.71.dist-info/RECORD,,