openstef 3.2.70__py3-none-any.whl → 3.2.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef/model/objective.py +6 -1
- openstef/model_selection/model_selection.py +14 -9
- openstef/pipeline/train_create_forecast_backtest.py +6 -2
- openstef/pipeline/train_model.py +19 -7
- {openstef-3.2.70.dist-info → openstef-3.2.71.dist-info}/METADATA +1 -1
- {openstef-3.2.70.dist-info → openstef-3.2.71.dist-info}/RECORD +9 -9
- {openstef-3.2.70.dist-info → openstef-3.2.71.dist-info}/LICENSE +0 -0
- {openstef-3.2.70.dist-info → openstef-3.2.71.dist-info}/WHEEL +0 -0
- {openstef-3.2.70.dist-info → openstef-3.2.71.dist-info}/top_level.txt +0 -0
openstef/model/objective.py
CHANGED
@@ -97,7 +97,12 @@ class RegressorObjective:
|
|
97
97
|
"stratification_min_max": self.model_type != MLModelType.ProLoaf,
|
98
98
|
"back_test": True,
|
99
99
|
}
|
100
|
-
(
|
100
|
+
(
|
101
|
+
self.train_data,
|
102
|
+
self.validation_data,
|
103
|
+
self.test_data,
|
104
|
+
self.operational_score_data,
|
105
|
+
) = self.split_func(
|
101
106
|
self.input_data,
|
102
107
|
test_fraction=self.test_fraction,
|
103
108
|
validation_fraction=self.validation_fraction,
|
@@ -113,7 +113,7 @@ def split_data_train_validation_test(
|
|
113
113
|
validation dataset. In an operational setting the following sequence is
|
114
114
|
returned (when using stratification):
|
115
115
|
|
116
|
-
|
116
|
+
Train >> Validation (and the test is the Train and Validation combined.)
|
117
117
|
|
118
118
|
For a back test (indicated with argument "back_test") the following sequence
|
119
119
|
is returned:
|
@@ -141,6 +141,7 @@ def split_data_train_validation_test(
|
|
141
141
|
- Test data.
|
142
142
|
|
143
143
|
"""
|
144
|
+
test_fraction = test_fraction if back_test else 0
|
144
145
|
train_fraction = 1 - (test_fraction + validation_fraction)
|
145
146
|
if train_fraction < 0:
|
146
147
|
raise ValueError(
|
@@ -172,10 +173,18 @@ def split_data_train_validation_test(
|
|
172
173
|
start_date_test = end_date - np.round(number_indices * test_fraction) * delta
|
173
174
|
test_data = data_[start_date_test:]
|
174
175
|
train_val_data = data_[:start_date_test]
|
176
|
+
operational_score_data = (
|
177
|
+
pd.DataFrame()
|
178
|
+
) # Empty because a backtest is no operational setting.
|
175
179
|
else:
|
176
180
|
start_date_val = start_date + np.round(number_indices * test_fraction) * delta
|
177
|
-
test_data = data_[
|
181
|
+
test_data = data_[
|
182
|
+
:start_date_val
|
183
|
+
] # Empty as all data is used for training in an operational setting.
|
178
184
|
train_val_data = data_[start_date_val:]
|
185
|
+
operational_score_data = data_.copy(deep=True).reset_index(
|
186
|
+
drop=True
|
187
|
+
) # Used to check wether a new operationally train model is better than the old one.
|
179
188
|
|
180
189
|
if stratification_min_max and (
|
181
190
|
len(set(train_val_data.index.date)) >= min_days_for_stratification
|
@@ -248,11 +257,7 @@ def split_data_train_validation_test(
|
|
248
257
|
validation_data = validation_data.sort_index()
|
249
258
|
test_data = test_data.sort_index()
|
250
259
|
|
251
|
-
return (
|
252
|
-
train_data,
|
253
|
-
validation_data,
|
254
|
-
test_data,
|
255
|
-
)
|
260
|
+
return (train_data, validation_data, test_data, operational_score_data)
|
256
261
|
|
257
262
|
|
258
263
|
def backtest_split_default(
|
@@ -286,14 +291,14 @@ def backtest_split_default(
|
|
286
291
|
for ifold in range(n_folds):
|
287
292
|
test_data = data[data["random_fold"] == ifold].sort_index()
|
288
293
|
|
289
|
-
(train_data, validation_data, _,) = split_data_train_validation_test(
|
294
|
+
(train_data, validation_data, _, _) = split_data_train_validation_test(
|
290
295
|
data[data["random_fold"] != ifold].iloc[:, :-2],
|
291
296
|
test_fraction=0,
|
292
297
|
back_test=True,
|
293
298
|
stratification_min_max=stratification_min_max,
|
294
299
|
)
|
295
300
|
|
296
|
-
yield train_data, validation_data, test_data.iloc[:, :-2]
|
301
|
+
yield train_data, validation_data, test_data.iloc[:, :-2], pd.DataFrame()
|
297
302
|
else:
|
298
303
|
yield split_data_train_validation_test(
|
299
304
|
data,
|
@@ -82,10 +82,14 @@ def train_model_and_forecast_back_test(
|
|
82
82
|
) = zip(
|
83
83
|
*(
|
84
84
|
train_model_and_forecast_test_core(
|
85
|
-
pj,
|
85
|
+
pj,
|
86
|
+
modelspecs,
|
87
|
+
train_data,
|
88
|
+
validation_data,
|
89
|
+
test_data,
|
86
90
|
)
|
87
91
|
+ (train_data, validation_data, test_data)
|
88
|
-
for train_data, validation_data, test_data in backtest_split_func(
|
92
|
+
for train_data, validation_data, test_data, _ in backtest_split_func(
|
89
93
|
data_with_features, n_folds, **backtest_split_args
|
90
94
|
)
|
91
95
|
)
|
openstef/pipeline/train_model.py
CHANGED
@@ -176,7 +176,14 @@ def train_model_pipeline_core(
|
|
176
176
|
logger = structlog.get_logger(__name__)
|
177
177
|
|
178
178
|
# Call common pipeline
|
179
|
-
|
179
|
+
(
|
180
|
+
model,
|
181
|
+
report,
|
182
|
+
train_data,
|
183
|
+
validation_data,
|
184
|
+
test_data,
|
185
|
+
operational_score_data,
|
186
|
+
) = train_pipeline_common(
|
180
187
|
pj,
|
181
188
|
model_specs,
|
182
189
|
input_data,
|
@@ -192,8 +199,8 @@ def train_model_pipeline_core(
|
|
192
199
|
combined = combined.iloc[:, :-1]
|
193
200
|
|
194
201
|
x_data, y_data = (
|
195
|
-
|
196
|
-
|
202
|
+
operational_score_data.iloc[:, 1:-1],
|
203
|
+
operational_score_data.iloc[:, 0],
|
197
204
|
)
|
198
205
|
|
199
206
|
# Score method always returns R^2
|
@@ -260,7 +267,12 @@ def train_pipeline_common(
|
|
260
267
|
horizons=horizons,
|
261
268
|
)
|
262
269
|
|
263
|
-
|
270
|
+
(
|
271
|
+
train_data,
|
272
|
+
validation_data,
|
273
|
+
test_data,
|
274
|
+
operational_score_data,
|
275
|
+
) = train_pipeline_step_split_data(
|
264
276
|
data_with_features=data_with_features,
|
265
277
|
pj=pj,
|
266
278
|
test_fraction=test_fraction,
|
@@ -284,7 +296,7 @@ def train_pipeline_common(
|
|
284
296
|
validation_data["forecast"] = model.predict(validation_data.iloc[:, 1:-1])
|
285
297
|
test_data["forecast"] = model.predict(test_data.iloc[:, 1:-1])
|
286
298
|
|
287
|
-
return model, report, train_data, validation_data, test_data
|
299
|
+
return model, report, train_data, validation_data, test_data, operational_score_data
|
288
300
|
|
289
301
|
|
290
302
|
def train_pipeline_step_load_model(
|
@@ -515,7 +527,7 @@ def train_pipeline_step_split_data(
|
|
515
527
|
required_arguments=["data", "test_fraction"]
|
516
528
|
)
|
517
529
|
|
518
|
-
train_data, validation_data, test_data = split_func(
|
530
|
+
train_data, validation_data, test_data, operational_score_data = split_func(
|
519
531
|
data_with_features, test_fraction, **split_args
|
520
532
|
)
|
521
533
|
|
@@ -523,4 +535,4 @@ def train_pipeline_step_split_data(
|
|
523
535
|
if not test_data_predefined.empty:
|
524
536
|
test_data = test_data_predefined
|
525
537
|
|
526
|
-
return train_data, validation_data, test_data
|
538
|
+
return train_data, validation_data, test_data, operational_score_data
|
@@ -32,7 +32,7 @@ openstef/model/basecase.py,sha256=caI6Q-8y0ymlxGK9Js_H3Vh0q6ruNHlGD5RG0_kE5M0,28
|
|
32
32
|
openstef/model/confidence_interval_applicator.py,sha256=7E1_JFLZ4-hyEhleacMvp5szdmYZS4tpKAjfhGvXXvg,8602
|
33
33
|
openstef/model/fallback.py,sha256=VV9ehgnoMZtWzqKk9H1t8wnERFh5CyC4TvDIuRP_ZDI,2861
|
34
34
|
openstef/model/model_creator.py,sha256=U1Lw4HFyajfxQ2o5lEnCxnmRC62DEu5PBHXrm_jnEJU,5582
|
35
|
-
openstef/model/objective.py,sha256=
|
35
|
+
openstef/model/objective.py,sha256=85CWxLOw8eDe1Waj81H_f8Rm5YaS-AlhCfzcMT4yFyM,15434
|
36
36
|
openstef/model/objective_creator.py,sha256=OiPPFSiSu7z9K_983ib5iqhhu6_9tt7iyTyKNZ2Iz68,2057
|
37
37
|
openstef/model/serializer.py,sha256=mfa8VZOXKK05zhDKBpXFw4E_UKndHl-fUuMQJiGPGjI,16909
|
38
38
|
openstef/model/standard_deviation_generator.py,sha256=WCgZwerAEURUnSNW-DzpvJHC-3piD8TMZiOI60-HfZ8,2913
|
@@ -50,7 +50,7 @@ openstef/model/regressors/regressor.py,sha256=uJcx59AyCPE9f_yPcAQ59h2ZS7eNsDpIHJ
|
|
50
50
|
openstef/model/regressors/xgb.py,sha256=HggA1U10srzdysjV560BMMX66kfaxCKAnOZB3JyyT_Y,808
|
51
51
|
openstef/model/regressors/xgb_quantile.py,sha256=pjtG0WxEEPnKlL63iDHhUqydx_UVK_9w49uhlp0WS6c,7458
|
52
52
|
openstef/model_selection/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
53
|
-
openstef/model_selection/model_selection.py,sha256=
|
53
|
+
openstef/model_selection/model_selection.py,sha256=oGloQBP_FPdNyCs9wzS3l8zFNJxMs1P5XPjVN9qUOsw,11081
|
54
54
|
openstef/monitoring/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
55
55
|
openstef/monitoring/performance_meter.py,sha256=mMQKpDNv_-RcNYdEvEFPvB76lkG8V9gJOKYQqnH5BX4,2851
|
56
56
|
openstef/monitoring/teams.py,sha256=fnZScPD55z9yC0q3YavWj40GEZmL7tsSGhWzG_sMPws,6401
|
@@ -59,8 +59,8 @@ openstef/pipeline/create_basecase_forecast.py,sha256=BPxf2MSvJyfbNCQGCr1Rol5ShqC
|
|
59
59
|
openstef/pipeline/create_component_forecast.py,sha256=HgByae6ruVhy6TuGIJEuPyLyx7g4zSvJfk6Dynlqjl4,5030
|
60
60
|
openstef/pipeline/create_forecast.py,sha256=2vK2cH_VeRcoDWPXR06zFmwQ043FPA9uPvg5_OyxUfU,5008
|
61
61
|
openstef/pipeline/optimize_hyperparameters.py,sha256=qptTlg6v0hvHt1ocA7nueLwYRXAB82VI6bQ9ATmBVKQ,10824
|
62
|
-
openstef/pipeline/train_create_forecast_backtest.py,sha256=
|
63
|
-
openstef/pipeline/train_model.py,sha256=
|
62
|
+
openstef/pipeline/train_create_forecast_backtest.py,sha256=upuoiE01vjjxUu_sY0tANPqdOtpGKrQQ3azhVDnBJdc,5512
|
63
|
+
openstef/pipeline/train_model.py,sha256=tYC8xh6eKpea0CutHNoSGYvt6hoQt4vDWqXmZs6ejbk,18567
|
64
64
|
openstef/pipeline/utils.py,sha256=fkc-oNirJ-JiyuOAL08RFrnPYPwudWal_N-BO6Cw980,2086
|
65
65
|
openstef/postprocessing/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
66
66
|
openstef/postprocessing/postprocessing.py,sha256=nehd0tDpkdIaWFJggQ-fDizIKdfmqJ3IOGfk0sDnrzk,8409
|
@@ -83,8 +83,8 @@ openstef/tasks/utils/predictionjobloop.py,sha256=u4WQjvqBM6z9T7VFUZ-9JqgdepNJO0Z
|
|
83
83
|
openstef/tasks/utils/taskcontext.py,sha256=yI6TntOkZcW8JiNVuw4uJIigEBL0_iIrkPklF4ZeCX4,5401
|
84
84
|
openstef/validation/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
85
85
|
openstef/validation/validation.py,sha256=AYQJBXwbFhpq34bqEhybw0lTIJ8Td4vr2-AbWxGxm3M,16917
|
86
|
-
openstef-3.2.
|
87
|
-
openstef-3.2.
|
88
|
-
openstef-3.2.
|
89
|
-
openstef-3.2.
|
90
|
-
openstef-3.2.
|
86
|
+
openstef-3.2.71.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
|
87
|
+
openstef-3.2.71.dist-info/METADATA,sha256=Uou71qcVT-bsF0YLEHWasre0rbhX68vvkfqF-W0OBxM,6972
|
88
|
+
openstef-3.2.71.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
89
|
+
openstef-3.2.71.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
|
90
|
+
openstef-3.2.71.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|