openstef 3.4.10__py3-none-any.whl → 3.4.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef/app_settings.py +19 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +18 -0
- openstef/data/dutch_holidays.csv +1759 -0
- openstef/data_classes/data_prep.py +1 -1
- openstef/data_classes/prediction_job.py +15 -9
- openstef/enums.py +108 -9
- openstef/exceptions.py +1 -1
- openstef/feature_engineering/apply_features.py +25 -6
- openstef/feature_engineering/bidding_zone_to_country_mapping.py +106 -0
- openstef/feature_engineering/cyclic_features.py +102 -0
- openstef/feature_engineering/data_preparation.py +12 -5
- openstef/feature_engineering/feature_applicator.py +1 -5
- openstef/feature_engineering/general.py +14 -0
- openstef/feature_engineering/holiday_features.py +35 -26
- openstef/feature_engineering/missing_values_transformer.py +141 -0
- openstef/feature_engineering/weather_features.py +7 -0
- openstef/metrics/figure.py +3 -0
- openstef/metrics/metrics.py +58 -1
- openstef/metrics/reporter.py +7 -0
- openstef/model/confidence_interval_applicator.py +28 -3
- openstef/model/model_creator.py +54 -41
- openstef/model/objective.py +17 -34
- openstef/model/objective_creator.py +13 -12
- openstef/model/regressors/arima.py +1 -1
- openstef/model/regressors/dazls.py +35 -96
- openstef/model/regressors/flatliner.py +95 -0
- openstef/model/regressors/linear_quantile.py +296 -0
- openstef/model/regressors/xgb.py +23 -0
- openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
- openstef/model/regressors/xgb_quantile.py +3 -0
- openstef/model/serializer.py +10 -0
- openstef/model_selection/model_selection.py +4 -1
- openstef/monitoring/performance_meter.py +1 -2
- openstef/monitoring/teams.py +11 -0
- openstef/pipeline/create_basecase_forecast.py +11 -1
- openstef/pipeline/create_component_forecast.py +24 -28
- openstef/pipeline/create_forecast.py +20 -1
- openstef/pipeline/optimize_hyperparameters.py +18 -16
- openstef/pipeline/train_create_forecast_backtest.py +11 -1
- openstef/pipeline/train_model.py +31 -12
- openstef/pipeline/utils.py +3 -0
- openstef/postprocessing/postprocessing.py +29 -0
- openstef/settings.py +15 -0
- openstef/tasks/calculate_kpi.py +23 -20
- openstef/tasks/create_basecase_forecast.py +15 -7
- openstef/tasks/create_components_forecast.py +24 -8
- openstef/tasks/create_forecast.py +9 -6
- openstef/tasks/create_solar_forecast.py +4 -4
- openstef/tasks/optimize_hyperparameters.py +2 -2
- openstef/tasks/split_forecast.py +9 -2
- openstef/tasks/train_model.py +9 -7
- openstef/tasks/utils/taskcontext.py +7 -0
- openstef/validation/validation.py +28 -3
- {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/METADATA +65 -57
- openstef-3.4.44.dist-info/RECORD +97 -0
- {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/WHEEL +1 -1
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
- openstef/data/dutch_holidays_2020-2022.csv +0 -831
- openstef/data/dutch_holidays_2020-2022.csv.license +0 -3
- openstef/feature_engineering/historic_features.py +0 -40
- openstef/model/regressors/proloaf.py +0 -281
- openstef/tasks/run_tracy.py +0 -145
- openstef-3.4.10.dist-info/RECORD +0 -104
- /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license} +0 -0
- /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license} +0 -0
- /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license → dutch_holidays.csv.license} +0 -0
- {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/LICENSE +0 -0
- {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/top_level.txt +0 -0
openstef/tasks/calculate_kpi.py
CHANGED
@@ -18,6 +18,8 @@ Example:
|
|
18
18
|
$ python calculate_kpi.py
|
19
19
|
|
20
20
|
"""
|
21
|
+
import logging
|
22
|
+
|
21
23
|
# Import builtins
|
22
24
|
from datetime import datetime, timedelta
|
23
25
|
from pathlib import Path
|
@@ -27,9 +29,10 @@ import pandas as pd
|
|
27
29
|
import structlog
|
28
30
|
|
29
31
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
30
|
-
from openstef.enums import
|
32
|
+
from openstef.enums import ModelType
|
31
33
|
from openstef.exceptions import NoPredictedLoadError, NoRealisedLoadError
|
32
34
|
from openstef.metrics import metrics
|
35
|
+
from openstef.settings import Settings
|
33
36
|
from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
|
34
37
|
from openstef.tasks.utils.taskcontext import TaskContext
|
35
38
|
from openstef.validation import validation
|
@@ -39,7 +42,7 @@ THRESHOLD_RETRAINING = 0.25
|
|
39
42
|
THRESHOLD_OPTIMIZING = 0.50
|
40
43
|
|
41
44
|
|
42
|
-
def main(model_type:
|
45
|
+
def main(model_type: ModelType = None, config=None, database=None) -> None:
|
43
46
|
taskname = Path(__file__).name.replace(".py", "")
|
44
47
|
|
45
48
|
if database is None or config is None:
|
@@ -49,7 +52,7 @@ def main(model_type: MLModelType = None, config=None, database=None) -> None:
|
|
49
52
|
)
|
50
53
|
|
51
54
|
if model_type is None:
|
52
|
-
model_type = [ml.value for ml in
|
55
|
+
model_type = [ml.value for ml in ModelType]
|
53
56
|
|
54
57
|
with TaskContext(taskname, config, database) as context:
|
55
58
|
# Set start and end time
|
@@ -69,6 +72,8 @@ def check_kpi_task(
|
|
69
72
|
context: TaskContext,
|
70
73
|
start_time: datetime,
|
71
74
|
end_time: datetime,
|
75
|
+
threshold_optimizing=THRESHOLD_OPTIMIZING,
|
76
|
+
threshold_retraining=THRESHOLD_RETRAINING,
|
72
77
|
) -> None:
|
73
78
|
# Apply default parameters if none are provided
|
74
79
|
if start_time is None:
|
@@ -99,28 +104,21 @@ def check_kpi_task(
|
|
99
104
|
|
100
105
|
# Add pid to the list of pids that should be retrained or optimized if
|
101
106
|
# performance is insufficient
|
102
|
-
if kpis["47.0h"]["rMAE"] >
|
107
|
+
if kpis["47.0h"]["rMAE"] > threshold_retraining:
|
103
108
|
context.logger.warning(
|
104
109
|
"Need to retrain model, retraining threshold rMAE 47h exceeded",
|
105
110
|
t_ahead="47.0h",
|
106
111
|
rMAE=kpis["47.0h"]["rMAE"],
|
107
|
-
retraining_threshold=
|
112
|
+
retraining_threshold=threshold_retraining,
|
108
113
|
)
|
109
|
-
function_name = "train_model"
|
110
|
-
|
111
|
-
context.logger.info("Adding tracy job", function=function_name)
|
112
|
-
context.database.ktp_api.add_tracy_job(pj["id"], function=function_name)
|
113
114
|
|
114
|
-
if kpis["47.0h"]["rMAE"] >
|
115
|
+
if kpis["47.0h"]["rMAE"] > threshold_optimizing:
|
115
116
|
context.logger.warning(
|
116
117
|
"Need to optimize hyperparameters, optimizing threshold rMAE 47h exceeded",
|
117
118
|
t_ahead="47.0h",
|
118
119
|
rMAE=kpis["47.0h"]["rMAE"],
|
119
|
-
optimizing_threshold=
|
120
|
+
optimizing_threshold=threshold_optimizing,
|
120
121
|
)
|
121
|
-
function_name = "optimize_hyperparameters"
|
122
|
-
context.logger.info("Adding tracy job", function=function_name)
|
123
|
-
context.database.ktp_api.add_tracy_job(pj["id"], function=function_name)
|
124
122
|
|
125
123
|
|
126
124
|
def calc_kpi_for_specific_pid(
|
@@ -160,7 +158,12 @@ def calc_kpi_for_specific_pid(
|
|
160
158
|
COMPLETENESS_REALISED_THRESHOLDS = 0.7
|
161
159
|
COMPLETENESS_PREDICTED_LOAD_THRESHOLD = 0.7
|
162
160
|
|
163
|
-
|
161
|
+
structlog.configure(
|
162
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
163
|
+
logging.getLevelName(Settings.log_level)
|
164
|
+
)
|
165
|
+
)
|
166
|
+
logger = structlog.get_logger(__name__)
|
164
167
|
|
165
168
|
# If predicted is empty
|
166
169
|
if len(predicted_load) == 0:
|
@@ -194,9 +197,9 @@ def calc_kpi_for_specific_pid(
|
|
194
197
|
|
195
198
|
# Raise exception in case of constant load
|
196
199
|
if combined.load.nunique() == 1:
|
197
|
-
|
200
|
+
logger.warning(
|
198
201
|
"The load is constant! KPIs will still be calculated, but relative metrics"
|
199
|
-
" will be nan"
|
202
|
+
" will be nan."
|
200
203
|
)
|
201
204
|
|
202
205
|
# Define output dictonary
|
@@ -213,7 +216,7 @@ def calc_kpi_for_specific_pid(
|
|
213
216
|
date = pd.to_datetime(end_time)
|
214
217
|
|
215
218
|
# Calculate model metrics and add them to the output dictionary
|
216
|
-
|
219
|
+
logger.info("Start calculating kpis")
|
217
220
|
for hor_cols in hor_list:
|
218
221
|
t_ahead_h = hor_cols[0].split("_")[1]
|
219
222
|
fc = combined[hor_cols[0]] # load predictions
|
@@ -272,7 +275,7 @@ def calc_kpi_for_specific_pid(
|
|
272
275
|
)
|
273
276
|
|
274
277
|
if completeness_realised < COMPLETENESS_REALISED_THRESHOLDS:
|
275
|
-
|
278
|
+
logger.warning(
|
276
279
|
"Completeness realised load too low",
|
277
280
|
prediction_id=pid,
|
278
281
|
start_time=start_time,
|
@@ -282,7 +285,7 @@ def calc_kpi_for_specific_pid(
|
|
282
285
|
)
|
283
286
|
set_incomplete_kpi_to_nan(kpis, t_ahead_h)
|
284
287
|
if completeness_predicted_load.any() < COMPLETENESS_PREDICTED_LOAD_THRESHOLD:
|
285
|
-
|
288
|
+
logger.warning(
|
286
289
|
"Completeness predicted load of specific horizon too low",
|
287
290
|
prediction_id=pid,
|
288
291
|
horizon=t_ahead_h,
|
@@ -32,7 +32,10 @@ T_AHEAD_DAYS: int = 14
|
|
32
32
|
|
33
33
|
|
34
34
|
def create_basecase_forecast_task(
|
35
|
-
pj: PredictionJobDataClass,
|
35
|
+
pj: PredictionJobDataClass,
|
36
|
+
context: TaskContext,
|
37
|
+
t_behind_days=T_BEHIND_DAYS,
|
38
|
+
t_ahead_days=T_AHEAD_DAYS,
|
36
39
|
) -> None:
|
37
40
|
"""Top level task that creates a basecase forecast.
|
38
41
|
|
@@ -41,6 +44,8 @@ def create_basecase_forecast_task(
|
|
41
44
|
Args:
|
42
45
|
pj: Prediction job
|
43
46
|
context: Contect object that holds a config manager and a database connection
|
47
|
+
t_behind_days: number of days included as history. This is used to generated lagged features for the to-be-forecasted period
|
48
|
+
t_ahead_days: number of days a basecase forecast is created for
|
44
49
|
|
45
50
|
"""
|
46
51
|
# Check pipeline types
|
@@ -63,8 +68,8 @@ def create_basecase_forecast_task(
|
|
63
68
|
return
|
64
69
|
|
65
70
|
# Define datetime range for input data
|
66
|
-
datetime_start = datetime.utcnow() - timedelta(days=
|
67
|
-
datetime_end = datetime.utcnow() + timedelta(days=
|
71
|
+
datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
|
72
|
+
datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
|
68
73
|
|
69
74
|
# Retrieve input data
|
70
75
|
input_data = context.database.get_model_input(
|
@@ -77,11 +82,14 @@ def create_basecase_forecast_task(
|
|
77
82
|
# Make basecase forecast using the corresponding pipeline
|
78
83
|
basecase_forecast = create_basecase_forecast_pipeline(pj, input_data)
|
79
84
|
|
80
|
-
# Do not store basecase forecasts for moments within
|
85
|
+
# Do not store basecase forecasts for moments within the prediction job's horizon.
|
81
86
|
# Those should be updated by regular forecast process.
|
82
87
|
basecase_forecast = basecase_forecast.loc[
|
83
88
|
basecase_forecast.index
|
84
|
-
> (
|
89
|
+
> (
|
90
|
+
pd.to_datetime(datetime.utcnow(), utc=True)
|
91
|
+
+ timedelta(minutes=pj.horizon_minutes)
|
92
|
+
),
|
85
93
|
:,
|
86
94
|
]
|
87
95
|
|
@@ -89,7 +97,7 @@ def create_basecase_forecast_task(
|
|
89
97
|
context.database.write_forecast(basecase_forecast, t_ahead_series=True)
|
90
98
|
|
91
99
|
|
92
|
-
def main(config: object = None, database: object = None):
|
100
|
+
def main(config: object = None, database: object = None, **kwargs):
|
93
101
|
taskname = Path(__file__).name.replace(".py", "")
|
94
102
|
|
95
103
|
if database is None or config is None:
|
@@ -102,7 +110,7 @@ def main(config: object = None, database: object = None):
|
|
102
110
|
model_type = ["xgb", "xgb_quantile", "lgb"]
|
103
111
|
|
104
112
|
PredictionJobLoop(context, model_type=model_type).map(
|
105
|
-
create_basecase_forecast_task, context
|
113
|
+
create_basecase_forecast_task, context, **kwargs
|
106
114
|
)
|
107
115
|
|
108
116
|
|
@@ -21,18 +21,20 @@ Example:
|
|
21
21
|
$ python create_components_forecast.py
|
22
22
|
|
23
23
|
"""
|
24
|
+
import logging
|
24
25
|
from datetime import datetime, timedelta, timezone
|
25
26
|
from pathlib import Path
|
26
27
|
|
27
|
-
import structlog
|
28
28
|
import pandas as pd
|
29
|
+
import structlog
|
29
30
|
|
30
31
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
31
|
-
from openstef.enums import
|
32
|
+
from openstef.enums import ModelType
|
32
33
|
from openstef.exceptions import ComponentForecastTooShortHorizonError
|
33
34
|
from openstef.pipeline.create_component_forecast import (
|
34
35
|
create_components_forecast_pipeline,
|
35
36
|
)
|
37
|
+
from openstef.settings import Settings
|
36
38
|
from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
|
37
39
|
from openstef.tasks.utils.taskcontext import TaskContext
|
38
40
|
|
@@ -41,7 +43,10 @@ T_AHEAD_DAYS = 3
|
|
41
43
|
|
42
44
|
|
43
45
|
def create_components_forecast_task(
|
44
|
-
pj: PredictionJobDataClass,
|
46
|
+
pj: PredictionJobDataClass,
|
47
|
+
context: TaskContext,
|
48
|
+
t_behind_days: int = T_BEHIND_DAYS,
|
49
|
+
t_ahead_days: int = T_AHEAD_DAYS,
|
45
50
|
) -> None:
|
46
51
|
"""Top level task that creates a components forecast.
|
47
52
|
|
@@ -50,8 +55,19 @@ def create_components_forecast_task(
|
|
50
55
|
Args:
|
51
56
|
pj: Prediction job
|
52
57
|
context: Contect object that holds a config manager and a database connection
|
58
|
+
t_behind_days: number of days in the past that the component forecast is created for
|
59
|
+
t_ahead_days: number of days in the future that the component forecast is created for
|
60
|
+
|
61
|
+
Raises:
|
62
|
+
ComponentForecastTooShortHorizonError: If the forecast horizon is too short
|
63
|
+
(less than 30 minutes in advance)
|
53
64
|
|
54
65
|
"""
|
66
|
+
structlog.configure(
|
67
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
68
|
+
logging.getLevelName(Settings.log_level)
|
69
|
+
)
|
70
|
+
)
|
55
71
|
logger = structlog.get_logger(__name__)
|
56
72
|
if pj["train_components"] == 0:
|
57
73
|
context.logger.info(
|
@@ -60,8 +76,8 @@ def create_components_forecast_task(
|
|
60
76
|
return
|
61
77
|
|
62
78
|
# Define datetime range for input data
|
63
|
-
datetime_start = datetime.utcnow() - timedelta(days=
|
64
|
-
datetime_end = datetime.utcnow() + timedelta(days=
|
79
|
+
datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
|
80
|
+
datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
|
65
81
|
|
66
82
|
logger.info(
|
67
83
|
"Get predicted load", datetime_start=datetime_start, datetime_end=datetime_end
|
@@ -124,7 +140,7 @@ def create_components_forecast_task(
|
|
124
140
|
)
|
125
141
|
|
126
142
|
|
127
|
-
def main(config: object = None, database: object = None):
|
143
|
+
def main(config: object = None, database: object = None, **kwargs):
|
128
144
|
taskname = Path(__file__).name.replace(".py", "")
|
129
145
|
|
130
146
|
if database is None or config is None:
|
@@ -134,12 +150,12 @@ def main(config: object = None, database: object = None):
|
|
134
150
|
)
|
135
151
|
|
136
152
|
with TaskContext(taskname, config, database) as context:
|
137
|
-
model_type = [ml.value for ml in
|
153
|
+
model_type = [ml.value for ml in ModelType]
|
138
154
|
|
139
155
|
PredictionJobLoop(
|
140
156
|
context,
|
141
157
|
model_type=model_type,
|
142
|
-
).map(create_components_forecast_task, context)
|
158
|
+
).map(create_components_forecast_task, context, **kwargs)
|
143
159
|
|
144
160
|
|
145
161
|
if __name__ == "__main__":
|
@@ -24,7 +24,7 @@ from datetime import datetime, timedelta
|
|
24
24
|
from pathlib import Path
|
25
25
|
|
26
26
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
27
|
-
from openstef.enums import
|
27
|
+
from openstef.enums import ModelType, PipelineType
|
28
28
|
from openstef.exceptions import InputDataOngoingZeroFlatlinerError
|
29
29
|
from openstef.pipeline.create_forecast import create_forecast_pipeline
|
30
30
|
from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
|
@@ -34,7 +34,9 @@ from openstef.validation.validation import detect_ongoing_zero_flatliner
|
|
34
34
|
T_BEHIND_DAYS: int = 14
|
35
35
|
|
36
36
|
|
37
|
-
def create_forecast_task(
|
37
|
+
def create_forecast_task(
|
38
|
+
pj: PredictionJobDataClass, context: TaskContext, t_behind_days: int = T_BEHIND_DAYS
|
39
|
+
) -> None:
|
38
40
|
"""Top level task that creates a forecast.
|
39
41
|
|
40
42
|
On this task level all database and context manager dependencies are resolved.
|
@@ -45,6 +47,7 @@ def create_forecast_task(pj: PredictionJobDataClass, context: TaskContext) -> No
|
|
45
47
|
Args:
|
46
48
|
pj: Prediction job
|
47
49
|
context: Contect object that holds a config manager and a database connection
|
50
|
+
t_behind_days: number of days included as history. This is used to generated lagged features for the to-be-forecasted period
|
48
51
|
|
49
52
|
"""
|
50
53
|
# Check pipeline types
|
@@ -70,7 +73,7 @@ def create_forecast_task(pj: PredictionJobDataClass, context: TaskContext) -> No
|
|
70
73
|
mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri
|
71
74
|
|
72
75
|
# Define datetime range for input data
|
73
|
-
datetime_start = datetime.utcnow() - timedelta(days=
|
76
|
+
datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
|
74
77
|
datetime_end = datetime.utcnow() + timedelta(seconds=pj.horizon_minutes * 60)
|
75
78
|
|
76
79
|
# Retrieve input data
|
@@ -115,7 +118,7 @@ def create_forecast_task(pj: PredictionJobDataClass, context: TaskContext) -> No
|
|
115
118
|
context.database.write_forecast(forecast, t_ahead_series=True)
|
116
119
|
|
117
120
|
|
118
|
-
def main(model_type=None, config=None, database=None):
|
121
|
+
def main(model_type=None, config=None, database=None, **kwargs):
|
119
122
|
taskname = Path(__file__).name.replace(".py", "")
|
120
123
|
|
121
124
|
if database is None or config is None:
|
@@ -126,10 +129,10 @@ def main(model_type=None, config=None, database=None):
|
|
126
129
|
|
127
130
|
with TaskContext(taskname, config, database) as context:
|
128
131
|
if model_type is None:
|
129
|
-
model_type = [ml.value for ml in
|
132
|
+
model_type = [ml.value for ml in ModelType]
|
130
133
|
|
131
134
|
PredictionJobLoop(context, model_type=model_type).map(
|
132
|
-
create_forecast_task, context
|
135
|
+
create_forecast_task, context, **kwargs
|
133
136
|
)
|
134
137
|
|
135
138
|
|
@@ -186,7 +186,7 @@ def fides(data: pd.DataFrame, all_forecasts: bool = False):
|
|
186
186
|
data = pd.DataFrame(index = index,
|
187
187
|
data = dict(load=np.sin(index.hour/24*np.pi)*np.random.uniform(0.7,1.7, 300)))
|
188
188
|
data['insolation'] = data.load * np.random.uniform(0.8, 1.2, len(index)) + 0.1
|
189
|
-
data.loc[int(len(index)/3*2):,"load"] = np.
|
189
|
+
data.loc[int(len(index)/3*2):,"load"] = np.nan
|
190
190
|
|
191
191
|
"""
|
192
192
|
insolation_forecast = apply_fit_insol(data, add_to_df=False)
|
@@ -216,7 +216,7 @@ def fides(data: pd.DataFrame, all_forecasts: bool = False):
|
|
216
216
|
return forecast
|
217
217
|
|
218
218
|
|
219
|
-
def main(config=None, database=None):
|
219
|
+
def main(config=None, database=None, **kwargs):
|
220
220
|
taskname = Path(__file__).name.replace(".py", "")
|
221
221
|
|
222
222
|
if database is None or config is None:
|
@@ -245,7 +245,7 @@ def main(config=None, database=None):
|
|
245
245
|
)
|
246
246
|
|
247
247
|
PredictionJobLoop(context, prediction_jobs=prediction_jobs).map(
|
248
|
-
make_solar_prediction_pj, context
|
248
|
+
make_solar_prediction_pj, context, kwargs=kwargs
|
249
249
|
)
|
250
250
|
|
251
251
|
|
@@ -357,7 +357,7 @@ def apply_fit_insol(data, add_to_df=True, hours_delta=None, polynomial=False):
|
|
357
357
|
data = pd.DataFrame(index = index,
|
358
358
|
data = dict(load=np.sin(index.hour/24*np.pi)*np.random.uniform(0.7,1.7, len(index))))
|
359
359
|
data['insolation'] = data.load * np.random.uniform(0.8, 1.2, len(index)) + 0.1
|
360
|
-
data.loc[int(len(index)/3*2):,"load"] = np.
|
360
|
+
data.loc[int(len(index)/3*2):,"load"] = np.nan
|
361
361
|
|
362
362
|
"""
|
363
363
|
colname = list(data)[0]
|
@@ -20,7 +20,7 @@ from datetime import datetime, timedelta
|
|
20
20
|
from pathlib import Path
|
21
21
|
|
22
22
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
23
|
-
from openstef.enums import
|
23
|
+
from openstef.enums import ModelType, PipelineType
|
24
24
|
from openstef.model.serializer import MLflowSerializer
|
25
25
|
from openstef.monitoring import teams
|
26
26
|
from openstef.pipeline.optimize_hyperparameters import optimize_hyperparameters_pipeline
|
@@ -124,7 +124,7 @@ def main(config=None, database=None):
|
|
124
124
|
)
|
125
125
|
|
126
126
|
with TaskContext(taskname, config, database) as context:
|
127
|
-
model_type = [ml.value for ml in
|
127
|
+
model_type = [ml.value for ml in ModelType]
|
128
128
|
|
129
129
|
PredictionJobLoop(context, model_type=model_type).map(
|
130
130
|
optimize_hyperparameters_task, context
|
openstef/tasks/split_forecast.py
CHANGED
@@ -22,6 +22,7 @@ Example:
|
|
22
22
|
$ python split_forecast.py
|
23
23
|
|
24
24
|
"""
|
25
|
+
import logging
|
25
26
|
from datetime import datetime
|
26
27
|
from pathlib import Path
|
27
28
|
|
@@ -32,7 +33,8 @@ import structlog
|
|
32
33
|
|
33
34
|
import openstef.monitoring.teams as monitoring
|
34
35
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
35
|
-
from openstef.enums import
|
36
|
+
from openstef.enums import ModelType
|
37
|
+
from openstef.settings import Settings
|
36
38
|
from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
|
37
39
|
from openstef.tasks.utils.taskcontext import TaskContext
|
38
40
|
|
@@ -49,7 +51,7 @@ def main(config=None, database=None):
|
|
49
51
|
)
|
50
52
|
|
51
53
|
with TaskContext(taskname, config, database) as context:
|
52
|
-
model_type = [ml.value for ml in
|
54
|
+
model_type = [ml.value for ml in ModelType]
|
53
55
|
|
54
56
|
PredictionJobLoop(
|
55
57
|
context,
|
@@ -70,6 +72,11 @@ def split_forecast_task(
|
|
70
72
|
Energy splitting coefficients.
|
71
73
|
|
72
74
|
"""
|
75
|
+
structlog.configure(
|
76
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
77
|
+
logging.getLevelName(Settings.log_level)
|
78
|
+
)
|
79
|
+
)
|
73
80
|
logger = structlog.get_logger(__name__)
|
74
81
|
|
75
82
|
logger.info("Start splitting energy", pid=pj["id"])
|
openstef/tasks/train_model.py
CHANGED
@@ -23,22 +23,20 @@ from datetime import datetime, timedelta
|
|
23
23
|
from pathlib import Path
|
24
24
|
|
25
25
|
from openstef.data_classes.prediction_job import PredictionJobDataClass
|
26
|
-
|
27
|
-
from openstef.enums import MLModelType, PipelineType
|
26
|
+
from openstef.enums import ModelType, PipelineType
|
28
27
|
from openstef.exceptions import (
|
29
|
-
SkipSaveTrainingForecasts,
|
30
28
|
InputDataOngoingZeroFlatlinerError,
|
29
|
+
SkipSaveTrainingForecasts,
|
31
30
|
)
|
31
|
+
from openstef.model.serializer import MLflowSerializer
|
32
32
|
from openstef.pipeline.train_model import (
|
33
|
+
MAXIMUM_MODEL_AGE,
|
33
34
|
train_model_pipeline,
|
34
35
|
train_pipeline_step_load_model,
|
35
|
-
MAXIMUM_MODEL_AGE,
|
36
36
|
)
|
37
37
|
from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
|
38
38
|
from openstef.tasks.utils.taskcontext import TaskContext
|
39
39
|
|
40
|
-
from openstef.model.serializer import MLflowSerializer
|
41
|
-
|
42
40
|
TRAINING_PERIOD_DAYS: int = 120
|
43
41
|
DEFAULT_CHECK_MODEL_AGE: bool = True
|
44
42
|
|
@@ -65,6 +63,10 @@ def train_model_task(
|
|
65
63
|
datetime_start: Start
|
66
64
|
datetime_end: End
|
67
65
|
|
66
|
+
Raises:
|
67
|
+
SkipSaveTrainingForecasts: If old model is better or too young, you don't need to save the traing forcast.
|
68
|
+
InputDataOngoingZeroFlatlinerError: If all recent load measurements are zero.
|
69
|
+
|
68
70
|
"""
|
69
71
|
# Check pipeline types
|
70
72
|
if PipelineType.TRAIN not in pj.pipelines_to_run:
|
@@ -177,7 +179,7 @@ def main(model_type=None, config=None, database=None):
|
|
177
179
|
)
|
178
180
|
|
179
181
|
if model_type is None:
|
180
|
-
model_type = [ml.value for ml in
|
182
|
+
model_type = [ml.value for ml in ModelType]
|
181
183
|
|
182
184
|
taskname = Path(__file__).name.replace(".py", "")
|
183
185
|
datetime_now = datetime.utcnow()
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
import logging
|
4
5
|
import traceback
|
5
6
|
from typing import Callable
|
6
7
|
|
@@ -9,6 +10,7 @@ import structlog
|
|
9
10
|
from openstef.exceptions import PredictionJobException
|
10
11
|
from openstef.monitoring.performance_meter import PerformanceMeter
|
11
12
|
from openstef.monitoring.teams import post_teams
|
13
|
+
from openstef.settings import Settings
|
12
14
|
|
13
15
|
|
14
16
|
class TaskContext:
|
@@ -62,6 +64,11 @@ class TaskContext:
|
|
62
64
|
self.database = database
|
63
65
|
|
64
66
|
def __enter__(self):
|
67
|
+
structlog.configure(
|
68
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
69
|
+
logging.getLevelName(Settings.log_level)
|
70
|
+
)
|
71
|
+
)
|
65
72
|
self.logger = structlog.get_logger(__name__).bind(task=self.name)
|
66
73
|
|
67
74
|
self.perf_meter = PerformanceMeter(self.logger)
|
@@ -1,17 +1,19 @@
|
|
1
1
|
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
|
+
import logging
|
5
|
+
import math
|
4
6
|
from datetime import datetime, timedelta
|
5
7
|
from typing import Union
|
6
8
|
|
7
|
-
import math
|
8
9
|
import numpy as np
|
9
10
|
import pandas as pd
|
10
11
|
import structlog
|
11
12
|
|
12
13
|
from openstef.exceptions import InputDataOngoingZeroFlatlinerError
|
13
|
-
from openstef.preprocessing.preprocessing import replace_repeated_values_with_nan
|
14
14
|
from openstef.model.regressors.regressor import OpenstfRegressor
|
15
|
+
from openstef.preprocessing.preprocessing import replace_repeated_values_with_nan
|
16
|
+
from openstef.settings import Settings
|
15
17
|
|
16
18
|
|
17
19
|
def validate(
|
@@ -37,7 +39,15 @@ def validate(
|
|
37
39
|
Returns:
|
38
40
|
Dataframe where repeated values are set to None
|
39
41
|
|
42
|
+
Raises:
|
43
|
+
InputDataOngoingZeroFlatlinerError: If all recent load measurements are zero.
|
44
|
+
|
40
45
|
"""
|
46
|
+
structlog.configure(
|
47
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
48
|
+
logging.getLevelName(Settings.log_level)
|
49
|
+
)
|
50
|
+
)
|
41
51
|
logger = structlog.get_logger(__name__)
|
42
52
|
|
43
53
|
if not isinstance(data.index, pd.DatetimeIndex):
|
@@ -81,6 +91,11 @@ def validate(
|
|
81
91
|
|
82
92
|
|
83
93
|
def drop_target_na(data: pd.DataFrame) -> pd.DataFrame:
|
94
|
+
structlog.configure(
|
95
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
96
|
+
logging.getLevelName(Settings.log_level)
|
97
|
+
)
|
98
|
+
)
|
84
99
|
logger = structlog.get_logger(__name__)
|
85
100
|
len_original = len(data)
|
86
101
|
# Remove where load is NA, NaN features are preserved
|
@@ -119,6 +134,11 @@ def is_data_sufficient(
|
|
119
134
|
else:
|
120
135
|
weights = model.feature_importance_dataframe
|
121
136
|
|
137
|
+
structlog.configure(
|
138
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
139
|
+
logging.getLevelName(Settings.log_level)
|
140
|
+
)
|
141
|
+
)
|
122
142
|
logger = structlog.get_logger(__name__)
|
123
143
|
# Set output variable
|
124
144
|
is_sufficient = True
|
@@ -224,7 +244,7 @@ def detect_ongoing_zero_flatliner(
|
|
224
244
|
"""
|
225
245
|
# remove all timestamps in the future
|
226
246
|
load = load[load.index.tz_localize(None) <= datetime.utcnow()]
|
227
|
-
latest_measurement_time = load.index.max()
|
247
|
+
latest_measurement_time = load.dropna().index.max()
|
228
248
|
latest_measurements = load[
|
229
249
|
latest_measurement_time - timedelta(minutes=duration_threshold_minutes) :
|
230
250
|
].dropna()
|
@@ -251,6 +271,11 @@ def calc_completeness_dataframe(
|
|
251
271
|
Dataframe with fraction of completeness per column
|
252
272
|
|
253
273
|
"""
|
274
|
+
structlog.configure(
|
275
|
+
wrapper_class=structlog.make_filtering_bound_logger(
|
276
|
+
logging.getLevelName(Settings.log_level)
|
277
|
+
)
|
278
|
+
)
|
254
279
|
logger = structlog.get_logger(__name__)
|
255
280
|
|
256
281
|
if homogenise and isinstance(df.index, pd.DatetimeIndex) and len(df) > 0:
|