openstef 3.4.10__py3-none-any.whl → 3.4.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. openstef/app_settings.py +19 -0
  2. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  3. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +18 -0
  4. openstef/data/dutch_holidays.csv +1759 -0
  5. openstef/data_classes/data_prep.py +1 -1
  6. openstef/data_classes/prediction_job.py +15 -9
  7. openstef/enums.py +108 -9
  8. openstef/exceptions.py +1 -1
  9. openstef/feature_engineering/apply_features.py +25 -6
  10. openstef/feature_engineering/bidding_zone_to_country_mapping.py +106 -0
  11. openstef/feature_engineering/cyclic_features.py +102 -0
  12. openstef/feature_engineering/data_preparation.py +12 -5
  13. openstef/feature_engineering/feature_applicator.py +1 -5
  14. openstef/feature_engineering/general.py +14 -0
  15. openstef/feature_engineering/holiday_features.py +35 -26
  16. openstef/feature_engineering/missing_values_transformer.py +141 -0
  17. openstef/feature_engineering/weather_features.py +7 -0
  18. openstef/metrics/figure.py +3 -0
  19. openstef/metrics/metrics.py +58 -1
  20. openstef/metrics/reporter.py +7 -0
  21. openstef/model/confidence_interval_applicator.py +28 -3
  22. openstef/model/model_creator.py +54 -41
  23. openstef/model/objective.py +17 -34
  24. openstef/model/objective_creator.py +13 -12
  25. openstef/model/regressors/arima.py +1 -1
  26. openstef/model/regressors/dazls.py +35 -96
  27. openstef/model/regressors/flatliner.py +95 -0
  28. openstef/model/regressors/linear_quantile.py +296 -0
  29. openstef/model/regressors/xgb.py +23 -0
  30. openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
  31. openstef/model/regressors/xgb_quantile.py +3 -0
  32. openstef/model/serializer.py +10 -0
  33. openstef/model_selection/model_selection.py +4 -1
  34. openstef/monitoring/performance_meter.py +1 -2
  35. openstef/monitoring/teams.py +11 -0
  36. openstef/pipeline/create_basecase_forecast.py +11 -1
  37. openstef/pipeline/create_component_forecast.py +24 -28
  38. openstef/pipeline/create_forecast.py +20 -1
  39. openstef/pipeline/optimize_hyperparameters.py +18 -16
  40. openstef/pipeline/train_create_forecast_backtest.py +11 -1
  41. openstef/pipeline/train_model.py +31 -12
  42. openstef/pipeline/utils.py +3 -0
  43. openstef/postprocessing/postprocessing.py +29 -0
  44. openstef/settings.py +15 -0
  45. openstef/tasks/calculate_kpi.py +23 -20
  46. openstef/tasks/create_basecase_forecast.py +15 -7
  47. openstef/tasks/create_components_forecast.py +24 -8
  48. openstef/tasks/create_forecast.py +9 -6
  49. openstef/tasks/create_solar_forecast.py +4 -4
  50. openstef/tasks/optimize_hyperparameters.py +2 -2
  51. openstef/tasks/split_forecast.py +9 -2
  52. openstef/tasks/train_model.py +9 -7
  53. openstef/tasks/utils/taskcontext.py +7 -0
  54. openstef/validation/validation.py +28 -3
  55. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/METADATA +65 -57
  56. openstef-3.4.44.dist-info/RECORD +97 -0
  57. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/WHEEL +1 -1
  58. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
  59. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
  60. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
  61. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
  62. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
  63. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
  64. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
  65. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
  66. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
  67. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
  68. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
  69. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
  70. openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
  71. openstef/data/dutch_holidays_2020-2022.csv +0 -831
  72. openstef/data/dutch_holidays_2020-2022.csv.license +0 -3
  73. openstef/feature_engineering/historic_features.py +0 -40
  74. openstef/model/regressors/proloaf.py +0 -281
  75. openstef/tasks/run_tracy.py +0 -145
  76. openstef-3.4.10.dist-info/RECORD +0 -104
  77. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license} +0 -0
  78. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license} +0 -0
  79. /openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license → dutch_holidays.csv.license} +0 -0
  80. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/LICENSE +0 -0
  81. {openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,8 @@ Example:
18
18
  $ python calculate_kpi.py
19
19
 
20
20
  """
21
+ import logging
22
+
21
23
  # Import builtins
22
24
  from datetime import datetime, timedelta
23
25
  from pathlib import Path
@@ -27,9 +29,10 @@ import pandas as pd
27
29
  import structlog
28
30
 
29
31
  from openstef.data_classes.prediction_job import PredictionJobDataClass
30
- from openstef.enums import MLModelType
32
+ from openstef.enums import ModelType
31
33
  from openstef.exceptions import NoPredictedLoadError, NoRealisedLoadError
32
34
  from openstef.metrics import metrics
35
+ from openstef.settings import Settings
33
36
  from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
34
37
  from openstef.tasks.utils.taskcontext import TaskContext
35
38
  from openstef.validation import validation
@@ -39,7 +42,7 @@ THRESHOLD_RETRAINING = 0.25
39
42
  THRESHOLD_OPTIMIZING = 0.50
40
43
 
41
44
 
42
- def main(model_type: MLModelType = None, config=None, database=None) -> None:
45
+ def main(model_type: ModelType = None, config=None, database=None) -> None:
43
46
  taskname = Path(__file__).name.replace(".py", "")
44
47
 
45
48
  if database is None or config is None:
@@ -49,7 +52,7 @@ def main(model_type: MLModelType = None, config=None, database=None) -> None:
49
52
  )
50
53
 
51
54
  if model_type is None:
52
- model_type = [ml.value for ml in MLModelType]
55
+ model_type = [ml.value for ml in ModelType]
53
56
 
54
57
  with TaskContext(taskname, config, database) as context:
55
58
  # Set start and end time
@@ -69,6 +72,8 @@ def check_kpi_task(
69
72
  context: TaskContext,
70
73
  start_time: datetime,
71
74
  end_time: datetime,
75
+ threshold_optimizing=THRESHOLD_OPTIMIZING,
76
+ threshold_retraining=THRESHOLD_RETRAINING,
72
77
  ) -> None:
73
78
  # Apply default parameters if none are provided
74
79
  if start_time is None:
@@ -99,28 +104,21 @@ def check_kpi_task(
99
104
 
100
105
  # Add pid to the list of pids that should be retrained or optimized if
101
106
  # performance is insufficient
102
- if kpis["47.0h"]["rMAE"] > THRESHOLD_RETRAINING:
107
+ if kpis["47.0h"]["rMAE"] > threshold_retraining:
103
108
  context.logger.warning(
104
109
  "Need to retrain model, retraining threshold rMAE 47h exceeded",
105
110
  t_ahead="47.0h",
106
111
  rMAE=kpis["47.0h"]["rMAE"],
107
- retraining_threshold=THRESHOLD_RETRAINING,
112
+ retraining_threshold=threshold_retraining,
108
113
  )
109
- function_name = "train_model"
110
-
111
- context.logger.info("Adding tracy job", function=function_name)
112
- context.database.ktp_api.add_tracy_job(pj["id"], function=function_name)
113
114
 
114
- if kpis["47.0h"]["rMAE"] > THRESHOLD_OPTIMIZING:
115
+ if kpis["47.0h"]["rMAE"] > threshold_optimizing:
115
116
  context.logger.warning(
116
117
  "Need to optimize hyperparameters, optimizing threshold rMAE 47h exceeded",
117
118
  t_ahead="47.0h",
118
119
  rMAE=kpis["47.0h"]["rMAE"],
119
- optimizing_threshold=THRESHOLD_OPTIMIZING,
120
+ optimizing_threshold=threshold_optimizing,
120
121
  )
121
- function_name = "optimize_hyperparameters"
122
- context.logger.info("Adding tracy job", function=function_name)
123
- context.database.ktp_api.add_tracy_job(pj["id"], function=function_name)
124
122
 
125
123
 
126
124
  def calc_kpi_for_specific_pid(
@@ -160,7 +158,12 @@ def calc_kpi_for_specific_pid(
160
158
  COMPLETENESS_REALISED_THRESHOLDS = 0.7
161
159
  COMPLETENESS_PREDICTED_LOAD_THRESHOLD = 0.7
162
160
 
163
- log = structlog.get_logger(__name__)
161
+ structlog.configure(
162
+ wrapper_class=structlog.make_filtering_bound_logger(
163
+ logging.getLevelName(Settings.log_level)
164
+ )
165
+ )
166
+ logger = structlog.get_logger(__name__)
164
167
 
165
168
  # If predicted is empty
166
169
  if len(predicted_load) == 0:
@@ -194,9 +197,9 @@ def calc_kpi_for_specific_pid(
194
197
 
195
198
  # Raise exception in case of constant load
196
199
  if combined.load.nunique() == 1:
197
- structlog.get_logger(__name__).warning(
200
+ logger.warning(
198
201
  "The load is constant! KPIs will still be calculated, but relative metrics"
199
- " will be nan"
202
+ " will be nan."
200
203
  )
201
204
 
202
205
  # Define output dictonary
@@ -213,7 +216,7 @@ def calc_kpi_for_specific_pid(
213
216
  date = pd.to_datetime(end_time)
214
217
 
215
218
  # Calculate model metrics and add them to the output dictionary
216
- log.info("Start calculating kpis")
219
+ logger.info("Start calculating kpis")
217
220
  for hor_cols in hor_list:
218
221
  t_ahead_h = hor_cols[0].split("_")[1]
219
222
  fc = combined[hor_cols[0]] # load predictions
@@ -272,7 +275,7 @@ def calc_kpi_for_specific_pid(
272
275
  )
273
276
 
274
277
  if completeness_realised < COMPLETENESS_REALISED_THRESHOLDS:
275
- log.warning(
278
+ logger.warning(
276
279
  "Completeness realised load too low",
277
280
  prediction_id=pid,
278
281
  start_time=start_time,
@@ -282,7 +285,7 @@ def calc_kpi_for_specific_pid(
282
285
  )
283
286
  set_incomplete_kpi_to_nan(kpis, t_ahead_h)
284
287
  if completeness_predicted_load.any() < COMPLETENESS_PREDICTED_LOAD_THRESHOLD:
285
- log.warning(
288
+ logger.warning(
286
289
  "Completeness predicted load of specific horizon too low",
287
290
  prediction_id=pid,
288
291
  horizon=t_ahead_h,
@@ -32,7 +32,10 @@ T_AHEAD_DAYS: int = 14
32
32
 
33
33
 
34
34
  def create_basecase_forecast_task(
35
- pj: PredictionJobDataClass, context: TaskContext
35
+ pj: PredictionJobDataClass,
36
+ context: TaskContext,
37
+ t_behind_days=T_BEHIND_DAYS,
38
+ t_ahead_days=T_AHEAD_DAYS,
36
39
  ) -> None:
37
40
  """Top level task that creates a basecase forecast.
38
41
 
@@ -41,6 +44,8 @@ def create_basecase_forecast_task(
41
44
  Args:
42
45
  pj: Prediction job
43
46
  context: Contect object that holds a config manager and a database connection
47
+ t_behind_days: number of days included as history. This is used to generated lagged features for the to-be-forecasted period
48
+ t_ahead_days: number of days a basecase forecast is created for
44
49
 
45
50
  """
46
51
  # Check pipeline types
@@ -63,8 +68,8 @@ def create_basecase_forecast_task(
63
68
  return
64
69
 
65
70
  # Define datetime range for input data
66
- datetime_start = datetime.utcnow() - timedelta(days=T_BEHIND_DAYS)
67
- datetime_end = datetime.utcnow() + timedelta(days=T_AHEAD_DAYS)
71
+ datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
72
+ datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
68
73
 
69
74
  # Retrieve input data
70
75
  input_data = context.database.get_model_input(
@@ -77,11 +82,14 @@ def create_basecase_forecast_task(
77
82
  # Make basecase forecast using the corresponding pipeline
78
83
  basecase_forecast = create_basecase_forecast_pipeline(pj, input_data)
79
84
 
80
- # Do not store basecase forecasts for moments within next 48 hours.
85
+ # Do not store basecase forecasts for moments within the prediction job's horizon.
81
86
  # Those should be updated by regular forecast process.
82
87
  basecase_forecast = basecase_forecast.loc[
83
88
  basecase_forecast.index
84
- > (pd.to_datetime(datetime.utcnow(), utc=True) + timedelta(hours=48)),
89
+ > (
90
+ pd.to_datetime(datetime.utcnow(), utc=True)
91
+ + timedelta(minutes=pj.horizon_minutes)
92
+ ),
85
93
  :,
86
94
  ]
87
95
 
@@ -89,7 +97,7 @@ def create_basecase_forecast_task(
89
97
  context.database.write_forecast(basecase_forecast, t_ahead_series=True)
90
98
 
91
99
 
92
- def main(config: object = None, database: object = None):
100
+ def main(config: object = None, database: object = None, **kwargs):
93
101
  taskname = Path(__file__).name.replace(".py", "")
94
102
 
95
103
  if database is None or config is None:
@@ -102,7 +110,7 @@ def main(config: object = None, database: object = None):
102
110
  model_type = ["xgb", "xgb_quantile", "lgb"]
103
111
 
104
112
  PredictionJobLoop(context, model_type=model_type).map(
105
- create_basecase_forecast_task, context
113
+ create_basecase_forecast_task, context, **kwargs
106
114
  )
107
115
 
108
116
 
@@ -21,18 +21,20 @@ Example:
21
21
  $ python create_components_forecast.py
22
22
 
23
23
  """
24
+ import logging
24
25
  from datetime import datetime, timedelta, timezone
25
26
  from pathlib import Path
26
27
 
27
- import structlog
28
28
  import pandas as pd
29
+ import structlog
29
30
 
30
31
  from openstef.data_classes.prediction_job import PredictionJobDataClass
31
- from openstef.enums import MLModelType
32
+ from openstef.enums import ModelType
32
33
  from openstef.exceptions import ComponentForecastTooShortHorizonError
33
34
  from openstef.pipeline.create_component_forecast import (
34
35
  create_components_forecast_pipeline,
35
36
  )
37
+ from openstef.settings import Settings
36
38
  from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
37
39
  from openstef.tasks.utils.taskcontext import TaskContext
38
40
 
@@ -41,7 +43,10 @@ T_AHEAD_DAYS = 3
41
43
 
42
44
 
43
45
  def create_components_forecast_task(
44
- pj: PredictionJobDataClass, context: TaskContext
46
+ pj: PredictionJobDataClass,
47
+ context: TaskContext,
48
+ t_behind_days: int = T_BEHIND_DAYS,
49
+ t_ahead_days: int = T_AHEAD_DAYS,
45
50
  ) -> None:
46
51
  """Top level task that creates a components forecast.
47
52
 
@@ -50,8 +55,19 @@ def create_components_forecast_task(
50
55
  Args:
51
56
  pj: Prediction job
52
57
  context: Contect object that holds a config manager and a database connection
58
+ t_behind_days: number of days in the past that the component forecast is created for
59
+ t_ahead_days: number of days in the future that the component forecast is created for
60
+
61
+ Raises:
62
+ ComponentForecastTooShortHorizonError: If the forecast horizon is too short
63
+ (less than 30 minutes in advance)
53
64
 
54
65
  """
66
+ structlog.configure(
67
+ wrapper_class=structlog.make_filtering_bound_logger(
68
+ logging.getLevelName(Settings.log_level)
69
+ )
70
+ )
55
71
  logger = structlog.get_logger(__name__)
56
72
  if pj["train_components"] == 0:
57
73
  context.logger.info(
@@ -60,8 +76,8 @@ def create_components_forecast_task(
60
76
  return
61
77
 
62
78
  # Define datetime range for input data
63
- datetime_start = datetime.utcnow() - timedelta(days=T_BEHIND_DAYS)
64
- datetime_end = datetime.utcnow() + timedelta(days=T_AHEAD_DAYS)
79
+ datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
80
+ datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
65
81
 
66
82
  logger.info(
67
83
  "Get predicted load", datetime_start=datetime_start, datetime_end=datetime_end
@@ -124,7 +140,7 @@ def create_components_forecast_task(
124
140
  )
125
141
 
126
142
 
127
- def main(config: object = None, database: object = None):
143
+ def main(config: object = None, database: object = None, **kwargs):
128
144
  taskname = Path(__file__).name.replace(".py", "")
129
145
 
130
146
  if database is None or config is None:
@@ -134,12 +150,12 @@ def main(config: object = None, database: object = None):
134
150
  )
135
151
 
136
152
  with TaskContext(taskname, config, database) as context:
137
- model_type = [ml.value for ml in MLModelType]
153
+ model_type = [ml.value for ml in ModelType]
138
154
 
139
155
  PredictionJobLoop(
140
156
  context,
141
157
  model_type=model_type,
142
- ).map(create_components_forecast_task, context)
158
+ ).map(create_components_forecast_task, context, **kwargs)
143
159
 
144
160
 
145
161
  if __name__ == "__main__":
@@ -24,7 +24,7 @@ from datetime import datetime, timedelta
24
24
  from pathlib import Path
25
25
 
26
26
  from openstef.data_classes.prediction_job import PredictionJobDataClass
27
- from openstef.enums import MLModelType, PipelineType
27
+ from openstef.enums import ModelType, PipelineType
28
28
  from openstef.exceptions import InputDataOngoingZeroFlatlinerError
29
29
  from openstef.pipeline.create_forecast import create_forecast_pipeline
30
30
  from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
@@ -34,7 +34,9 @@ from openstef.validation.validation import detect_ongoing_zero_flatliner
34
34
  T_BEHIND_DAYS: int = 14
35
35
 
36
36
 
37
- def create_forecast_task(pj: PredictionJobDataClass, context: TaskContext) -> None:
37
+ def create_forecast_task(
38
+ pj: PredictionJobDataClass, context: TaskContext, t_behind_days: int = T_BEHIND_DAYS
39
+ ) -> None:
38
40
  """Top level task that creates a forecast.
39
41
 
40
42
  On this task level all database and context manager dependencies are resolved.
@@ -45,6 +47,7 @@ def create_forecast_task(pj: PredictionJobDataClass, context: TaskContext) -> No
45
47
  Args:
46
48
  pj: Prediction job
47
49
  context: Contect object that holds a config manager and a database connection
50
+ t_behind_days: number of days included as history. This is used to generated lagged features for the to-be-forecasted period
48
51
 
49
52
  """
50
53
  # Check pipeline types
@@ -70,7 +73,7 @@ def create_forecast_task(pj: PredictionJobDataClass, context: TaskContext) -> No
70
73
  mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri
71
74
 
72
75
  # Define datetime range for input data
73
- datetime_start = datetime.utcnow() - timedelta(days=T_BEHIND_DAYS)
76
+ datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
74
77
  datetime_end = datetime.utcnow() + timedelta(seconds=pj.horizon_minutes * 60)
75
78
 
76
79
  # Retrieve input data
@@ -115,7 +118,7 @@ def create_forecast_task(pj: PredictionJobDataClass, context: TaskContext) -> No
115
118
  context.database.write_forecast(forecast, t_ahead_series=True)
116
119
 
117
120
 
118
- def main(model_type=None, config=None, database=None):
121
+ def main(model_type=None, config=None, database=None, **kwargs):
119
122
  taskname = Path(__file__).name.replace(".py", "")
120
123
 
121
124
  if database is None or config is None:
@@ -126,10 +129,10 @@ def main(model_type=None, config=None, database=None):
126
129
 
127
130
  with TaskContext(taskname, config, database) as context:
128
131
  if model_type is None:
129
- model_type = [ml.value for ml in MLModelType]
132
+ model_type = [ml.value for ml in ModelType]
130
133
 
131
134
  PredictionJobLoop(context, model_type=model_type).map(
132
- create_forecast_task, context
135
+ create_forecast_task, context, **kwargs
133
136
  )
134
137
 
135
138
 
@@ -186,7 +186,7 @@ def fides(data: pd.DataFrame, all_forecasts: bool = False):
186
186
  data = pd.DataFrame(index = index,
187
187
  data = dict(load=np.sin(index.hour/24*np.pi)*np.random.uniform(0.7,1.7, 300)))
188
188
  data['insolation'] = data.load * np.random.uniform(0.8, 1.2, len(index)) + 0.1
189
- data.loc[int(len(index)/3*2):,"load"] = np.NaN
189
+ data.loc[int(len(index)/3*2):,"load"] = np.nan
190
190
 
191
191
  """
192
192
  insolation_forecast = apply_fit_insol(data, add_to_df=False)
@@ -216,7 +216,7 @@ def fides(data: pd.DataFrame, all_forecasts: bool = False):
216
216
  return forecast
217
217
 
218
218
 
219
- def main(config=None, database=None):
219
+ def main(config=None, database=None, **kwargs):
220
220
  taskname = Path(__file__).name.replace(".py", "")
221
221
 
222
222
  if database is None or config is None:
@@ -245,7 +245,7 @@ def main(config=None, database=None):
245
245
  )
246
246
 
247
247
  PredictionJobLoop(context, prediction_jobs=prediction_jobs).map(
248
- make_solar_prediction_pj, context
248
+ make_solar_prediction_pj, context, kwargs=kwargs
249
249
  )
250
250
 
251
251
 
@@ -357,7 +357,7 @@ def apply_fit_insol(data, add_to_df=True, hours_delta=None, polynomial=False):
357
357
  data = pd.DataFrame(index = index,
358
358
  data = dict(load=np.sin(index.hour/24*np.pi)*np.random.uniform(0.7,1.7, len(index))))
359
359
  data['insolation'] = data.load * np.random.uniform(0.8, 1.2, len(index)) + 0.1
360
- data.loc[int(len(index)/3*2):,"load"] = np.NaN
360
+ data.loc[int(len(index)/3*2):,"load"] = np.nan
361
361
 
362
362
  """
363
363
  colname = list(data)[0]
@@ -20,7 +20,7 @@ from datetime import datetime, timedelta
20
20
  from pathlib import Path
21
21
 
22
22
  from openstef.data_classes.prediction_job import PredictionJobDataClass
23
- from openstef.enums import MLModelType, PipelineType
23
+ from openstef.enums import ModelType, PipelineType
24
24
  from openstef.model.serializer import MLflowSerializer
25
25
  from openstef.monitoring import teams
26
26
  from openstef.pipeline.optimize_hyperparameters import optimize_hyperparameters_pipeline
@@ -124,7 +124,7 @@ def main(config=None, database=None):
124
124
  )
125
125
 
126
126
  with TaskContext(taskname, config, database) as context:
127
- model_type = [ml.value for ml in MLModelType]
127
+ model_type = [ml.value for ml in ModelType]
128
128
 
129
129
  PredictionJobLoop(context, model_type=model_type).map(
130
130
  optimize_hyperparameters_task, context
@@ -22,6 +22,7 @@ Example:
22
22
  $ python split_forecast.py
23
23
 
24
24
  """
25
+ import logging
25
26
  from datetime import datetime
26
27
  from pathlib import Path
27
28
 
@@ -32,7 +33,8 @@ import structlog
32
33
 
33
34
  import openstef.monitoring.teams as monitoring
34
35
  from openstef.data_classes.prediction_job import PredictionJobDataClass
35
- from openstef.enums import MLModelType
36
+ from openstef.enums import ModelType
37
+ from openstef.settings import Settings
36
38
  from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
37
39
  from openstef.tasks.utils.taskcontext import TaskContext
38
40
 
@@ -49,7 +51,7 @@ def main(config=None, database=None):
49
51
  )
50
52
 
51
53
  with TaskContext(taskname, config, database) as context:
52
- model_type = [ml.value for ml in MLModelType]
54
+ model_type = [ml.value for ml in ModelType]
53
55
 
54
56
  PredictionJobLoop(
55
57
  context,
@@ -70,6 +72,11 @@ def split_forecast_task(
70
72
  Energy splitting coefficients.
71
73
 
72
74
  """
75
+ structlog.configure(
76
+ wrapper_class=structlog.make_filtering_bound_logger(
77
+ logging.getLevelName(Settings.log_level)
78
+ )
79
+ )
73
80
  logger = structlog.get_logger(__name__)
74
81
 
75
82
  logger.info("Start splitting energy", pid=pj["id"])
@@ -23,22 +23,20 @@ from datetime import datetime, timedelta
23
23
  from pathlib import Path
24
24
 
25
25
  from openstef.data_classes.prediction_job import PredictionJobDataClass
26
-
27
- from openstef.enums import MLModelType, PipelineType
26
+ from openstef.enums import ModelType, PipelineType
28
27
  from openstef.exceptions import (
29
- SkipSaveTrainingForecasts,
30
28
  InputDataOngoingZeroFlatlinerError,
29
+ SkipSaveTrainingForecasts,
31
30
  )
31
+ from openstef.model.serializer import MLflowSerializer
32
32
  from openstef.pipeline.train_model import (
33
+ MAXIMUM_MODEL_AGE,
33
34
  train_model_pipeline,
34
35
  train_pipeline_step_load_model,
35
- MAXIMUM_MODEL_AGE,
36
36
  )
37
37
  from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
38
38
  from openstef.tasks.utils.taskcontext import TaskContext
39
39
 
40
- from openstef.model.serializer import MLflowSerializer
41
-
42
40
  TRAINING_PERIOD_DAYS: int = 120
43
41
  DEFAULT_CHECK_MODEL_AGE: bool = True
44
42
 
@@ -65,6 +63,10 @@ def train_model_task(
65
63
  datetime_start: Start
66
64
  datetime_end: End
67
65
 
66
+ Raises:
67
+ SkipSaveTrainingForecasts: If old model is better or too young, you don't need to save the traing forcast.
68
+ InputDataOngoingZeroFlatlinerError: If all recent load measurements are zero.
69
+
68
70
  """
69
71
  # Check pipeline types
70
72
  if PipelineType.TRAIN not in pj.pipelines_to_run:
@@ -177,7 +179,7 @@ def main(model_type=None, config=None, database=None):
177
179
  )
178
180
 
179
181
  if model_type is None:
180
- model_type = [ml.value for ml in MLModelType]
182
+ model_type = [ml.value for ml in ModelType]
181
183
 
182
184
  taskname = Path(__file__).name.replace(".py", "")
183
185
  datetime_now = datetime.utcnow()
@@ -1,6 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
4
5
  import traceback
5
6
  from typing import Callable
6
7
 
@@ -9,6 +10,7 @@ import structlog
9
10
  from openstef.exceptions import PredictionJobException
10
11
  from openstef.monitoring.performance_meter import PerformanceMeter
11
12
  from openstef.monitoring.teams import post_teams
13
+ from openstef.settings import Settings
12
14
 
13
15
 
14
16
  class TaskContext:
@@ -62,6 +64,11 @@ class TaskContext:
62
64
  self.database = database
63
65
 
64
66
  def __enter__(self):
67
+ structlog.configure(
68
+ wrapper_class=structlog.make_filtering_bound_logger(
69
+ logging.getLevelName(Settings.log_level)
70
+ )
71
+ )
65
72
  self.logger = structlog.get_logger(__name__).bind(task=self.name)
66
73
 
67
74
  self.perf_meter = PerformanceMeter(self.logger)
@@ -1,17 +1,19 @@
1
1
  # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
2
  #
3
3
  # SPDX-License-Identifier: MPL-2.0
4
+ import logging
5
+ import math
4
6
  from datetime import datetime, timedelta
5
7
  from typing import Union
6
8
 
7
- import math
8
9
  import numpy as np
9
10
  import pandas as pd
10
11
  import structlog
11
12
 
12
13
  from openstef.exceptions import InputDataOngoingZeroFlatlinerError
13
- from openstef.preprocessing.preprocessing import replace_repeated_values_with_nan
14
14
  from openstef.model.regressors.regressor import OpenstfRegressor
15
+ from openstef.preprocessing.preprocessing import replace_repeated_values_with_nan
16
+ from openstef.settings import Settings
15
17
 
16
18
 
17
19
  def validate(
@@ -37,7 +39,15 @@ def validate(
37
39
  Returns:
38
40
  Dataframe where repeated values are set to None
39
41
 
42
+ Raises:
43
+ InputDataOngoingZeroFlatlinerError: If all recent load measurements are zero.
44
+
40
45
  """
46
+ structlog.configure(
47
+ wrapper_class=structlog.make_filtering_bound_logger(
48
+ logging.getLevelName(Settings.log_level)
49
+ )
50
+ )
41
51
  logger = structlog.get_logger(__name__)
42
52
 
43
53
  if not isinstance(data.index, pd.DatetimeIndex):
@@ -81,6 +91,11 @@ def validate(
81
91
 
82
92
 
83
93
  def drop_target_na(data: pd.DataFrame) -> pd.DataFrame:
94
+ structlog.configure(
95
+ wrapper_class=structlog.make_filtering_bound_logger(
96
+ logging.getLevelName(Settings.log_level)
97
+ )
98
+ )
84
99
  logger = structlog.get_logger(__name__)
85
100
  len_original = len(data)
86
101
  # Remove where load is NA, NaN features are preserved
@@ -119,6 +134,11 @@ def is_data_sufficient(
119
134
  else:
120
135
  weights = model.feature_importance_dataframe
121
136
 
137
+ structlog.configure(
138
+ wrapper_class=structlog.make_filtering_bound_logger(
139
+ logging.getLevelName(Settings.log_level)
140
+ )
141
+ )
122
142
  logger = structlog.get_logger(__name__)
123
143
  # Set output variable
124
144
  is_sufficient = True
@@ -224,7 +244,7 @@ def detect_ongoing_zero_flatliner(
224
244
  """
225
245
  # remove all timestamps in the future
226
246
  load = load[load.index.tz_localize(None) <= datetime.utcnow()]
227
- latest_measurement_time = load.index.max()
247
+ latest_measurement_time = load.dropna().index.max()
228
248
  latest_measurements = load[
229
249
  latest_measurement_time - timedelta(minutes=duration_threshold_minutes) :
230
250
  ].dropna()
@@ -251,6 +271,11 @@ def calc_completeness_dataframe(
251
271
  Dataframe with fraction of completeness per column
252
272
 
253
273
  """
274
+ structlog.configure(
275
+ wrapper_class=structlog.make_filtering_bound_logger(
276
+ logging.getLevelName(Settings.log_level)
277
+ )
278
+ )
254
279
  logger = structlog.get_logger(__name__)
255
280
 
256
281
  if homogenise and isinstance(df.index, pd.DatetimeIndex) and len(df) > 0: