openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef-4.0.0a3.dist-info/METADATA +177 -0
- openstef-4.0.0a3.dist-info/RECORD +4 -0
- {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
- openstef/__init__.py +0 -14
- openstef/__main__.py +0 -3
- openstef/app_settings.py +0 -19
- openstef/data/NL_terrestrial_radiation.csv +0 -25585
- openstef/data/NL_terrestrial_radiation.csv.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
- openstef/data/dutch_holidays.csv +0 -1759
- openstef/data/dutch_holidays.csv.license +0 -3
- openstef/data/pv_single_coefs.csv +0 -601
- openstef/data/pv_single_coefs.csv.license +0 -3
- openstef/data_classes/__init__.py +0 -3
- openstef/data_classes/data_prep.py +0 -99
- openstef/data_classes/model_specifications.py +0 -30
- openstef/data_classes/prediction_job.py +0 -135
- openstef/data_classes/split_function.py +0 -97
- openstef/enums.py +0 -140
- openstef/exceptions.py +0 -74
- openstef/feature_engineering/__init__.py +0 -3
- openstef/feature_engineering/apply_features.py +0 -138
- openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
- openstef/feature_engineering/cyclic_features.py +0 -161
- openstef/feature_engineering/data_preparation.py +0 -152
- openstef/feature_engineering/feature_adder.py +0 -206
- openstef/feature_engineering/feature_applicator.py +0 -202
- openstef/feature_engineering/general.py +0 -141
- openstef/feature_engineering/holiday_features.py +0 -231
- openstef/feature_engineering/lag_features.py +0 -165
- openstef/feature_engineering/missing_values_transformer.py +0 -141
- openstef/feature_engineering/rolling_features.py +0 -58
- openstef/feature_engineering/weather_features.py +0 -492
- openstef/metrics/__init__.py +0 -3
- openstef/metrics/figure.py +0 -303
- openstef/metrics/metrics.py +0 -486
- openstef/metrics/reporter.py +0 -222
- openstef/model/__init__.py +0 -3
- openstef/model/basecase.py +0 -82
- openstef/model/confidence_interval_applicator.py +0 -242
- openstef/model/fallback.py +0 -77
- openstef/model/metamodels/__init__.py +0 -3
- openstef/model/metamodels/feature_clipper.py +0 -90
- openstef/model/metamodels/grouped_regressor.py +0 -222
- openstef/model/metamodels/missing_values_handler.py +0 -138
- openstef/model/model_creator.py +0 -214
- openstef/model/objective.py +0 -426
- openstef/model/objective_creator.py +0 -65
- openstef/model/regressors/__init__.py +0 -3
- openstef/model/regressors/arima.py +0 -197
- openstef/model/regressors/custom_regressor.py +0 -64
- openstef/model/regressors/dazls.py +0 -116
- openstef/model/regressors/flatliner.py +0 -95
- openstef/model/regressors/gblinear_quantile.py +0 -334
- openstef/model/regressors/lgbm.py +0 -29
- openstef/model/regressors/linear.py +0 -90
- openstef/model/regressors/linear_quantile.py +0 -305
- openstef/model/regressors/regressor.py +0 -114
- openstef/model/regressors/xgb.py +0 -52
- openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
- openstef/model/regressors/xgb_quantile.py +0 -228
- openstef/model/serializer.py +0 -431
- openstef/model/standard_deviation_generator.py +0 -81
- openstef/model_selection/__init__.py +0 -3
- openstef/model_selection/model_selection.py +0 -311
- openstef/monitoring/__init__.py +0 -3
- openstef/monitoring/performance_meter.py +0 -92
- openstef/monitoring/teams.py +0 -203
- openstef/pipeline/__init__.py +0 -3
- openstef/pipeline/create_basecase_forecast.py +0 -133
- openstef/pipeline/create_component_forecast.py +0 -168
- openstef/pipeline/create_forecast.py +0 -171
- openstef/pipeline/optimize_hyperparameters.py +0 -317
- openstef/pipeline/train_create_forecast_backtest.py +0 -163
- openstef/pipeline/train_model.py +0 -561
- openstef/pipeline/utils.py +0 -52
- openstef/postprocessing/__init__.py +0 -3
- openstef/postprocessing/postprocessing.py +0 -275
- openstef/preprocessing/__init__.py +0 -3
- openstef/preprocessing/preprocessing.py +0 -42
- openstef/settings.py +0 -15
- openstef/tasks/__init__.py +0 -3
- openstef/tasks/calculate_kpi.py +0 -324
- openstef/tasks/create_basecase_forecast.py +0 -118
- openstef/tasks/create_components_forecast.py +0 -162
- openstef/tasks/create_forecast.py +0 -145
- openstef/tasks/create_solar_forecast.py +0 -420
- openstef/tasks/create_wind_forecast.py +0 -80
- openstef/tasks/optimize_hyperparameters.py +0 -135
- openstef/tasks/split_forecast.py +0 -273
- openstef/tasks/train_model.py +0 -224
- openstef/tasks/utils/__init__.py +0 -3
- openstef/tasks/utils/dependencies.py +0 -107
- openstef/tasks/utils/predictionjobloop.py +0 -243
- openstef/tasks/utils/taskcontext.py +0 -160
- openstef/validation/__init__.py +0 -3
- openstef/validation/validation.py +0 -322
- openstef-3.4.56.dist-info/METADATA +0 -154
- openstef-3.4.56.dist-info/RECORD +0 -102
- openstef-3.4.56.dist-info/top_level.txt +0 -1
- /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
|
@@ -1,311 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
import random
|
|
5
|
-
import secrets
|
|
6
|
-
from datetime import timedelta
|
|
7
|
-
from itertools import accumulate
|
|
8
|
-
from typing import Iterable
|
|
9
|
-
|
|
10
|
-
import numpy as np
|
|
11
|
-
import pandas as pd
|
|
12
|
-
|
|
13
|
-
AMOUNT_DAY = 96 # Duration of the periods (in T-15) that are in a day (default = 96)
|
|
14
|
-
PERIOD_TIMEDELTA = 1 # Duration of the periods (in days) that will be sampled as validation data for each split.
|
|
15
|
-
PEAK_FRACTION = 0.15
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def group_kfold(
|
|
19
|
-
input_data: pd.DataFrame, n_folds: int, randomize_fold_split: bool = True
|
|
20
|
-
) -> pd.DataFrame:
|
|
21
|
-
"""Function to group data into groups, according to the date and the number of folds.
|
|
22
|
-
|
|
23
|
-
Each date gets assigned a number between 0 and n_folds.
|
|
24
|
-
|
|
25
|
-
Args:
|
|
26
|
-
input_data: Input data
|
|
27
|
-
n_folds: Number of folds
|
|
28
|
-
randomize_fold_split: Indicates if random split needs to be applied
|
|
29
|
-
|
|
30
|
-
Returns:
|
|
31
|
-
Grouped data
|
|
32
|
-
|
|
33
|
-
"""
|
|
34
|
-
unique_dates = input_data["dates"].unique() # dates defines the day (Y-M-D)
|
|
35
|
-
# Group separators
|
|
36
|
-
len_data = len(unique_dates) # number of indices that can be used for splitting
|
|
37
|
-
size = len_data // n_folds # size of each fold set
|
|
38
|
-
rem = (
|
|
39
|
-
len_data % n_folds
|
|
40
|
-
) # remaining number of indices when divided in fold sets of equal size
|
|
41
|
-
separators = list(
|
|
42
|
-
accumulate([0] + [size + 1] * rem + [size] * (n_folds - rem))
|
|
43
|
-
) # location of seperators
|
|
44
|
-
|
|
45
|
-
items = list(unique_dates)
|
|
46
|
-
|
|
47
|
-
if randomize_fold_split:
|
|
48
|
-
random.shuffle(items) # if random, shuffle the days
|
|
49
|
-
|
|
50
|
-
for i, s in enumerate(zip(separators, separators[1:])):
|
|
51
|
-
group = items[slice(*s)]
|
|
52
|
-
input_data.loc[
|
|
53
|
-
input_data[input_data["dates"].isin(group)].index, "random_fold"
|
|
54
|
-
] = i
|
|
55
|
-
return input_data
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def sample_indices_train_val(
|
|
59
|
-
data: pd.DataFrame, peaks: pd.DataFrame
|
|
60
|
-
) -> tuple[np.array, np.array]:
|
|
61
|
-
"""Sample indices of given period length assuming the peaks are evenly spreaded.
|
|
62
|
-
|
|
63
|
-
Args:
|
|
64
|
-
data: Clean data with features
|
|
65
|
-
peaks: Data frame of selected peaks to sample the dates from
|
|
66
|
-
|
|
67
|
-
Returns:
|
|
68
|
-
- List with the start point of each peak
|
|
69
|
-
- Sorted list with the indices corresponding to the peak
|
|
70
|
-
|
|
71
|
-
"""
|
|
72
|
-
sampled = set()
|
|
73
|
-
peaks_val = []
|
|
74
|
-
|
|
75
|
-
for peak in peaks:
|
|
76
|
-
sampled |= set(data[data.index.date == peak].index)
|
|
77
|
-
peaks_val.append(peak)
|
|
78
|
-
return peaks_val, np.sort(list(sampled))
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def random_sample(all_peaks: np.array, k: int) -> np.array:
|
|
82
|
-
"""Random sampling of numbers out of a np.array.
|
|
83
|
-
|
|
84
|
-
Implemented due to security sonar cloud not accepting the random built-in functions.
|
|
85
|
-
|
|
86
|
-
Args:
|
|
87
|
-
all_peaks: List with numbers to sample from
|
|
88
|
-
k: Number of wanted samples
|
|
89
|
-
|
|
90
|
-
Returns:
|
|
91
|
-
Sorted array with the random samples (dates from the peaks)
|
|
92
|
-
|
|
93
|
-
"""
|
|
94
|
-
random_peaks = []
|
|
95
|
-
all_peaks_list = all_peaks.tolist()
|
|
96
|
-
for _ in range(k):
|
|
97
|
-
element_random = secrets.choice(all_peaks_list)
|
|
98
|
-
all_peaks_list.remove(element_random)
|
|
99
|
-
random_peaks.append(element_random)
|
|
100
|
-
return np.array(random_peaks)
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def split_data_train_validation_test(
|
|
104
|
-
data_: pd.DataFrame,
|
|
105
|
-
test_fraction: float = 0.1,
|
|
106
|
-
validation_fraction: float = 0.15,
|
|
107
|
-
back_test: bool = False,
|
|
108
|
-
stratification_min_max: bool = True,
|
|
109
|
-
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
|
110
|
-
"""Split input data into train, test and validation set.
|
|
111
|
-
|
|
112
|
-
Function for splitting data with features in a train, test and
|
|
113
|
-
validation dataset. In an operational setting the following sequence is
|
|
114
|
-
returned (when using stratification):
|
|
115
|
-
|
|
116
|
-
Train >> Validation (and the test is the Train and Validation combined.)
|
|
117
|
-
|
|
118
|
-
For a back test (indicated with argument "back_test") the following sequence
|
|
119
|
-
is returned:
|
|
120
|
-
|
|
121
|
-
Train >> Validation >> Test
|
|
122
|
-
|
|
123
|
-
The ratios of the different types can be set with test_fraction and
|
|
124
|
-
validation fraction.
|
|
125
|
-
|
|
126
|
-
Args:
|
|
127
|
-
data_: Cleaned data with features
|
|
128
|
-
test_fraction : Number between 0 and 1 that indicates the desired
|
|
129
|
-
fraction of test data.
|
|
130
|
-
validation_fraction: Number between 0 and 1 that indicates the
|
|
131
|
-
desired fraction of validation data.
|
|
132
|
-
back_test: Indicates if data is intended for a back test.
|
|
133
|
-
stratification_min_max: Indicates if validation data must be sampled as
|
|
134
|
-
periods, using stratification on min and max values per day.
|
|
135
|
-
If True, 'extreme days' are ensured to be included in the validation and train sets,
|
|
136
|
-
ensuring the validation set to be representative of the train set.
|
|
137
|
-
|
|
138
|
-
Returns:
|
|
139
|
-
- Train data.
|
|
140
|
-
- Validation data.
|
|
141
|
-
- Test data.
|
|
142
|
-
|
|
143
|
-
Raises:
|
|
144
|
-
ValueError: When the test and validation fractions are too high.
|
|
145
|
-
|
|
146
|
-
"""
|
|
147
|
-
test_fraction = test_fraction if back_test else 0
|
|
148
|
-
train_fraction = 1 - (test_fraction + validation_fraction)
|
|
149
|
-
if train_fraction < 0:
|
|
150
|
-
raise ValueError(
|
|
151
|
-
"Test ({test_fraction}) and validation fraction ({validation_fraction}) too"
|
|
152
|
-
" high."
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
# Define constants
|
|
156
|
-
min_days_for_stratification = 4
|
|
157
|
-
extreme_values_fraction = (
|
|
158
|
-
0.15 # 15percent of highest and lowest values are considered extremes
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
# Get start date from the index
|
|
162
|
-
start_date = data_.index.min().to_pydatetime()
|
|
163
|
-
end_date = data_.index.max().to_pydatetime()
|
|
164
|
-
|
|
165
|
-
# Calculate total of quarter hours (PTU's) in input data
|
|
166
|
-
number_indices = len(data_.index.unique()) # Total number of unique timepoints
|
|
167
|
-
delta = (
|
|
168
|
-
data_.index.unique().sort_values()[1] - data_.index.unique().sort_values()[0]
|
|
169
|
-
) # Delta t, assumed to be constant throughout DataFrame
|
|
170
|
-
delta = timedelta(
|
|
171
|
-
seconds=delta.seconds
|
|
172
|
-
) # Convert from pandas timedelta to original python timedelta
|
|
173
|
-
|
|
174
|
-
# Determine which dates are in testset
|
|
175
|
-
if back_test:
|
|
176
|
-
start_date_test = end_date - np.round(number_indices * test_fraction) * delta
|
|
177
|
-
test_data = data_[start_date_test:]
|
|
178
|
-
train_val_data = data_[:start_date_test]
|
|
179
|
-
operational_score_data = (
|
|
180
|
-
pd.DataFrame()
|
|
181
|
-
) # Empty because a backtest is no operational setting.
|
|
182
|
-
else:
|
|
183
|
-
start_date_val = start_date + np.round(number_indices * test_fraction) * delta
|
|
184
|
-
test_data = data_[
|
|
185
|
-
:start_date_val
|
|
186
|
-
] # Empty as all data is used for training in an operational setting.
|
|
187
|
-
train_val_data = data_[start_date_val:]
|
|
188
|
-
operational_score_data = data_.copy(deep=True).reset_index(
|
|
189
|
-
drop=True
|
|
190
|
-
) # Used to check wether a new operationally train model is better than the old one.
|
|
191
|
-
|
|
192
|
-
if stratification_min_max and (
|
|
193
|
-
len(set(train_val_data.index.date)) >= min_days_for_stratification
|
|
194
|
-
):
|
|
195
|
-
# First how many dates are considers min or max.
|
|
196
|
-
# Note that this should be at least to, so one can go to the train set
|
|
197
|
-
# and another to the testset
|
|
198
|
-
train_val_dates = list(set(train_val_data.index.date))
|
|
199
|
-
n_days_per_min_max_subset = int(
|
|
200
|
-
max(extreme_values_fraction * len(set(data_.index.date)), 2)
|
|
201
|
-
)
|
|
202
|
-
# Find max_dates
|
|
203
|
-
max_dates = (
|
|
204
|
-
train_val_data[["load"]]
|
|
205
|
-
.resample("1D")
|
|
206
|
-
.max()
|
|
207
|
-
.sort_values(by="load", ascending=False)
|
|
208
|
-
.dropna()
|
|
209
|
-
.index[:n_days_per_min_max_subset]
|
|
210
|
-
).date
|
|
211
|
-
# Find min_dates, but do not consider the max_dates
|
|
212
|
-
min_dates_subset = train_val_data.loc[
|
|
213
|
-
~np.isin(train_val_data.index.date, max_dates), ["load"]
|
|
214
|
-
]
|
|
215
|
-
min_dates = (
|
|
216
|
-
min_dates_subset[["load"]]
|
|
217
|
-
.resample("1D")
|
|
218
|
-
.min()
|
|
219
|
-
.sort_values(by="load", ascending=True)
|
|
220
|
-
.dropna()
|
|
221
|
-
.index[:n_days_per_min_max_subset]
|
|
222
|
-
).date
|
|
223
|
-
other_dates = [
|
|
224
|
-
x for x in train_val_dates if x not in min_dates and x not in max_dates
|
|
225
|
-
]
|
|
226
|
-
|
|
227
|
-
# Divide min, max and other dates fairly over validation and train set, with at least 1 min and max in train and validation
|
|
228
|
-
val_dates = []
|
|
229
|
-
train_dates = []
|
|
230
|
-
for date_set in [max_dates, min_dates, other_dates]:
|
|
231
|
-
n_days_val = max(1, int(validation_fraction * len(date_set)))
|
|
232
|
-
val_dates += list(
|
|
233
|
-
np.random.choice(list(date_set), n_days_val, replace=False)
|
|
234
|
-
)
|
|
235
|
-
train_dates += [x for x in date_set if x not in val_dates]
|
|
236
|
-
|
|
237
|
-
validation_data = train_val_data[np.isin(train_val_data.index.date, val_dates)]
|
|
238
|
-
train_data = train_val_data[np.isin(train_val_data.index.date, train_dates)]
|
|
239
|
-
|
|
240
|
-
# Default sampling, take a one single validation set.
|
|
241
|
-
else:
|
|
242
|
-
if back_test:
|
|
243
|
-
start_date_train = (
|
|
244
|
-
start_date + np.round(number_indices * validation_fraction) * delta
|
|
245
|
-
)
|
|
246
|
-
end_date_train = end_date - np.round(number_indices * test_fraction) * delta
|
|
247
|
-
validation_data = data_[:start_date_train]
|
|
248
|
-
train_data = data_[start_date_train:end_date_train]
|
|
249
|
-
else:
|
|
250
|
-
start_date_val = (
|
|
251
|
-
start_date + np.round(number_indices * test_fraction) * delta
|
|
252
|
-
)
|
|
253
|
-
start_date_train = (
|
|
254
|
-
start_date_val + np.round(number_indices * validation_fraction) * delta
|
|
255
|
-
)
|
|
256
|
-
train_data = data_[start_date_train:None]
|
|
257
|
-
validation_data = data_[start_date_val:start_date_train]
|
|
258
|
-
|
|
259
|
-
train_data = train_data.sort_index()
|
|
260
|
-
validation_data = validation_data.sort_index()
|
|
261
|
-
test_data = test_data.sort_index()
|
|
262
|
-
|
|
263
|
-
return (train_data, validation_data, test_data, operational_score_data)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
def backtest_split_default(
|
|
267
|
-
data: pd.DataFrame,
|
|
268
|
-
n_folds: int,
|
|
269
|
-
test_fraction: float = 0.15,
|
|
270
|
-
stratification_min_max: bool = True,
|
|
271
|
-
randomize_fold_split: bool = False,
|
|
272
|
-
) -> Iterable[tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]]:
|
|
273
|
-
"""Default cross validation strategy.
|
|
274
|
-
|
|
275
|
-
Args:
|
|
276
|
-
data:
|
|
277
|
-
n_folds:
|
|
278
|
-
test_fraction:
|
|
279
|
-
stratification_min_max:
|
|
280
|
-
randomize_fold_split:
|
|
281
|
-
|
|
282
|
-
Returns:
|
|
283
|
-
Iterable on train, val, test splits
|
|
284
|
-
|
|
285
|
-
Notes:
|
|
286
|
-
We use a generator in order to have lazy estimation and avoid multiple copy of the data.
|
|
287
|
-
|
|
288
|
-
"""
|
|
289
|
-
if n_folds > 1:
|
|
290
|
-
data.index = pd.to_datetime(data.index)
|
|
291
|
-
data["dates"] = data.index
|
|
292
|
-
data = group_kfold(data, n_folds, randomize_fold_split)
|
|
293
|
-
|
|
294
|
-
for ifold in range(n_folds):
|
|
295
|
-
test_data = data[data["random_fold"] == ifold].sort_index()
|
|
296
|
-
|
|
297
|
-
(train_data, validation_data, _, _) = split_data_train_validation_test(
|
|
298
|
-
data[data["random_fold"] != ifold].iloc[:, :-2],
|
|
299
|
-
test_fraction=0,
|
|
300
|
-
back_test=True,
|
|
301
|
-
stratification_min_max=stratification_min_max,
|
|
302
|
-
)
|
|
303
|
-
|
|
304
|
-
yield train_data, validation_data, test_data.iloc[:, :-2], pd.DataFrame()
|
|
305
|
-
else:
|
|
306
|
-
yield split_data_train_validation_test(
|
|
307
|
-
data,
|
|
308
|
-
back_test=True,
|
|
309
|
-
test_fraction=test_fraction,
|
|
310
|
-
stratification_min_max=stratification_min_max,
|
|
311
|
-
)
|
openstef/monitoring/__init__.py
DELETED
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
|
|
5
|
-
from collections import OrderedDict
|
|
6
|
-
from time import perf_counter
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class PerformanceMeter:
|
|
10
|
-
def __init__(self, logger):
|
|
11
|
-
self.logger = logger
|
|
12
|
-
self.levels = OrderedDict()
|
|
13
|
-
self.level_timers = []
|
|
14
|
-
self.checkpoint_timers = []
|
|
15
|
-
|
|
16
|
-
def start_level(self, level_label: str, level_name: str, **kwargs):
|
|
17
|
-
"""Enters a new level in the performance meter and logs it.
|
|
18
|
-
|
|
19
|
-
This function also creates a checkpoint on the newly created level.
|
|
20
|
-
|
|
21
|
-
Args:
|
|
22
|
-
level_label: The label of the new level. This could i.e. be 'task'
|
|
23
|
-
level_name: The name of the specified level.
|
|
24
|
-
**kwargs: Any other kwargs are appended to the logging.
|
|
25
|
-
|
|
26
|
-
Returns:
|
|
27
|
-
self
|
|
28
|
-
|
|
29
|
-
"""
|
|
30
|
-
self.levels[level_label] = level_name
|
|
31
|
-
|
|
32
|
-
self.logger.info(
|
|
33
|
-
f"{level_label.capitalize()} started",
|
|
34
|
-
**self.levels,
|
|
35
|
-
**kwargs,
|
|
36
|
-
)
|
|
37
|
-
time = perf_counter()
|
|
38
|
-
self.level_timers.append(time)
|
|
39
|
-
self.checkpoint_timers.append(time)
|
|
40
|
-
|
|
41
|
-
return self
|
|
42
|
-
|
|
43
|
-
def checkpoint(self, name_checkpoint: str, **kwargs):
|
|
44
|
-
"""Creates a timing checkpoint and logs the runtime from the previous one.
|
|
45
|
-
|
|
46
|
-
Args:
|
|
47
|
-
name_checkpoint: The name of the checkpoint. This will be logged as
|
|
48
|
-
"checkpoint: name_checkpoint"
|
|
49
|
-
**kwargs: Any other kwargs are appended to the logging.
|
|
50
|
-
|
|
51
|
-
Returns:
|
|
52
|
-
self
|
|
53
|
-
|
|
54
|
-
"""
|
|
55
|
-
runtime = round(perf_counter() - self.checkpoint_timers.pop(), ndigits=3)
|
|
56
|
-
self.logger.info(
|
|
57
|
-
f"{name_checkpoint.capitalize()} completed",
|
|
58
|
-
**self.levels,
|
|
59
|
-
ktp_checkpoint=name_checkpoint,
|
|
60
|
-
ktp_runtime=runtime,
|
|
61
|
-
**kwargs,
|
|
62
|
-
)
|
|
63
|
-
self.checkpoint_timers.append(perf_counter())
|
|
64
|
-
|
|
65
|
-
return self
|
|
66
|
-
|
|
67
|
-
def complete_level(self, successful: bool = True, **kwargs):
|
|
68
|
-
"""Completes the most inner level and logs the total runtime of that level.
|
|
69
|
-
|
|
70
|
-
Args:
|
|
71
|
-
successful: Whether the level was successful. Defaults to True.
|
|
72
|
-
**kwargs: Any other kwargs are appended to the logging.
|
|
73
|
-
|
|
74
|
-
Returns:
|
|
75
|
-
self
|
|
76
|
-
|
|
77
|
-
"""
|
|
78
|
-
runtime = round(perf_counter() - self.level_timers.pop(), ndigits=3)
|
|
79
|
-
self.checkpoint_timers.pop()
|
|
80
|
-
|
|
81
|
-
level_label, level_name = self.levels.popitem()
|
|
82
|
-
|
|
83
|
-
self.logger.info(
|
|
84
|
-
f"{level_label.capitalize()} completed",
|
|
85
|
-
**self.levels,
|
|
86
|
-
**{level_label: level_name},
|
|
87
|
-
ktp_runtime=runtime,
|
|
88
|
-
ktp_successful=int(successful),
|
|
89
|
-
**kwargs,
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
return self
|
openstef/monitoring/teams.py
DELETED
|
@@ -1,203 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
import logging
|
|
5
|
-
from typing import Union
|
|
6
|
-
|
|
7
|
-
import pandas as pd
|
|
8
|
-
import pymsteams
|
|
9
|
-
import structlog
|
|
10
|
-
from pymsteams import cardsection
|
|
11
|
-
|
|
12
|
-
from openstef.settings import Settings
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def post_teams(
|
|
16
|
-
msg: Union[str, dict],
|
|
17
|
-
invalid_coefficients: pd.DataFrame = None,
|
|
18
|
-
coefficients_df: pd.DataFrame = None,
|
|
19
|
-
url: str = None,
|
|
20
|
-
proxies: dict = None,
|
|
21
|
-
) -> None:
|
|
22
|
-
"""Post a message to Teams - KTP.
|
|
23
|
-
|
|
24
|
-
Note that currently no authentication occurs.
|
|
25
|
-
Security is given by keeping the URL secret.
|
|
26
|
-
One should therefore refrain from using more enhanced features such as
|
|
27
|
-
action buttons.
|
|
28
|
-
|
|
29
|
-
Args:
|
|
30
|
-
msg: For simple messages a string can be passed. For more
|
|
31
|
-
complex messages pass a dict. The following keys are supported:
|
|
32
|
-
text, links, sections. Each section can contain the following keys:
|
|
33
|
-
text, title, images, facts, markdown. Also see:
|
|
34
|
-
https://docs.microsoft.com/en-us/outlook/actionable-messages/send-via-connectors
|
|
35
|
-
invalid_coefficients: df of information of invalid
|
|
36
|
-
coefficients. Defaults to None.
|
|
37
|
-
coefficients_df: df of new coefficients. Defaults to None.
|
|
38
|
-
url: webhook url, monitoring by default
|
|
39
|
-
proxies: Optinonal proxy settings.
|
|
40
|
-
|
|
41
|
-
Note:
|
|
42
|
-
This function is namespace-specific.
|
|
43
|
-
"""
|
|
44
|
-
if not Settings.post_teams_messages:
|
|
45
|
-
return
|
|
46
|
-
|
|
47
|
-
structlog.configure(
|
|
48
|
-
wrapper_class=structlog.make_filtering_bound_logger(
|
|
49
|
-
logging.getLevelName(Settings.log_level)
|
|
50
|
-
)
|
|
51
|
-
)
|
|
52
|
-
logger = structlog.get_logger(__name__)
|
|
53
|
-
# If no url is passed, give warning and don't send teams message
|
|
54
|
-
if url is None:
|
|
55
|
-
logger.warning("Can't post Teams message, no url given.")
|
|
56
|
-
return
|
|
57
|
-
|
|
58
|
-
# Add invalid coefficients and manual coefficients-query to message
|
|
59
|
-
if invalid_coefficients is not None and coefficients_df is not None:
|
|
60
|
-
# add invalid coefficient information to message in dict-format
|
|
61
|
-
invalid_coefficients_text = "".join(
|
|
62
|
-
[
|
|
63
|
-
f"\n* **{row.coef_name}**: {round(row.coef_value_new, 2)}, "
|
|
64
|
-
f"(previous: {round(row.coef_value_last, 2)})"
|
|
65
|
-
for index, row in invalid_coefficients.iterrows()
|
|
66
|
-
]
|
|
67
|
-
)
|
|
68
|
-
query = build_sql_query_string(coefficients_df, "energy_split_coefs")
|
|
69
|
-
query_text = (
|
|
70
|
-
"If you would like to update the coefficients manually in the "
|
|
71
|
-
+ "database, use this query:"
|
|
72
|
-
)
|
|
73
|
-
msg = {
|
|
74
|
-
"fallback": msg,
|
|
75
|
-
"title": "Invalid energy splitting coefficients",
|
|
76
|
-
"text": msg,
|
|
77
|
-
"sections": [
|
|
78
|
-
{
|
|
79
|
-
"text": invalid_coefficients_text,
|
|
80
|
-
"markdown": True,
|
|
81
|
-
},
|
|
82
|
-
{
|
|
83
|
-
"title": "Manual query",
|
|
84
|
-
"text": query_text,
|
|
85
|
-
"markdown": True,
|
|
86
|
-
},
|
|
87
|
-
{
|
|
88
|
-
"text": query,
|
|
89
|
-
"markdown": True,
|
|
90
|
-
},
|
|
91
|
-
],
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
card = pymsteams.connectorcard(url)
|
|
95
|
-
|
|
96
|
-
# add proxies
|
|
97
|
-
# NOTE the connectorcard.proxy is passed to the requests library under the hood
|
|
98
|
-
card.proxies = proxies
|
|
99
|
-
|
|
100
|
-
# if msg is string, convert to dict
|
|
101
|
-
if type(msg) is str:
|
|
102
|
-
msg = dict(text=msg)
|
|
103
|
-
card.text(msg.get("text"))
|
|
104
|
-
card.summary(msg.get("fallback", "-"))
|
|
105
|
-
|
|
106
|
-
# set title, color, ...
|
|
107
|
-
card.color(msg.get("color", "white"))
|
|
108
|
-
card.title(msg.get("title"))
|
|
109
|
-
|
|
110
|
-
link_dicts = msg.get("links", []) # link_dicts can be single dict or list of dicts
|
|
111
|
-
if isinstance(link_dicts, dict): # if single dict
|
|
112
|
-
card.addLinkButton(link_dicts["buttontext"], link_dicts["buttonurl"])
|
|
113
|
-
elif isinstance(link_dicts, list): # if list of dicts
|
|
114
|
-
for link_dict in link_dicts:
|
|
115
|
-
card.addLinkButton(link_dict["buttontext"], link_dict["buttonurl"])
|
|
116
|
-
|
|
117
|
-
# Add sections
|
|
118
|
-
for section_dict in msg.get("sections", []):
|
|
119
|
-
card_section = get_card_section(section_dict=section_dict)
|
|
120
|
-
card.addSection(card_section)
|
|
121
|
-
|
|
122
|
-
card.send()
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def get_card_section(section_dict: dict) -> cardsection:
|
|
126
|
-
"""Get card section for teams message from dictionary."""
|
|
127
|
-
card_section = cardsection()
|
|
128
|
-
card_section.text(section_dict.get("text"))
|
|
129
|
-
card_section.title(section_dict.get("title"))
|
|
130
|
-
for image in section_dict.get("images", []):
|
|
131
|
-
card_section.addImage(image)
|
|
132
|
-
for fact in section_dict.get("facts", []):
|
|
133
|
-
card_section.addFact(*fact)
|
|
134
|
-
if not section_dict.get("markdown", True):
|
|
135
|
-
card_section.disableMarkdown()
|
|
136
|
-
if "link" in section_dict:
|
|
137
|
-
card_section.linkButton(
|
|
138
|
-
section_dict.get("link").get("buttontext"),
|
|
139
|
-
section_dict.get("link").get("buttonurl"),
|
|
140
|
-
)
|
|
141
|
-
return card_section
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
def build_sql_query_string(df: pd.DataFrame, table: str) -> str:
|
|
145
|
-
"""Build sql insert query string for Teams message output from df.
|
|
146
|
-
|
|
147
|
-
Args:
|
|
148
|
-
df: Df of table values to insert in sql.
|
|
149
|
-
table: Table to insert df into.
|
|
150
|
-
|
|
151
|
-
Returns:
|
|
152
|
-
Sql query of insert statement.
|
|
153
|
-
|
|
154
|
-
"""
|
|
155
|
-
# round all values to two decimals
|
|
156
|
-
df = df.round(2)
|
|
157
|
-
# convert datetime to string format
|
|
158
|
-
datetime_columns = df.columns[
|
|
159
|
-
df.columns.isin(["date_start", "date_end", "created"])
|
|
160
|
-
]
|
|
161
|
-
for col in datetime_columns:
|
|
162
|
-
df[col] = df[col].astype("str")
|
|
163
|
-
|
|
164
|
-
sql_texts = [
|
|
165
|
-
"``` \nINSERT INTO "
|
|
166
|
-
+ table
|
|
167
|
-
+ " ("
|
|
168
|
-
+ str(", ".join(df.columns))
|
|
169
|
-
+ ") VALUES \n"
|
|
170
|
-
]
|
|
171
|
-
for index, row in df.iterrows():
|
|
172
|
-
if index != df.index[0]:
|
|
173
|
-
sql_texts.append(", \n") # 2 spaces and \n create a new line
|
|
174
|
-
sql_texts.append(str(tuple(row.values)))
|
|
175
|
-
sql_texts.append(" \n```")
|
|
176
|
-
query = "".join(sql_texts)
|
|
177
|
-
return query
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
def format_message(title: str, params: dict, fallback=None, color=None) -> dict:
|
|
181
|
-
if color is None:
|
|
182
|
-
color = "#046b00" # green
|
|
183
|
-
if fallback is None:
|
|
184
|
-
fallback = title
|
|
185
|
-
|
|
186
|
-
# format allparams using limited precision for floats
|
|
187
|
-
# make keys bold (**key**)
|
|
188
|
-
values = []
|
|
189
|
-
for k, v in params.items():
|
|
190
|
-
if type(v) is float:
|
|
191
|
-
values.append(f"**{k}**: {v:0.3f}")
|
|
192
|
-
continue
|
|
193
|
-
values.append(f"**{k}**: {v}")
|
|
194
|
-
# join all {params}: {value} pairs with a new line
|
|
195
|
-
text = "".join([f"* {v} \n" for v in values])
|
|
196
|
-
|
|
197
|
-
msg = {
|
|
198
|
-
"fallback": fallback,
|
|
199
|
-
"title": title,
|
|
200
|
-
"text": text,
|
|
201
|
-
"color": color,
|
|
202
|
-
}
|
|
203
|
-
return msg
|
openstef/pipeline/__init__.py
DELETED