openstef 3.3.14__py3-none-any.whl → 3.3.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,21 +7,36 @@ import pandas as pd
7
7
 
8
8
 
9
9
  def replace_repeated_values_with_nan(
10
- df: pd.DataFrame, max_length: int, column_name: str
10
+ df: pd.DataFrame, threshold: int, column_name: str
11
11
  ) -> pd.DataFrame:
12
12
  """Replace sequentially repeated values with NaN.
13
13
 
14
14
  Args:
15
15
  df: Data with potential repeating values.
16
- max_length: Maximum length of sequence. Above are set to NaN.
16
+ threshold: The minimum number of squentially repeated values needed to trigger the replacement with NaN.
17
17
  column_name: Column name of input dataframe with repeating values.
18
18
 
19
19
  Returns:
20
20
  DataFrame, similar to df, with the desired values set to NaN.
21
21
 
22
22
  """
23
- data = df.copy(deep=True)
24
- sequentials = data[column_name].diff().ne(0).cumsum()
25
- grouped_sequentials_over_max = sequentials.groupby(sequentials).head(max_length)
26
- data.loc[~data.index.isin(grouped_sequentials_over_max.index), column_name] = np.nan
23
+ data = df.copy()
24
+
25
+ # Add a boolean column to mark sequential duplicates
26
+ data["temp_is_duplicate"] = data[column_name].eq(data[column_name].shift(1))
27
+
28
+ # Create an unique identifier for each sequence with the same value, so we can easily remove the correct sequences
29
+ data["temp_repeated_group"] = (~data["temp_is_duplicate"]).cumsum()
30
+
31
+ # Create mask of sequences larger than or equal to the threshold value
32
+ mask = (
33
+ data.groupby("temp_repeated_group")[column_name].transform("count") >= threshold
34
+ )
35
+
36
+ # Replace the masked values with NaN
37
+ data.loc[mask, column_name] = np.nan
38
+
39
+ # Drop temporary columns
40
+ data = data.drop(["temp_is_duplicate", "temp_repeated_group"], axis=1)
41
+
27
42
  return data
@@ -62,7 +62,7 @@ def validate(
62
62
 
63
63
  # Drop 'false' measurements. e.g. where load appears to be constant.
64
64
  data = replace_repeated_values_with_nan(
65
- data, max_length=flatliner_threshold_repetitions, column_name=data.columns[0]
65
+ data, threshold=flatliner_threshold_repetitions, column_name=data.columns[0]
66
66
  )
67
67
  num_repeated_values = len(data) - len(data.iloc[:, 0].dropna())
68
68
  if num_repeated_values > 0:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openstef
3
- Version: 3.3.14
3
+ Version: 3.3.15
4
4
  Summary: Open short term energy forecaster
5
5
  Home-page: https://github.com/OpenSTEF/openstef
6
6
  Author: Alliander N.V
@@ -67,7 +67,7 @@ openstef/pipeline/utils.py,sha256=fkc-oNirJ-JiyuOAL08RFrnPYPwudWal_N-BO6Cw980,20
67
67
  openstef/postprocessing/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
68
68
  openstef/postprocessing/postprocessing.py,sha256=nehd0tDpkdIaWFJggQ-fDizIKdfmqJ3IOGfk0sDnrzk,8409
69
69
  openstef/preprocessing/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
70
- openstef/preprocessing/preprocessing.py,sha256=S7gsQZJ0Kj9mq7qKN_4-VlYfU5MYre-mb9yQB05bt74,949
70
+ openstef/preprocessing/preprocessing.py,sha256=bM_cSSSb2vGTD79RGzUrI6KoELbzlCyJwc7jqQGNEsE,1454
71
71
  openstef/tasks/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
72
72
  openstef/tasks/calculate_kpi.py,sha256=pC8CJ0UqhySpVVewMN0GIe0ELEmYOf1Wc9xElUe0Q5M,11985
73
73
  openstef/tasks/create_basecase_forecast.py,sha256=Hk9fDljXvo5TfeS3nWHrerWi7y-lQzoJEaqWbqaxHOs,3852
@@ -84,9 +84,9 @@ openstef/tasks/utils/dependencies.py,sha256=Jy9dtV_G7lTEa5Cdy--wvMxJuAb0adb3R0X4
84
84
  openstef/tasks/utils/predictionjobloop.py,sha256=Ysy3zF5lzPMz_asYDKeF5m0qgVT3tCtwSPihqMjnI5Q,9580
85
85
  openstef/tasks/utils/taskcontext.py,sha256=yI6TntOkZcW8JiNVuw4uJIigEBL0_iIrkPklF4ZeCX4,5401
86
86
  openstef/validation/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
87
- openstef/validation/validation.py,sha256=Mhgizxn-zBGjf5m6WdTLO-WAppFJCPQGD29OsmmelAU,10347
88
- openstef-3.3.14.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
89
- openstef-3.3.14.dist-info/METADATA,sha256=5iGOfb-oZp202KyzleLN_3Ap9qlYhR-hAtKE65qObek,7840
90
- openstef-3.3.14.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
91
- openstef-3.3.14.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
92
- openstef-3.3.14.dist-info/RECORD,,
87
+ openstef/validation/validation.py,sha256=SaI-Mff9UOHQPnQ2jodXzZAVZilc-2AXZsPpSjDRqAg,10346
88
+ openstef-3.3.15.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
89
+ openstef-3.3.15.dist-info/METADATA,sha256=dZzwkwBP5JTrSGCwiKFCpkzy1Jb_1g4GLhgg_-lf-YM,7840
90
+ openstef-3.3.15.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
91
+ openstef-3.3.15.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
92
+ openstef-3.3.15.dist-info/RECORD,,