pthelma 1.1.0__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

Sign up to get free protection for your applications and to get access to all the features.
rocc/calculation.pyx ADDED
@@ -0,0 +1,182 @@
1
+ # cython: language_level=3
2
+ import datetime as dt
3
+
4
+ cimport numpy as np
5
+ from cpython cimport array
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+
11
+ class Rocc:
12
+ def __init__(self, timeseries, thresholds, symmetric, flag):
13
+ self.htimeseries = timeseries
14
+ self.thresholds = thresholds
15
+ self.symmetric = symmetric
16
+ self.flag = flag or ""
17
+
18
+ def execute(self):
19
+ self._transform_thresholds()
20
+ self._transform_to_plain_numpy()
21
+ failures = self._do_actual_job()
22
+ self._transform_to_pandas()
23
+ return failures
24
+
25
+ def _transform_thresholds(self):
26
+ threshold_deltas = array.array("l")
27
+ threshold_allowed_diffs = array.array("d")
28
+
29
+ for threshold in self.thresholds:
30
+ delta_t = int(self._get_delta_t_transformed(threshold.delta_t))
31
+ threshold_deltas.append(delta_t)
32
+ threshold_allowed_diffs.append(threshold.allowed_diff)
33
+ self.threshold_deltas = threshold_deltas
34
+ self.threshold_allowed_diffs = threshold_allowed_diffs
35
+
36
+ def _get_delta_t_transformed(self, delta_t):
37
+ if not delta_t[0].isdigit():
38
+ delta_t = "1" + delta_t
39
+ return pd.Timedelta(delta_t).to_timedelta64()
40
+
41
+ def _transform_to_plain_numpy(self):
42
+ flag_lengths = self.htimeseries.data["flags"].str.len()
43
+ max_flag_length = 0 if flag_lengths.empty else max(flag_lengths)
44
+ flags_dtype = "U" + str(max_flag_length + 1 + len(self.flag))
45
+ self.ts_index = self.htimeseries.data.index.values.astype(long)
46
+ self.ts_values = self.htimeseries.data["value"].values
47
+ self.ts_flags = self.htimeseries.data["flags"].values.astype(flags_dtype)
48
+ try:
49
+ utc_offset = self.htimeseries.data.index.tz.utcoffset(dt.datetime.now())
50
+ except AttributeError:
51
+ utc_offset = dt.timedelta(0)
52
+ self.ts_utc_offset_minutes = int(utc_offset.total_seconds() / 60)
53
+
54
+ def _do_actual_job(self):
55
+ return _perform_rocc(
56
+ self.ts_index,
57
+ self.ts_values,
58
+ self.ts_flags,
59
+ self.ts_utc_offset_minutes,
60
+ list(self.thresholds),
61
+ self.threshold_deltas,
62
+ self.threshold_allowed_diffs,
63
+ self.symmetric,
64
+ self.flag,
65
+ )
66
+
67
+ def _transform_to_pandas(self):
68
+ self.htimeseries.data = pd.DataFrame(
69
+ index=self.htimeseries.data.index,
70
+ columns=["value", "flags"],
71
+ data=np.vstack((self.ts_values, self.ts_flags)).transpose(),
72
+ )
73
+ self.htimeseries.data["value"] = self.htimeseries.data["value"].astype(np.float64)
74
+
75
+
76
+ # IMPORTANT: There's some plain Python in the Cython below. Specifically, there are some
77
+ # Python lists and some places with undeclared variables. These are only used when a
78
+ # failure is found. Given that failures should be very few, this should not affect the
79
+ # overall speed. But I'm not really a Cython expert and I don't know exactly how it
80
+ # works.
81
+
82
+
83
+ def _perform_rocc(
84
+ np.ndarray ts_index,
85
+ np.ndarray ts_values,
86
+ np.ndarray ts_flags,
87
+ int ts_utc_offset_minutes,
88
+ list thresholds,
89
+ array.array threshold_deltas,
90
+ array.array threshold_allowed_diffs,
91
+ int symmetric,
92
+ str flag,
93
+ ):
94
+ cdef int i, record_fails_check
95
+ cdef list failures = []
96
+
97
+ for i in range(ts_index.size):
98
+ record_fails_check = _record_fails_check(
99
+ i,
100
+ ts_index,
101
+ ts_values,
102
+ ts_utc_offset_minutes,
103
+ thresholds,
104
+ threshold_deltas,
105
+ threshold_allowed_diffs,
106
+ symmetric,
107
+ failures,
108
+ )
109
+ if record_fails_check and flag:
110
+ _add_flag(i, ts_flags, flag)
111
+ return failures
112
+
113
+
114
+ def _add_flag(int i, np.ndarray ts_flags, str flag):
115
+ if ts_flags[i]:
116
+ ts_flags[i] = ts_flags[i] + " "
117
+ ts_flags[i] = ts_flags[i] + flag
118
+
119
+
120
+ def _record_fails_check(
121
+ int record_index,
122
+ np.ndarray ts_index,
123
+ np.ndarray ts_values,
124
+ int ts_utc_offset_minutes,
125
+ list thresholds,
126
+ array.array threshold_deltas,
127
+ array.array threshold_allowed_diffs,
128
+ int symmetric,
129
+ list failures,
130
+ ):
131
+ cdef int ti
132
+ cdef double diff
133
+
134
+ for ti in range(len(threshold_deltas)):
135
+ diff = _record_fails_threshold(
136
+ record_index,
137
+ threshold_deltas[ti],
138
+ threshold_allowed_diffs[ti],
139
+ ts_index,
140
+ ts_values,
141
+ symmetric,
142
+ )
143
+ if diff:
144
+ timestamp = ts_index[record_index].item()
145
+ datestr = str(
146
+ np.datetime64(timestamp, "ns") + np.timedelta64(ts_utc_offset_minutes, "m")
147
+ )[:16]
148
+ diffsign = '+' if diff > 0 else ''
149
+ thresholdsign = '-' if diff < 0 else ''
150
+ cmpsign = '>' if diff > 0 else '<'
151
+ failures.append(
152
+ f"{datestr} {diffsign}{diff} in {thresholds[ti].delta_t} "
153
+ f"({cmpsign} {thresholdsign}{threshold_allowed_diffs[ti]})"
154
+ )
155
+ return True
156
+ return False
157
+
158
+
159
+ def _record_fails_threshold(
160
+ int record_index,
161
+ long threshold_delta,
162
+ double threshold_allowed_diff,
163
+ np.ndarray ts_index,
164
+ np.ndarray ts_values,
165
+ int symmetric,
166
+ ):
167
+ cdef double current_value = ts_values[record_index]
168
+ cdef long current_timestamp = ts_index[record_index]
169
+ cdef int i, fails
170
+ cdef double diff;
171
+
172
+ for i in range(record_index - 1, -1, -1):
173
+ if current_timestamp - ts_index[i] > threshold_delta:
174
+ return False
175
+ diff = current_value - ts_values[i];
176
+ fails = (
177
+ diff > threshold_allowed_diff
178
+ or (symmetric and diff < -threshold_allowed_diff)
179
+ )
180
+ if fails:
181
+ return diff
182
+ return False