pthelma 0.99.3.dev0__cp312-cp312-musllinux_1_2_i686.whl → 1.1.0__cp312-cp312-musllinux_1_2_i686.whl
Sign up to get free protection for your applications and to get access to all the features.
- haggregate/__init__.py +0 -3
- haggregate/regularize.c +123 -123
- haggregate/regularize.cpython-312-i386-linux-musl.so +0 -0
- pthelma/_version.py +2 -2
- {pthelma-0.99.3.dev0.dist-info → pthelma-1.1.0.dist-info}/METADATA +5 -8
- {pthelma-0.99.3.dev0.dist-info → pthelma-1.1.0.dist-info}/RECORD +14 -10
- {pthelma-0.99.3.dev0.dist-info → pthelma-1.1.0.dist-info}/top_level.txt +1 -0
- rocc/__init__.py +9 -0
- rocc/calculation.c +17231 -0
- rocc/calculation.cpython-312-i386-linux-musl.so +0 -0
- rocc/calculation.pyx +182 -0
- {pthelma-0.99.3.dev0.dist-info → pthelma-1.1.0.dist-info}/LICENSE.rst +0 -0
- {pthelma-0.99.3.dev0.dist-info → pthelma-1.1.0.dist-info}/WHEEL +0 -0
- {pthelma-0.99.3.dev0.dist-info → pthelma-1.1.0.dist-info}/entry_points.txt +0 -0
Binary file
|
rocc/calculation.pyx
ADDED
@@ -0,0 +1,182 @@
|
|
1
|
+
# cython: language_level=3
|
2
|
+
import datetime as dt
|
3
|
+
|
4
|
+
cimport numpy as np
|
5
|
+
from cpython cimport array
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
|
10
|
+
|
11
|
+
class Rocc:
|
12
|
+
def __init__(self, timeseries, thresholds, symmetric, flag):
|
13
|
+
self.htimeseries = timeseries
|
14
|
+
self.thresholds = thresholds
|
15
|
+
self.symmetric = symmetric
|
16
|
+
self.flag = flag or ""
|
17
|
+
|
18
|
+
def execute(self):
|
19
|
+
self._transform_thresholds()
|
20
|
+
self._transform_to_plain_numpy()
|
21
|
+
failures = self._do_actual_job()
|
22
|
+
self._transform_to_pandas()
|
23
|
+
return failures
|
24
|
+
|
25
|
+
def _transform_thresholds(self):
|
26
|
+
threshold_deltas = array.array("l")
|
27
|
+
threshold_allowed_diffs = array.array("d")
|
28
|
+
|
29
|
+
for threshold in self.thresholds:
|
30
|
+
delta_t = int(self._get_delta_t_transformed(threshold.delta_t))
|
31
|
+
threshold_deltas.append(delta_t)
|
32
|
+
threshold_allowed_diffs.append(threshold.allowed_diff)
|
33
|
+
self.threshold_deltas = threshold_deltas
|
34
|
+
self.threshold_allowed_diffs = threshold_allowed_diffs
|
35
|
+
|
36
|
+
def _get_delta_t_transformed(self, delta_t):
|
37
|
+
if not delta_t[0].isdigit():
|
38
|
+
delta_t = "1" + delta_t
|
39
|
+
return pd.Timedelta(delta_t).to_timedelta64()
|
40
|
+
|
41
|
+
def _transform_to_plain_numpy(self):
|
42
|
+
flag_lengths = self.htimeseries.data["flags"].str.len()
|
43
|
+
max_flag_length = 0 if flag_lengths.empty else max(flag_lengths)
|
44
|
+
flags_dtype = "U" + str(max_flag_length + 1 + len(self.flag))
|
45
|
+
self.ts_index = self.htimeseries.data.index.values.astype(long)
|
46
|
+
self.ts_values = self.htimeseries.data["value"].values
|
47
|
+
self.ts_flags = self.htimeseries.data["flags"].values.astype(flags_dtype)
|
48
|
+
try:
|
49
|
+
utc_offset = self.htimeseries.data.index.tz.utcoffset(dt.datetime.now())
|
50
|
+
except AttributeError:
|
51
|
+
utc_offset = dt.timedelta(0)
|
52
|
+
self.ts_utc_offset_minutes = int(utc_offset.total_seconds() / 60)
|
53
|
+
|
54
|
+
def _do_actual_job(self):
|
55
|
+
return _perform_rocc(
|
56
|
+
self.ts_index,
|
57
|
+
self.ts_values,
|
58
|
+
self.ts_flags,
|
59
|
+
self.ts_utc_offset_minutes,
|
60
|
+
list(self.thresholds),
|
61
|
+
self.threshold_deltas,
|
62
|
+
self.threshold_allowed_diffs,
|
63
|
+
self.symmetric,
|
64
|
+
self.flag,
|
65
|
+
)
|
66
|
+
|
67
|
+
def _transform_to_pandas(self):
|
68
|
+
self.htimeseries.data = pd.DataFrame(
|
69
|
+
index=self.htimeseries.data.index,
|
70
|
+
columns=["value", "flags"],
|
71
|
+
data=np.vstack((self.ts_values, self.ts_flags)).transpose(),
|
72
|
+
)
|
73
|
+
self.htimeseries.data["value"] = self.htimeseries.data["value"].astype(np.float64)
|
74
|
+
|
75
|
+
|
76
|
+
# IMPORTANT: There's some plain Python in the Cython below. Specifically, there are some
|
77
|
+
# Python lists and some places with undeclared variables. These are only used when a
|
78
|
+
# failure is found. Given that failures should be very few, this should not affect the
|
79
|
+
# overall speed. But I'm not really a Cython expert and I don't know exactly how it
|
80
|
+
# works.
|
81
|
+
|
82
|
+
|
83
|
+
def _perform_rocc(
|
84
|
+
np.ndarray ts_index,
|
85
|
+
np.ndarray ts_values,
|
86
|
+
np.ndarray ts_flags,
|
87
|
+
int ts_utc_offset_minutes,
|
88
|
+
list thresholds,
|
89
|
+
array.array threshold_deltas,
|
90
|
+
array.array threshold_allowed_diffs,
|
91
|
+
int symmetric,
|
92
|
+
str flag,
|
93
|
+
):
|
94
|
+
cdef int i, record_fails_check
|
95
|
+
cdef list failures = []
|
96
|
+
|
97
|
+
for i in range(ts_index.size):
|
98
|
+
record_fails_check = _record_fails_check(
|
99
|
+
i,
|
100
|
+
ts_index,
|
101
|
+
ts_values,
|
102
|
+
ts_utc_offset_minutes,
|
103
|
+
thresholds,
|
104
|
+
threshold_deltas,
|
105
|
+
threshold_allowed_diffs,
|
106
|
+
symmetric,
|
107
|
+
failures,
|
108
|
+
)
|
109
|
+
if record_fails_check and flag:
|
110
|
+
_add_flag(i, ts_flags, flag)
|
111
|
+
return failures
|
112
|
+
|
113
|
+
|
114
|
+
def _add_flag(int i, np.ndarray ts_flags, str flag):
|
115
|
+
if ts_flags[i]:
|
116
|
+
ts_flags[i] = ts_flags[i] + " "
|
117
|
+
ts_flags[i] = ts_flags[i] + flag
|
118
|
+
|
119
|
+
|
120
|
+
def _record_fails_check(
|
121
|
+
int record_index,
|
122
|
+
np.ndarray ts_index,
|
123
|
+
np.ndarray ts_values,
|
124
|
+
int ts_utc_offset_minutes,
|
125
|
+
list thresholds,
|
126
|
+
array.array threshold_deltas,
|
127
|
+
array.array threshold_allowed_diffs,
|
128
|
+
int symmetric,
|
129
|
+
list failures,
|
130
|
+
):
|
131
|
+
cdef int ti
|
132
|
+
cdef double diff
|
133
|
+
|
134
|
+
for ti in range(len(threshold_deltas)):
|
135
|
+
diff = _record_fails_threshold(
|
136
|
+
record_index,
|
137
|
+
threshold_deltas[ti],
|
138
|
+
threshold_allowed_diffs[ti],
|
139
|
+
ts_index,
|
140
|
+
ts_values,
|
141
|
+
symmetric,
|
142
|
+
)
|
143
|
+
if diff:
|
144
|
+
timestamp = ts_index[record_index].item()
|
145
|
+
datestr = str(
|
146
|
+
np.datetime64(timestamp, "ns") + np.timedelta64(ts_utc_offset_minutes, "m")
|
147
|
+
)[:16]
|
148
|
+
diffsign = '+' if diff > 0 else ''
|
149
|
+
thresholdsign = '-' if diff < 0 else ''
|
150
|
+
cmpsign = '>' if diff > 0 else '<'
|
151
|
+
failures.append(
|
152
|
+
f"{datestr} {diffsign}{diff} in {thresholds[ti].delta_t} "
|
153
|
+
f"({cmpsign} {thresholdsign}{threshold_allowed_diffs[ti]})"
|
154
|
+
)
|
155
|
+
return True
|
156
|
+
return False
|
157
|
+
|
158
|
+
|
159
|
+
def _record_fails_threshold(
|
160
|
+
int record_index,
|
161
|
+
long threshold_delta,
|
162
|
+
double threshold_allowed_diff,
|
163
|
+
np.ndarray ts_index,
|
164
|
+
np.ndarray ts_values,
|
165
|
+
int symmetric,
|
166
|
+
):
|
167
|
+
cdef double current_value = ts_values[record_index]
|
168
|
+
cdef long current_timestamp = ts_index[record_index]
|
169
|
+
cdef int i, fails
|
170
|
+
cdef double diff;
|
171
|
+
|
172
|
+
for i in range(record_index - 1, -1, -1):
|
173
|
+
if current_timestamp - ts_index[i] > threshold_delta:
|
174
|
+
return False
|
175
|
+
diff = current_value - ts_values[i];
|
176
|
+
fails = (
|
177
|
+
diff > threshold_allowed_diff
|
178
|
+
or (symmetric and diff < -threshold_allowed_diff)
|
179
|
+
)
|
180
|
+
if fails:
|
181
|
+
return diff
|
182
|
+
return False
|
File without changes
|
File without changes
|
File without changes
|