pattern-detector 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pattern_detector/__init__.py +36 -0
- pattern_detector/aoi_finder.py +185 -0
- pattern_detector/cosine_similarity_function.py +75 -0
- pattern_detector/sliding_window_cosine_similarity.py +45 -0
- {pattern_detector-0.1.0.dist-info → pattern_detector-0.2.0.dist-info}/METADATA +1 -1
- pattern_detector-0.2.0.dist-info/RECORD +12 -0
- pattern_detector-0.2.0.dist-info/top_level.txt +2 -0
- tests/test_detector.py +1 -1
- pattern_detection/__init__.py +0 -3
- pattern_detection/detector.py +0 -152
- pattern_detector-0.1.0.dist-info/RECORD +0 -10
- pattern_detector-0.1.0.dist-info/top_level.txt +0 -2
- {pattern_detection → pattern_detector}/utils.py +0 -0
- {pattern_detector-0.1.0.dist-info → pattern_detector-0.2.0.dist-info}/LICENSE +0 -0
- {pattern_detector-0.1.0.dist-info → pattern_detector-0.2.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
try:
|
2
|
+
from .aoi_finder import run_area_of_interest_finder
|
3
|
+
except ImportError:
|
4
|
+
raise ImportError("The PatternDetector module could not be imported. Ensure all dependencies are installed.")
|
5
|
+
|
6
|
+
__version__ = "0.2.0"
|
7
|
+
|
8
|
+
def pattern_detector(data, pattern, column_pattern):
|
9
|
+
"""
|
10
|
+
A simplified interface for using the PatternDetector class.
|
11
|
+
|
12
|
+
Parameters:
|
13
|
+
- data (pd.DataFrame): The main data.
|
14
|
+
- pattern (pd.DataFrame): The pattern data.
|
15
|
+
- column_pattern (str): The column name for pattern matching.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
- pd.DataFrame: The resulting DataFrame with detected cycles.
|
19
|
+
"""
|
20
|
+
detector = run_area_of_interest_finder(data, pattern, column_pattern)
|
21
|
+
return detector
|
22
|
+
|
23
|
+
# Replace the module with a callable version
|
24
|
+
import sys
|
25
|
+
from types import ModuleType
|
26
|
+
|
27
|
+
class CallableModule(ModuleType):
|
28
|
+
def __init__(self, name):
|
29
|
+
super().__init__(name)
|
30
|
+
self.__version__ = __version__
|
31
|
+
|
32
|
+
def __call__(self, data, pattern, column_pattern):
|
33
|
+
return pattern_detector(data, pattern, column_pattern)
|
34
|
+
|
35
|
+
# Replace the current module in sys.modules with the callable version
|
36
|
+
sys.modules[__name__] = CallableModule(__name__)
|
@@ -0,0 +1,185 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
import warnings
|
5
|
+
warnings.filterwarnings("ignore")
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
7
|
+
|
8
|
+
#import warnings
|
9
|
+
from scipy.stats import skew, kurtosis
|
10
|
+
#warnings.filterwarnings("ignore")
|
11
|
+
from scipy.fft import fft
|
12
|
+
from scipy.interpolate import interp1d
|
13
|
+
|
14
|
+
|
15
|
+
def compute_cosine_sim(data1, window_size,len_iter,pattern1, i, j):
|
16
|
+
|
17
|
+
window = data1[i:i + window_size - (len_iter//2) + j ,:]
|
18
|
+
|
19
|
+
|
20
|
+
max_pos = np.max(pattern1) + 0.1 * np.ptp(pattern1) # np.ptp : calculates max-min difference
|
21
|
+
min_pos = np.min(pattern1) - 0.1 * np.ptp(pattern1)
|
22
|
+
mean_pos_upper = np.mean(pattern1) + 0.1 * np.ptp(pattern1)
|
23
|
+
mean_pos_lower = np.mean(pattern1) - 0.1 * np.ptp(pattern1)
|
24
|
+
pattern_skewness = skew(pattern1.flatten())
|
25
|
+
pattern_kurtosis = kurtosis(pattern1.flatten())
|
26
|
+
pattern_std = np.std( pattern1.flatten() )
|
27
|
+
cross_correlation_threshold = 0.5 # Set a threshold for cross-correlation
|
28
|
+
|
29
|
+
starting_point_lower = pattern1[0] - 0.2 * np.ptp(pattern1)
|
30
|
+
starting_point_upper = pattern1[0] + 0.2 * np.ptp(pattern1)
|
31
|
+
|
32
|
+
ending_point_lower = pattern1[-1] - 0.2 * np.ptp(pattern1)
|
33
|
+
ending_point_upper = pattern1[-1] + 0.2 * np.ptp(pattern1)
|
34
|
+
|
35
|
+
|
36
|
+
x_original = np.linspace(0, 1, len(window))
|
37
|
+
x_target = np.linspace(0, 1, len(pattern1))
|
38
|
+
|
39
|
+
window = window.reshape(-1 ,)
|
40
|
+
|
41
|
+
interpolator = interp1d(x_original, window, kind='cubic') # Linear interpolation
|
42
|
+
|
43
|
+
window2 = interpolator(x_target)
|
44
|
+
|
45
|
+
corr_coef = np.corrcoef(window2, pattern1)[0][1]
|
46
|
+
|
47
|
+
sliding_window_max = np.max(window2)
|
48
|
+
sliding_window_min = np.min(window2)
|
49
|
+
sliding_window_skewness = skew(window2)
|
50
|
+
sliding_window_kurtosis = kurtosis(window2)
|
51
|
+
sliding_window_mean = np.mean(window2)
|
52
|
+
sliding_window_std = np.std(window2)
|
53
|
+
|
54
|
+
|
55
|
+
if ( sliding_window_max <= max_pos and sliding_window_min >= min_pos and
|
56
|
+
mean_pos_upper >= sliding_window_mean >= mean_pos_lower and
|
57
|
+
abs(sliding_window_skewness - pattern_skewness) < 0.5 and
|
58
|
+
abs(sliding_window_kurtosis - pattern_kurtosis) < 1.0 and
|
59
|
+
pattern_std * 0.9 <= sliding_window_std <= pattern_std * 1.1 and
|
60
|
+
corr_coef >= cross_correlation_threshold
|
61
|
+
and starting_point_lower<= window2[0] <= starting_point_upper and
|
62
|
+
ending_point_lower<= window2[-1] <= ending_point_upper ):
|
63
|
+
|
64
|
+
fft_pattern = fft(pattern1)
|
65
|
+
fft_window = fft(window2)
|
66
|
+
|
67
|
+
magnitude_pattern = np.abs(fft_pattern)
|
68
|
+
magnitude_window = np.abs(fft_window)
|
69
|
+
|
70
|
+
dot_product = np.dot(magnitude_pattern, magnitude_window)
|
71
|
+
norm_1 = np.linalg.norm(magnitude_pattern)
|
72
|
+
norm_2 = np.linalg.norm(magnitude_window)
|
73
|
+
|
74
|
+
cosine_similarity = dot_product / (norm_1 * norm_2)
|
75
|
+
|
76
|
+
|
77
|
+
return i, j, cosine_similarity
|
78
|
+
|
79
|
+
else:
|
80
|
+
return i, j, 0
|
81
|
+
|
82
|
+
|
83
|
+
def sliding_window_cosine_similarity(data, pattern, column_pattern ):
|
84
|
+
|
85
|
+
len_iter = 400 if len(pattern) >= 400 else 200
|
86
|
+
bin_parser = 3 if len_iter == 400 else 2
|
87
|
+
|
88
|
+
pattern[column_pattern] = pattern[column_pattern].astype(float)
|
89
|
+
pattern.reset_index(drop=True, inplace=True)
|
90
|
+
pattern['bin'] = pattern.index // bin_parser
|
91
|
+
pattern1 = pattern.groupby('bin').agg({column_pattern: 'mean'}).reset_index()
|
92
|
+
pattern1 = np.array(pattern1[column_pattern]).reshape(-1, )
|
93
|
+
|
94
|
+
# Prepare data
|
95
|
+
data.reset_index(drop=True, inplace=True)
|
96
|
+
data[column_pattern] = data[column_pattern].astype(float)
|
97
|
+
data['bin'] = data.index // bin_parser
|
98
|
+
data1 = data.groupby('bin').agg({column_pattern: 'mean'}).reset_index()
|
99
|
+
data1 = np.array(data1[column_pattern]).reshape(-1, 1)
|
100
|
+
|
101
|
+
window_size = len(pattern1)
|
102
|
+
step_size = 2
|
103
|
+
|
104
|
+
similarity_dict = {}
|
105
|
+
|
106
|
+
|
107
|
+
with ThreadPoolExecutor() as executor: # Adjust max_workers based on your CPU cores
|
108
|
+
futures = [executor.submit(compute_cosine_sim, data1, window_size, len_iter, pattern1, i, j)
|
109
|
+
for i in range(0, len(data1) - window_size, step_size)
|
110
|
+
for j in range(0, len_iter, (len_iter // 40))]
|
111
|
+
for future in as_completed(futures):
|
112
|
+
i, j, similarity = future.result()
|
113
|
+
if i not in similarity_dict:
|
114
|
+
similarity_dict[i] = {}
|
115
|
+
similarity_dict[i][j] = similarity
|
116
|
+
|
117
|
+
return similarity_dict, pattern1, window_size, bin_parser, len_iter
|
118
|
+
|
119
|
+
|
120
|
+
def run_area_of_interest_finder(df,pattern,column_pattern):
|
121
|
+
|
122
|
+
similarity_dict, pattern1, window_size, bin_parser, len_iter = sliding_window_cosine_similarity(df, pattern, column_pattern)
|
123
|
+
|
124
|
+
approx_cycle_length = len(pattern1)*0.95
|
125
|
+
|
126
|
+
results = []
|
127
|
+
for key1, value in similarity_dict.items():
|
128
|
+
|
129
|
+
max_key = max(value, key=value.get)
|
130
|
+
max_value = value[max_key]
|
131
|
+
results.append({'key': key1, 'max_key': max_key, 'max_value': max_value})
|
132
|
+
|
133
|
+
df_dist = pd.DataFrame(results)
|
134
|
+
|
135
|
+
df_dist.reset_index(inplace=True)
|
136
|
+
df_dist['app_cycle'] = df_dist["key"] // approx_cycle_length
|
137
|
+
df_dist["app_cycle"] = df_dist["app_cycle"].astype(int)
|
138
|
+
|
139
|
+
yig = tuple(df_dist.groupby("app_cycle"))
|
140
|
+
cyc_dict = {x: y for x, y in yig}
|
141
|
+
|
142
|
+
idx_cyc = 0
|
143
|
+
cyc_concat_df = pd.DataFrame()
|
144
|
+
|
145
|
+
for k in cyc_dict.keys():
|
146
|
+
df_cyc = cyc_dict[k]
|
147
|
+
df_cyc = df_cyc[ df_cyc["max_value"] != 0 ]
|
148
|
+
|
149
|
+
key_min_df = df_cyc[["key","max_key","max_value"]][ df_cyc["max_value"] == np.max(df_cyc['max_value'])]
|
150
|
+
key_min_df["cycle"] = idx_cyc
|
151
|
+
if len(key_min_df) != 0:
|
152
|
+
cyc_concat_df = pd.concat([cyc_concat_df,key_min_df],ignore_index=True,axis="index")
|
153
|
+
idx_cyc += 1
|
154
|
+
else:
|
155
|
+
continue
|
156
|
+
|
157
|
+
cyc_concat_df["start_index"] = cyc_concat_df["key"]
|
158
|
+
cyc_concat_df["end_index"] = cyc_concat_df["start_index"] + window_size + cyc_concat_df["max_key"] - (len_iter//2)
|
159
|
+
cyc_concat_df["shift_start"] = cyc_concat_df["start_index"].shift(1)
|
160
|
+
|
161
|
+
cyc_concat_df["diff"] = cyc_concat_df["shift_start"] - cyc_concat_df["start_index"]
|
162
|
+
cyc_concat_df["shift_start"].iloc[0] = len(pattern1)
|
163
|
+
cyc_concat_df["diff"].iloc[0] = -len(pattern1)
|
164
|
+
limit = len(pattern1)*.7
|
165
|
+
cyc_concat_df = cyc_concat_df[ cyc_concat_df["diff"] < -limit ]
|
166
|
+
cyc_concat_df.reset_index(inplace=True, drop=True)
|
167
|
+
cyc_concat_df["cycle"] = cyc_concat_df.index
|
168
|
+
|
169
|
+
cyc_concat_df["shift_end"] = cyc_concat_df["end_index"].shift(1)
|
170
|
+
######## Çakışmaları önlemek için yapıldı
|
171
|
+
cyc_concat_df["shift_end"].iloc[0] = cyc_concat_df["diff"].iloc[0]
|
172
|
+
cyc_concat_df["diff_end"] = cyc_concat_df["shift_end"] - cyc_concat_df["start_index"]
|
173
|
+
cyc_concat_df["start_index"][ cyc_concat_df["diff_end"] > 0 ] = cyc_concat_df["start_index"][ cyc_concat_df["diff_end"] > 0 ] + cyc_concat_df["diff_end"] + 1
|
174
|
+
|
175
|
+
|
176
|
+
#df = data.copy()
|
177
|
+
df.reset_index(drop=True,inplace=True)
|
178
|
+
for i in cyc_concat_df["cycle"].unique():
|
179
|
+
start = cyc_concat_df["start_index"][cyc_concat_df["cycle"] == i].values[0]*bin_parser
|
180
|
+
stop = cyc_concat_df["end_index"][cyc_concat_df["cycle"] == i].values[0]*bin_parser
|
181
|
+
#print(start, stop, i, stop-start)
|
182
|
+
df.loc[start:stop,"cycle"] = int(i)
|
183
|
+
|
184
|
+
|
185
|
+
return df
|
@@ -0,0 +1,75 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
#import warnings
|
4
|
+
from scipy.stats import skew, kurtosis
|
5
|
+
#warnings.filterwarnings("ignore")
|
6
|
+
from scipy.fft import fft
|
7
|
+
from scipy.interpolate import interp1d
|
8
|
+
|
9
|
+
|
10
|
+
def compute_cosine_sim(data1, window_size,len_iter,pattern1, i, j):
|
11
|
+
|
12
|
+
window = data1[i:i + window_size - (len_iter//2) + j ,:]
|
13
|
+
|
14
|
+
|
15
|
+
max_pos = np.max(pattern1) + 0.1 * np.ptp(pattern1) # np.ptp : calculates max-min difference
|
16
|
+
min_pos = np.min(pattern1) - 0.1 * np.ptp(pattern1)
|
17
|
+
mean_pos_upper = np.mean(pattern1) + 0.1 * np.ptp(pattern1)
|
18
|
+
mean_pos_lower = np.mean(pattern1) - 0.1 * np.ptp(pattern1)
|
19
|
+
pattern_skewness = skew(pattern1.flatten())
|
20
|
+
pattern_kurtosis = kurtosis(pattern1.flatten())
|
21
|
+
pattern_std = np.std( pattern1.flatten() )
|
22
|
+
cross_correlation_threshold = 0.5 # Set a threshold for cross-correlation
|
23
|
+
|
24
|
+
starting_point_lower = pattern1[0] - 0.2 * np.ptp(pattern1)
|
25
|
+
starting_point_upper = pattern1[0] + 0.2 * np.ptp(pattern1)
|
26
|
+
|
27
|
+
ending_point_lower = pattern1[-1] - 0.2 * np.ptp(pattern1)
|
28
|
+
ending_point_upper = pattern1[-1] + 0.2 * np.ptp(pattern1)
|
29
|
+
|
30
|
+
|
31
|
+
x_original = np.linspace(0, 1, len(window))
|
32
|
+
x_target = np.linspace(0, 1, len(pattern1))
|
33
|
+
|
34
|
+
window = window.reshape(-1 ,)
|
35
|
+
|
36
|
+
interpolator = interp1d(x_original, window, kind='cubic') # Linear interpolation
|
37
|
+
|
38
|
+
window2 = interpolator(x_target)
|
39
|
+
|
40
|
+
corr_coef = np.corrcoef(window2, pattern1)[0][1]
|
41
|
+
|
42
|
+
sliding_window_max = np.max(window2)
|
43
|
+
sliding_window_min = np.min(window2)
|
44
|
+
sliding_window_skewness = skew(window2)
|
45
|
+
sliding_window_kurtosis = kurtosis(window2)
|
46
|
+
sliding_window_mean = np.mean(window2)
|
47
|
+
sliding_window_std = np.std(window2)
|
48
|
+
|
49
|
+
|
50
|
+
if ( sliding_window_max <= max_pos and sliding_window_min >= min_pos and
|
51
|
+
mean_pos_upper >= sliding_window_mean >= mean_pos_lower and
|
52
|
+
abs(sliding_window_skewness - pattern_skewness) < 0.5 and
|
53
|
+
abs(sliding_window_kurtosis - pattern_kurtosis) < 1.0 and
|
54
|
+
pattern_std * 0.9 <= sliding_window_std <= pattern_std * 1.1 and
|
55
|
+
corr_coef >= cross_correlation_threshold
|
56
|
+
and starting_point_lower<= window2[0] <= starting_point_upper and
|
57
|
+
ending_point_lower<= window2[-1] <= ending_point_upper ):
|
58
|
+
|
59
|
+
fft_pattern = fft(pattern1)
|
60
|
+
fft_window = fft(window2)
|
61
|
+
|
62
|
+
magnitude_pattern = np.abs(fft_pattern)
|
63
|
+
magnitude_window = np.abs(fft_window)
|
64
|
+
|
65
|
+
dot_product = np.dot(magnitude_pattern, magnitude_window)
|
66
|
+
norm_1 = np.linalg.norm(magnitude_pattern)
|
67
|
+
norm_2 = np.linalg.norm(magnitude_window)
|
68
|
+
|
69
|
+
cosine_similarity = dot_product / (norm_1 * norm_2)
|
70
|
+
|
71
|
+
|
72
|
+
return i, j, cosine_similarity
|
73
|
+
|
74
|
+
else:
|
75
|
+
return i, j, 0
|
@@ -0,0 +1,45 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
from Functions.cosine_similarity_function import compute_cosine_sim
|
4
|
+
import warnings
|
5
|
+
warnings.filterwarnings("ignore")
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
def sliding_window_cosine_similarity(data, pattern, column_pattern ):
|
11
|
+
|
12
|
+
len_iter = 400 if len(pattern) >= 400 else 200
|
13
|
+
bin_parser = 3 if len_iter == 400 else 2
|
14
|
+
|
15
|
+
pattern[column_pattern] = pattern[column_pattern].astype(float)
|
16
|
+
pattern.reset_index(drop=True, inplace=True)
|
17
|
+
pattern['bin'] = pattern.index // bin_parser
|
18
|
+
pattern1 = pattern.groupby('bin').agg({column_pattern: 'mean'}).reset_index()
|
19
|
+
pattern1 = np.array(pattern1[column_pattern]).reshape(-1, )
|
20
|
+
|
21
|
+
# Prepare data
|
22
|
+
data.reset_index(drop=True, inplace=True)
|
23
|
+
data[column_pattern] = data[column_pattern].astype(float)
|
24
|
+
data['bin'] = data.index // bin_parser
|
25
|
+
data1 = data.groupby('bin').agg({column_pattern: 'mean'}).reset_index()
|
26
|
+
data1 = np.array(data1[column_pattern]).reshape(-1, 1)
|
27
|
+
|
28
|
+
window_size = len(pattern1)
|
29
|
+
step_size = 2
|
30
|
+
|
31
|
+
similarity_dict = {}
|
32
|
+
|
33
|
+
|
34
|
+
with ThreadPoolExecutor() as executor: # Adjust max_workers based on your CPU cores
|
35
|
+
futures = [executor.submit(compute_cosine_sim, data1, window_size, len_iter, pattern1, i, j)
|
36
|
+
for i in range(0, len(data1) - window_size, step_size)
|
37
|
+
for j in range(0, len_iter, (len_iter // 40))]
|
38
|
+
for future in as_completed(futures):
|
39
|
+
i, j, similarity = future.result()
|
40
|
+
if i not in similarity_dict:
|
41
|
+
similarity_dict[i] = {}
|
42
|
+
similarity_dict[i][j] = similarity
|
43
|
+
|
44
|
+
return similarity_dict, pattern1, window_size, bin_parser, len_iter
|
45
|
+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: pattern_detector
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0
|
4
4
|
Summary: A library for detecting patterns in time-series data.
|
5
5
|
Author: Yigit Utku Bulut and Ahmet Faruk Minareci
|
6
6
|
Author-email: yigit.utku.bulut@gmail.com, ahmetfaruk.minareci@gmail.com
|
@@ -0,0 +1,12 @@
|
|
1
|
+
pattern_detector/__init__.py,sha256=KdIGjRvrjByYpWexnCidgTpFpMcj0TJAqJXrh05VJTo,1159
|
2
|
+
pattern_detector/aoi_finder.py,sha256=TwlXN54UWVdWl2tC-E48enCPiDvnkmFm_rwFhZkkS7c,7069
|
3
|
+
pattern_detector/cosine_similarity_function.py,sha256=Vgh6Blm6kji54JfXLVoiesYYwO0qDAyCv6W--YBlU3o,2704
|
4
|
+
pattern_detector/sliding_window_cosine_similarity.py,sha256=B-Zs0fHDCXlFcWlzEhKuTquavIWzUlMgc1dd3x2gkBk,1735
|
5
|
+
pattern_detector/utils.py,sha256=jqkfeTqWEnrnvzKBxr_UdKj9cj7zPF15UhHPr9sBSa0,2646
|
6
|
+
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
tests/test_detector.py,sha256=nJJRWlXy1jGM3gbynHJGVD7jO4Zp0eqCuQYNePWtjJw,972
|
8
|
+
pattern_detector-0.2.0.dist-info/LICENSE,sha256=RslT26mCGxF9nQEyEQL8kFMFUgmYvCA0UivIWefxWmU,1098
|
9
|
+
pattern_detector-0.2.0.dist-info/METADATA,sha256=N8bkT1WbLmP0CspHdwXrLVYmUcKk8ZMt9C4EYeH9Gsk,536
|
10
|
+
pattern_detector-0.2.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
11
|
+
pattern_detector-0.2.0.dist-info/top_level.txt,sha256=zYjRoeMq6DYEzeVTU-JZKbeooXspQwn8M0uCF-naRww,23
|
12
|
+
pattern_detector-0.2.0.dist-info/RECORD,,
|
tests/test_detector.py
CHANGED
pattern_detection/__init__.py
DELETED
pattern_detection/detector.py
DELETED
@@ -1,152 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
import pandas as pd
|
3
|
-
from scipy.stats import skew, kurtosis
|
4
|
-
from joblib import Parallel, delayed
|
5
|
-
|
6
|
-
class PatternDetector:
|
7
|
-
def __init__(self, df, pattern, column_pattern):
|
8
|
-
self.df = df.copy()
|
9
|
-
self.pattern = pattern
|
10
|
-
self.column_pattern = column_pattern
|
11
|
-
self.similarity_dict = {}
|
12
|
-
self.pattern1 = None
|
13
|
-
self.window_size = None
|
14
|
-
self.bin_parser = None
|
15
|
-
self.len_iter = None
|
16
|
-
self.pattern_constraints = {}
|
17
|
-
|
18
|
-
def preprocess_pattern(self):
|
19
|
-
"""Preprocess the pattern data."""
|
20
|
-
len_iter = 400 if len(self.pattern) >= 400 else 200
|
21
|
-
bin_parser = 3 if len_iter == 400 else 2
|
22
|
-
|
23
|
-
self.pattern['bin'] = self.pattern.index // bin_parser
|
24
|
-
self.pattern1 = self.pattern.groupby('bin')[self.column_pattern].mean().to_numpy()
|
25
|
-
self.len_iter = len_iter
|
26
|
-
self.bin_parser = bin_parser
|
27
|
-
self.window_size = len(self.pattern1)
|
28
|
-
|
29
|
-
# Compute constraints
|
30
|
-
self.pattern_constraints = {
|
31
|
-
"max_pos": np.max(self.pattern1) + 0.1 * np.ptp(self.pattern1),
|
32
|
-
"min_pos": np.min(self.pattern1) - 0.1 * np.ptp(self.pattern1),
|
33
|
-
"mean_pos_upper": np.mean(self.pattern1) + 0.1 * np.ptp(self.pattern1),
|
34
|
-
"mean_pos_lower": np.mean(self.pattern1) - 0.1 * np.ptp(self.pattern1),
|
35
|
-
"pattern_skewness": skew(self.pattern1),
|
36
|
-
"pattern_kurtosis": kurtosis(self.pattern1),
|
37
|
-
"pattern_std": np.std(self.pattern1),
|
38
|
-
"starting_point_lower": self.pattern1[0] - 0.2 * np.ptp(self.pattern1),
|
39
|
-
"starting_point_upper": self.pattern1[0] + 0.2 * np.ptp(self.pattern1),
|
40
|
-
"ending_point_lower": self.pattern1[-1] - 0.2 * np.ptp(self.pattern1),
|
41
|
-
"ending_point_upper": self.pattern1[-1] + 0.2 * np.ptp(self.pattern1),
|
42
|
-
"cross_correlation_threshold": 0.5,
|
43
|
-
}
|
44
|
-
|
45
|
-
def preprocess_data(self):
|
46
|
-
"""Preprocess the main data."""
|
47
|
-
self.df['bin'] = self.df.index // self.bin_parser
|
48
|
-
return self.df.groupby('bin')[self.column_pattern].mean().to_numpy().reshape(-1, 1)
|
49
|
-
|
50
|
-
def apply_constraints(self, window):
|
51
|
-
"""Apply constraints to filter valid windows."""
|
52
|
-
pc = self.pattern_constraints
|
53
|
-
corr_coef = np.corrcoef(window, self.pattern1)[0][1]
|
54
|
-
|
55
|
-
if (
|
56
|
-
np.max(window) <= pc["max_pos"]
|
57
|
-
and np.min(window) >= pc["min_pos"]
|
58
|
-
and pc["mean_pos_upper"] >= np.mean(window) >= pc["mean_pos_lower"]
|
59
|
-
and abs(skew(window) - pc["pattern_skewness"]) < 0.5
|
60
|
-
and abs(kurtosis(window) - pc["pattern_kurtosis"]) < 1.0
|
61
|
-
and pc["pattern_std"] * 0.9 <= np.std(window) <= pc["pattern_std"] * 1.1
|
62
|
-
and corr_coef >= pc["cross_correlation_threshold"]
|
63
|
-
and pc["starting_point_lower"] <= window[0] <= pc["starting_point_upper"]
|
64
|
-
and pc["ending_point_lower"] <= window[-1] <= pc["ending_point_upper"]
|
65
|
-
):
|
66
|
-
return True
|
67
|
-
return False
|
68
|
-
|
69
|
-
def compute_cosine_sim(self, data1, i, j):
|
70
|
-
"""Compute cosine similarity for a given sliding window."""
|
71
|
-
window = data1[i:i + self.window_size - (self.len_iter // 2) + j, :].reshape(-1,)
|
72
|
-
|
73
|
-
if len(window) != len(self.pattern1): # Ensure dimensions match
|
74
|
-
return i, j, 0
|
75
|
-
|
76
|
-
# Apply constraints
|
77
|
-
if not self.apply_constraints(window):
|
78
|
-
return i, j, 0
|
79
|
-
|
80
|
-
fft_pattern = np.fft.fft(self.pattern1)
|
81
|
-
fft_window = np.fft.fft(window)
|
82
|
-
|
83
|
-
dot_product = np.dot(np.abs(fft_pattern), np.abs(fft_window))
|
84
|
-
norm_pattern = np.linalg.norm(np.abs(fft_pattern))
|
85
|
-
norm_window = np.linalg.norm(np.abs(fft_window))
|
86
|
-
similarity = dot_product / (norm_pattern * norm_window)
|
87
|
-
|
88
|
-
return i, j, similarity
|
89
|
-
|
90
|
-
def calculate_similarity(self):
|
91
|
-
"""Calculate sliding window cosine similarity."""
|
92
|
-
data1 = self.preprocess_data()
|
93
|
-
|
94
|
-
results = Parallel(n_jobs=-1)(
|
95
|
-
delayed(self.compute_cosine_sim)(data1, i, j)
|
96
|
-
for i in range(0, len(data1) - self.window_size, 2)
|
97
|
-
for j in range(0, self.len_iter, self.len_iter // 40)
|
98
|
-
)
|
99
|
-
|
100
|
-
for i, j, similarity in results:
|
101
|
-
if similarity > 0:
|
102
|
-
self.similarity_dict.setdefault(i, {})[j] = similarity
|
103
|
-
|
104
|
-
def get_top_similarities(self):
|
105
|
-
"""Extract top similarities from the similarity dictionary."""
|
106
|
-
results = [
|
107
|
-
{'key': key1, 'max_key': max(value, key=value.get), 'max_value': max(value.values())}
|
108
|
-
for key1, value in self.similarity_dict.items()
|
109
|
-
]
|
110
|
-
return pd.DataFrame(results)
|
111
|
-
|
112
|
-
def find_area_of_interest(self):
|
113
|
-
"""Find areas of interest in the data."""
|
114
|
-
self.preprocess_pattern()
|
115
|
-
self.calculate_similarity()
|
116
|
-
df_dist = self.get_top_similarities()
|
117
|
-
|
118
|
-
approx_cycle_length = len(self.pattern1) * 0.95
|
119
|
-
df_dist['app_cycle'] = (df_dist['key'] // approx_cycle_length).astype(int)
|
120
|
-
grouped = df_dist.groupby('app_cycle')
|
121
|
-
|
122
|
-
cyc_concat_df = pd.concat(
|
123
|
-
[
|
124
|
-
group.loc[group['max_value'].idxmax()].assign(cycle=idx_cyc)
|
125
|
-
for idx_cyc, (_, group) in enumerate(grouped)
|
126
|
-
if not group.empty and group['max_value'].max() != 0
|
127
|
-
],
|
128
|
-
ignore_index=True
|
129
|
-
)
|
130
|
-
|
131
|
-
cyc_concat_df['start_index'] = cyc_concat_df['key']
|
132
|
-
cyc_concat_df['end_index'] = (
|
133
|
-
cyc_concat_df['start_index'] + self.window_size +
|
134
|
-
cyc_concat_df['max_key'] - (self.len_iter // 2)
|
135
|
-
)
|
136
|
-
cyc_concat_df['shift_start'] = cyc_concat_df['start_index'].shift(1, fill_value=len(self.pattern1))
|
137
|
-
cyc_concat_df['diff'] = cyc_concat_df['shift_start'] - cyc_concat_df['start_index']
|
138
|
-
limit = len(self.pattern1) * 0.7
|
139
|
-
cyc_concat_df = cyc_concat_df[cyc_concat_df['diff'] < -limit].reset_index(drop=True)
|
140
|
-
cyc_concat_df['cycle'] = cyc_concat_df.index
|
141
|
-
|
142
|
-
cyc_concat_df['shift_end'] = cyc_concat_df['end_index'].shift(1, fill_value=cyc_concat_df['diff'].iloc[0])
|
143
|
-
cyc_concat_df['diff_end'] = cyc_concat_df['shift_end'] - cyc_concat_df['start_index']
|
144
|
-
overlap = cyc_concat_df['diff_end'] > 0
|
145
|
-
cyc_concat_df.loc[overlap, 'start_index'] += cyc_concat_df.loc[overlap, 'diff_end'] + 1
|
146
|
-
|
147
|
-
self.df['cycle'] = np.nan
|
148
|
-
for _, row in cyc_concat_df.iterrows():
|
149
|
-
start, stop = int(row['start_index'] * self.bin_parser), int(row['end_index'] * self.bin_parser)
|
150
|
-
self.df.loc[start:stop, 'cycle'] = row['cycle']
|
151
|
-
|
152
|
-
return self.df
|
@@ -1,10 +0,0 @@
|
|
1
|
-
pattern_detection/__init__.py,sha256=hzZhj16aof1dhcIOAI9Tfo5EoPJgexGChsnCLgXq-68,68
|
2
|
-
pattern_detection/detector.py,sha256=XaYYRledSolgTPTYLRSWhjZhmnAAvZT2pD3h5g9kP9k,6585
|
3
|
-
pattern_detection/utils.py,sha256=jqkfeTqWEnrnvzKBxr_UdKj9cj7zPF15UhHPr9sBSa0,2646
|
4
|
-
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
tests/test_detector.py,sha256=biHGbqd6Uc8dFTtyK-BxYeqFYtJOO9lk_6rQMeQCbQs,972
|
6
|
-
pattern_detector-0.1.0.dist-info/LICENSE,sha256=RslT26mCGxF9nQEyEQL8kFMFUgmYvCA0UivIWefxWmU,1098
|
7
|
-
pattern_detector-0.1.0.dist-info/METADATA,sha256=9pgMSSHwImJHjA1AOAOJm-ou2MsYnNFwPa6AGvl-fow,536
|
8
|
-
pattern_detector-0.1.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
9
|
-
pattern_detector-0.1.0.dist-info/top_level.txt,sha256=W_tzR9Ah3_TgQsiGDs46ELePkuxfcPmFOosXSYw2hl0,24
|
10
|
-
pattern_detector-0.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|