pattern-detector 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ try:
2
+ from .aoi_finder import run_area_of_interest_finder
3
+ except ImportError:
4
+ raise ImportError("The PatternDetector module could not be imported. Ensure all dependencies are installed.")
5
+
6
+ __version__ = "0.2.1"
7
+
8
+ def pattern_detector(data, pattern, column_pattern):
9
+ """
10
+ A simplified interface for using the PatternDetector class.
11
+
12
+ Parameters:
13
+ - data (pd.DataFrame): The main data.
14
+ - pattern (pd.DataFrame): The pattern data.
15
+ - column_pattern (str): The column name for pattern matching.
16
+
17
+ Returns:
18
+ - pd.DataFrame: The resulting DataFrame with detected cycles.
19
+ """
20
+ detector = run_area_of_interest_finder(data, pattern, column_pattern)
21
+ return detector
22
+
23
+ # Replace the module with a callable version
24
+ import sys
25
+ from types import ModuleType
26
+
27
+ class CallableModule(ModuleType):
28
+ def __init__(self, name):
29
+ super().__init__(name)
30
+ self.__version__ = __version__
31
+
32
+ def __call__(self, data, pattern, column_pattern):
33
+ return pattern_detector(data, pattern, column_pattern)
34
+
35
+ # Replace the current module in sys.modules with the callable version
36
+ sys.modules[__name__] = CallableModule(__name__)
@@ -0,0 +1,185 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ import warnings
5
+ warnings.filterwarnings("ignore")
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
+
8
+ #import warnings
9
+ from scipy.stats import skew, kurtosis
10
+ #warnings.filterwarnings("ignore")
11
+ from scipy.fft import fft
12
+ from scipy.interpolate import interp1d
13
+
14
+
15
+ def compute_cosine_sim(data1, window_size,len_iter,pattern1, i, j):
16
+
17
+ window = data1[i:i + window_size - (len_iter//2) + j ,:]
18
+
19
+
20
+ max_pos = np.max(pattern1) + 0.1 * np.ptp(pattern1) # np.ptp : calculates max-min difference
21
+ min_pos = np.min(pattern1) - 0.1 * np.ptp(pattern1)
22
+ mean_pos_upper = np.mean(pattern1) + 0.1 * np.ptp(pattern1)
23
+ mean_pos_lower = np.mean(pattern1) - 0.1 * np.ptp(pattern1)
24
+ pattern_skewness = skew(pattern1.flatten())
25
+ pattern_kurtosis = kurtosis(pattern1.flatten())
26
+ pattern_std = np.std( pattern1.flatten() )
27
+ cross_correlation_threshold = 0.5 # Set a threshold for cross-correlation
28
+
29
+ starting_point_lower = pattern1[0] - 0.2 * np.ptp(pattern1)
30
+ starting_point_upper = pattern1[0] + 0.2 * np.ptp(pattern1)
31
+
32
+ ending_point_lower = pattern1[-1] - 0.2 * np.ptp(pattern1)
33
+ ending_point_upper = pattern1[-1] + 0.2 * np.ptp(pattern1)
34
+
35
+
36
+ x_original = np.linspace(0, 1, len(window))
37
+ x_target = np.linspace(0, 1, len(pattern1))
38
+
39
+ window = window.reshape(-1 ,)
40
+
41
+ interpolator = interp1d(x_original, window, kind='cubic') # Linear interpolation
42
+
43
+ window2 = interpolator(x_target)
44
+
45
+ corr_coef = np.corrcoef(window2, pattern1)[0][1]
46
+
47
+ sliding_window_max = np.max(window2)
48
+ sliding_window_min = np.min(window2)
49
+ sliding_window_skewness = skew(window2)
50
+ sliding_window_kurtosis = kurtosis(window2)
51
+ sliding_window_mean = np.mean(window2)
52
+ sliding_window_std = np.std(window2)
53
+
54
+
55
+ if ( sliding_window_max <= max_pos and sliding_window_min >= min_pos and
56
+ mean_pos_upper >= sliding_window_mean >= mean_pos_lower and
57
+ abs(sliding_window_skewness - pattern_skewness) < 0.5 and
58
+ abs(sliding_window_kurtosis - pattern_kurtosis) < 1.0 and
59
+ pattern_std * 0.9 <= sliding_window_std <= pattern_std * 1.1 and
60
+ corr_coef >= cross_correlation_threshold
61
+ and starting_point_lower<= window2[0] <= starting_point_upper and
62
+ ending_point_lower<= window2[-1] <= ending_point_upper ):
63
+
64
+ fft_pattern = fft(pattern1)
65
+ fft_window = fft(window2)
66
+
67
+ magnitude_pattern = np.abs(fft_pattern)
68
+ magnitude_window = np.abs(fft_window)
69
+
70
+ dot_product = np.dot(magnitude_pattern, magnitude_window)
71
+ norm_1 = np.linalg.norm(magnitude_pattern)
72
+ norm_2 = np.linalg.norm(magnitude_window)
73
+
74
+ cosine_similarity = dot_product / (norm_1 * norm_2)
75
+
76
+
77
+ return i, j, cosine_similarity
78
+
79
+ else:
80
+ return i, j, 0
81
+
82
+
83
+ def sliding_window_cosine_similarity(data, pattern, column_pattern ):
84
+
85
+ len_iter = 400 if len(pattern) >= 400 else 200
86
+ bin_parser = 3 if len_iter == 400 else 2
87
+
88
+ pattern[column_pattern] = pattern[column_pattern].astype(float)
89
+ pattern.reset_index(drop=True, inplace=True)
90
+ pattern['bin'] = pattern.index // bin_parser
91
+ pattern1 = pattern.groupby('bin').agg({column_pattern: 'mean'}).reset_index()
92
+ pattern1 = np.array(pattern1[column_pattern]).reshape(-1, )
93
+
94
+ # Prepare data
95
+ data.reset_index(drop=True, inplace=True)
96
+ data[column_pattern] = data[column_pattern].astype(float)
97
+ data['bin'] = data.index // bin_parser
98
+ data1 = data.groupby('bin').agg({column_pattern: 'mean'}).reset_index()
99
+ data1 = np.array(data1[column_pattern]).reshape(-1, 1)
100
+
101
+ window_size = len(pattern1)
102
+ step_size = 2
103
+
104
+ similarity_dict = {}
105
+
106
+
107
+ with ThreadPoolExecutor() as executor: # Adjust max_workers based on your CPU cores
108
+ futures = [executor.submit(compute_cosine_sim, data1, window_size, len_iter, pattern1, i, j)
109
+ for i in range(0, len(data1) - window_size, step_size)
110
+ for j in range(0, len_iter, (len_iter // 40))]
111
+ for future in as_completed(futures):
112
+ i, j, similarity = future.result()
113
+ if i not in similarity_dict:
114
+ similarity_dict[i] = {}
115
+ similarity_dict[i][j] = similarity
116
+
117
+ return similarity_dict, pattern1, window_size, bin_parser, len_iter
118
+
119
+
120
+ def run_area_of_interest_finder(df,pattern,column_pattern):
121
+
122
+ similarity_dict, pattern1, window_size, bin_parser, len_iter = sliding_window_cosine_similarity(df, pattern, column_pattern)
123
+
124
+ approx_cycle_length = len(pattern1)*0.95
125
+
126
+ results = []
127
+ for key1, value in similarity_dict.items():
128
+
129
+ max_key = max(value, key=value.get)
130
+ max_value = value[max_key]
131
+ results.append({'key': key1, 'max_key': max_key, 'max_value': max_value})
132
+
133
+ df_dist = pd.DataFrame(results)
134
+
135
+ df_dist.reset_index(inplace=True)
136
+ df_dist['app_cycle'] = df_dist["key"] // approx_cycle_length
137
+ df_dist["app_cycle"] = df_dist["app_cycle"].astype(int)
138
+
139
+ yig = tuple(df_dist.groupby("app_cycle"))
140
+ cyc_dict = {x: y for x, y in yig}
141
+
142
+ idx_cyc = 0
143
+ cyc_concat_df = pd.DataFrame()
144
+
145
+ for k in cyc_dict.keys():
146
+ df_cyc = cyc_dict[k]
147
+ df_cyc = df_cyc[ df_cyc["max_value"] != 0 ]
148
+
149
+ key_min_df = df_cyc[["key","max_key","max_value"]][ df_cyc["max_value"] == np.max(df_cyc['max_value'])]
150
+ key_min_df["cycle"] = idx_cyc
151
+ if len(key_min_df) != 0:
152
+ cyc_concat_df = pd.concat([cyc_concat_df,key_min_df],ignore_index=True,axis="index")
153
+ idx_cyc += 1
154
+ else:
155
+ continue
156
+
157
+ cyc_concat_df["start_index"] = cyc_concat_df["key"]
158
+ cyc_concat_df["end_index"] = cyc_concat_df["start_index"] + window_size + cyc_concat_df["max_key"] - (len_iter//2)
159
+ cyc_concat_df["shift_start"] = cyc_concat_df["start_index"].shift(1)
160
+
161
+ cyc_concat_df["diff"] = cyc_concat_df["shift_start"] - cyc_concat_df["start_index"]
162
+ cyc_concat_df["shift_start"].iloc[0] = len(pattern1)
163
+ cyc_concat_df["diff"].iloc[0] = -len(pattern1)
164
+ limit = len(pattern1)*.7
165
+ cyc_concat_df = cyc_concat_df[ cyc_concat_df["diff"] < -limit ]
166
+ cyc_concat_df.reset_index(inplace=True, drop=True)
167
+ cyc_concat_df["cycle"] = cyc_concat_df.index
168
+
169
+ cyc_concat_df["shift_end"] = cyc_concat_df["end_index"].shift(1)
170
+ ######## Çakışmaları önlemek için yapıldı
171
+ cyc_concat_df["shift_end"].iloc[0] = cyc_concat_df["diff"].iloc[0]
172
+ cyc_concat_df["diff_end"] = cyc_concat_df["shift_end"] - cyc_concat_df["start_index"]
173
+ cyc_concat_df["start_index"][ cyc_concat_df["diff_end"] > 0 ] = cyc_concat_df["start_index"][ cyc_concat_df["diff_end"] > 0 ] + cyc_concat_df["diff_end"] + 1
174
+
175
+
176
+ #df = data.copy()
177
+ df.reset_index(drop=True,inplace=True)
178
+ for i in cyc_concat_df["cycle"].unique():
179
+ start = cyc_concat_df["start_index"][cyc_concat_df["cycle"] == i].values[0]*bin_parser
180
+ stop = cyc_concat_df["end_index"][cyc_concat_df["cycle"] == i].values[0]*bin_parser
181
+ #print(start, stop, i, stop-start)
182
+ df.loc[start:stop,"cycle"] = int(i)
183
+
184
+
185
+ return df
@@ -0,0 +1,75 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ #import warnings
4
+ from scipy.stats import skew, kurtosis
5
+ #warnings.filterwarnings("ignore")
6
+ from scipy.fft import fft
7
+ from scipy.interpolate import interp1d
8
+
9
+
10
+ def compute_cosine_sim(data1, window_size,len_iter,pattern1, i, j):
11
+
12
+ window = data1[i:i + window_size - (len_iter//2) + j ,:]
13
+
14
+
15
+ max_pos = np.max(pattern1) + 0.1 * np.ptp(pattern1) # np.ptp : calculates max-min difference
16
+ min_pos = np.min(pattern1) - 0.1 * np.ptp(pattern1)
17
+ mean_pos_upper = np.mean(pattern1) + 0.1 * np.ptp(pattern1)
18
+ mean_pos_lower = np.mean(pattern1) - 0.1 * np.ptp(pattern1)
19
+ pattern_skewness = skew(pattern1.flatten())
20
+ pattern_kurtosis = kurtosis(pattern1.flatten())
21
+ pattern_std = np.std( pattern1.flatten() )
22
+ cross_correlation_threshold = 0.5 # Set a threshold for cross-correlation
23
+
24
+ starting_point_lower = pattern1[0] - 0.2 * np.ptp(pattern1)
25
+ starting_point_upper = pattern1[0] + 0.2 * np.ptp(pattern1)
26
+
27
+ ending_point_lower = pattern1[-1] - 0.2 * np.ptp(pattern1)
28
+ ending_point_upper = pattern1[-1] + 0.2 * np.ptp(pattern1)
29
+
30
+
31
+ x_original = np.linspace(0, 1, len(window))
32
+ x_target = np.linspace(0, 1, len(pattern1))
33
+
34
+ window = window.reshape(-1 ,)
35
+
36
+ interpolator = interp1d(x_original, window, kind='cubic') # Linear interpolation
37
+
38
+ window2 = interpolator(x_target)
39
+
40
+ corr_coef = np.corrcoef(window2, pattern1)[0][1]
41
+
42
+ sliding_window_max = np.max(window2)
43
+ sliding_window_min = np.min(window2)
44
+ sliding_window_skewness = skew(window2)
45
+ sliding_window_kurtosis = kurtosis(window2)
46
+ sliding_window_mean = np.mean(window2)
47
+ sliding_window_std = np.std(window2)
48
+
49
+
50
+ if ( sliding_window_max <= max_pos and sliding_window_min >= min_pos and
51
+ mean_pos_upper >= sliding_window_mean >= mean_pos_lower and
52
+ abs(sliding_window_skewness - pattern_skewness) < 0.5 and
53
+ abs(sliding_window_kurtosis - pattern_kurtosis) < 1.0 and
54
+ pattern_std * 0.9 <= sliding_window_std <= pattern_std * 1.1 and
55
+ corr_coef >= cross_correlation_threshold
56
+ and starting_point_lower<= window2[0] <= starting_point_upper and
57
+ ending_point_lower<= window2[-1] <= ending_point_upper ):
58
+
59
+ fft_pattern = fft(pattern1)
60
+ fft_window = fft(window2)
61
+
62
+ magnitude_pattern = np.abs(fft_pattern)
63
+ magnitude_window = np.abs(fft_window)
64
+
65
+ dot_product = np.dot(magnitude_pattern, magnitude_window)
66
+ norm_1 = np.linalg.norm(magnitude_pattern)
67
+ norm_2 = np.linalg.norm(magnitude_window)
68
+
69
+ cosine_similarity = dot_product / (norm_1 * norm_2)
70
+
71
+
72
+ return i, j, cosine_similarity
73
+
74
+ else:
75
+ return i, j, 0
@@ -0,0 +1,45 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from Functions.cosine_similarity_function import compute_cosine_sim
4
+ import warnings
5
+ warnings.filterwarnings("ignore")
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor
7
+
8
+
9
+
10
+ def sliding_window_cosine_similarity(data, pattern, column_pattern ):
11
+
12
+ len_iter = 400 if len(pattern) >= 400 else 200
13
+ bin_parser = 3 if len_iter == 400 else 2
14
+
15
+ pattern[column_pattern] = pattern[column_pattern].astype(float)
16
+ pattern.reset_index(drop=True, inplace=True)
17
+ pattern['bin'] = pattern.index // bin_parser
18
+ pattern1 = pattern.groupby('bin').agg({column_pattern: 'mean'}).reset_index()
19
+ pattern1 = np.array(pattern1[column_pattern]).reshape(-1, )
20
+
21
+ # Prepare data
22
+ data.reset_index(drop=True, inplace=True)
23
+ data[column_pattern] = data[column_pattern].astype(float)
24
+ data['bin'] = data.index // bin_parser
25
+ data1 = data.groupby('bin').agg({column_pattern: 'mean'}).reset_index()
26
+ data1 = np.array(data1[column_pattern]).reshape(-1, 1)
27
+
28
+ window_size = len(pattern1)
29
+ step_size = 2
30
+
31
+ similarity_dict = {}
32
+
33
+
34
+ with ThreadPoolExecutor() as executor: # Adjust max_workers based on your CPU cores
35
+ futures = [executor.submit(compute_cosine_sim, data1, window_size, len_iter, pattern1, i, j)
36
+ for i in range(0, len(data1) - window_size, step_size)
37
+ for j in range(0, len_iter, (len_iter // 40))]
38
+ for future in as_completed(futures):
39
+ i, j, similarity = future.result()
40
+ if i not in similarity_dict:
41
+ similarity_dict[i] = {}
42
+ similarity_dict[i][j] = similarity
43
+
44
+ return similarity_dict, pattern1, window_size, bin_parser, len_iter
45
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pattern_detector
3
- Version: 0.1.0
3
+ Version: 0.2.1
4
4
  Summary: A library for detecting patterns in time-series data.
5
5
  Author: Yigit Utku Bulut and Ahmet Faruk Minareci
6
6
  Author-email: yigit.utku.bulut@gmail.com, ahmetfaruk.minareci@gmail.com
@@ -0,0 +1,12 @@
1
+ pattern_detector/__init__.py,sha256=Fuk6OrcI-ryoFxzIqFQEM2F6L3R0vZYSX7y_hHxHufI,1159
2
+ pattern_detector/aoi_finder.py,sha256=TwlXN54UWVdWl2tC-E48enCPiDvnkmFm_rwFhZkkS7c,7069
3
+ pattern_detector/cosine_similarity_function.py,sha256=Vgh6Blm6kji54JfXLVoiesYYwO0qDAyCv6W--YBlU3o,2704
4
+ pattern_detector/sliding_window_cosine_similarity.py,sha256=B-Zs0fHDCXlFcWlzEhKuTquavIWzUlMgc1dd3x2gkBk,1735
5
+ pattern_detector/utils.py,sha256=jqkfeTqWEnrnvzKBxr_UdKj9cj7zPF15UhHPr9sBSa0,2646
6
+ tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ tests/test_detector.py,sha256=mSNuqaHEkRiGcFnvvwNWa1UWKjhF6l8xBrFhGHQ1S0A,1021
8
+ pattern_detector-0.2.1.dist-info/LICENSE,sha256=RslT26mCGxF9nQEyEQL8kFMFUgmYvCA0UivIWefxWmU,1098
9
+ pattern_detector-0.2.1.dist-info/METADATA,sha256=5VuZn23JkdZryRW5UKPGpJFJ_lnKs6_fqObUBn-0Dd8,536
10
+ pattern_detector-0.2.1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
11
+ pattern_detector-0.2.1.dist-info/top_level.txt,sha256=zYjRoeMq6DYEzeVTU-JZKbeooXspQwn8M0uCF-naRww,23
12
+ pattern_detector-0.2.1.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ pattern_detector
2
+ tests
tests/test_detector.py CHANGED
@@ -1,11 +1,11 @@
1
1
  import pytest
2
2
  import numpy as np
3
3
  import pandas as pd
4
- from pattern_detection.detector import PatternDetector
4
+ from pattern_detector.aoi_finder import run_area_of_interest_finder
5
5
 
6
6
  def test_preprocess_pattern():
7
7
  pattern = pd.DataFrame({"value": range(10)})
8
- detector = PatternDetector(None, pattern, "value")
8
+ detector = run_area_of_interest_finder(None, pattern, "value")
9
9
  detector.preprocess_pattern()
10
10
  assert detector.pattern1 is not None
11
11
  assert len(detector.pattern1) > 0
@@ -13,7 +13,7 @@ def test_preprocess_pattern():
13
13
  def test_calculate_similarity():
14
14
  data = pd.DataFrame({"value": range(100)})
15
15
  pattern = pd.DataFrame({"value": range(10)})
16
- detector = PatternDetector(data, pattern, "value")
16
+ detector = run_area_of_interest_finder(data, pattern, "value")
17
17
  detector.preprocess_pattern()
18
18
  detector.calculate_similarity()
19
19
  assert len(detector.similarity_dict) > 0
@@ -21,7 +21,7 @@ def test_calculate_similarity():
21
21
  def test_find_area_of_interest():
22
22
  data = pd.DataFrame({"value": range(100)})
23
23
  pattern = pd.DataFrame({"value": range(10)})
24
- detector = PatternDetector(data, pattern, "value")
24
+ detector = run_area_of_interest_finder(data, pattern, "value")
25
25
  result = detector.find_area_of_interest()
26
26
  assert "cycle" in result.columns
27
27
  assert not result["cycle"].isnull().all()
@@ -1,3 +0,0 @@
1
- from .detector import PatternDetector
2
-
3
- __all__ = ["PatternDetector"]
@@ -1,152 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- from scipy.stats import skew, kurtosis
4
- from joblib import Parallel, delayed
5
-
6
- class PatternDetector:
7
- def __init__(self, df, pattern, column_pattern):
8
- self.df = df.copy()
9
- self.pattern = pattern
10
- self.column_pattern = column_pattern
11
- self.similarity_dict = {}
12
- self.pattern1 = None
13
- self.window_size = None
14
- self.bin_parser = None
15
- self.len_iter = None
16
- self.pattern_constraints = {}
17
-
18
- def preprocess_pattern(self):
19
- """Preprocess the pattern data."""
20
- len_iter = 400 if len(self.pattern) >= 400 else 200
21
- bin_parser = 3 if len_iter == 400 else 2
22
-
23
- self.pattern['bin'] = self.pattern.index // bin_parser
24
- self.pattern1 = self.pattern.groupby('bin')[self.column_pattern].mean().to_numpy()
25
- self.len_iter = len_iter
26
- self.bin_parser = bin_parser
27
- self.window_size = len(self.pattern1)
28
-
29
- # Compute constraints
30
- self.pattern_constraints = {
31
- "max_pos": np.max(self.pattern1) + 0.1 * np.ptp(self.pattern1),
32
- "min_pos": np.min(self.pattern1) - 0.1 * np.ptp(self.pattern1),
33
- "mean_pos_upper": np.mean(self.pattern1) + 0.1 * np.ptp(self.pattern1),
34
- "mean_pos_lower": np.mean(self.pattern1) - 0.1 * np.ptp(self.pattern1),
35
- "pattern_skewness": skew(self.pattern1),
36
- "pattern_kurtosis": kurtosis(self.pattern1),
37
- "pattern_std": np.std(self.pattern1),
38
- "starting_point_lower": self.pattern1[0] - 0.2 * np.ptp(self.pattern1),
39
- "starting_point_upper": self.pattern1[0] + 0.2 * np.ptp(self.pattern1),
40
- "ending_point_lower": self.pattern1[-1] - 0.2 * np.ptp(self.pattern1),
41
- "ending_point_upper": self.pattern1[-1] + 0.2 * np.ptp(self.pattern1),
42
- "cross_correlation_threshold": 0.5,
43
- }
44
-
45
- def preprocess_data(self):
46
- """Preprocess the main data."""
47
- self.df['bin'] = self.df.index // self.bin_parser
48
- return self.df.groupby('bin')[self.column_pattern].mean().to_numpy().reshape(-1, 1)
49
-
50
- def apply_constraints(self, window):
51
- """Apply constraints to filter valid windows."""
52
- pc = self.pattern_constraints
53
- corr_coef = np.corrcoef(window, self.pattern1)[0][1]
54
-
55
- if (
56
- np.max(window) <= pc["max_pos"]
57
- and np.min(window) >= pc["min_pos"]
58
- and pc["mean_pos_upper"] >= np.mean(window) >= pc["mean_pos_lower"]
59
- and abs(skew(window) - pc["pattern_skewness"]) < 0.5
60
- and abs(kurtosis(window) - pc["pattern_kurtosis"]) < 1.0
61
- and pc["pattern_std"] * 0.9 <= np.std(window) <= pc["pattern_std"] * 1.1
62
- and corr_coef >= pc["cross_correlation_threshold"]
63
- and pc["starting_point_lower"] <= window[0] <= pc["starting_point_upper"]
64
- and pc["ending_point_lower"] <= window[-1] <= pc["ending_point_upper"]
65
- ):
66
- return True
67
- return False
68
-
69
- def compute_cosine_sim(self, data1, i, j):
70
- """Compute cosine similarity for a given sliding window."""
71
- window = data1[i:i + self.window_size - (self.len_iter // 2) + j, :].reshape(-1,)
72
-
73
- if len(window) != len(self.pattern1): # Ensure dimensions match
74
- return i, j, 0
75
-
76
- # Apply constraints
77
- if not self.apply_constraints(window):
78
- return i, j, 0
79
-
80
- fft_pattern = np.fft.fft(self.pattern1)
81
- fft_window = np.fft.fft(window)
82
-
83
- dot_product = np.dot(np.abs(fft_pattern), np.abs(fft_window))
84
- norm_pattern = np.linalg.norm(np.abs(fft_pattern))
85
- norm_window = np.linalg.norm(np.abs(fft_window))
86
- similarity = dot_product / (norm_pattern * norm_window)
87
-
88
- return i, j, similarity
89
-
90
- def calculate_similarity(self):
91
- """Calculate sliding window cosine similarity."""
92
- data1 = self.preprocess_data()
93
-
94
- results = Parallel(n_jobs=-1)(
95
- delayed(self.compute_cosine_sim)(data1, i, j)
96
- for i in range(0, len(data1) - self.window_size, 2)
97
- for j in range(0, self.len_iter, self.len_iter // 40)
98
- )
99
-
100
- for i, j, similarity in results:
101
- if similarity > 0:
102
- self.similarity_dict.setdefault(i, {})[j] = similarity
103
-
104
- def get_top_similarities(self):
105
- """Extract top similarities from the similarity dictionary."""
106
- results = [
107
- {'key': key1, 'max_key': max(value, key=value.get), 'max_value': max(value.values())}
108
- for key1, value in self.similarity_dict.items()
109
- ]
110
- return pd.DataFrame(results)
111
-
112
- def find_area_of_interest(self):
113
- """Find areas of interest in the data."""
114
- self.preprocess_pattern()
115
- self.calculate_similarity()
116
- df_dist = self.get_top_similarities()
117
-
118
- approx_cycle_length = len(self.pattern1) * 0.95
119
- df_dist['app_cycle'] = (df_dist['key'] // approx_cycle_length).astype(int)
120
- grouped = df_dist.groupby('app_cycle')
121
-
122
- cyc_concat_df = pd.concat(
123
- [
124
- group.loc[group['max_value'].idxmax()].assign(cycle=idx_cyc)
125
- for idx_cyc, (_, group) in enumerate(grouped)
126
- if not group.empty and group['max_value'].max() != 0
127
- ],
128
- ignore_index=True
129
- )
130
-
131
- cyc_concat_df['start_index'] = cyc_concat_df['key']
132
- cyc_concat_df['end_index'] = (
133
- cyc_concat_df['start_index'] + self.window_size +
134
- cyc_concat_df['max_key'] - (self.len_iter // 2)
135
- )
136
- cyc_concat_df['shift_start'] = cyc_concat_df['start_index'].shift(1, fill_value=len(self.pattern1))
137
- cyc_concat_df['diff'] = cyc_concat_df['shift_start'] - cyc_concat_df['start_index']
138
- limit = len(self.pattern1) * 0.7
139
- cyc_concat_df = cyc_concat_df[cyc_concat_df['diff'] < -limit].reset_index(drop=True)
140
- cyc_concat_df['cycle'] = cyc_concat_df.index
141
-
142
- cyc_concat_df['shift_end'] = cyc_concat_df['end_index'].shift(1, fill_value=cyc_concat_df['diff'].iloc[0])
143
- cyc_concat_df['diff_end'] = cyc_concat_df['shift_end'] - cyc_concat_df['start_index']
144
- overlap = cyc_concat_df['diff_end'] > 0
145
- cyc_concat_df.loc[overlap, 'start_index'] += cyc_concat_df.loc[overlap, 'diff_end'] + 1
146
-
147
- self.df['cycle'] = np.nan
148
- for _, row in cyc_concat_df.iterrows():
149
- start, stop = int(row['start_index'] * self.bin_parser), int(row['end_index'] * self.bin_parser)
150
- self.df.loc[start:stop, 'cycle'] = row['cycle']
151
-
152
- return self.df
@@ -1,10 +0,0 @@
1
- pattern_detection/__init__.py,sha256=hzZhj16aof1dhcIOAI9Tfo5EoPJgexGChsnCLgXq-68,68
2
- pattern_detection/detector.py,sha256=XaYYRledSolgTPTYLRSWhjZhmnAAvZT2pD3h5g9kP9k,6585
3
- pattern_detection/utils.py,sha256=jqkfeTqWEnrnvzKBxr_UdKj9cj7zPF15UhHPr9sBSa0,2646
4
- tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- tests/test_detector.py,sha256=biHGbqd6Uc8dFTtyK-BxYeqFYtJOO9lk_6rQMeQCbQs,972
6
- pattern_detector-0.1.0.dist-info/LICENSE,sha256=RslT26mCGxF9nQEyEQL8kFMFUgmYvCA0UivIWefxWmU,1098
7
- pattern_detector-0.1.0.dist-info/METADATA,sha256=9pgMSSHwImJHjA1AOAOJm-ou2MsYnNFwPa6AGvl-fow,536
8
- pattern_detector-0.1.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
9
- pattern_detector-0.1.0.dist-info/top_level.txt,sha256=W_tzR9Ah3_TgQsiGDs46ELePkuxfcPmFOosXSYw2hl0,24
10
- pattern_detector-0.1.0.dist-info/RECORD,,
@@ -1,2 +0,0 @@
1
- pattern_detection
2
- tests
File without changes