pattern-detector 0.1.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,3 @@
1
+ from .detector import PatternDetector
2
+
3
+ __all__ = ["PatternDetector"]
@@ -0,0 +1,152 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from scipy.stats import skew, kurtosis
4
+ from joblib import Parallel, delayed
5
+
6
+ class PatternDetector:
7
+ def __init__(self, df, pattern, column_pattern):
8
+ self.df = df.copy()
9
+ self.pattern = pattern
10
+ self.column_pattern = column_pattern
11
+ self.similarity_dict = {}
12
+ self.pattern1 = None
13
+ self.window_size = None
14
+ self.bin_parser = None
15
+ self.len_iter = None
16
+ self.pattern_constraints = {}
17
+
18
+ def preprocess_pattern(self):
19
+ """Preprocess the pattern data."""
20
+ len_iter = 400 if len(self.pattern) >= 400 else 200
21
+ bin_parser = 3 if len_iter == 400 else 2
22
+
23
+ self.pattern['bin'] = self.pattern.index // bin_parser
24
+ self.pattern1 = self.pattern.groupby('bin')[self.column_pattern].mean().to_numpy()
25
+ self.len_iter = len_iter
26
+ self.bin_parser = bin_parser
27
+ self.window_size = len(self.pattern1)
28
+
29
+ # Compute constraints
30
+ self.pattern_constraints = {
31
+ "max_pos": np.max(self.pattern1) + 0.1 * np.ptp(self.pattern1),
32
+ "min_pos": np.min(self.pattern1) - 0.1 * np.ptp(self.pattern1),
33
+ "mean_pos_upper": np.mean(self.pattern1) + 0.1 * np.ptp(self.pattern1),
34
+ "mean_pos_lower": np.mean(self.pattern1) - 0.1 * np.ptp(self.pattern1),
35
+ "pattern_skewness": skew(self.pattern1),
36
+ "pattern_kurtosis": kurtosis(self.pattern1),
37
+ "pattern_std": np.std(self.pattern1),
38
+ "starting_point_lower": self.pattern1[0] - 0.2 * np.ptp(self.pattern1),
39
+ "starting_point_upper": self.pattern1[0] + 0.2 * np.ptp(self.pattern1),
40
+ "ending_point_lower": self.pattern1[-1] - 0.2 * np.ptp(self.pattern1),
41
+ "ending_point_upper": self.pattern1[-1] + 0.2 * np.ptp(self.pattern1),
42
+ "cross_correlation_threshold": 0.5,
43
+ }
44
+
45
+ def preprocess_data(self):
46
+ """Preprocess the main data."""
47
+ self.df['bin'] = self.df.index // self.bin_parser
48
+ return self.df.groupby('bin')[self.column_pattern].mean().to_numpy().reshape(-1, 1)
49
+
50
+ def apply_constraints(self, window):
51
+ """Apply constraints to filter valid windows."""
52
+ pc = self.pattern_constraints
53
+ corr_coef = np.corrcoef(window, self.pattern1)[0][1]
54
+
55
+ if (
56
+ np.max(window) <= pc["max_pos"]
57
+ and np.min(window) >= pc["min_pos"]
58
+ and pc["mean_pos_upper"] >= np.mean(window) >= pc["mean_pos_lower"]
59
+ and abs(skew(window) - pc["pattern_skewness"]) < 0.5
60
+ and abs(kurtosis(window) - pc["pattern_kurtosis"]) < 1.0
61
+ and pc["pattern_std"] * 0.9 <= np.std(window) <= pc["pattern_std"] * 1.1
62
+ and corr_coef >= pc["cross_correlation_threshold"]
63
+ and pc["starting_point_lower"] <= window[0] <= pc["starting_point_upper"]
64
+ and pc["ending_point_lower"] <= window[-1] <= pc["ending_point_upper"]
65
+ ):
66
+ return True
67
+ return False
68
+
69
+ def compute_cosine_sim(self, data1, i, j):
70
+ """Compute cosine similarity for a given sliding window."""
71
+ window = data1[i:i + self.window_size - (self.len_iter // 2) + j, :].reshape(-1,)
72
+
73
+ if len(window) != len(self.pattern1): # Ensure dimensions match
74
+ return i, j, 0
75
+
76
+ # Apply constraints
77
+ if not self.apply_constraints(window):
78
+ return i, j, 0
79
+
80
+ fft_pattern = np.fft.fft(self.pattern1)
81
+ fft_window = np.fft.fft(window)
82
+
83
+ dot_product = np.dot(np.abs(fft_pattern), np.abs(fft_window))
84
+ norm_pattern = np.linalg.norm(np.abs(fft_pattern))
85
+ norm_window = np.linalg.norm(np.abs(fft_window))
86
+ similarity = dot_product / (norm_pattern * norm_window)
87
+
88
+ return i, j, similarity
89
+
90
+ def calculate_similarity(self):
91
+ """Calculate sliding window cosine similarity."""
92
+ data1 = self.preprocess_data()
93
+
94
+ results = Parallel(n_jobs=-1)(
95
+ delayed(self.compute_cosine_sim)(data1, i, j)
96
+ for i in range(0, len(data1) - self.window_size, 2)
97
+ for j in range(0, self.len_iter, self.len_iter // 40)
98
+ )
99
+
100
+ for i, j, similarity in results:
101
+ if similarity > 0:
102
+ self.similarity_dict.setdefault(i, {})[j] = similarity
103
+
104
+ def get_top_similarities(self):
105
+ """Extract top similarities from the similarity dictionary."""
106
+ results = [
107
+ {'key': key1, 'max_key': max(value, key=value.get), 'max_value': max(value.values())}
108
+ for key1, value in self.similarity_dict.items()
109
+ ]
110
+ return pd.DataFrame(results)
111
+
112
+ def find_area_of_interest(self):
113
+ """Find areas of interest in the data."""
114
+ self.preprocess_pattern()
115
+ self.calculate_similarity()
116
+ df_dist = self.get_top_similarities()
117
+
118
+ approx_cycle_length = len(self.pattern1) * 0.95
119
+ df_dist['app_cycle'] = (df_dist['key'] // approx_cycle_length).astype(int)
120
+ grouped = df_dist.groupby('app_cycle')
121
+
122
+ cyc_concat_df = pd.concat(
123
+ [
124
+ group.loc[group['max_value'].idxmax()].assign(cycle=idx_cyc)
125
+ for idx_cyc, (_, group) in enumerate(grouped)
126
+ if not group.empty and group['max_value'].max() != 0
127
+ ],
128
+ ignore_index=True
129
+ )
130
+
131
+ cyc_concat_df['start_index'] = cyc_concat_df['key']
132
+ cyc_concat_df['end_index'] = (
133
+ cyc_concat_df['start_index'] + self.window_size +
134
+ cyc_concat_df['max_key'] - (self.len_iter // 2)
135
+ )
136
+ cyc_concat_df['shift_start'] = cyc_concat_df['start_index'].shift(1, fill_value=len(self.pattern1))
137
+ cyc_concat_df['diff'] = cyc_concat_df['shift_start'] - cyc_concat_df['start_index']
138
+ limit = len(self.pattern1) * 0.7
139
+ cyc_concat_df = cyc_concat_df[cyc_concat_df['diff'] < -limit].reset_index(drop=True)
140
+ cyc_concat_df['cycle'] = cyc_concat_df.index
141
+
142
+ cyc_concat_df['shift_end'] = cyc_concat_df['end_index'].shift(1, fill_value=cyc_concat_df['diff'].iloc[0])
143
+ cyc_concat_df['diff_end'] = cyc_concat_df['shift_end'] - cyc_concat_df['start_index']
144
+ overlap = cyc_concat_df['diff_end'] > 0
145
+ cyc_concat_df.loc[overlap, 'start_index'] += cyc_concat_df.loc[overlap, 'diff_end'] + 1
146
+
147
+ self.df['cycle'] = np.nan
148
+ for _, row in cyc_concat_df.iterrows():
149
+ start, stop = int(row['start_index'] * self.bin_parser), int(row['end_index'] * self.bin_parser)
150
+ self.df.loc[start:stop, 'cycle'] = row['cycle']
151
+
152
+ return self.df
@@ -0,0 +1,87 @@
1
+ import numpy as np
2
+ from scipy.stats import skew, kurtosis
3
+
4
+ def calculate_statistics(data):
5
+ """
6
+ Calculate various statistics for a given dataset.
7
+
8
+ Parameters:
9
+ - data (array-like): The input data.
10
+
11
+ Returns:
12
+ - dict: Dictionary containing max, min, mean, std, skewness, and kurtosis.
13
+ """
14
+ return {
15
+ "max": np.max(data),
16
+ "min": np.min(data),
17
+ "mean": np.mean(data),
18
+ "std": np.std(data),
19
+ "skewness": skew(data),
20
+ "kurtosis": kurtosis(data),
21
+ }
22
+
23
+ def normalize_data(data):
24
+ """
25
+ Normalize the input data to range [0, 1].
26
+
27
+ Parameters:
28
+ - data (array-like): Input data to normalize.
29
+
30
+ Returns:
31
+ - np.ndarray: Normalized data.
32
+ """
33
+ min_val = np.min(data)
34
+ max_val = np.max(data)
35
+ return (data - min_val) / (max_val - min_val)
36
+
37
+ def validate_window(window, constraints):
38
+ """
39
+ Validate a sliding window against constraints.
40
+
41
+ Parameters:
42
+ - window (array-like): The input window data.
43
+ - constraints (dict): A dictionary of constraints with keys like 'max_pos', 'min_pos', etc.
44
+
45
+ Returns:
46
+ - bool: True if all constraints are satisfied, False otherwise.
47
+ """
48
+ corr_coef = np.corrcoef(window, constraints['pattern'])[0, 1]
49
+
50
+ return (
51
+ np.max(window) <= constraints["max_pos"]
52
+ and np.min(window) >= constraints["min_pos"]
53
+ and constraints["mean_pos_upper"] >= np.mean(window) >= constraints["mean_pos_lower"]
54
+ and abs(skew(window) - constraints["pattern_skewness"]) < 0.5
55
+ and abs(kurtosis(window) - constraints["pattern_kurtosis"]) < 1.0
56
+ and constraints["pattern_std"] * 0.9 <= np.std(window) <= constraints["pattern_std"] * 1.1
57
+ and corr_coef >= constraints["cross_correlation_threshold"]
58
+ and constraints["starting_point_lower"] <= window[0] <= constraints["starting_point_upper"]
59
+ and constraints["ending_point_lower"] <= window[-1] <= constraints["ending_point_upper"]
60
+ )
61
+
62
+ def calculate_correlation(data1, data2):
63
+ """
64
+ Calculate correlation coefficient between two datasets.
65
+
66
+ Parameters:
67
+ - data1 (array-like): First dataset.
68
+ - data2 (array-like): Second dataset.
69
+
70
+ Returns:
71
+ - float: Correlation coefficient.
72
+ """
73
+ return np.corrcoef(data1, data2)[0, 1]
74
+
75
+ def bin_data(data, bin_size):
76
+ """
77
+ Bin the data into averages over fixed-size intervals.
78
+
79
+ Parameters:
80
+ - data (array-like): The input data.
81
+ - bin_size (int): The size of the bins.
82
+
83
+ Returns:
84
+ - np.ndarray: Binned data.
85
+ """
86
+ binned = data[:len(data) // bin_size * bin_size].reshape(-1, bin_size)
87
+ return np.mean(binned, axis=1)
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Yigit Utku Bulut and Ahmet Faruk Minareci
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.1
2
+ Name: pattern_detector
3
+ Version: 0.1.0
4
+ Summary: A library for detecting patterns in time-series data.
5
+ Author: Yigit Utku Bulut and Ahmet Faruk Minareci
6
+ Author-email: yigit.utku.bulut@gmail.com, ahmetfaruk.minareci@gmail.com
7
+ License: MIT
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.7
12
+ License-File: LICENSE
13
+ Requires-Dist: numpy
14
+ Requires-Dist: pandas
15
+ Requires-Dist: joblib
16
+ Requires-Dist: scipy
17
+
@@ -0,0 +1,10 @@
1
+ pattern_detection/__init__.py,sha256=hzZhj16aof1dhcIOAI9Tfo5EoPJgexGChsnCLgXq-68,68
2
+ pattern_detection/detector.py,sha256=XaYYRledSolgTPTYLRSWhjZhmnAAvZT2pD3h5g9kP9k,6585
3
+ pattern_detection/utils.py,sha256=jqkfeTqWEnrnvzKBxr_UdKj9cj7zPF15UhHPr9sBSa0,2646
4
+ tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ tests/test_detector.py,sha256=biHGbqd6Uc8dFTtyK-BxYeqFYtJOO9lk_6rQMeQCbQs,972
6
+ pattern_detector-0.1.0.dist-info/LICENSE,sha256=RslT26mCGxF9nQEyEQL8kFMFUgmYvCA0UivIWefxWmU,1098
7
+ pattern_detector-0.1.0.dist-info/METADATA,sha256=9pgMSSHwImJHjA1AOAOJm-ou2MsYnNFwPa6AGvl-fow,536
8
+ pattern_detector-0.1.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
9
+ pattern_detector-0.1.0.dist-info/top_level.txt,sha256=W_tzR9Ah3_TgQsiGDs46ELePkuxfcPmFOosXSYw2hl0,24
10
+ pattern_detector-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (72.1.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ pattern_detection
2
+ tests
tests/__init__.py ADDED
File without changes
tests/test_detector.py ADDED
@@ -0,0 +1,27 @@
1
+ import pytest
2
+ import numpy as np
3
+ import pandas as pd
4
+ from pattern_detection.detector import PatternDetector
5
+
6
+ def test_preprocess_pattern():
7
+ pattern = pd.DataFrame({"value": range(10)})
8
+ detector = PatternDetector(None, pattern, "value")
9
+ detector.preprocess_pattern()
10
+ assert detector.pattern1 is not None
11
+ assert len(detector.pattern1) > 0
12
+
13
+ def test_calculate_similarity():
14
+ data = pd.DataFrame({"value": range(100)})
15
+ pattern = pd.DataFrame({"value": range(10)})
16
+ detector = PatternDetector(data, pattern, "value")
17
+ detector.preprocess_pattern()
18
+ detector.calculate_similarity()
19
+ assert len(detector.similarity_dict) > 0
20
+
21
+ def test_find_area_of_interest():
22
+ data = pd.DataFrame({"value": range(100)})
23
+ pattern = pd.DataFrame({"value": range(10)})
24
+ detector = PatternDetector(data, pattern, "value")
25
+ result = detector.find_area_of_interest()
26
+ assert "cycle" in result.columns
27
+ assert not result["cycle"].isnull().all()