pattern-detector 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ from .detector import PatternDetector
2
+
3
+ __all__ = ["PatternDetector"]
@@ -0,0 +1,152 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from scipy.stats import skew, kurtosis
4
+ from joblib import Parallel, delayed
5
+
6
+ class PatternDetector:
7
+ def __init__(self, df, pattern, column_pattern):
8
+ self.df = df.copy()
9
+ self.pattern = pattern
10
+ self.column_pattern = column_pattern
11
+ self.similarity_dict = {}
12
+ self.pattern1 = None
13
+ self.window_size = None
14
+ self.bin_parser = None
15
+ self.len_iter = None
16
+ self.pattern_constraints = {}
17
+
18
+ def preprocess_pattern(self):
19
+ """Preprocess the pattern data."""
20
+ len_iter = 400 if len(self.pattern) >= 400 else 200
21
+ bin_parser = 3 if len_iter == 400 else 2
22
+
23
+ self.pattern['bin'] = self.pattern.index // bin_parser
24
+ self.pattern1 = self.pattern.groupby('bin')[self.column_pattern].mean().to_numpy()
25
+ self.len_iter = len_iter
26
+ self.bin_parser = bin_parser
27
+ self.window_size = len(self.pattern1)
28
+
29
+ # Compute constraints
30
+ self.pattern_constraints = {
31
+ "max_pos": np.max(self.pattern1) + 0.1 * np.ptp(self.pattern1),
32
+ "min_pos": np.min(self.pattern1) - 0.1 * np.ptp(self.pattern1),
33
+ "mean_pos_upper": np.mean(self.pattern1) + 0.1 * np.ptp(self.pattern1),
34
+ "mean_pos_lower": np.mean(self.pattern1) - 0.1 * np.ptp(self.pattern1),
35
+ "pattern_skewness": skew(self.pattern1),
36
+ "pattern_kurtosis": kurtosis(self.pattern1),
37
+ "pattern_std": np.std(self.pattern1),
38
+ "starting_point_lower": self.pattern1[0] - 0.2 * np.ptp(self.pattern1),
39
+ "starting_point_upper": self.pattern1[0] + 0.2 * np.ptp(self.pattern1),
40
+ "ending_point_lower": self.pattern1[-1] - 0.2 * np.ptp(self.pattern1),
41
+ "ending_point_upper": self.pattern1[-1] + 0.2 * np.ptp(self.pattern1),
42
+ "cross_correlation_threshold": 0.5,
43
+ }
44
+
45
+ def preprocess_data(self):
46
+ """Preprocess the main data."""
47
+ self.df['bin'] = self.df.index // self.bin_parser
48
+ return self.df.groupby('bin')[self.column_pattern].mean().to_numpy().reshape(-1, 1)
49
+
50
+ def apply_constraints(self, window):
51
+ """Apply constraints to filter valid windows."""
52
+ pc = self.pattern_constraints
53
+ corr_coef = np.corrcoef(window, self.pattern1)[0][1]
54
+
55
+ if (
56
+ np.max(window) <= pc["max_pos"]
57
+ and np.min(window) >= pc["min_pos"]
58
+ and pc["mean_pos_upper"] >= np.mean(window) >= pc["mean_pos_lower"]
59
+ and abs(skew(window) - pc["pattern_skewness"]) < 0.5
60
+ and abs(kurtosis(window) - pc["pattern_kurtosis"]) < 1.0
61
+ and pc["pattern_std"] * 0.9 <= np.std(window) <= pc["pattern_std"] * 1.1
62
+ and corr_coef >= pc["cross_correlation_threshold"]
63
+ and pc["starting_point_lower"] <= window[0] <= pc["starting_point_upper"]
64
+ and pc["ending_point_lower"] <= window[-1] <= pc["ending_point_upper"]
65
+ ):
66
+ return True
67
+ return False
68
+
69
+ def compute_cosine_sim(self, data1, i, j):
70
+ """Compute cosine similarity for a given sliding window."""
71
+ window = data1[i:i + self.window_size - (self.len_iter // 2) + j, :].reshape(-1,)
72
+
73
+ if len(window) != len(self.pattern1): # Ensure dimensions match
74
+ return i, j, 0
75
+
76
+ # Apply constraints
77
+ if not self.apply_constraints(window):
78
+ return i, j, 0
79
+
80
+ fft_pattern = np.fft.fft(self.pattern1)
81
+ fft_window = np.fft.fft(window)
82
+
83
+ dot_product = np.dot(np.abs(fft_pattern), np.abs(fft_window))
84
+ norm_pattern = np.linalg.norm(np.abs(fft_pattern))
85
+ norm_window = np.linalg.norm(np.abs(fft_window))
86
+ similarity = dot_product / (norm_pattern * norm_window)
87
+
88
+ return i, j, similarity
89
+
90
+ def calculate_similarity(self):
91
+ """Calculate sliding window cosine similarity."""
92
+ data1 = self.preprocess_data()
93
+
94
+ results = Parallel(n_jobs=-1)(
95
+ delayed(self.compute_cosine_sim)(data1, i, j)
96
+ for i in range(0, len(data1) - self.window_size, 2)
97
+ for j in range(0, self.len_iter, self.len_iter // 40)
98
+ )
99
+
100
+ for i, j, similarity in results:
101
+ if similarity > 0:
102
+ self.similarity_dict.setdefault(i, {})[j] = similarity
103
+
104
+ def get_top_similarities(self):
105
+ """Extract top similarities from the similarity dictionary."""
106
+ results = [
107
+ {'key': key1, 'max_key': max(value, key=value.get), 'max_value': max(value.values())}
108
+ for key1, value in self.similarity_dict.items()
109
+ ]
110
+ return pd.DataFrame(results)
111
+
112
+ def find_area_of_interest(self):
113
+ """Find areas of interest in the data."""
114
+ self.preprocess_pattern()
115
+ self.calculate_similarity()
116
+ df_dist = self.get_top_similarities()
117
+
118
+ approx_cycle_length = len(self.pattern1) * 0.95
119
+ df_dist['app_cycle'] = (df_dist['key'] // approx_cycle_length).astype(int)
120
+ grouped = df_dist.groupby('app_cycle')
121
+
122
+ cyc_concat_df = pd.concat(
123
+ [
124
+ group.loc[group['max_value'].idxmax()].assign(cycle=idx_cyc)
125
+ for idx_cyc, (_, group) in enumerate(grouped)
126
+ if not group.empty and group['max_value'].max() != 0
127
+ ],
128
+ ignore_index=True
129
+ )
130
+
131
+ cyc_concat_df['start_index'] = cyc_concat_df['key']
132
+ cyc_concat_df['end_index'] = (
133
+ cyc_concat_df['start_index'] + self.window_size +
134
+ cyc_concat_df['max_key'] - (self.len_iter // 2)
135
+ )
136
+ cyc_concat_df['shift_start'] = cyc_concat_df['start_index'].shift(1, fill_value=len(self.pattern1))
137
+ cyc_concat_df['diff'] = cyc_concat_df['shift_start'] - cyc_concat_df['start_index']
138
+ limit = len(self.pattern1) * 0.7
139
+ cyc_concat_df = cyc_concat_df[cyc_concat_df['diff'] < -limit].reset_index(drop=True)
140
+ cyc_concat_df['cycle'] = cyc_concat_df.index
141
+
142
+ cyc_concat_df['shift_end'] = cyc_concat_df['end_index'].shift(1, fill_value=cyc_concat_df['diff'].iloc[0])
143
+ cyc_concat_df['diff_end'] = cyc_concat_df['shift_end'] - cyc_concat_df['start_index']
144
+ overlap = cyc_concat_df['diff_end'] > 0
145
+ cyc_concat_df.loc[overlap, 'start_index'] += cyc_concat_df.loc[overlap, 'diff_end'] + 1
146
+
147
+ self.df['cycle'] = np.nan
148
+ for _, row in cyc_concat_df.iterrows():
149
+ start, stop = int(row['start_index'] * self.bin_parser), int(row['end_index'] * self.bin_parser)
150
+ self.df.loc[start:stop, 'cycle'] = row['cycle']
151
+
152
+ return self.df
@@ -0,0 +1,87 @@
1
+ import numpy as np
2
+ from scipy.stats import skew, kurtosis
3
+
4
+ def calculate_statistics(data):
5
+ """
6
+ Calculate various statistics for a given dataset.
7
+
8
+ Parameters:
9
+ - data (array-like): The input data.
10
+
11
+ Returns:
12
+ - dict: Dictionary containing max, min, mean, std, skewness, and kurtosis.
13
+ """
14
+ return {
15
+ "max": np.max(data),
16
+ "min": np.min(data),
17
+ "mean": np.mean(data),
18
+ "std": np.std(data),
19
+ "skewness": skew(data),
20
+ "kurtosis": kurtosis(data),
21
+ }
22
+
23
+ def normalize_data(data):
24
+ """
25
+ Normalize the input data to range [0, 1].
26
+
27
+ Parameters:
28
+ - data (array-like): Input data to normalize.
29
+
30
+ Returns:
31
+ - np.ndarray: Normalized data.
32
+ """
33
+ min_val = np.min(data)
34
+ max_val = np.max(data)
35
+ return (data - min_val) / (max_val - min_val)
36
+
37
+ def validate_window(window, constraints):
38
+ """
39
+ Validate a sliding window against constraints.
40
+
41
+ Parameters:
42
+ - window (array-like): The input window data.
43
+ - constraints (dict): A dictionary of constraints with keys like 'max_pos', 'min_pos', etc.
44
+
45
+ Returns:
46
+ - bool: True if all constraints are satisfied, False otherwise.
47
+ """
48
+ corr_coef = np.corrcoef(window, constraints['pattern'])[0, 1]
49
+
50
+ return (
51
+ np.max(window) <= constraints["max_pos"]
52
+ and np.min(window) >= constraints["min_pos"]
53
+ and constraints["mean_pos_upper"] >= np.mean(window) >= constraints["mean_pos_lower"]
54
+ and abs(skew(window) - constraints["pattern_skewness"]) < 0.5
55
+ and abs(kurtosis(window) - constraints["pattern_kurtosis"]) < 1.0
56
+ and constraints["pattern_std"] * 0.9 <= np.std(window) <= constraints["pattern_std"] * 1.1
57
+ and corr_coef >= constraints["cross_correlation_threshold"]
58
+ and constraints["starting_point_lower"] <= window[0] <= constraints["starting_point_upper"]
59
+ and constraints["ending_point_lower"] <= window[-1] <= constraints["ending_point_upper"]
60
+ )
61
+
62
+ def calculate_correlation(data1, data2):
63
+ """
64
+ Calculate correlation coefficient between two datasets.
65
+
66
+ Parameters:
67
+ - data1 (array-like): First dataset.
68
+ - data2 (array-like): Second dataset.
69
+
70
+ Returns:
71
+ - float: Correlation coefficient.
72
+ """
73
+ return np.corrcoef(data1, data2)[0, 1]
74
+
75
+ def bin_data(data, bin_size):
76
+ """
77
+ Bin the data into averages over fixed-size intervals.
78
+
79
+ Parameters:
80
+ - data (array-like): The input data.
81
+ - bin_size (int): The size of the bins.
82
+
83
+ Returns:
84
+ - np.ndarray: Binned data.
85
+ """
86
+ binned = data[:len(data) // bin_size * bin_size].reshape(-1, bin_size)
87
+ return np.mean(binned, axis=1)
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Yigit Utku Bulut and Ahmet Faruk Minareci
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.1
2
+ Name: pattern_detector
3
+ Version: 0.1.0
4
+ Summary: A library for detecting patterns in time-series data.
5
+ Author: Yigit Utku Bulut and Ahmet Faruk Minareci
6
+ Author-email: yigit.utku.bulut@gmail.com, ahmetfaruk.minareci@gmail.com
7
+ License: MIT
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.7
12
+ License-File: LICENSE
13
+ Requires-Dist: numpy
14
+ Requires-Dist: pandas
15
+ Requires-Dist: joblib
16
+ Requires-Dist: scipy
17
+
@@ -0,0 +1,10 @@
1
+ pattern_detection/__init__.py,sha256=hzZhj16aof1dhcIOAI9Tfo5EoPJgexGChsnCLgXq-68,68
2
+ pattern_detection/detector.py,sha256=XaYYRledSolgTPTYLRSWhjZhmnAAvZT2pD3h5g9kP9k,6585
3
+ pattern_detection/utils.py,sha256=jqkfeTqWEnrnvzKBxr_UdKj9cj7zPF15UhHPr9sBSa0,2646
4
+ tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ tests/test_detector.py,sha256=biHGbqd6Uc8dFTtyK-BxYeqFYtJOO9lk_6rQMeQCbQs,972
6
+ pattern_detector-0.1.0.dist-info/LICENSE,sha256=RslT26mCGxF9nQEyEQL8kFMFUgmYvCA0UivIWefxWmU,1098
7
+ pattern_detector-0.1.0.dist-info/METADATA,sha256=9pgMSSHwImJHjA1AOAOJm-ou2MsYnNFwPa6AGvl-fow,536
8
+ pattern_detector-0.1.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
9
+ pattern_detector-0.1.0.dist-info/top_level.txt,sha256=W_tzR9Ah3_TgQsiGDs46ELePkuxfcPmFOosXSYw2hl0,24
10
+ pattern_detector-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (72.1.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ pattern_detection
2
+ tests
tests/__init__.py ADDED
File without changes
tests/test_detector.py ADDED
@@ -0,0 +1,27 @@
1
+ import pytest
2
+ import numpy as np
3
+ import pandas as pd
4
+ from pattern_detection.detector import PatternDetector
5
+
6
+ def test_preprocess_pattern():
7
+ pattern = pd.DataFrame({"value": range(10)})
8
+ detector = PatternDetector(None, pattern, "value")
9
+ detector.preprocess_pattern()
10
+ assert detector.pattern1 is not None
11
+ assert len(detector.pattern1) > 0
12
+
13
+ def test_calculate_similarity():
14
+ data = pd.DataFrame({"value": range(100)})
15
+ pattern = pd.DataFrame({"value": range(10)})
16
+ detector = PatternDetector(data, pattern, "value")
17
+ detector.preprocess_pattern()
18
+ detector.calculate_similarity()
19
+ assert len(detector.similarity_dict) > 0
20
+
21
+ def test_find_area_of_interest():
22
+ data = pd.DataFrame({"value": range(100)})
23
+ pattern = pd.DataFrame({"value": range(10)})
24
+ detector = PatternDetector(data, pattern, "value")
25
+ result = detector.find_area_of_interest()
26
+ assert "cycle" in result.columns
27
+ assert not result["cycle"].isnull().all()