py2ls 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.git/COMMIT_EDITMSG +1 -0
- py2ls/.git/FETCH_HEAD +1 -0
- py2ls/.git/HEAD +1 -0
- py2ls/.git/config +15 -0
- py2ls/.git/description +1 -0
- py2ls/.git/hooks/applypatch-msg.sample +15 -0
- py2ls/.git/hooks/commit-msg.sample +24 -0
- py2ls/.git/hooks/fsmonitor-watchman.sample +174 -0
- py2ls/.git/hooks/post-update.sample +8 -0
- py2ls/.git/hooks/pre-applypatch.sample +14 -0
- py2ls/.git/hooks/pre-commit.sample +49 -0
- py2ls/.git/hooks/pre-merge-commit.sample +13 -0
- py2ls/.git/hooks/pre-push.sample +53 -0
- py2ls/.git/hooks/pre-rebase.sample +169 -0
- py2ls/.git/hooks/pre-receive.sample +24 -0
- py2ls/.git/hooks/prepare-commit-msg.sample +42 -0
- py2ls/.git/hooks/push-to-checkout.sample +78 -0
- py2ls/.git/hooks/update.sample +128 -0
- py2ls/.git/index +0 -0
- py2ls/.git/info/exclude +6 -0
- py2ls/.git/logs/HEAD +1 -0
- py2ls/.git/logs/refs/heads/main +1 -0
- py2ls/.git/logs/refs/remotes/origin/HEAD +1 -0
- py2ls/.git/logs/refs/remotes/origin/main +1 -0
- py2ls/.git/objects/25/b796accd261b9135fd32a2c00785f68edf6c46 +0 -0
- py2ls/.git/objects/36/b4a1b7403abc6c360f8fe2cb656ab945254971 +0 -0
- py2ls/.git/objects/3f/d6561300938afbb3d11976cf9c8f29549280d9 +0 -0
- py2ls/.git/objects/58/20a729045d4dc7e37ccaf8aa8eec126850afe2 +0 -0
- py2ls/.git/objects/60/f273eb1c412d916fa3f11318a7da7a9911b52a +0 -0
- py2ls/.git/objects/61/570cec8c061abe74121f27f5face6c69b98f99 +0 -0
- py2ls/.git/objects/69/13c452ca319f7cbf6a0836dc10a5bb033c84e4 +0 -0
- py2ls/.git/objects/78/3d4167bc95c9d2175e0df03ef1c1c880ba75ab +0 -0
- py2ls/.git/objects/79/7ae089b2212a937840e215276005ce76881307 +0 -0
- py2ls/.git/objects/7e/5956c806b5edc344d46dab599dec337891ba1f +1 -0
- py2ls/.git/objects/8e/55a7d2b96184030211f20c9b9af201eefcac82 +0 -0
- py2ls/.git/objects/91/c69ad88fe0ba94aa7859fb5f7edac5e6f1a3f7 +0 -0
- py2ls/.git/objects/b0/56be4be89ba6b76949dd641df45bb7036050c8 +0 -0
- py2ls/.git/objects/b0/9cd7856d58590578ee1a4f3ad45d1310a97f87 +0 -0
- py2ls/.git/objects/d9/005f2cc7fc4e65f14ed5518276007c08cf2fd0 +0 -0
- py2ls/.git/objects/df/e0770424b2a19faf507a501ebfc23be8f54e7b +0 -0
- py2ls/.git/objects/e9/391ffe371f1cc43b42ef09b705d9c767c2e14f +0 -0
- py2ls/.git/objects/fc/292e793ecfd42240ac43be407023bd731fa9e7 +0 -0
- py2ls/.git/refs/heads/main +1 -0
- py2ls/.git/refs/remotes/origin/HEAD +1 -0
- py2ls/.git/refs/remotes/origin/main +1 -0
- py2ls/.gitattributes +2 -0
- py2ls/.gitignore +152 -0
- py2ls/LICENSE +201 -0
- py2ls/README.md +409 -0
- py2ls/__init__.py +17 -0
- py2ls/brain_atlas.py +145 -0
- py2ls/correlators.py +475 -0
- py2ls/dbhandler.py +97 -0
- py2ls/freqanalysis.py +800 -0
- py2ls/internet_finder.py +405 -0
- py2ls/ips.py +2844 -0
- py2ls/netfinder.py +780 -0
- py2ls/sleep_events_detectors.py +1350 -0
- py2ls/translator.py +686 -0
- py2ls/version.py +1 -0
- py2ls/wb_detector.py +169 -0
- py2ls-0.1.0.dist-info/METADATA +12 -0
- py2ls-0.1.0.dist-info/RECORD +64 -0
- py2ls-0.1.0.dist-info/WHEEL +4 -0
py2ls/correlators.py
ADDED
@@ -0,0 +1,475 @@
|
|
1
|
+
# correlations
|
2
|
+
import numpy as np
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from scipy.interpolate import interp1d
|
6
|
+
import statsmodels.api as sm
|
7
|
+
from scipy.stats import pearsonr, spearmanr
|
8
|
+
# (1) pd.Series(data1).rolling(window=winsize).corr(pd.Series(data2))
|
9
|
+
def corr_pd_roll(x,y,window=3):
|
10
|
+
"""
|
11
|
+
winsize = 10
|
12
|
+
series_1 = pd.Series(res_spin.spin["freq"])
|
13
|
+
series_2 = pd.Series(res_spin.spin["pk2pk"])
|
14
|
+
# Compute the rolling correlation coefficient
|
15
|
+
rolling_correlation = corr_pd_roll(series_1, series_2,window=winsize)
|
16
|
+
Returns:
|
17
|
+
rolling_correlation
|
18
|
+
"""
|
19
|
+
# pd.Series() data1 and data2
|
20
|
+
series_1 = pd.Series(x)
|
21
|
+
series_2 = pd.Series(y)
|
22
|
+
# Compute the rolling correlation coefficient
|
23
|
+
rolling_correlation = series_1.rolling(window=window).corr(series_2)
|
24
|
+
print(f" corr_pd_roll correlation to check similarity, \nwindow is {window}, cannot be 1")
|
25
|
+
return rolling_correlation
|
26
|
+
# Sliding window: a sliding window with incremental updates. This method is computationally
|
27
|
+
# efficient compared to recalculating the correlation coefficient for each window.
|
28
|
+
def corr_sliding_window(x, y, window=1):
|
29
|
+
"""
|
30
|
+
corr_sliding_window a sliding window with incremental updates.
|
31
|
+
This method is computationally efficient compared to recalculating
|
32
|
+
the correlation coefficient for each window.
|
33
|
+
|
34
|
+
series_1 = pd.Series(res_spin.spin["freq"])
|
35
|
+
series_2 = pd.Series(res_spin.spin["pk2pk"])
|
36
|
+
sliding_corr=corr_sliding_window(series_1, series_2, window=3)
|
37
|
+
Returns:
|
38
|
+
sliding_corr: r values
|
39
|
+
"""
|
40
|
+
# Initialize variables
|
41
|
+
n = len(x)
|
42
|
+
sum_x = np.sum(x[:window])
|
43
|
+
sum_y = np.sum(y[:window])
|
44
|
+
sum_xy = np.sum(x[:window] * y[:window])
|
45
|
+
sum_x_sq = np.sum(x[:window] ** 2)
|
46
|
+
sum_y_sq = np.sum(y[:window] ** 2)
|
47
|
+
|
48
|
+
# Compute the initial correlation coefficient
|
49
|
+
corr = [
|
50
|
+
(n * sum_xy - sum_x * sum_y)
|
51
|
+
/ np.sqrt((n * sum_x_sq - sum_x**2) * (n * sum_y_sq - sum_y**2))
|
52
|
+
]
|
53
|
+
|
54
|
+
# Update correlation coefficient for each subsequent window
|
55
|
+
for i in range(1, n - window + 1):
|
56
|
+
sum_x += x[i + window - 1] - x[i - 1]
|
57
|
+
sum_y += y[i + window - 1] - y[i - 1]
|
58
|
+
sum_xy += np.dot(x[i : i + window], y[i : i + window]) - np.dot(
|
59
|
+
x[i - 1 : i + window - 1], y[i - 1 : i + window - 1]
|
60
|
+
)
|
61
|
+
sum_x_sq += np.sum(x[i : i + window] ** 2) - np.sum(
|
62
|
+
x[i - 1 : i + window - 1] ** 2
|
63
|
+
)
|
64
|
+
sum_y_sq += np.sum(y[i : i + window] ** 2) - np.sum(
|
65
|
+
y[i - 1 : i + window - 1] ** 2
|
66
|
+
)
|
67
|
+
|
68
|
+
# Compute the correlation coefficient for the current window
|
69
|
+
corr.append(
|
70
|
+
(window * sum_xy - sum_x * sum_y)
|
71
|
+
/ np.sqrt(
|
72
|
+
(window * sum_x_sq - sum_x**2)
|
73
|
+
* (window * sum_y_sq - sum_y**2)
|
74
|
+
)
|
75
|
+
)
|
76
|
+
|
77
|
+
return np.array(corr)
|
78
|
+
|
79
|
+
|
80
|
+
# Fourier Transform for correlation analysis
|
81
|
+
# Compute the cross-power spectral density (CPSD) between the two time series.
|
82
|
+
# Compute the power spectral density (PSD) of each time series separately.
|
83
|
+
# Divide the CPSD by the square root of the product of the individual PSDs to obtain the cross-correlation function.
|
84
|
+
# Apply the inverse Fourier Transform to obtain the correlation coefficient as a function of time.
|
85
|
+
def corr_fft(x, y):
|
86
|
+
"""
|
87
|
+
corr_fft _summary_
|
88
|
+
|
89
|
+
Args:
|
90
|
+
x (_type_): _description_
|
91
|
+
y (_type_): _description_
|
92
|
+
series_1 = pd.Series(res_spin.spin["freq"])
|
93
|
+
series_2 = pd.Series(res_spin.spin["pk2pk"])
|
94
|
+
r=corr_fft(series_1, series_2)
|
95
|
+
Returns:
|
96
|
+
r: r values
|
97
|
+
"""
|
98
|
+
# Compute FFT of each time series
|
99
|
+
fft_x = np.fft.fft(x)
|
100
|
+
fft_y = np.fft.fft(y)
|
101
|
+
|
102
|
+
# Compute cross-power spectral density
|
103
|
+
cpsd = fft_x * np.conj(fft_y)
|
104
|
+
|
105
|
+
# Compute power spectral density of each time series
|
106
|
+
psd_x = np.abs(fft_x) ** 2
|
107
|
+
psd_y = np.abs(fft_y) ** 2
|
108
|
+
|
109
|
+
# Compute cross-correlation function
|
110
|
+
cross_corr = np.fft.ifft(cpsd / np.sqrt(psd_x * psd_y))
|
111
|
+
return cross_corr.real
|
112
|
+
|
113
|
+
# Exponentially Weighted Moving Average (EWMA)
|
114
|
+
# You can use exponentially weighted moving average to compute the correlation coefficient continuously over time. This method assigns exponentially decreasing weights
|
115
|
+
# to the past observations, giving more weight to recent observations. Here's an example of how you can implement it:
|
116
|
+
def corr_ewma(x, y, smth=0.1): # alpha is the smth factor
|
117
|
+
"""
|
118
|
+
smth = 0.1 # default
|
119
|
+
# Compute the EWMA correlation coefficient
|
120
|
+
series_1 = pd.Series(res_spin.spin["freq"])
|
121
|
+
series_2 = pd.Series(res_spin.spin["pk2pk"])
|
122
|
+
ewma_correlation = corr_ewma(series_1, series_2, smth=smth)
|
123
|
+
|
124
|
+
Args:
|
125
|
+
x (_type_): data1
|
126
|
+
y (_type_): data2
|
127
|
+
smth (float, optional): alpha is the smth factor. Defaults to 0.1.
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
ewma_correlation: r values
|
131
|
+
"""
|
132
|
+
corr = []
|
133
|
+
corr.append(np.corrcoef(x, y)[0, 1])
|
134
|
+
for i in range(1, len(x)):
|
135
|
+
corr.append(
|
136
|
+
smth * np.corrcoef(x[: i + 1], y[: i + 1])[0, 1] + (1 - smth) * corr[i - 1]
|
137
|
+
)
|
138
|
+
return np.array(corr)
|
139
|
+
|
140
|
+
# Recursive Formulas
|
141
|
+
# where each new value is computed based on the previous one. This method is similar to
|
142
|
+
# rolling window functions but calculates each new value efficiently without re-computing
|
143
|
+
# the entire window.
|
144
|
+
def corr_recursive(x, y):
|
145
|
+
corr = []
|
146
|
+
corr.append(np.corrcoef(x[:2], y[:2])[0, 1])
|
147
|
+
for i in range(2, len(x)):
|
148
|
+
corr.append(
|
149
|
+
(i - 1) / i * corr[-1] + 1 / i * np.corrcoef(x[: i + 1], y[: i + 1])[0, 1]
|
150
|
+
)
|
151
|
+
return np.array(corr)
|
152
|
+
|
153
|
+
# adaptive or online algorithm
|
154
|
+
# One such algorithm is the Online Pearson Correlation Coefficient algorithm, which updates the correlation coefficient as new data points become available without the need for storing or reprocessing past data.
|
155
|
+
class ContinuousOnlinePearsonCorrelation:
|
156
|
+
"""
|
157
|
+
x = pd.Series(res_spin.spin["freq"])
|
158
|
+
y = pd.Series(res_spin.spin["pk2pk"])
|
159
|
+
|
160
|
+
# Initialize ContinuousOnlinePearsonCorrelation
|
161
|
+
continuous_online_corr = ContinuousOnlinePearsonCorrelation()
|
162
|
+
for i, j in zip(x, y):
|
163
|
+
continuous_online_corr.update(i, j)
|
164
|
+
|
165
|
+
print("Continuous correlation coefficients:")
|
166
|
+
print(continuous_online_corr.correlation_values[:10])
|
167
|
+
"""
|
168
|
+
def __init__(self):
|
169
|
+
self.n = 0
|
170
|
+
self.mean_x = 0
|
171
|
+
self.mean_y = 0
|
172
|
+
self.m2_x = 0
|
173
|
+
self.m2_y = 0
|
174
|
+
self.cov_xy = 0
|
175
|
+
self.correlation_values = []
|
176
|
+
|
177
|
+
def update(self, x, y):
|
178
|
+
self.n += 1
|
179
|
+
delta_x = x - self.mean_x
|
180
|
+
delta_y = y - self.mean_y
|
181
|
+
self.mean_x += delta_x / self.n
|
182
|
+
self.mean_y += delta_y / self.n
|
183
|
+
delta2_x = x - self.mean_x
|
184
|
+
delta2_y = y - self.mean_y
|
185
|
+
self.m2_x += delta_x * delta2_x
|
186
|
+
self.m2_y += delta_y * delta2_y
|
187
|
+
self.cov_xy += delta_x * delta_y * (self.n - 1) / self.n
|
188
|
+
if self.m2_x > 0 and self.m2_y > 0:
|
189
|
+
correlation = self.cov_xy / (self.m2_x**0.5 * self.m2_y**0.5)
|
190
|
+
self.correlation_values.append(correlation)
|
191
|
+
|
192
|
+
|
193
|
+
"""
|
194
|
+
# what if the two data series with different sample rate. how to do the correlation?
|
195
|
+
|
196
|
+
If the two data series have different sample rates, you can still compute the correlation between them. However, you need to ensure that they are synchronized or resampled to a common time grid before calculating the correlation.
|
197
|
+
|
198
|
+
general approach to handle data series with different sample rates:
|
199
|
+
|
200
|
+
(1) Resample both data series to a common time grid using interpolation or other resampling techniques.
|
201
|
+
|
202
|
+
(2) Compute the correlation between the resampled data series.
|
203
|
+
|
204
|
+
example:
|
205
|
+
series_1 = pd.Series(res_spin.spin["freq"])
|
206
|
+
series_2 = pd.Series(res_spin.spin["pk2pk"])
|
207
|
+
series_3 = resample_data(res_spin.spin["freq"], 1000, 12)
|
208
|
+
series_3 = pd.Series(series_3)
|
209
|
+
series_4 = resample_data(res_spin.spin["pk2pk"], 1000, 12)
|
210
|
+
series_4 = pd.Series(series_4)
|
211
|
+
window_size = 10
|
212
|
+
resample_sliding_corr = sliding_window_corr(series_3, series_4, window_size)
|
213
|
+
"""
|
214
|
+
|
215
|
+
def corr_interp_sliding(
|
216
|
+
x, y, x_timestamps, y_timestamps, window_size
|
217
|
+
):
|
218
|
+
"""
|
219
|
+
Using interpolation to align timestamps followed by sliding window computation of the correlation coefficient
|
220
|
+
|
221
|
+
Args:
|
222
|
+
x (np.array): _description_
|
223
|
+
y (_type_): _description_
|
224
|
+
x_timestamps (int): _description_
|
225
|
+
y_timestamps (int): _description_
|
226
|
+
window_size (int): sliding window
|
227
|
+
# Example data
|
228
|
+
x = np.random.randn(10000) # sampled at 1000 Hz
|
229
|
+
y = np.random.randn(120) # sampled at 12 Hz
|
230
|
+
|
231
|
+
x_timestamps = np.linspace(0, 10, 10000) # EEG timestamps
|
232
|
+
y_timestamps = np.linspace(0, 10, 120) # Glucose timestamps
|
233
|
+
|
234
|
+
# Set the window size for sliding window correlation computation
|
235
|
+
window_size = 100 # Adjust according to your needs
|
236
|
+
|
237
|
+
# Compute continuous correlation coefficients using interpolation and sliding window
|
238
|
+
continuous_correlation = corr_interp_sliding(
|
239
|
+
x, y, x_timestamps, y_timestamps, window_size
|
240
|
+
)
|
241
|
+
|
242
|
+
print("Continuous correlation coefficients:")
|
243
|
+
print(continuous_correlation)
|
244
|
+
Returns:
|
245
|
+
continuous_correlation: r value
|
246
|
+
"""
|
247
|
+
# Interpolate y data onto x timestamps
|
248
|
+
interp_func = interp1d(y_timestamps, y, kind="linear", fill_value="extrapolate")
|
249
|
+
y_interp = interp_func(x_timestamps)
|
250
|
+
|
251
|
+
# Compute correlation coefficient using sliding window
|
252
|
+
n = len(x)
|
253
|
+
corr_values = []
|
254
|
+
|
255
|
+
for i in range(n - window_size + 1):
|
256
|
+
x_window = x[i : i + window_size]
|
257
|
+
y_window = y_interp[i : i + window_size]
|
258
|
+
|
259
|
+
# Calculate correlation coefficient for the current window
|
260
|
+
correlation = np.corrcoef(x_window, y_window)[0, 1]
|
261
|
+
corr_values.append(correlation)
|
262
|
+
|
263
|
+
return np.array(corr_values)
|
264
|
+
|
265
|
+
|
266
|
+
"""
|
267
|
+
Autocorrelation is used in various fields and applications, including:
|
268
|
+
|
269
|
+
Time Series Analysis: Autocorrelation is fundamental in time series analysis
|
270
|
+
for understanding the structure and patterns in sequential data. It helps identify
|
271
|
+
seasonality, trends, and other repeating patterns within the data.
|
272
|
+
|
273
|
+
Modeling and Forecasting: Autocorrelation informs the selection of appropriate models
|
274
|
+
for forecasting future values of a time series. Models such as autoregressive
|
275
|
+
integrated moving average (ARIMA) and seasonal autoregressive integrated moving
|
276
|
+
average (SARIMA) rely on autocorrelation patterns to capture dependencies between
|
277
|
+
observations.
|
278
|
+
|
279
|
+
Quality Control: In manufacturing and process control, autocorrelation analysis
|
280
|
+
is used to detect correlations between successive measurements. Deviations from
|
281
|
+
expected autocorrelation patterns can indicate process instability or abnormalities.
|
282
|
+
|
283
|
+
Signal Processing: Autocorrelation is used in signal processing for tasks such as
|
284
|
+
speech recognition, audio processing, and seismic analysis to analyze time-domain
|
285
|
+
signals and extract useful information about signal characteristics.
|
286
|
+
|
287
|
+
Overall, autocorrelation provides valuable insights into the temporal dependencies
|
288
|
+
and behavior of time series data, enabling better understanding, modeling, and prediction
|
289
|
+
of sequential phenomena.
|
290
|
+
"""
|
291
|
+
def autocorr_np(x, lag=1):
|
292
|
+
"""
|
293
|
+
autocorr_np : use np.correlate(x)
|
294
|
+
|
295
|
+
Args:
|
296
|
+
x (_type_): _description_
|
297
|
+
lag (_type_): _description_
|
298
|
+
# Example data
|
299
|
+
data = np.random.randn(100)
|
300
|
+
|
301
|
+
# Compute autocorrelation at lag 1
|
302
|
+
lag_1_autocorr = autocorr_np(data, 1)
|
303
|
+
print("Autocorrelation at lag 1:", lag_1_autocorr)
|
304
|
+
Returns:
|
305
|
+
lag_corr: r value
|
306
|
+
"""
|
307
|
+
n = len(x)
|
308
|
+
mean = np.mean(x)
|
309
|
+
var = np.var(x)
|
310
|
+
x = x - mean
|
311
|
+
lag_corr = np.correlate(x, x, mode="full") / (var * n)
|
312
|
+
return lag_corr[n - 1 : n + lag]
|
313
|
+
|
314
|
+
def autocorr_pd(data,max_lag=10):
|
315
|
+
"""
|
316
|
+
Compute autocorrelation of a 1D numpy array.
|
317
|
+
|
318
|
+
Parameters:
|
319
|
+
data (numpy.ndarray): 1D array containing the data.
|
320
|
+
|
321
|
+
# Example data
|
322
|
+
data_series = np.random.randn(100)
|
323
|
+
autocorr_series = autocorr_pd(data_series)
|
324
|
+
print("Autocorrelation:", autocorr_series)
|
325
|
+
Returns:
|
326
|
+
float: Autocorrelation value.
|
327
|
+
"""
|
328
|
+
# Compute mean and centered data
|
329
|
+
mean = np.mean(data)
|
330
|
+
centered_data = data - mean
|
331
|
+
|
332
|
+
# Compute autocovariance at lag 0
|
333
|
+
auto_covariance_0 = np.mean(centered_data ** 2)
|
334
|
+
|
335
|
+
# Compute autocorrelation values for a range of lags
|
336
|
+
autocorr_values = np.zeros(max_lag + 1)
|
337
|
+
for lag in range(max_lag + 1):
|
338
|
+
if lag == 0:
|
339
|
+
autocorr_values[lag] = 1.0
|
340
|
+
else:
|
341
|
+
auto_covariance_lag = np.mean(centered_data[:-lag] * centered_data[lag:])
|
342
|
+
autocorr_values[lag] = auto_covariance_lag / auto_covariance_0
|
343
|
+
|
344
|
+
return autocorr_values
|
345
|
+
|
346
|
+
|
347
|
+
def autocorr_statsmodels(data, nlags=1):
|
348
|
+
"""
|
349
|
+
Compute autocorrelation of a 1D numpy array using StatsModels.
|
350
|
+
|
351
|
+
Parameters:
|
352
|
+
data (numpy.ndarray): 1D array containing the data.
|
353
|
+
nlags (int): Number of lags for which to compute autocorrelation (default: 1).
|
354
|
+
# Example data
|
355
|
+
data_array = np.random.randn(100)
|
356
|
+
autocorr_array = compute_autocorrelation(data_array, nlags=1)
|
357
|
+
print("Autocorrelation at lag 1:", autocorr_array)
|
358
|
+
Returns:
|
359
|
+
autocorr_array(float): Autocorrelation value at the specified lag.
|
360
|
+
"""
|
361
|
+
# Compute autocorrelation using StatsModels
|
362
|
+
autocorr_result = sm.tsa.acf(data, nlags=nlags)
|
363
|
+
|
364
|
+
return autocorr_result
|
365
|
+
|
366
|
+
|
367
|
+
"""
|
368
|
+
cross-correlation
|
369
|
+
|
370
|
+
Cross-correlation is a statistical method used to measure the similarity between two
|
371
|
+
time series by comparing them at different time lags. Unlike autocorrelation, which
|
372
|
+
measures the similarity of a time series with itself at different lags, cross-correlation
|
373
|
+
measures the similarity between two different time series.
|
374
|
+
|
375
|
+
Cross-correlation has several applications, including:
|
376
|
+
Signal Processing: In signal processing, cross-correlation is used to detect similarities
|
377
|
+
between different signals or to find the time delay between them. It is widely used in
|
378
|
+
fields such as audio processing, radar signal processing, and image processing.
|
379
|
+
|
380
|
+
Time Series Analysis: Cross-correlation helps identify relationships and dependencies between
|
381
|
+
different time series data. It is used in fields such as economics, finance, and environmental
|
382
|
+
science to analyze the interactions between various variables over time.
|
383
|
+
|
384
|
+
Pattern Recognition: Cross-correlation is used in pattern recognition tasks to match and
|
385
|
+
compare patterns in different datasets. It is employed in fields such as speech recognition,
|
386
|
+
pattern matching, and machine vision.
|
387
|
+
"""
|
388
|
+
|
389
|
+
def cross_corr_np(x, y,mode='same'):
|
390
|
+
"""
|
391
|
+
cross_corr_np _summary_
|
392
|
+
|
393
|
+
Args:
|
394
|
+
x (_type_): _description_
|
395
|
+
y (_type_): _description_
|
396
|
+
mode: default 'same', returns the same lengh "full", in NumPy, setting the mode parameter to "full" returns the
|
397
|
+
cross-correlation of x and y at each position of their overlap, with the result
|
398
|
+
being twice the length of the original sequences minus 1.
|
399
|
+
# Example data
|
400
|
+
x = np.random.randn(100)
|
401
|
+
y = np.random.randn(100)
|
402
|
+
cross_corr_values = cross_corr_np(x, y)
|
403
|
+
print("Cross-correlation values:", cross_corr_values[:4])
|
404
|
+
Returns:
|
405
|
+
_type_: _description_
|
406
|
+
"""
|
407
|
+
n = len(x)
|
408
|
+
mean_x = np.mean(x)
|
409
|
+
mean_y = np.mean(y)
|
410
|
+
x = x - mean_x
|
411
|
+
y = y - mean_y
|
412
|
+
cross_corr_values = np.correlate(x, y, mode=mode) / (np.std(x) * np.std(y) * n)
|
413
|
+
return cross_corr_values
|
414
|
+
|
415
|
+
def cross_corr_pd(x, y):
|
416
|
+
"""
|
417
|
+
Compute cross-correlation coefficient between two pandas Series.
|
418
|
+
Example:
|
419
|
+
x=np.random.randn(100)
|
420
|
+
y=np.random.randn(100)
|
421
|
+
cross_corr_values = cross_corr_pd(x,y)
|
422
|
+
print("Cross-correlation:", cross_corr_values)
|
423
|
+
Returns:
|
424
|
+
cross_corr_values(float): Cross-correlation coefficient between the two Series.
|
425
|
+
"""
|
426
|
+
Series1=pd.Series(x)
|
427
|
+
Series2=pd.Series(y)
|
428
|
+
# Compute cross-correlation using pandas
|
429
|
+
cross_corr_value = Series1.corr(Series2)
|
430
|
+
|
431
|
+
return cross_corr_value
|
432
|
+
|
433
|
+
def cross_corr_scipy(x, y):
|
434
|
+
from scipy.signal import correlate
|
435
|
+
cross_corr_values = correlate(x, y)
|
436
|
+
print("Cross-correlation values:", cross_corr_values[:4])
|
437
|
+
return cross_corr_values
|
438
|
+
|
439
|
+
"""Autocorrelation is used in various fields and applications, including:
|
440
|
+
|
441
|
+
Time Series Analysis: Autocorrelation is fundamental in time series analysis for understanding the structure and patterns in sequential data. It helps identify seasonality, trends, and other repeating patterns within the data.
|
442
|
+
|
443
|
+
Modeling and Forecasting: Autocorrelation informs the selection of appropriate models for forecasting future values of a time series. Models such as autoregressive integrated moving average (ARIMA) and seasonal autoregressive integrated moving average (SARIMA) rely on autocorrelation patterns to capture dependencies between observations.
|
444
|
+
|
445
|
+
Quality Control: In manufacturing and process control, autocorrelation analysis is used to detect correlations between successive measurements. Deviations from expected autocorrelation patterns can indicate process instability or abnormalities.
|
446
|
+
|
447
|
+
Signal Processing: Autocorrelation is used in signal processing for tasks such as speech recognition, audio processing, and seismic analysis to analyze time-domain signals and extract useful information about signal characteristics.
|
448
|
+
|
449
|
+
Overall, autocorrelation provides valuable insights into the temporal dependencies and behavior of time series data, enabling better understanding, modeling, and prediction of sequential phenomena."""
|
450
|
+
def autocorr(x, lag):
|
451
|
+
n = len(x)
|
452
|
+
mean = np.mean(x)
|
453
|
+
var = np.var(x)
|
454
|
+
x = x - mean
|
455
|
+
corr = np.correlate(x, x, mode="full") / (var * n)
|
456
|
+
return corr[n - 1 : n + lag]
|
457
|
+
|
458
|
+
"""
|
459
|
+
General correlation
|
460
|
+
e.g., Pearson correlation or Spearman correlation
|
461
|
+
"""
|
462
|
+
def corr(x, y, method='pearson'):
|
463
|
+
if method.lower() in ['pe','pear','pearson','peson','pearon']:
|
464
|
+
r, p = pearsonr(x, y)
|
465
|
+
print("Pearson correlation coefficient:", r)
|
466
|
+
print("Pearson p-value:", p)
|
467
|
+
return r,p
|
468
|
+
elif method.lower() in ['spear','sp','spea','spearman','speaman']:
|
469
|
+
r, p = spearmanr(x, y)
|
470
|
+
print("Spearman correlation coefficient:", r)
|
471
|
+
print("Spearman p-value:", p)
|
472
|
+
return r,p
|
473
|
+
else:
|
474
|
+
print(f"{method} is not supported, do you mean 'pearson' or 'spearman'")
|
475
|
+
return None, None
|
py2ls/dbhandler.py
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
import sqlite3
|
2
|
+
import time
|
3
|
+
|
4
|
+
class dbhandler:
|
5
|
+
def __init__(self, db_path):
|
6
|
+
self.db_path = db_path
|
7
|
+
|
8
|
+
def execute_query(self, query, params=None, retries=2, delay=1):
|
9
|
+
for attempt in range(retries):
|
10
|
+
try:
|
11
|
+
conn = sqlite3.connect(self.db_path)
|
12
|
+
c = conn.cursor()
|
13
|
+
if params:
|
14
|
+
c.execute(query, params)
|
15
|
+
else:
|
16
|
+
c.execute(query)
|
17
|
+
conn.commit()
|
18
|
+
conn.close()
|
19
|
+
return
|
20
|
+
except sqlite3.OperationalError as e:
|
21
|
+
if "database is locked" in str(e) and attempt < retries - 1:
|
22
|
+
print(f"Database is locked, retrying in {delay} seconds...")
|
23
|
+
time.sleep(delay)
|
24
|
+
else:
|
25
|
+
print(f"Error executing query: {e}")
|
26
|
+
break
|
27
|
+
|
28
|
+
def create_table(self, table_definition):
|
29
|
+
self.execute_query(table_definition)
|
30
|
+
print(f"Table created with definition: {table_definition}")
|
31
|
+
|
32
|
+
def insert_data(self, table, columns, data):
|
33
|
+
placeholders = ', '.join(['?' for _ in data])
|
34
|
+
query = f"INSERT INTO {table} ({', '.join(columns)}) VALUES ({placeholders})"
|
35
|
+
self.execute_query(query, data)
|
36
|
+
print(f"Data inserted into {table}: {data}")
|
37
|
+
|
38
|
+
def select_data(self, table, columns='*', where_clause=None, where_params=None):
|
39
|
+
query = f"SELECT {', '.join(columns)} FROM {table}"
|
40
|
+
if where_clause:
|
41
|
+
query += f" WHERE {where_clause}"
|
42
|
+
try:
|
43
|
+
conn = sqlite3.connect(self.db_path)
|
44
|
+
c = conn.cursor()
|
45
|
+
if where_params:
|
46
|
+
c.execute(query, where_params)
|
47
|
+
else:
|
48
|
+
c.execute(query)
|
49
|
+
rows = c.fetchall()
|
50
|
+
conn.close()
|
51
|
+
return rows
|
52
|
+
except sqlite3.OperationalError as e:
|
53
|
+
print(f"Error selecting data: {e}")
|
54
|
+
return None
|
55
|
+
|
56
|
+
def update_data(self, table, updates, where_clause, where_params):
|
57
|
+
update_clause = ', '.join([f"{col} = ?" for col in updates.keys()])
|
58
|
+
params = list(updates.values()) + list(where_params)
|
59
|
+
query = f"UPDATE {table} SET {update_clause} WHERE {where_clause}"
|
60
|
+
self.execute_query(query, params)
|
61
|
+
print(f"Data updated in {table} where {where_clause}: {updates}")
|
62
|
+
|
63
|
+
def delete_data(self, table, where_clause, where_params):
|
64
|
+
query = f"DELETE FROM {table} WHERE {where_clause}"
|
65
|
+
self.execute_query(query, where_params)
|
66
|
+
print(f"Data deleted from {table} where {where_clause}")
|
67
|
+
|
68
|
+
# Example usage
|
69
|
+
if __name__ == "__main__":
|
70
|
+
db = dbhandler("/Users/macjianfeng/Dropbox/github/python/xample_dbhandler/data/example.db")
|
71
|
+
|
72
|
+
# Create table
|
73
|
+
create_table_sql = """CREATE TABLE IF NOT EXISTS tab (
|
74
|
+
row TEXT PRIMARY KEY,
|
75
|
+
content TEXT)"""
|
76
|
+
db.create_table(create_table_sql)
|
77
|
+
|
78
|
+
# Insert data
|
79
|
+
db.insert_data("tab", ["row", "content"], ["row1", "This is a row"])
|
80
|
+
|
81
|
+
# Select data
|
82
|
+
rows = db.select_data("tab")
|
83
|
+
print("Selected rows:", rows)
|
84
|
+
|
85
|
+
# Update data
|
86
|
+
db.update_data("tab", {"content": "Updated content"}, "row = ?", ["row1"])
|
87
|
+
|
88
|
+
# Select data again to see the update
|
89
|
+
rows = db.select_data("tab")
|
90
|
+
print("Selected rows after update:", rows)
|
91
|
+
|
92
|
+
# Delete data
|
93
|
+
db.delete_data("tab", "row = ?", ["row1"])
|
94
|
+
|
95
|
+
# Select data again to see the deletion
|
96
|
+
rows = db.select_data("tab")
|
97
|
+
print("Selected rows after deletion:", rows)
|