py2ls 0.2.4.25__py3-none-any.whl → 0.2.4.26__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- py2ls/.DS_Store +0 -0
- py2ls/.git/index +0 -0
- py2ls/corr.py +475 -0
- py2ls/data/.DS_Store +0 -0
- py2ls/data/hyper_param_autogluon_zeroshot2024.json +2383 -0
- py2ls/data/styles/.DS_Store +0 -0
- py2ls/data/styles/example/.DS_Store +0 -0
- py2ls/data/usages_sns.json +6 -1
- py2ls/ips.py +399 -91
- py2ls/ml2ls.py +758 -186
- py2ls/netfinder.py +16 -20
- py2ls/plot.py +916 -141
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.26.dist-info}/METADATA +5 -1
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.26.dist-info}/RECORD +15 -13
- py2ls/data/usages_pd copy.json +0 -1105
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.26.dist-info}/WHEEL +0 -0
py2ls/.DS_Store
CHANGED
Binary file
|
py2ls/.git/index
CHANGED
Binary file
|
py2ls/corr.py
ADDED
@@ -0,0 +1,475 @@
|
|
1
|
+
# correlations
|
2
|
+
import numpy as np
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from scipy.interpolate import interp1d
|
6
|
+
import statsmodels.api as sm
|
7
|
+
from scipy.stats import pearsonr, spearmanr
|
8
|
+
# (1) pd.Series(data1).rolling(window=winsize).corr(pd.Series(data2))
|
9
|
+
def corr_pd_roll(x,y,window=3):
|
10
|
+
"""
|
11
|
+
winsize = 10
|
12
|
+
series_1 = pd.Series(res_spin.spin["freq"])
|
13
|
+
series_2 = pd.Series(res_spin.spin["pk2pk"])
|
14
|
+
# Compute the rolling correlation coefficient
|
15
|
+
rolling_correlation = corr_pd_roll(series_1, series_2,window=winsize)
|
16
|
+
Returns:
|
17
|
+
rolling_correlation
|
18
|
+
"""
|
19
|
+
# pd.Series() data1 and data2
|
20
|
+
series_1 = pd.Series(x)
|
21
|
+
series_2 = pd.Series(y)
|
22
|
+
# Compute the rolling correlation coefficient
|
23
|
+
rolling_correlation = series_1.rolling(window=window).corr(series_2)
|
24
|
+
print(f" corr_pd_roll correlation to check similarity, \nwindow is {window}, cannot be 1")
|
25
|
+
return rolling_correlation
|
26
|
+
# Sliding window: a sliding window with incremental updates. This method is computationally
|
27
|
+
# efficient compared to recalculating the correlation coefficient for each window.
|
28
|
+
def corr_sliding_window(x, y, window=1):
|
29
|
+
"""
|
30
|
+
corr_sliding_window a sliding window with incremental updates.
|
31
|
+
This method is computationally efficient compared to recalculating
|
32
|
+
the correlation coefficient for each window.
|
33
|
+
|
34
|
+
series_1 = pd.Series(res_spin.spin["freq"])
|
35
|
+
series_2 = pd.Series(res_spin.spin["pk2pk"])
|
36
|
+
sliding_corr=corr_sliding_window(series_1, series_2, window=3)
|
37
|
+
Returns:
|
38
|
+
sliding_corr: r values
|
39
|
+
"""
|
40
|
+
# Initialize variables
|
41
|
+
n = len(x)
|
42
|
+
sum_x = np.sum(x[:window])
|
43
|
+
sum_y = np.sum(y[:window])
|
44
|
+
sum_xy = np.sum(x[:window] * y[:window])
|
45
|
+
sum_x_sq = np.sum(x[:window] ** 2)
|
46
|
+
sum_y_sq = np.sum(y[:window] ** 2)
|
47
|
+
|
48
|
+
# Compute the initial correlation coefficient
|
49
|
+
corr = [
|
50
|
+
(n * sum_xy - sum_x * sum_y)
|
51
|
+
/ np.sqrt((n * sum_x_sq - sum_x**2) * (n * sum_y_sq - sum_y**2))
|
52
|
+
]
|
53
|
+
|
54
|
+
# Update correlation coefficient for each subsequent window
|
55
|
+
for i in range(1, n - window + 1):
|
56
|
+
sum_x += x[i + window - 1] - x[i - 1]
|
57
|
+
sum_y += y[i + window - 1] - y[i - 1]
|
58
|
+
sum_xy += np.dot(x[i : i + window], y[i : i + window]) - np.dot(
|
59
|
+
x[i - 1 : i + window - 1], y[i - 1 : i + window - 1]
|
60
|
+
)
|
61
|
+
sum_x_sq += np.sum(x[i : i + window] ** 2) - np.sum(
|
62
|
+
x[i - 1 : i + window - 1] ** 2
|
63
|
+
)
|
64
|
+
sum_y_sq += np.sum(y[i : i + window] ** 2) - np.sum(
|
65
|
+
y[i - 1 : i + window - 1] ** 2
|
66
|
+
)
|
67
|
+
|
68
|
+
# Compute the correlation coefficient for the current window
|
69
|
+
corr.append(
|
70
|
+
(window * sum_xy - sum_x * sum_y)
|
71
|
+
/ np.sqrt(
|
72
|
+
(window * sum_x_sq - sum_x**2)
|
73
|
+
* (window * sum_y_sq - sum_y**2)
|
74
|
+
)
|
75
|
+
)
|
76
|
+
|
77
|
+
return np.array(corr)
|
78
|
+
|
79
|
+
|
80
|
+
# Fourier Transform for correlation analysis
|
81
|
+
# Compute the cross-power spectral density (CPSD) between the two time series.
|
82
|
+
# Compute the power spectral density (PSD) of each time series separately.
|
83
|
+
# Divide the CPSD by the square root of the product of the individual PSDs to obtain the cross-correlation function.
|
84
|
+
# Apply the inverse Fourier Transform to obtain the correlation coefficient as a function of time.
|
85
|
+
def corr_fft(x, y):
|
86
|
+
"""
|
87
|
+
corr_fft _summary_
|
88
|
+
|
89
|
+
Args:
|
90
|
+
x (_type_): _description_
|
91
|
+
y (_type_): _description_
|
92
|
+
series_1 = pd.Series(res_spin.spin["freq"])
|
93
|
+
series_2 = pd.Series(res_spin.spin["pk2pk"])
|
94
|
+
r=corr_fft(series_1, series_2)
|
95
|
+
Returns:
|
96
|
+
r: r values
|
97
|
+
"""
|
98
|
+
# Compute FFT of each time series
|
99
|
+
fft_x = np.fft.fft(x)
|
100
|
+
fft_y = np.fft.fft(y)
|
101
|
+
|
102
|
+
# Compute cross-power spectral density
|
103
|
+
cpsd = fft_x * np.conj(fft_y)
|
104
|
+
|
105
|
+
# Compute power spectral density of each time series
|
106
|
+
psd_x = np.abs(fft_x) ** 2
|
107
|
+
psd_y = np.abs(fft_y) ** 2
|
108
|
+
|
109
|
+
# Compute cross-correlation function
|
110
|
+
cross_corr = np.fft.ifft(cpsd / np.sqrt(psd_x * psd_y))
|
111
|
+
return cross_corr.real
|
112
|
+
|
113
|
+
# Exponentially Weighted Moving Average (EWMA)
|
114
|
+
# You can use exponentially weighted moving average to compute the correlation coefficient continuously over time. This method assigns exponentially decreasing weights
|
115
|
+
# to the past observations, giving more weight to recent observations. Here's an example of how you can implement it:
|
116
|
+
def corr_ewma(x, y, smth=0.1): # alpha is the smth factor
|
117
|
+
"""
|
118
|
+
smth = 0.1 # default
|
119
|
+
# Compute the EWMA correlation coefficient
|
120
|
+
series_1 = pd.Series(res_spin.spin["freq"])
|
121
|
+
series_2 = pd.Series(res_spin.spin["pk2pk"])
|
122
|
+
ewma_correlation = corr_ewma(series_1, series_2, smth=smth)
|
123
|
+
|
124
|
+
Args:
|
125
|
+
x (_type_): data1
|
126
|
+
y (_type_): data2
|
127
|
+
smth (float, optional): alpha is the smth factor. Defaults to 0.1.
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
ewma_correlation: r values
|
131
|
+
"""
|
132
|
+
corr = []
|
133
|
+
corr.append(np.corrcoef(x, y)[0, 1])
|
134
|
+
for i in range(1, len(x)):
|
135
|
+
corr.append(
|
136
|
+
smth * np.corrcoef(x[: i + 1], y[: i + 1])[0, 1] + (1 - smth) * corr[i - 1]
|
137
|
+
)
|
138
|
+
return np.array(corr)
|
139
|
+
|
140
|
+
# Recursive Formulas
|
141
|
+
# where each new value is computed based on the previous one. This method is similar to
|
142
|
+
# rolling window functions but calculates each new value efficiently without re-computing
|
143
|
+
# the entire window.
|
144
|
+
def corr_recursive(x, y):
|
145
|
+
corr = []
|
146
|
+
corr.append(np.corrcoef(x[:2], y[:2])[0, 1])
|
147
|
+
for i in range(2, len(x)):
|
148
|
+
corr.append(
|
149
|
+
(i - 1) / i * corr[-1] + 1 / i * np.corrcoef(x[: i + 1], y[: i + 1])[0, 1]
|
150
|
+
)
|
151
|
+
return np.array(corr)
|
152
|
+
|
153
|
+
# adaptive or online algorithm
|
154
|
+
# One such algorithm is the Online Pearson Correlation Coefficient algorithm, which updates the correlation coefficient as new data points become available without the need for storing or reprocessing past data.
|
155
|
+
class ContinuousOnlinePearsonCorrelation:
|
156
|
+
"""
|
157
|
+
x = pd.Series(res_spin.spin["freq"])
|
158
|
+
y = pd.Series(res_spin.spin["pk2pk"])
|
159
|
+
|
160
|
+
# Initialize ContinuousOnlinePearsonCorrelation
|
161
|
+
continuous_online_corr = ContinuousOnlinePearsonCorrelation()
|
162
|
+
for i, j in zip(x, y):
|
163
|
+
continuous_online_corr.update(i, j)
|
164
|
+
|
165
|
+
print("Continuous correlation coefficients:")
|
166
|
+
print(continuous_online_corr.correlation_values[:10])
|
167
|
+
"""
|
168
|
+
def __init__(self):
|
169
|
+
self.n = 0
|
170
|
+
self.mean_x = 0
|
171
|
+
self.mean_y = 0
|
172
|
+
self.m2_x = 0
|
173
|
+
self.m2_y = 0
|
174
|
+
self.cov_xy = 0
|
175
|
+
self.correlation_values = []
|
176
|
+
|
177
|
+
def update(self, x, y):
|
178
|
+
self.n += 1
|
179
|
+
delta_x = x - self.mean_x
|
180
|
+
delta_y = y - self.mean_y
|
181
|
+
self.mean_x += delta_x / self.n
|
182
|
+
self.mean_y += delta_y / self.n
|
183
|
+
delta2_x = x - self.mean_x
|
184
|
+
delta2_y = y - self.mean_y
|
185
|
+
self.m2_x += delta_x * delta2_x
|
186
|
+
self.m2_y += delta_y * delta2_y
|
187
|
+
self.cov_xy += delta_x * delta_y * (self.n - 1) / self.n
|
188
|
+
if self.m2_x > 0 and self.m2_y > 0:
|
189
|
+
correlation = self.cov_xy / (self.m2_x**0.5 * self.m2_y**0.5)
|
190
|
+
self.correlation_values.append(correlation)
|
191
|
+
|
192
|
+
|
193
|
+
"""
|
194
|
+
# what if the two data series with different sample rate. how to do the correlation?
|
195
|
+
|
196
|
+
If the two data series have different sample rates, you can still compute the correlation between them. However, you need to ensure that they are synchronized or resampled to a common time grid before calculating the correlation.
|
197
|
+
|
198
|
+
general approach to handle data series with different sample rates:
|
199
|
+
|
200
|
+
(1) Resample both data series to a common time grid using interpolation or other resampling techniques.
|
201
|
+
|
202
|
+
(2) Compute the correlation between the resampled data series.
|
203
|
+
|
204
|
+
example:
|
205
|
+
series_1 = pd.Series(res_spin.spin["freq"])
|
206
|
+
series_2 = pd.Series(res_spin.spin["pk2pk"])
|
207
|
+
series_3 = resample_data(res_spin.spin["freq"], 1000, 12)
|
208
|
+
series_3 = pd.Series(series_3)
|
209
|
+
series_4 = resample_data(res_spin.spin["pk2pk"], 1000, 12)
|
210
|
+
series_4 = pd.Series(series_4)
|
211
|
+
window_size = 10
|
212
|
+
resample_sliding_corr = sliding_window_corr(series_3, series_4, window_size)
|
213
|
+
"""
|
214
|
+
|
215
|
+
def corr_interp_sliding(
|
216
|
+
x, y, x_timestamps, y_timestamps, window_size
|
217
|
+
):
|
218
|
+
"""
|
219
|
+
Using interpolation to align timestamps followed by sliding window computation of the correlation coefficient
|
220
|
+
|
221
|
+
Args:
|
222
|
+
x (np.array): _description_
|
223
|
+
y (_type_): _description_
|
224
|
+
x_timestamps (int): _description_
|
225
|
+
y_timestamps (int): _description_
|
226
|
+
window_size (int): sliding window
|
227
|
+
# Example data
|
228
|
+
x = np.random.randn(10000) # sampled at 1000 Hz
|
229
|
+
y = np.random.randn(120) # sampled at 12 Hz
|
230
|
+
|
231
|
+
x_timestamps = np.linspace(0, 10, 10000) # EEG timestamps
|
232
|
+
y_timestamps = np.linspace(0, 10, 120) # Glucose timestamps
|
233
|
+
|
234
|
+
# Set the window size for sliding window correlation computation
|
235
|
+
window_size = 100 # Adjust according to your needs
|
236
|
+
|
237
|
+
# Compute continuous correlation coefficients using interpolation and sliding window
|
238
|
+
continuous_correlation = corr_interp_sliding(
|
239
|
+
x, y, x_timestamps, y_timestamps, window_size
|
240
|
+
)
|
241
|
+
|
242
|
+
print("Continuous correlation coefficients:")
|
243
|
+
print(continuous_correlation)
|
244
|
+
Returns:
|
245
|
+
continuous_correlation: r value
|
246
|
+
"""
|
247
|
+
# Interpolate y data onto x timestamps
|
248
|
+
interp_func = interp1d(y_timestamps, y, kind="linear", fill_value="extrapolate")
|
249
|
+
y_interp = interp_func(x_timestamps)
|
250
|
+
|
251
|
+
# Compute correlation coefficient using sliding window
|
252
|
+
n = len(x)
|
253
|
+
corr_values = []
|
254
|
+
|
255
|
+
for i in range(n - window_size + 1):
|
256
|
+
x_window = x[i : i + window_size]
|
257
|
+
y_window = y_interp[i : i + window_size]
|
258
|
+
|
259
|
+
# Calculate correlation coefficient for the current window
|
260
|
+
correlation = np.corrcoef(x_window, y_window)[0, 1]
|
261
|
+
corr_values.append(correlation)
|
262
|
+
|
263
|
+
return np.array(corr_values)
|
264
|
+
|
265
|
+
|
266
|
+
"""
|
267
|
+
Autocorrelation is used in various fields and applications, including:
|
268
|
+
|
269
|
+
Time Series Analysis: Autocorrelation is fundamental in time series analysis
|
270
|
+
for understanding the structure and patterns in sequential data. It helps identify
|
271
|
+
seasonality, trends, and other repeating patterns within the data.
|
272
|
+
|
273
|
+
Modeling and Forecasting: Autocorrelation informs the selection of appropriate models
|
274
|
+
for forecasting future values of a time series. Models such as autoregressive
|
275
|
+
integrated moving average (ARIMA) and seasonal autoregressive integrated moving
|
276
|
+
average (SARIMA) rely on autocorrelation patterns to capture dependencies between
|
277
|
+
observations.
|
278
|
+
|
279
|
+
Quality Control: In manufacturing and process control, autocorrelation analysis
|
280
|
+
is used to detect correlations between successive measurements. Deviations from
|
281
|
+
expected autocorrelation patterns can indicate process instability or abnormalities.
|
282
|
+
|
283
|
+
Signal Processing: Autocorrelation is used in signal processing for tasks such as
|
284
|
+
speech recognition, audio processing, and seismic analysis to analyze time-domain
|
285
|
+
signals and extract useful information about signal characteristics.
|
286
|
+
|
287
|
+
Overall, autocorrelation provides valuable insights into the temporal dependencies
|
288
|
+
and behavior of time series data, enabling better understanding, modeling, and prediction
|
289
|
+
of sequential phenomena.
|
290
|
+
"""
|
291
|
+
def autocorr_np(x, lag=1):
|
292
|
+
"""
|
293
|
+
autocorr_np : use np.correlate(x)
|
294
|
+
|
295
|
+
Args:
|
296
|
+
x (_type_): _description_
|
297
|
+
lag (_type_): _description_
|
298
|
+
# Example data
|
299
|
+
data = np.random.randn(100)
|
300
|
+
|
301
|
+
# Compute autocorrelation at lag 1
|
302
|
+
lag_1_autocorr = autocorr_np(data, 1)
|
303
|
+
print("Autocorrelation at lag 1:", lag_1_autocorr)
|
304
|
+
Returns:
|
305
|
+
lag_corr: r value
|
306
|
+
"""
|
307
|
+
n = len(x)
|
308
|
+
mean = np.mean(x)
|
309
|
+
var = np.var(x)
|
310
|
+
x = x - mean
|
311
|
+
lag_corr = np.correlate(x, x, mode="full") / (var * n)
|
312
|
+
return lag_corr[n - 1 : n + lag]
|
313
|
+
|
314
|
+
def autocorr_pd(data,max_lag=10):
|
315
|
+
"""
|
316
|
+
Compute autocorrelation of a 1D numpy array.
|
317
|
+
|
318
|
+
Parameters:
|
319
|
+
data (numpy.ndarray): 1D array containing the data.
|
320
|
+
|
321
|
+
# Example data
|
322
|
+
data_series = np.random.randn(100)
|
323
|
+
autocorr_series = autocorr_pd(data_series)
|
324
|
+
print("Autocorrelation:", autocorr_series)
|
325
|
+
Returns:
|
326
|
+
float: Autocorrelation value.
|
327
|
+
"""
|
328
|
+
# Compute mean and centered data
|
329
|
+
mean = np.mean(data)
|
330
|
+
centered_data = data - mean
|
331
|
+
|
332
|
+
# Compute autocovariance at lag 0
|
333
|
+
auto_covariance_0 = np.mean(centered_data ** 2)
|
334
|
+
|
335
|
+
# Compute autocorrelation values for a range of lags
|
336
|
+
autocorr_values = np.zeros(max_lag + 1)
|
337
|
+
for lag in range(max_lag + 1):
|
338
|
+
if lag == 0:
|
339
|
+
autocorr_values[lag] = 1.0
|
340
|
+
else:
|
341
|
+
auto_covariance_lag = np.mean(centered_data[:-lag] * centered_data[lag:])
|
342
|
+
autocorr_values[lag] = auto_covariance_lag / auto_covariance_0
|
343
|
+
|
344
|
+
return autocorr_values
|
345
|
+
|
346
|
+
|
347
|
+
def autocorr_statsmodels(data, nlags=1):
|
348
|
+
"""
|
349
|
+
Compute autocorrelation of a 1D numpy array using StatsModels.
|
350
|
+
|
351
|
+
Parameters:
|
352
|
+
data (numpy.ndarray): 1D array containing the data.
|
353
|
+
nlags (int): Number of lags for which to compute autocorrelation (default: 1).
|
354
|
+
# Example data
|
355
|
+
data_array = np.random.randn(100)
|
356
|
+
autocorr_array = compute_autocorrelation(data_array, nlags=1)
|
357
|
+
print("Autocorrelation at lag 1:", autocorr_array)
|
358
|
+
Returns:
|
359
|
+
autocorr_array(float): Autocorrelation value at the specified lag.
|
360
|
+
"""
|
361
|
+
# Compute autocorrelation using StatsModels
|
362
|
+
autocorr_result = sm.tsa.acf(data, nlags=nlags)
|
363
|
+
|
364
|
+
return autocorr_result
|
365
|
+
|
366
|
+
|
367
|
+
"""
|
368
|
+
cross-correlation
|
369
|
+
|
370
|
+
Cross-correlation is a statistical method used to measure the similarity between two
|
371
|
+
time series by comparing them at different time lags. Unlike autocorrelation, which
|
372
|
+
measures the similarity of a time series with itself at different lags, cross-correlation
|
373
|
+
measures the similarity between two different time series.
|
374
|
+
|
375
|
+
Cross-correlation has several applications, including:
|
376
|
+
Signal Processing: In signal processing, cross-correlation is used to detect similarities
|
377
|
+
between different signals or to find the time delay between them. It is widely used in
|
378
|
+
fields such as audio processing, radar signal processing, and image processing.
|
379
|
+
|
380
|
+
Time Series Analysis: Cross-correlation helps identify relationships and dependencies between
|
381
|
+
different time series data. It is used in fields such as economics, finance, and environmental
|
382
|
+
science to analyze the interactions between various variables over time.
|
383
|
+
|
384
|
+
Pattern Recognition: Cross-correlation is used in pattern recognition tasks to match and
|
385
|
+
compare patterns in different datasets. It is employed in fields such as speech recognition,
|
386
|
+
pattern matching, and machine vision.
|
387
|
+
"""
|
388
|
+
|
389
|
+
def cross_corr_np(x, y,mode='same'):
|
390
|
+
"""
|
391
|
+
cross_corr_np _summary_
|
392
|
+
|
393
|
+
Args:
|
394
|
+
x (_type_): _description_
|
395
|
+
y (_type_): _description_
|
396
|
+
mode: default 'same', returns the same lengh "full", in NumPy, setting the mode parameter to "full" returns the
|
397
|
+
cross-correlation of x and y at each position of their overlap, with the result
|
398
|
+
being twice the length of the original sequences minus 1.
|
399
|
+
# Example data
|
400
|
+
x = np.random.randn(100)
|
401
|
+
y = np.random.randn(100)
|
402
|
+
cross_corr_values = cross_corr_np(x, y)
|
403
|
+
print("Cross-correlation values:", cross_corr_values[:4])
|
404
|
+
Returns:
|
405
|
+
_type_: _description_
|
406
|
+
"""
|
407
|
+
n = len(x)
|
408
|
+
mean_x = np.mean(x)
|
409
|
+
mean_y = np.mean(y)
|
410
|
+
x = x - mean_x
|
411
|
+
y = y - mean_y
|
412
|
+
cross_corr_values = np.correlate(x, y, mode=mode) / (np.std(x) * np.std(y) * n)
|
413
|
+
return cross_corr_values
|
414
|
+
|
415
|
+
def cross_corr_pd(x, y):
|
416
|
+
"""
|
417
|
+
Compute cross-correlation coefficient between two pandas Series.
|
418
|
+
Example:
|
419
|
+
x=np.random.randn(100)
|
420
|
+
y=np.random.randn(100)
|
421
|
+
cross_corr_values = cross_corr_pd(x,y)
|
422
|
+
print("Cross-correlation:", cross_corr_values)
|
423
|
+
Returns:
|
424
|
+
cross_corr_values(float): Cross-correlation coefficient between the two Series.
|
425
|
+
"""
|
426
|
+
Series1=pd.Series(x)
|
427
|
+
Series2=pd.Series(y)
|
428
|
+
# Compute cross-correlation using pandas
|
429
|
+
cross_corr_value = Series1.corr(Series2)
|
430
|
+
|
431
|
+
return cross_corr_value
|
432
|
+
|
433
|
+
def cross_corr_scipy(x, y):
|
434
|
+
from scipy.signal import correlate
|
435
|
+
cross_corr_values = correlate(x, y)
|
436
|
+
print("Cross-correlation values:", cross_corr_values[:4])
|
437
|
+
return cross_corr_values
|
438
|
+
|
439
|
+
"""Autocorrelation is used in various fields and applications, including:
|
440
|
+
|
441
|
+
Time Series Analysis: Autocorrelation is fundamental in time series analysis for understanding the structure and patterns in sequential data. It helps identify seasonality, trends, and other repeating patterns within the data.
|
442
|
+
|
443
|
+
Modeling and Forecasting: Autocorrelation informs the selection of appropriate models for forecasting future values of a time series. Models such as autoregressive integrated moving average (ARIMA) and seasonal autoregressive integrated moving average (SARIMA) rely on autocorrelation patterns to capture dependencies between observations.
|
444
|
+
|
445
|
+
Quality Control: In manufacturing and process control, autocorrelation analysis is used to detect correlations between successive measurements. Deviations from expected autocorrelation patterns can indicate process instability or abnormalities.
|
446
|
+
|
447
|
+
Signal Processing: Autocorrelation is used in signal processing for tasks such as speech recognition, audio processing, and seismic analysis to analyze time-domain signals and extract useful information about signal characteristics.
|
448
|
+
|
449
|
+
Overall, autocorrelation provides valuable insights into the temporal dependencies and behavior of time series data, enabling better understanding, modeling, and prediction of sequential phenomena."""
|
450
|
+
def autocorr(x, lag):
|
451
|
+
n = len(x)
|
452
|
+
mean = np.mean(x)
|
453
|
+
var = np.var(x)
|
454
|
+
x = x - mean
|
455
|
+
corr = np.correlate(x, x, mode="full") / (var * n)
|
456
|
+
return corr[n - 1 : n + lag]
|
457
|
+
|
458
|
+
"""
|
459
|
+
General correlation
|
460
|
+
e.g., Pearson correlation or Spearman correlation
|
461
|
+
"""
|
462
|
+
def corr(x, y, method='pearson'):
|
463
|
+
if method.lower() in ['pe','pear','pearson','peson','pearon']:
|
464
|
+
r, p = pearsonr(x, y)
|
465
|
+
print("Pearson correlation coefficient:", r)
|
466
|
+
print("Pearson p-value:", p)
|
467
|
+
return r,p
|
468
|
+
elif method.lower() in ['spear','sp','spea','spearman','speaman']:
|
469
|
+
r, p = spearmanr(x, y)
|
470
|
+
print("Spearman correlation coefficient:", r)
|
471
|
+
print("Spearman p-value:", p)
|
472
|
+
return r,p
|
473
|
+
else:
|
474
|
+
print(f"{method} is not supported, do you mean 'pearson' or 'spearman'")
|
475
|
+
return None, None
|
py2ls/data/.DS_Store
CHANGED
Binary file
|