py-neuromodulation 0.0.4__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. py_neuromodulation/ConnectivityDecoding/_get_grid_hull.m +34 -34
  2. py_neuromodulation/ConnectivityDecoding/_get_grid_whole_brain.py +95 -106
  3. py_neuromodulation/ConnectivityDecoding/_helper_write_connectome.py +107 -119
  4. py_neuromodulation/FieldTrip.py +589 -589
  5. py_neuromodulation/__init__.py +74 -13
  6. py_neuromodulation/_write_example_dataset_helper.py +83 -65
  7. py_neuromodulation/data/README +6 -6
  8. py_neuromodulation/data/dataset_description.json +8 -8
  9. py_neuromodulation/data/participants.json +32 -32
  10. py_neuromodulation/data/participants.tsv +2 -2
  11. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/ieeg/sub-testsub_ses-EphysMedOff_space-mni_coordsystem.json +5 -5
  12. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/ieeg/sub-testsub_ses-EphysMedOff_space-mni_electrodes.tsv +11 -11
  13. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/ieeg/sub-testsub_ses-EphysMedOff_task-gripforce_run-0_channels.tsv +11 -11
  14. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/ieeg/sub-testsub_ses-EphysMedOff_task-gripforce_run-0_ieeg.json +18 -18
  15. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/ieeg/sub-testsub_ses-EphysMedOff_task-gripforce_run-0_ieeg.vhdr +35 -35
  16. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/ieeg/sub-testsub_ses-EphysMedOff_task-gripforce_run-0_ieeg.vmrk +13 -13
  17. py_neuromodulation/data/sub-testsub/ses-EphysMedOff/sub-testsub_ses-EphysMedOff_scans.tsv +2 -2
  18. py_neuromodulation/grid_cortex.tsv +40 -40
  19. py_neuromodulation/liblsl/libpugixml.so.1.12 +0 -0
  20. py_neuromodulation/liblsl/linux/bionic_amd64/liblsl.1.16.2.so +0 -0
  21. py_neuromodulation/liblsl/linux/bookworm_amd64/liblsl.1.16.2.so +0 -0
  22. py_neuromodulation/liblsl/linux/focal_amd46/liblsl.1.16.2.so +0 -0
  23. py_neuromodulation/liblsl/linux/jammy_amd64/liblsl.1.16.2.so +0 -0
  24. py_neuromodulation/liblsl/linux/jammy_x86/liblsl.1.16.2.so +0 -0
  25. py_neuromodulation/liblsl/linux/noble_amd64/liblsl.1.16.2.so +0 -0
  26. py_neuromodulation/liblsl/macos/amd64/liblsl.1.16.2.dylib +0 -0
  27. py_neuromodulation/liblsl/macos/arm64/liblsl.1.16.0.dylib +0 -0
  28. py_neuromodulation/liblsl/windows/amd64/liblsl.1.16.2.dll +0 -0
  29. py_neuromodulation/liblsl/windows/x86/liblsl.1.16.2.dll +0 -0
  30. py_neuromodulation/nm_IO.py +413 -417
  31. py_neuromodulation/nm_RMAP.py +496 -531
  32. py_neuromodulation/nm_analysis.py +993 -1074
  33. py_neuromodulation/nm_artifacts.py +30 -25
  34. py_neuromodulation/nm_bispectra.py +154 -168
  35. py_neuromodulation/nm_bursts.py +292 -198
  36. py_neuromodulation/nm_coherence.py +251 -205
  37. py_neuromodulation/nm_database.py +149 -0
  38. py_neuromodulation/nm_decode.py +918 -992
  39. py_neuromodulation/nm_define_nmchannels.py +300 -302
  40. py_neuromodulation/nm_features.py +144 -116
  41. py_neuromodulation/nm_filter.py +219 -219
  42. py_neuromodulation/nm_filter_preprocessing.py +79 -91
  43. py_neuromodulation/nm_fooof.py +139 -159
  44. py_neuromodulation/nm_generator.py +45 -37
  45. py_neuromodulation/nm_hjorth_raw.py +52 -73
  46. py_neuromodulation/nm_kalmanfilter.py +71 -58
  47. py_neuromodulation/nm_linelength.py +21 -33
  48. py_neuromodulation/nm_logger.py +66 -0
  49. py_neuromodulation/nm_mne_connectivity.py +149 -112
  50. py_neuromodulation/nm_mnelsl_generator.py +90 -0
  51. py_neuromodulation/nm_mnelsl_stream.py +116 -0
  52. py_neuromodulation/nm_nolds.py +96 -93
  53. py_neuromodulation/nm_normalization.py +173 -214
  54. py_neuromodulation/nm_oscillatory.py +423 -448
  55. py_neuromodulation/nm_plots.py +585 -612
  56. py_neuromodulation/nm_preprocessing.py +83 -0
  57. py_neuromodulation/nm_projection.py +370 -394
  58. py_neuromodulation/nm_rereference.py +97 -95
  59. py_neuromodulation/nm_resample.py +59 -50
  60. py_neuromodulation/nm_run_analysis.py +325 -435
  61. py_neuromodulation/nm_settings.py +289 -68
  62. py_neuromodulation/nm_settings.yaml +244 -0
  63. py_neuromodulation/nm_sharpwaves.py +423 -401
  64. py_neuromodulation/nm_stats.py +464 -480
  65. py_neuromodulation/nm_stream.py +398 -0
  66. py_neuromodulation/nm_stream_abc.py +166 -218
  67. py_neuromodulation/nm_types.py +193 -0
  68. {py_neuromodulation-0.0.4.dist-info → py_neuromodulation-0.0.5.dist-info}/METADATA +29 -26
  69. py_neuromodulation-0.0.5.dist-info/RECORD +83 -0
  70. {py_neuromodulation-0.0.4.dist-info → py_neuromodulation-0.0.5.dist-info}/WHEEL +1 -1
  71. {py_neuromodulation-0.0.4.dist-info → py_neuromodulation-0.0.5.dist-info}/licenses/LICENSE +21 -21
  72. py_neuromodulation/nm_EpochStream.py +0 -92
  73. py_neuromodulation/nm_across_patient_decoding.py +0 -927
  74. py_neuromodulation/nm_cohortwrapper.py +0 -435
  75. py_neuromodulation/nm_eval_timing.py +0 -239
  76. py_neuromodulation/nm_features_abc.py +0 -39
  77. py_neuromodulation/nm_settings.json +0 -338
  78. py_neuromodulation/nm_stream_offline.py +0 -359
  79. py_neuromodulation/utils/_logging.py +0 -24
  80. py_neuromodulation-0.0.4.dist-info/RECORD +0 -72
@@ -1,480 +1,464 @@
1
- import random
2
- import copy
3
-
4
- import matplotlib.pyplot as plt
5
-
6
- # from numba import njit
7
- import numpy as np
8
- import pandas as pd
9
- import scipy.stats as stats
10
-
11
-
12
- from skimage import measure
13
- from sklearn.linear_model import LinearRegression
14
- from sklearn.model_selection import KFold
15
- import statsmodels.api as sm
16
-
17
-
18
- def fitlm(x, y):
19
- return sm.OLS(y, sm.add_constant(x)).fit()
20
-
21
-
22
- def fitlm_kfold(x, y, kfold_splits=5):
23
- model = LinearRegression()
24
- if isinstance(x, type(np.array([]))) or isinstance(x, type([])):
25
- x = pd.DataFrame(x)
26
- if isinstance(y, type(np.array([]))) or isinstance(y, type([])):
27
- y = pd.DataFrame(y)
28
- scores, coeffs = [], np.zeros(x.shape[1])
29
- kfold = KFold(n_splits=kfold_splits, shuffle=True, random_state=42)
30
- for i, (train, test) in enumerate(kfold.split(x, y)):
31
- model.fit(x.iloc[train, :], y.iloc[train, :])
32
- score = model.score(x.iloc[test, :], y.iloc[test, :])
33
- # mdl = fitlm(np.squeeze(y.iloc[test,:].transpose()), np.squeeze(model.predict(x.iloc[test, :])))
34
- scores.append(score)
35
- coeffs = np.vstack((coeffs, model.coef_))
36
- coeffs = list(np.delete(coeffs, 0))
37
- return scores, coeffs, model, ["scores", "coeffs", "model"]
38
-
39
-
40
- def zscore(data):
41
- return (data - data.mean()) / data.std()
42
-
43
-
44
- def permutationTestSpearmansRho(x, y, plot_distr=True, x_unit=None, p=5000):
45
- """
46
- Calculate permutation test for multiple repetitions of Spearmans Rho
47
- https://towardsdatascience.com/how-to-assess-statistical-significance-in-your-data-with-permutation-tests-8bb925b2113d
48
-
49
- x (np array) : first distibution e.g. R^2
50
- y (np array) : second distribution e.g. UPDRS
51
- plot_distr (boolean) : if True: permutation histplot and ground truth will be
52
- plotted
53
- x_unit (str) : histplot xlabel
54
- p (int): number of permutations
55
-
56
- returns:
57
- gT (float) : estimated ground truth, here spearman's rho
58
- p (float) : p value of permutation test
59
- """
60
-
61
- # compute ground truth difference
62
- gT = stats.spearmanr(x, y)[0]
63
- #
64
- pV = np.array((x, y))
65
- # Initialize permutation:
66
- pD = []
67
- # Permutation loop:
68
- args_order = np.arange(0, pV.shape[1], 1)
69
- args_order_2 = np.arange(0, pV.shape[1], 1)
70
- for i in range(0, p):
71
- # Shuffle the data:
72
- random.shuffle(args_order)
73
- random.shuffle(args_order_2)
74
- # Compute permuted absolute difference of your two sampled
75
- # distributions and store it in pD:
76
- pD.append(stats.spearmanr(pV[0, args_order], pV[1, args_order_2])[0])
77
-
78
- # calculate p value
79
- if gT < 0:
80
- p_val = len(np.where(pD <= gT)[0]) / p
81
- else:
82
- p_val = len(np.where(pD >= gT)[0]) / p
83
-
84
- if plot_distr is True:
85
- plt.hist(pD, bins=30, label="permutation results")
86
- plt.axvline(gT, color="orange", label="ground truth")
87
- plt.title("ground truth " + x_unit + "=" + str(gT) + " p=" + str(p_val))
88
- plt.xlabel(x_unit)
89
- plt.legend()
90
- plt.show()
91
- return gT, p_val
92
-
93
-
94
- def permutationTest(x, y, plot_distr=True, x_unit=None, p=5000):
95
- """
96
- Calculate permutation test
97
- https://towardsdatascience.com/how-to-assess-statistical-significance-in-your-data-with-permutation-tests-8bb925b2113d
98
-
99
- x (np array) : first distr.
100
- y (np array) : first distr.
101
- plot_distr (boolean) : if True: plot permutation histplot and ground truth
102
- x_unit (str) : histplot xlabel
103
- p (int): number of permutations
104
-
105
- returns:
106
- gT (float) : estimated ground truth, here absolute difference of
107
- distribution means
108
- p (float) : p value of permutation test
109
-
110
- """
111
- # Compute ground truth difference
112
- gT = np.abs(np.average(x) - np.average(y))
113
-
114
- pV = np.concatenate((x, y), axis=0)
115
- pS = copy.copy(pV)
116
- # Initialize permutation:
117
- pD = []
118
- # Permutation loop:
119
- for i in range(0, p):
120
- # Shuffle the data:
121
- random.shuffle(pS)
122
- # Compute permuted absolute difference of your two sampled
123
- # distributions and store it in pD:
124
- pD.append(
125
- np.abs(
126
- np.average(pS[0 : int(len(pS) / 2)])
127
- - np.average(pS[int(len(pS) / 2) :])
128
- )
129
- )
130
-
131
- # Calculate p-value
132
- if gT < 0:
133
- p_val = len(np.where(pD <= gT)[0]) / p
134
- else:
135
- p_val = len(np.where(pD >= gT)[0]) / p
136
-
137
- if plot_distr is True:
138
- plt.hist(pD, bins=30, label="permutation results")
139
- plt.axvline(gT, color="orange", label="ground truth")
140
- plt.title("ground truth " + x_unit + "=" + str(gT) + " p=" + str(p_val))
141
- plt.xlabel(x_unit)
142
- plt.legend()
143
- plt.show()
144
- return gT, p_val
145
-
146
-
147
- def permutationTest_relative(x, y, plot_distr=True, x_unit=None, p=5000):
148
- """
149
- Calculate permutation test
150
- https://towardsdatascience.com/how-to-assess-statistical-significance-in-your-data-with-permutation-tests-8bb925b2113d
151
-
152
- x (np array) : first distr.
153
- y (np array) : first distr.
154
- plot_distr (boolean) : if True: plot permutation histplot and ground truth
155
- x_unit (str) : histplot xlabel
156
- p (int): number of permutations
157
-
158
- returns:
159
- gT (float) : estimated ground truth, here absolute difference of
160
- distribution means
161
- p (float) : p value of permutation test
162
-
163
- """
164
- gT = np.abs(np.average(x) - np.average(y))
165
- pD = []
166
- for i in range(0, p):
167
- l_ = []
168
- for i in range(x.shape[0]):
169
- if random.randint(0, 1) == 1:
170
- l_.append((x[i], y[i]))
171
- else:
172
- l_.append((y[i], x[i]))
173
- pD.append(
174
- np.abs(
175
- np.average(np.array(l_)[:, 0]) - np.average(np.array(l_)[:, 1])
176
- )
177
- )
178
- if gT < 0:
179
- p_val = len(np.where(pD <= gT)[0]) / p
180
- else:
181
- p_val = len(np.where(pD >= gT)[0]) / p
182
-
183
- if plot_distr is True:
184
- plt.hist(pD, bins=30, label="permutation results")
185
- plt.axvline(gT, color="orange", label="ground truth")
186
- plt.title("ground truth " + x_unit + "=" + str(gT) + " p=" + str(p_val))
187
- plt.xlabel(x_unit)
188
- plt.legend()
189
- plt.show()
190
-
191
- return gT, p_val
192
-
193
-
194
- # @njit
195
- def permutation_numba_onesample(x, y, n_perm, two_tailed=True):
196
- """Perform permutation test with one-sample distribution.
197
-
198
- Parameters
199
- ----------
200
- x : array_like
201
- First distribution
202
- y : int or float
203
- Baseline against which to check for statistical significane
204
- n_perm : int
205
- Number of permutations
206
- two_tailed : bool, default: True
207
- Set to False if you would like to perform a one-sampled permutation
208
- test, else True
209
- two_tailed : bool, default: True
210
- Set to False if you would like to perform a one-tailed permutation
211
- test, else True
212
-
213
- Returns
214
- -------
215
- float
216
- Estimated difference of distribution from baseline
217
- float
218
- P-value of permutation test
219
- """
220
- if two_tailed is True:
221
- zeroed = x - y
222
- z = np.abs(np.mean(zeroed))
223
- p = np.empty(n_perm)
224
- # Run the simulation n_perm times
225
- for i in np.arange(n_perm):
226
- sign = np.random.choice(
227
- a=np.array([-1.0, 1.0]), size=len(x), replace=True
228
- )
229
- p[i] = np.abs(np.mean(zeroed * sign))
230
- else:
231
- zeroed = x - y
232
- z = np.mean(zeroed)
233
- p = np.empty(n_perm)
234
- # Run the simulation n_perm times
235
- for i in np.arange(n_perm):
236
- sign = np.random.choice(
237
- a=np.array([-1.0, 1.0]), size=len(x), replace=True
238
- )
239
- p[i] = np.mean(zeroed * sign)
240
- # Return p-value
241
- return z, (np.sum(p >= z)) / n_perm
242
-
243
-
244
- # @njit
245
- def permutation_numba_twosample(x, y, n_perm, two_tailed=True):
246
- """Perform permutation test.
247
-
248
- Parameters
249
- ----------
250
- x : array_like
251
- First distribution
252
- y : array_like
253
- Second distribution
254
- n_perm : int
255
- Number of permutations
256
- two_tailed : bool, default: True
257
- Set to False if you would like to perform a one-sampled permutation
258
- test, else True
259
- two_tailed : bool, default: True
260
- Set to False if you would like to perform a one-tailed permutation
261
- test, else True
262
-
263
- Returns
264
- -------
265
- float
266
- Estimated difference of distribution means
267
- float
268
- P-value of permutation test
269
- """
270
- if two_tailed is True:
271
- z = np.abs(np.mean(x) - np.mean(y))
272
- pS = np.concatenate((x, y), axis=0)
273
- half = int(len(pS) / 2)
274
- p = np.empty(n_perm)
275
- # Run the simulation n_perm times
276
- for i in np.arange(0, n_perm):
277
- # Shuffle the data
278
- np.random.shuffle(pS)
279
- # Compute permuted absolute difference of the two sampled
280
- # distributions
281
- p[i] = np.abs(np.mean(pS[:half]) - np.mean(pS[half:]))
282
- else:
283
- z = np.mean(x) - np.mean(y)
284
- pS = np.concatenate((x, y), axis=0)
285
- half = int(len(pS) / 2)
286
- p = np.empty(n_perm)
287
- # Run the simulation n_perm times
288
- for i in np.arange(0, n_perm):
289
- # Shuffle the data
290
- np.random.shuffle(pS)
291
- # Compute permuted absolute difference of the two sampled
292
- # distributions
293
- p[i] = np.mean(pS[:half]) - np.mean(pS[half:])
294
- return z, (np.sum(p >= z)) / n_perm
295
-
296
-
297
- def cluster_wise_p_val_correction(p_arr, p_sig=0.05, num_permutations=10000):
298
- """Obtain cluster-wise corrected p values.
299
-
300
- Based on: https://github.com/neuromodulation/wjn_toolbox/blob/4745557040ad26f3b8498ca5d0c5d5dece2d3ba1/mypcluster.m
301
- https://garstats.wordpress.com/2018/09/06/cluster/
302
-
303
- Arguments
304
- ---------
305
- p_arr (np.array) : ndim, can be time series or image
306
- p_sig (float) : significance level
307
- num_permutations (int) : no. of random permutations of cluster comparisons
308
-
309
- Returns
310
- -------
311
- p (float) : significance level of highest cluster
312
- p_min_index : indices of significant samples
313
- """
314
- labels, num_clusters = measure.label(p_arr <= p_sig, return_num=True)
315
-
316
- # loop through clusters of p_val series or image
317
- index_cluster = {}
318
- p_cluster_sum = np.zeros(num_clusters)
319
- for cluster_i in np.arange(num_clusters):
320
- # first cluster is assigned to be 1 from measure.label
321
- index_cluster[cluster_i] = np.where(labels == cluster_i + 1)[0]
322
- p_cluster_sum[cluster_i] = np.sum(
323
- np.array(1 - p_arr)[index_cluster[cluster_i]]
324
- )
325
- # p_min corresponds to the most unlikely cluster
326
- p_min = np.max(p_cluster_sum)
327
-
328
- p_min_index = index_cluster[np.argmax(p_cluster_sum)]
329
-
330
- # loop through random permutation cycles
331
- r_per_arr = np.zeros(num_permutations)
332
- for r in range(num_permutations):
333
- r_per = np.random.randint(
334
- low=0, high=p_arr.shape[0], size=p_arr.shape[0]
335
- )
336
-
337
- labels, num_clusters = measure.label(
338
- p_arr[r_per] <= p_sig, return_num=True
339
- )
340
-
341
- index_cluster = {}
342
- if num_clusters == 0:
343
- r_per_arr[r] = 0
344
- else:
345
- p_cluster_sum = np.zeros(num_clusters)
346
- for cluster_i in np.arange(num_clusters):
347
- index_cluster[cluster_i] = np.where(labels == cluster_i + 1)[
348
- 0
349
- ] # first cluster is assigned to be 1 from measure.label
350
- p_cluster_sum[cluster_i] = np.sum(
351
- np.array(1 - p_arr[r_per])[index_cluster[cluster_i]]
352
- )
353
- # corresponds to the most unlikely cluster
354
- r_per_arr[r] = np.max(p_cluster_sum)
355
-
356
- sorted_r = np.sort(r_per_arr)
357
-
358
- def find_arg_nearest(array, value):
359
- array = np.asarray(array)
360
- idx = (np.abs(array - value)).argmin()
361
- return idx
362
-
363
- p = 1 - find_arg_nearest(sorted_r, p_min) / num_permutations
364
-
365
- return p, p_min_index
366
-
367
-
368
- # @njit
369
- def cluster_wise_p_val_correction_numba(p_arr, p_sig, n_perm):
370
- """Calculate significant clusters and their corresponding p-values.
371
-
372
- Based on:
373
- https://github.com/neuromodulation/wjn_toolbox/blob/4745557040ad26f3b8498ca5d0c5d5dece2d3ba1/mypcluster.m
374
- https://garstats.wordpress.com/2018/09/06/cluster/
375
-
376
- Arguments
377
- ---------
378
- p_arr : array-like
379
- Array of p-values. WARNING: MUST be one-dimensional
380
- p_sig : float
381
- Significance level
382
- n_perm : int
383
- No. of random permutations for building cluster null-distribution
384
-
385
- Returns
386
- -------
387
- p : list of floats
388
- List of p-values for each cluster
389
- p_min_index : list of numpy array
390
- List of indices of each significant cluster
391
- """
392
-
393
- def cluster(iterable):
394
- """Cluster 1-D array of boolean values.
395
-
396
- Parameters
397
- ----------
398
- iterable : array-like of bool
399
- Array to be clustered.
400
-
401
- Returns
402
- -------
403
- cluster_labels : np.array
404
- Array of shape (len(iterable), 1), where each value indicates the
405
- number of the cluster. Values are 0 if the item does not belong to
406
- a cluster
407
- cluster_count : int
408
- Number of detected cluster. Corresponds to the highest value in
409
- cluster_labels
410
- """
411
- cluster_labels = np.zeros((len(iterable), 1))
412
- cluster_count = 0
413
- cluster_len = 0
414
- for idx, item in enumerate(iterable):
415
- if item:
416
- cluster_labels[idx] = cluster_count + 1
417
- cluster_len += 1
418
- elif cluster_len == 0:
419
- pass
420
- else:
421
- cluster_len = 0
422
- cluster_count += 1
423
- if cluster_len >= 1:
424
- cluster_count += 1
425
- return cluster_labels, cluster_count
426
-
427
- def calculate_null_distribution(p_arr_, p_sig_, n_perm_):
428
- """Calculate null distribution of clusters.
429
-
430
- Parameters
431
- ----------
432
- p_arr_ : numpy array
433
- Array of p-values
434
- p_sig_ : float
435
- Significance level (p-value)
436
- n_perm_ : int
437
- No. of random permutations
438
-
439
- Returns
440
- -------
441
- r_per_arr : numpy array
442
- Null distribution of shape (n_perm_)
443
- """
444
- # loop through random permutation cycles
445
- r_per_arr = np.zeros(n_perm_)
446
- for r in range(n_perm_):
447
- r_per = np.random.randint(
448
- low=0, high=p_arr_.shape[0], size=p_arr_.shape[0]
449
- )
450
- labels_, n_clusters = cluster(p_arr_[r_per] <= p_sig_)
451
-
452
- cluster_ind = {}
453
- if n_clusters == 0:
454
- r_per_arr[r] = 0
455
- else:
456
- p_sum = np.zeros(n_clusters)
457
- for ind in range(n_clusters):
458
- cluster_ind[ind] = np.where(labels_ == ind + 1)[0]
459
- p_sum[ind] = np.sum(
460
- np.asarray(1 - p_arr_[r_per])[cluster_ind[ind]]
461
- )
462
- r_per_arr[r] = np.max(p_sum)
463
- return r_per_arr
464
-
465
- labels, num_clusters = cluster(p_arr <= p_sig)
466
-
467
- null_distr = calculate_null_distribution(p_arr, p_sig, n_perm)
468
- # Loop through clusters of p_val series or image
469
- clusters = []
470
- p_vals = [np.float64(x) for x in range(0)]
471
- # Cluster labels start at 1
472
- for cluster_i in range(num_clusters):
473
- index_cluster = np.where(labels == cluster_i + 1)[0]
474
- p_cluster_sum = np.sum(np.asarray(1 - p_arr)[index_cluster])
475
- p_val = 1 - np.sum(p_cluster_sum >= null_distr) / n_perm
476
- if p_val <= p_sig:
477
- clusters.append(index_cluster)
478
- p_vals.append(p_val)
479
-
480
- return p_vals, clusters
1
+ import random
2
+ import copy
3
+
4
+ import matplotlib.pyplot as plt
5
+
6
+ # from numba import njit
7
+ import numpy as np
8
+ import pandas as pd
9
+ import scipy.stats as stats
10
+
11
+
12
+ def fitlm(x, y):
13
+ import statsmodels.api as sm
14
+ return sm.OLS(y, sm.add_constant(x)).fit()
15
+
16
+
17
+ def fitlm_kfold(x, y, kfold_splits=5):
18
+ from sklearn.linear_model import LinearRegression
19
+ from sklearn.model_selection import KFold
20
+
21
+ model = LinearRegression()
22
+ if isinstance(x, type(np.array([]))) or isinstance(x, type([])):
23
+ x = pd.DataFrame(x)
24
+ if isinstance(y, type(np.array([]))) or isinstance(y, type([])):
25
+ y = pd.DataFrame(y)
26
+ scores, coeffs = [], np.zeros(x.shape[1])
27
+ kfold = KFold(n_splits=kfold_splits, shuffle=True, random_state=42)
28
+ for i, (train, test) in enumerate(kfold.split(x, y)):
29
+ model.fit(x.iloc[train, :], y.iloc[train, :])
30
+ score = model.score(x.iloc[test, :], y.iloc[test, :])
31
+ # mdl = fitlm(np.squeeze(y.iloc[test,:].transpose()), np.squeeze(model.predict(x.iloc[test, :])))
32
+ scores.append(score)
33
+ coeffs = np.vstack((coeffs, model.coef_))
34
+ coeffs = list(np.delete(coeffs, 0))
35
+ return scores, coeffs, model, ["scores", "coeffs", "model"]
36
+
37
+
38
+ def zscore(data):
39
+ return (data - data.mean()) / data.std()
40
+
41
+
42
+ def permutationTestSpearmansRho(x, y, plot_distr=True, x_unit=None, p=5000):
43
+ """
44
+ Calculate permutation test for multiple repetitions of Spearmans Rho
45
+ https://towardsdatascience.com/how-to-assess-statistical-significance-in-your-data-with-permutation-tests-8bb925b2113d
46
+
47
+ x (np array) : first distibution e.g. R^2
48
+ y (np array) : second distribution e.g. UPDRS
49
+ plot_distr (boolean) : if True: permutation histplot and ground truth will be
50
+ plotted
51
+ x_unit (str) : histplot xlabel
52
+ p (int): number of permutations
53
+
54
+ returns:
55
+ gT (float) : estimated ground truth, here spearman's rho
56
+ p (float) : p value of permutation test
57
+ """
58
+
59
+ # compute ground truth difference
60
+ gT = stats.spearmanr(x, y)[0]
61
+ #
62
+ pV = np.array((x, y))
63
+ # Initialize permutation:
64
+ pD = []
65
+ # Permutation loop:
66
+ args_order = np.arange(0, pV.shape[1], 1)
67
+ args_order_2 = np.arange(0, pV.shape[1], 1)
68
+ for i in range(0, p):
69
+ # Shuffle the data:
70
+ random.shuffle(args_order)
71
+ random.shuffle(args_order_2)
72
+ # Compute permuted absolute difference of your two sampled
73
+ # distributions and store it in pD:
74
+ pD.append(stats.spearmanr(pV[0, args_order], pV[1, args_order_2])[0])
75
+
76
+ # calculate p value
77
+ if gT < 0:
78
+ p_val = len(np.where(pD <= gT)[0]) / p
79
+ else:
80
+ p_val = len(np.where(pD >= gT)[0]) / p
81
+
82
+ if plot_distr:
83
+ plt.hist(pD, bins=30, label="permutation results")
84
+ plt.axvline(gT, color="orange", label="ground truth")
85
+ plt.title("ground truth " + x_unit + "=" + str(gT) + " p=" + str(p_val))
86
+ plt.xlabel(x_unit)
87
+ plt.legend()
88
+ plt.show()
89
+ return gT, p_val
90
+
91
+
92
+ def permutationTest(x, y, plot_distr=True, x_unit=None, p=5000):
93
+ """
94
+ Calculate permutation test
95
+ https://towardsdatascience.com/how-to-assess-statistical-significance-in-your-data-with-permutation-tests-8bb925b2113d
96
+
97
+ x (np array) : first distr.
98
+ y (np array) : first distr.
99
+ plot_distr (boolean) : if True: plot permutation histplot and ground truth
100
+ x_unit (str) : histplot xlabel
101
+ p (int): number of permutations
102
+
103
+ returns:
104
+ gT (float) : estimated ground truth, here absolute difference of
105
+ distribution means
106
+ p (float) : p value of permutation test
107
+
108
+ """
109
+ # Compute ground truth difference
110
+ gT = np.abs(np.average(x) - np.average(y))
111
+
112
+ pV = np.concatenate((x, y), axis=0)
113
+ pS = copy.copy(pV)
114
+ # Initialize permutation:
115
+ pD = []
116
+ # Permutation loop:
117
+ for i in range(0, p):
118
+ # Shuffle the data:
119
+ random.shuffle(pS)
120
+ # Compute permuted absolute difference of your two sampled
121
+ # distributions and store it in pD:
122
+ pD.append(
123
+ np.abs(
124
+ np.average(pS[0 : int(len(pS) / 2)])
125
+ - np.average(pS[int(len(pS) / 2) :])
126
+ )
127
+ )
128
+
129
+ # Calculate p-value
130
+ if gT < 0:
131
+ p_val = len(np.where(pD <= gT)[0]) / p
132
+ else:
133
+ p_val = len(np.where(pD >= gT)[0]) / p
134
+
135
+ if plot_distr:
136
+ plt.hist(pD, bins=30, label="permutation results")
137
+ plt.axvline(gT, color="orange", label="ground truth")
138
+ plt.title("ground truth " + x_unit + "=" + str(gT) + " p=" + str(p_val))
139
+ plt.xlabel(x_unit)
140
+ plt.legend()
141
+ plt.show()
142
+ return gT, p_val
143
+
144
+
145
+ def permutationTest_relative(x, y, plot_distr=True, x_unit=None, p=5000):
146
+ """
147
+ Calculate permutation test
148
+ https://towardsdatascience.com/how-to-assess-statistical-significance-in-your-data-with-permutation-tests-8bb925b2113d
149
+
150
+ x (np array) : first distr.
151
+ y (np array) : first distr.
152
+ plot_distr (boolean) : if True: plot permutation histplot and ground truth
153
+ x_unit (str) : histplot xlabel
154
+ p (int): number of permutations
155
+
156
+ returns:
157
+ gT (float) : estimated ground truth, here absolute difference of
158
+ distribution means
159
+ p (float) : p value of permutation test
160
+
161
+ """
162
+ gT = np.abs(np.average(x) - np.average(y))
163
+ pD = []
164
+ for i in range(0, p):
165
+ l_ = []
166
+ for i in range(x.shape[0]):
167
+ if random.randint(0, 1) == 1:
168
+ l_.append((x[i], y[i]))
169
+ else:
170
+ l_.append((y[i], x[i]))
171
+ pD.append(
172
+ np.abs(np.average(np.array(l_)[:, 0]) - np.average(np.array(l_)[:, 1]))
173
+ )
174
+ if gT < 0:
175
+ p_val = len(np.where(pD <= gT)[0]) / p
176
+ else:
177
+ p_val = len(np.where(pD >= gT)[0]) / p
178
+
179
+ if plot_distr:
180
+ plt.hist(pD, bins=30, label="permutation results")
181
+ plt.axvline(gT, color="orange", label="ground truth")
182
+ plt.title("ground truth " + x_unit + "=" + str(gT) + " p=" + str(p_val))
183
+ plt.xlabel(x_unit)
184
+ plt.legend()
185
+ plt.show()
186
+
187
+ return gT, p_val
188
+
189
+
190
+ # @njit
191
+ def permutation_numba_onesample(x, y, n_perm, two_tailed=True):
192
+ """Perform permutation test with one-sample distribution.
193
+
194
+ Parameters
195
+ ----------
196
+ x : array_like
197
+ First distribution
198
+ y : int or float
199
+ Baseline against which to check for statistical significane
200
+ n_perm : int
201
+ Number of permutations
202
+ two_tailed : bool, default: True
203
+ Set to False if you would like to perform a one-sampled permutation
204
+ test, else True
205
+ two_tailed : bool, default: True
206
+ Set to False if you would like to perform a one-tailed permutation
207
+ test, else True
208
+
209
+ Returns
210
+ -------
211
+ float
212
+ Estimated difference of distribution from baseline
213
+ float
214
+ P-value of permutation test
215
+ """
216
+ if two_tailed:
217
+ zeroed = x - y
218
+ z = np.abs(np.mean(zeroed))
219
+ p = np.empty(n_perm)
220
+ # Run the simulation n_perm times
221
+ for i in np.arange(n_perm):
222
+ sign = np.random.choice(a=np.array([-1.0, 1.0]), size=len(x), replace=True)
223
+ p[i] = np.abs(np.mean(zeroed * sign))
224
+ else:
225
+ zeroed = x - y
226
+ z = np.mean(zeroed)
227
+ p = np.empty(n_perm)
228
+ # Run the simulation n_perm times
229
+ for i in np.arange(n_perm):
230
+ sign = np.random.choice(a=np.array([-1.0, 1.0]), size=len(x), replace=True)
231
+ p[i] = np.mean(zeroed * sign)
232
+ # Return p-value
233
+ return z, (np.sum(p >= z)) / n_perm
234
+
235
+
236
+ # @njit
237
+ def permutation_numba_twosample(x, y, n_perm, two_tailed=True):
238
+ """Perform permutation test.
239
+
240
+ Parameters
241
+ ----------
242
+ x : array_like
243
+ First distribution
244
+ y : array_like
245
+ Second distribution
246
+ n_perm : int
247
+ Number of permutations
248
+ two_tailed : bool, default: True
249
+ Set to False if you would like to perform a one-sampled permutation
250
+ test, else True
251
+ two_tailed : bool, default: True
252
+ Set to False if you would like to perform a one-tailed permutation
253
+ test, else True
254
+
255
+ Returns
256
+ -------
257
+ float
258
+ Estimated difference of distribution means
259
+ float
260
+ P-value of permutation test
261
+ """
262
+ if two_tailed:
263
+ z = np.abs(np.mean(x) - np.mean(y))
264
+ pS = np.concatenate((x, y), axis=0)
265
+ half = int(len(pS) / 2)
266
+ p = np.empty(n_perm)
267
+ # Run the simulation n_perm times
268
+ for i in np.arange(0, n_perm):
269
+ # Shuffle the data
270
+ np.random.shuffle(pS)
271
+ # Compute permuted absolute difference of the two sampled
272
+ # distributions
273
+ p[i] = np.abs(np.mean(pS[:half]) - np.mean(pS[half:]))
274
+ else:
275
+ z = np.mean(x) - np.mean(y)
276
+ pS = np.concatenate((x, y), axis=0)
277
+ half = int(len(pS) / 2)
278
+ p = np.empty(n_perm)
279
+ # Run the simulation n_perm times
280
+ for i in np.arange(0, n_perm):
281
+ # Shuffle the data
282
+ np.random.shuffle(pS)
283
+ # Compute permuted absolute difference of the two sampled
284
+ # distributions
285
+ p[i] = np.mean(pS[:half]) - np.mean(pS[half:])
286
+ return z, (np.sum(p >= z)) / n_perm
287
+
288
+
289
+ def cluster_wise_p_val_correction(p_arr, p_sig=0.05, num_permutations=10000):
290
+ """Obtain cluster-wise corrected p values.
291
+
292
+ Based on: https://github.com/neuromodulation/wjn_toolbox/blob/4745557040ad26f3b8498ca5d0c5d5dece2d3ba1/mypcluster.m
293
+ https://garstats.wordpress.com/2018/09/06/cluster/
294
+
295
+ Arguments
296
+ ---------
297
+ p_arr (np.array) : ndim, can be time series or image
298
+ p_sig (float) : significance level
299
+ num_permutations (int) : no. of random permutations of cluster comparisons
300
+
301
+ Returns
302
+ -------
303
+ p (float) : significance level of highest cluster
304
+ p_min_index : indices of significant samples
305
+ """
306
+ from skimage.measure import label as measure_label
307
+
308
+ labels, num_clusters = measure_label(p_arr <= p_sig, return_num=True)
309
+
310
+ # loop through clusters of p_val series or image
311
+ index_cluster = {}
312
+ p_cluster_sum = np.zeros(num_clusters)
313
+ for cluster_i in np.arange(num_clusters):
314
+ # first cluster is assigned to be 1 from measure.label
315
+ index_cluster[cluster_i] = np.where(labels == cluster_i + 1)[0]
316
+ p_cluster_sum[cluster_i] = np.sum(np.array(1 - p_arr)[index_cluster[cluster_i]])
317
+ # p_min corresponds to the most unlikely cluster
318
+ p_min = np.max(p_cluster_sum)
319
+
320
+ p_min_index = index_cluster[np.argmax(p_cluster_sum)]
321
+
322
+ # loop through random permutation cycles
323
+ r_per_arr = np.zeros(num_permutations)
324
+ for r in range(num_permutations):
325
+ r_per = np.random.randint(low=0, high=p_arr.shape[0], size=p_arr.shape[0])
326
+
327
+ labels, num_clusters = measure_label(p_arr[r_per] <= p_sig, return_num=True)
328
+
329
+ index_cluster = {}
330
+ if num_clusters == 0:
331
+ r_per_arr[r] = 0
332
+ else:
333
+ p_cluster_sum = np.zeros(num_clusters)
334
+ for cluster_i in np.arange(num_clusters):
335
+ index_cluster[cluster_i] = np.where(labels == cluster_i + 1)[
336
+ 0
337
+ ] # first cluster is assigned to be 1 from measure.label
338
+ p_cluster_sum[cluster_i] = np.sum(
339
+ np.array(1 - p_arr[r_per])[index_cluster[cluster_i]]
340
+ )
341
+ # corresponds to the most unlikely cluster
342
+ r_per_arr[r] = np.max(p_cluster_sum)
343
+
344
+ sorted_r = np.sort(r_per_arr)
345
+
346
+ def find_arg_nearest(array, value):
347
+ array = np.asarray(array)
348
+ idx = (np.abs(array - value)).argmin()
349
+ return idx
350
+
351
+ p = 1 - find_arg_nearest(sorted_r, p_min) / num_permutations
352
+
353
+ return p, p_min_index
354
+
355
+
356
+ # @njit
357
+ def cluster_wise_p_val_correction_numba(p_arr, p_sig, n_perm):
358
+ """Calculate significant clusters and their corresponding p-values.
359
+
360
+ Based on:
361
+ https://github.com/neuromodulation/wjn_toolbox/blob/4745557040ad26f3b8498ca5d0c5d5dece2d3ba1/mypcluster.m
362
+ https://garstats.wordpress.com/2018/09/06/cluster/
363
+
364
+ Arguments
365
+ ---------
366
+ p_arr : array-like
367
+ Array of p-values. WARNING: MUST be one-dimensional
368
+ p_sig : float
369
+ Significance level
370
+ n_perm : int
371
+ No. of random permutations for building cluster null-distribution
372
+
373
+ Returns
374
+ -------
375
+ p : list of floats
376
+ List of p-values for each cluster
377
+ p_min_index : list of numpy array
378
+ List of indices of each significant cluster
379
+ """
380
+
381
+ def cluster(iterable):
382
+ """Cluster 1-D array of boolean values.
383
+
384
+ Parameters
385
+ ----------
386
+ iterable : array-like of bool
387
+ Array to be clustered.
388
+
389
+ Returns
390
+ -------
391
+ cluster_labels : np.ndarray
392
+ Array of shape (len(iterable), 1), where each value indicates the
393
+ number of the cluster. Values are 0 if the item does not belong to
394
+ a cluster
395
+ cluster_count : int
396
+ Number of detected cluster. Corresponds to the highest value in
397
+ cluster_labels
398
+ """
399
+ cluster_labels = np.zeros((len(iterable), 1))
400
+ cluster_count = 0
401
+ cluster_len = 0
402
+ for idx, item in enumerate(iterable):
403
+ if item:
404
+ cluster_labels[idx] = cluster_count + 1
405
+ cluster_len += 1
406
+ elif cluster_len == 0:
407
+ pass
408
+ else:
409
+ cluster_len = 0
410
+ cluster_count += 1
411
+ if cluster_len >= 1:
412
+ cluster_count += 1
413
+ return cluster_labels, cluster_count
414
+
415
+ def calculate_null_distribution(p_arr_, p_sig_, n_perm_):
416
+ """Calculate null distribution of clusters.
417
+
418
+ Parameters
419
+ ----------
420
+ p_arr_ : numpy array
421
+ Array of p-values
422
+ p_sig_ : float
423
+ Significance level (p-value)
424
+ n_perm_ : int
425
+ No. of random permutations
426
+
427
+ Returns
428
+ -------
429
+ r_per_arr : numpy array
430
+ Null distribution of shape (n_perm_)
431
+ """
432
+ # loop through random permutation cycles
433
+ r_per_arr = np.zeros(n_perm_)
434
+ for r in range(n_perm_):
435
+ r_per = np.random.randint(low=0, high=p_arr_.shape[0], size=p_arr_.shape[0])
436
+ labels_, n_clusters = cluster(p_arr_[r_per] <= p_sig_)
437
+
438
+ cluster_ind = {}
439
+ if n_clusters == 0:
440
+ r_per_arr[r] = 0
441
+ else:
442
+ p_sum = np.zeros(n_clusters)
443
+ for ind in range(n_clusters):
444
+ cluster_ind[ind] = np.where(labels_ == ind + 1)[0]
445
+ p_sum[ind] = np.sum(np.asarray(1 - p_arr_[r_per])[cluster_ind[ind]])
446
+ r_per_arr[r] = np.max(p_sum)
447
+ return r_per_arr
448
+
449
+ labels, num_clusters = cluster(p_arr <= p_sig)
450
+
451
+ null_distr = calculate_null_distribution(p_arr, p_sig, n_perm)
452
+ # Loop through clusters of p_val series or image
453
+ clusters = []
454
+ p_vals = [np.float64(x) for x in range(0)]
455
+ # Cluster labels start at 1
456
+ for cluster_i in range(num_clusters):
457
+ index_cluster = np.where(labels == cluster_i + 1)[0]
458
+ p_cluster_sum = np.sum(np.asarray(1 - p_arr)[index_cluster])
459
+ p_val = 1 - np.sum(p_cluster_sum >= null_distr) / n_perm
460
+ if p_val <= p_sig:
461
+ clusters.append(index_cluster)
462
+ p_vals.append(p_val)
463
+
464
+ return p_vals, clusters