brain-pasta 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: brain-pasta
3
+ Version: 0.0.0
4
+ Requires-Python: >=3.9
5
+ Requires-Dist: numpy<2.0,>=1.26
6
+ Requires-Dist: scipy>=1.13
7
+ Requires-Dist: scikit-learn>=1.6
8
+ Requires-Dist: scikit-learn-extra>=0.3
9
+ Requires-Dist: setuptools
10
+ Dynamic: requires-dist
11
+ Dynamic: requires-python
File without changes
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: brain-pasta
3
+ Version: 0.0.0
4
+ Requires-Python: >=3.9
5
+ Requires-Dist: numpy<2.0,>=1.26
6
+ Requires-Dist: scipy>=1.13
7
+ Requires-Dist: scikit-learn>=1.6
8
+ Requires-Dist: scikit-learn-extra>=0.3
9
+ Requires-Dist: setuptools
10
+ Dynamic: requires-dist
11
+ Dynamic: requires-python
@@ -0,0 +1,9 @@
1
+ README.md
2
+ setup.py
3
+ brain_pasta.egg-info/PKG-INFO
4
+ brain_pasta.egg-info/SOURCES.txt
5
+ brain_pasta.egg-info/dependency_links.txt
6
+ brain_pasta.egg-info/requires.txt
7
+ brain_pasta.egg-info/top_level.txt
8
+ pasta/__init__.py
9
+ pasta/pasta.py
@@ -0,0 +1,5 @@
1
+ numpy<2.0,>=1.26
2
+ scipy>=1.13
3
+ scikit-learn>=1.6
4
+ scikit-learn-extra>=0.3
5
+ setuptools
@@ -0,0 +1 @@
1
+ from .pasta import *
@@ -0,0 +1,683 @@
1
+ import numpy as np
2
+ from scipy.spatial.distance import pdist, squareform
3
+ import scipy.stats as stats
4
+ from scipy.optimize import curve_fit
5
+ from sklearn.cluster import KMeans
6
+ from sklearn_extra.cluster import KMedoids
7
+ import copy
8
+ import warnings
9
+ from scipy.stats import pearsonr
10
+ from scipy.stats import t as t_dist
11
+ import setuptools
12
+
13
+
14
+ def estimate_variogram(D, data, M:int, qd:float):
15
+ '''
16
+ Estimate the empirical variogram from distance matrix between vertices,
17
+ and data value at each vertex. Estimation performed in M bins, ranging
18
+ from min_distance to qd * max_distance, where min_distance and
19
+ max_distance are the min and max distance in the distance matrix.
20
+
21
+ Parameters
22
+ ----------
23
+ D : ndarray (N, N)
24
+ Distance matrix between all vertices.
25
+ data : ndarray (N,)
26
+ Data value at each vertex.
27
+ M : int
28
+ Number of bins to estimate variogram.
29
+ qd : float
30
+ Determine the maximum distance to evaluate variogram.
31
+
32
+ Returns
33
+ -------
34
+ v : ndarray (M,)
35
+ Estimated variogram values, i.e., semivariance.
36
+ h : ndarray (M,)
37
+ Lag distances.
38
+
39
+ Notes
40
+ -----
41
+ This is similar to variogram estimation in BrainSMASH but determining
42
+ the max distance evaluated in a different way.
43
+ '''
44
+
45
+ Dmax = qd * np.max(D)
46
+ Dmin = np.min(D[D > 0])
47
+
48
+ # Upper triangle without diagonal
49
+ triu_indices = np.triu_indices_from(D, k=1)
50
+ dval = D[triu_indices]
51
+ row = triu_indices[0]
52
+ col = triu_indices[1]
53
+
54
+ mask = dval <= Dmax #data pairs falling within the distance range of analysis
55
+ dval = dval[mask]
56
+ row = row[mask]
57
+ col = col[mask]
58
+
59
+ h = np.linspace(Dmin, Dmax, M) # linearly spaced lag distances
60
+ delta = (Dmax - Dmin) / (M - 1) * 0.5
61
+ sigma = 6 * delta
62
+ v = np.zeros(M)
63
+ # variogram estimation using gaussian smoothing kernel, same as BrainSMASH
64
+ for i in range(M):
65
+ w = np.exp(-((2.68 * np.abs(h[i] - dval)) ** 2) / (2 * sigma ** 2))
66
+ diff_sq = (data[row] - data[col]) ** 2
67
+ v[i] = 0.5 * np.sum(w * diff_sq) / np.sum(w)
68
+
69
+ return v, h
70
+
71
+
72
+ def fit_variogram(h,v,D,PrecomputedVariance=None, nugget:bool=True):
73
+ '''
74
+ Fit a stable variogram model to an empirical variogram.
75
+
76
+ Parameters
77
+ ----------
78
+ h : (M,) ndarray
79
+ Empirical lag distances.
80
+ v : (M,) ndarray
81
+ Empirical semivariance evaluated at lag distances ``h``.
82
+ D : (N, N) ndarray
83
+ Pairwise distance matrix between spatial locations.
84
+ PrecomputedVariance : float or None, optional
85
+ Precomputed sill (total variance) as initial guess for optimization. If ``None``, the sill is
86
+ estimated as the maximum value of ``v``.
87
+ nugget : bool, default=True
88
+ Whether to include a nugget term in the fitted model.
89
+
90
+ Returns
91
+ -------
92
+ c_para : (N, N) ndarray
93
+ Fitted covariance matrix derived from the stable variogram model.
94
+ b : (4,) ndarray
95
+ Estimated stable model parameters in the order
96
+ ``(sill, range, exponent, nugget)``.
97
+ f : callable
98
+ Variogram function. ``f(h)`` returns the semivariance at lag
99
+ distance ``h``.
100
+ fcov : callable
101
+ Covariance function. ``fcov(h)`` returns the covariance at lag
102
+ distance ``h``.
103
+
104
+ Notes
105
+ -----
106
+ The fitted model follows a stable variogram parameterization. The nugget
107
+ term should be included for better fit.
108
+ '''
109
+
110
+ if PrecomputedVariance is None:
111
+ PrecomputedVariance = np.max(v)
112
+ x0 = np.asarray([PrecomputedVariance, np.min(h), 1.]) # initial guess of stable variogram parameters
113
+ lb = np.asarray([0., 0., 0.]) # lower bound of estimation
114
+ ub = np.asarray([2*PrecomputedVariance, np.inf, 2.]) # upper bound of estimation, set ub of sill to 2*PrecomputedVariance for stable inference
115
+ # fit variogram model
116
+ if not nugget:
117
+ b, _ = curve_fit(stable_variogram_no_nugget, h, v, p0=x0, bounds=(lb, ub))
118
+ b = np.append(b, 0.)
119
+ else:
120
+ x0 = np.append(x0, 0.)
121
+ lb = np.append(lb, 0.)
122
+ ub = np.append(ub, 0.5 * PrecomputedVariance) # set ub for nugget for stable inference at extreme short-range autocorrelation
123
+ b, _ = curve_fit(stable_variogram, h, v, p0=x0, bounds=(lb, ub))
124
+ f = lambda h: stable_variogram(h, *b)
125
+ fcov = lambda h: stable_covariance_func(h, b)
126
+ c_para = fcov(D) # off-diagonal components of covariance matrix
127
+ np.fill_diagonal(c_para, b[0] + b[3]) # diagonal set to sill + nugget
128
+
129
+ return c_para, b, f, fcov
130
+
131
+
132
+ def stable_variogram_no_nugget(h, b1, b2, b3):
133
+ '''
134
+ stable variogram model without nugget, defined as: semivariance = sill * (1-exp(-(h/range)**shape))
135
+
136
+ Parameters
137
+ ----------
138
+ h : float or ndarray
139
+ lag distance to be evaluated
140
+ b1 : float
141
+ sill
142
+ b2 : float
143
+ range parameter
144
+ b3 : float
145
+ shape
146
+
147
+ Returns
148
+ -------
149
+ float or ndarray
150
+ senuvaruabce at lag distance h
151
+ '''
152
+ return b1 * (1 - np.exp(-(h / b2) ** b3))
153
+
154
+ def stable_variogram(h, b1, b2, b3, b4):
155
+ '''
156
+ Stable variogram model without nugget, defined as:
157
+ semivariance = sill * (1 - exp(-(h / range)**shape))
158
+
159
+ Parameters
160
+ ----------
161
+ h : float or ndarray
162
+ Lag distance to be evaluated.
163
+ b1 : float
164
+ Sill.
165
+ b2 : float
166
+ Range parameter.
167
+ b3 : float
168
+ Shape.
169
+
170
+ Returns
171
+ -------
172
+ float or ndarray
173
+ Semivariance at lag distance ``h``.
174
+ '''
175
+ return b1 * (1 - np.exp(-(h / b2) ** b3)) + b4
176
+
177
+ def stable_covariance_func(h, b):
178
+ '''
179
+ Covariance function based on stable variogram model for observations with
180
+ distance h.
181
+
182
+ Equivalent to:
183
+ (sill + nugget) - (sill * (1 - exp(-(h / range)**shape)) + nugget)
184
+ = sill * exp(-(h / range)**shape) for h > 0.
185
+
186
+ When h == 0, set to sill + nugget, which is not computed here.
187
+
188
+ Parameters
189
+ ----------
190
+ h : float or ndarray
191
+ Lag distance at which to compute covariance.
192
+ b : ndarray
193
+ Parameters for stable models.
194
+
195
+ Returns
196
+ -------
197
+ float or ndarray
198
+ Covariance at distance h.
199
+ '''
200
+ b1, b2, b3 = b[:3]
201
+ return (h > 0) * (b1 * np.exp(-(h / b2) ** b3))
202
+
203
+ def parc_data(parc, c_para, b, D, coord, max_clusters, min_clusters, min_cluster_size, map_idx):
204
+ '''
205
+ parcellate data depending on setting to account for nonstationarity
206
+
207
+ 3 scenarios:
208
+ 1. parc is None: do not parcellate and return covariance matrix c_para as is (new variable name fc_para)
209
+ 2. parc is string 'auto': determine the number of parcels based on estiamted range and shape parameter from stable variogram model (i.e., b[1] and b[2]), and parcellate data using spatial clustering
210
+ 3. parc is user specified np int array with shape (M,) with each int indicating a unique parcel: return parc as is (new variable name parc_out), raise warning if risk of over-parcellation (compared to 'auto')
211
+
212
+ Parameters
213
+ ----------
214
+ parc : either None, 'auto', or (N,)
215
+ specifying the setting of parcellation
216
+ c_para : covariance matrix estimated from PaSTA, i.e., without parcel
217
+ b : stable variogram model parameters estimated from PaSTA
218
+ D : distance matrix of data (N, N)
219
+ coord : (N, 3) spatial coordinates of data or None
220
+ If None, spatial clustering is conducted based on the distance matrix D with KMedoids
221
+ max_clusters : maximum number of parcellation, set to avoid over-parcellation at weak autocorrelation, e.g., spatial independence
222
+ min_clusters : minimum number of parcellation, set to 1 will allow PaSTA-NS to collapse to PaSTA. This was used to mandate parcellation and test difference between PaSTA and PaSTA-NS in the manuscript.
223
+ min_cluster_size : set to avoid too small parcellations, we set to 500 in fsaverage5 mesh with 10k vertices
224
+ map_idx : used to identify the map evaluated when raise warning
225
+
226
+ Returns
227
+ -------
228
+ parc_out : None when there is no subdivision of parcels, or (N,) of int where each unique int indicate a parcel
229
+ n_parc : number of parcels
230
+ unique_parcs : index of unique parcels
231
+ fc_para : covariance matrix, either as c_para (when no parcellation) or zeros (parcellated)
232
+ '''
233
+ if parc is None: # if None, return covariance matrix as is
234
+ fc_para = c_para
235
+ n_parc = 1
236
+ unique_parcs = None
237
+ parc_out = None
238
+ else:
239
+ # if not None, first compute number of parcels in data-driven manner depending on the strength of autocorrelation (i.e., the effective range of variogram)
240
+ range_len = b[1] * 2.996 ** (1/b[2]) # effective range
241
+ nPoints = np.max([np.sum(D < range_len) / D.shape[0] - 1, min_cluster_size]) # number of points per parcel on average, when parcel radius ~ effectuve rabge
242
+ n_clusters = np.max([np.min([np.floor(D.shape[0] / nPoints), max_clusters]),min_clusters]).astype(int) # number of parcels
243
+ if parc == 'auto':
244
+ if coord is not None: # spatial clustering via Kmeans on coordinates
245
+ parc_out = KMeans(n_clusters).fit(coord).labels_ # if coord available use kmeans
246
+ else: # spatial clustering via KMedoids on distance matrix
247
+ parc_out = KMedoids(n_clusters).fit(D).labels_ # if coord not available use kmedoids
248
+ unique_parcs = np.unique(parc_out)
249
+ n_parc = len(unique_parcs)
250
+ else: # if user specified parcel, raise waring if risk of over-parcellation (more than estimated by 'auto')
251
+ parc_out = parc # parcellation returned as is
252
+ unique_parcs = np.unique(parc_out)
253
+ n_parc = len(unique_parcs)
254
+ if n_parc > n_clusters:
255
+ warnings.warn(f'data No.{map_idx}: specified number of parcs {n_parc} is larger than data-derived max number of parcs {n_clusters}, carefully trade off the ability for detecting nonstationarity and the parcel coverage for robust estimation.')
256
+ if n_parc == 1: # if does not subdivide, return covariance matrix as is
257
+ fc_para = c_para
258
+ else: # if subdivide, initialize and return a covariance matrix of zeros that will be filled in later steps, i.e., PaSTA-NS
259
+ fc_para = np.zeros_like(c_para)
260
+ return parc_out, n_parc, unique_parcs, fc_para
261
+
262
+
263
+ def effective_sample_size_estimation(x, y, coord=None, D=None, dim=None, M=None, qd=0.7, xparc=None, yparc=None, max_clusters=10, min_cluster_size=500, min_clusters=1, M_cluster=None, nugget=True):
264
+ '''
265
+ Main function that runs PaSTA and PaSTA-NS to compute effective sample size
266
+ and autocorrelation-corrected p-values.
267
+
268
+ Leave xparc=None and yparc=None will run PaSTA, while setting to 'auto'
269
+ or user-specified parcellation np int array (N,) will run PaSTA-NS.
270
+
271
+ Parameters
272
+ ----------
273
+ x, y : ndarray (N,)
274
+ Spatial map data to evaluate association. Can contain missing values
275
+ such as NaN and Inf.
276
+ coord : ndarray (N, 3) or None
277
+ Spatial coordinates for observations. When unknown and left as None,
278
+ the function requires D to run PaSTA, and D and dim to run PaSTA-NS.
279
+ D : ndarray (N, N) or None
280
+ Distance matrix. When left as None, computed from coord.
281
+ dim : int or None
282
+ Spatial dimension of data. When left as None, computed as
283
+ coord.shape[1] if needed.
284
+ M : int or None
285
+ Number of lag distances to evaluate when estimating variogram —
286
+ important hyperparameter that determines the quality of variogram
287
+ estimation, large values preferred. When set to None, use
288
+ 3*sqrt(N) as default.
289
+ qd : float (0, 1]
290
+ Determine the coverage of lag distances evaluated in variogram,
291
+ with maximum distance evaluated being qd*np.max(D) — important
292
+ hyperparameter that determines the quality of variogram estimation,
293
+ large values preferred. Default 0.7.
294
+ xparc : None, 'auto', or ndarray (N,)
295
+ Parcellation setting for map x. If ndarray, index should begin from
296
+ 0, i.e., 0 to Np - 1 if Np parcels specified.
297
+ yparc : None, 'auto', or ndarray (N,)
298
+ Parcellation setting for map y. If ndarray, index should begin from
299
+ 0, i.e., 0 to Np - 1 if Np parcels specified.
300
+ max_clusters : int
301
+ Maximum number of parcellations allowed in PaSTA-NS.
302
+ min_clusters : int
303
+ Minimum number of parcellations allowed in PaSTA-NS.
304
+ min_cluster_size : int
305
+ Minimum size of parcellations (# observations per parcel).
306
+ M_cluster : int or None
307
+ Number of lag distances to evaluate in PaSTA-NS parcels when estimating
308
+ their variograms. When set to None, default to 3*sqrt(Np), where Np
309
+ is the number of observations in each parcel.
310
+ nugget : bool
311
+ Indicator of whether use nugget in variogram models or not.
312
+ Default True because nugget helps with discontinuity at short
313
+ distances, and setting to False can result in problems especially
314
+ when data are nonstationary.
315
+
316
+ Returns
317
+ -------
318
+ pef : float
319
+ Significance p-values based on PaSTA/PaSTA-NS.
320
+ rX : float
321
+ Pearson correlation coefficient between x and y.
322
+ nef : float
323
+ Effective sample size estimated.
324
+ run_status : int
325
+ 1 indicates successful run, and 0 indicates unsuccessful run such
326
+ as when nef < 2 and data are too smooth to infer significance.
327
+ n_parc : ndarray (2,)
328
+ [xn_parc, yn_parc] that indicates the number of parcels for each
329
+ map in PaSTA-NS.
330
+ p_naive : float
331
+ Significance with independence assumption and without controlling
332
+ for autocorrelation.
333
+ fc_para1 : ndarray
334
+ Covariance matrix for map x, with Nvx indicating the number of
335
+ valid (finite value) observations in map x.
336
+ fc_para2 : ndarray
337
+ Covariance matrix for map y, with Nvy indicating the number of
338
+ valid (finite value) observations in map y.
339
+ '''
340
+ assert (coord is not None or D is not None), 'at least one of coord and D is required'
341
+ assert ((coord is not None or dim is not None) or xparc is None and yparc is None), 'dim is required for PaSTA-NS when coord is not provided'
342
+ valid = np.logical_and(np.isfinite(x), np.isfinite(y))
343
+ x = x[valid]
344
+ y = y[valid]
345
+ x = stats.zscore(x)
346
+ y = stats.zscore(y)
347
+ if D is not None:
348
+ D = D[np.ix_(valid, valid)]
349
+ else:
350
+ coord = coord[valid,:]
351
+ D = squareform(pdist(coord))
352
+ dim = coord.shape[1]
353
+
354
+ if M is None:
355
+ M = 3 * np.ceil(np.sqrt(x.shape[0])).astype('int')
356
+
357
+ PrecomputedVariance = None
358
+ v1,h1 = estimate_variogram(D, x, M, qd)
359
+ v2,h2 = estimate_variogram(D, y, M, qd)
360
+ c_para1, b1, f1, fcov1 = fit_variogram(h1,v1,D,PrecomputedVariance,nugget)
361
+ c_para2, b2, f2, fcov2 = fit_variogram(h2,v2,D,PrecomputedVariance,nugget)
362
+
363
+ xparc, xn_parc, xunique_parcs, fc_para1 = parc_data(xparc, c_para1, b1, D, coord, max_clusters, min_clusters, min_cluster_size, 1)
364
+ yparc, yn_parc, yunique_parcs, fc_para2 = parc_data(yparc, c_para2, b2, D, coord, max_clusters, min_clusters, min_cluster_size, 1)
365
+ if xn_parc > 1:
366
+ exponent1 = b1[2]
367
+ fc_para1, pb1 = fit_covariance_blocks(x, D, xn_parc, xparc, M_cluster, qd, nugget, exponent1)
368
+ fc_para1 = process_convolution_crossblocks(fc_para1, pb1, x, D, xn_parc, xparc, dim, exponent1)
369
+ if yn_parc > 1:
370
+ exponent2 = b2[2]
371
+ fc_para2, pb2 = fit_covariance_blocks(y, D, yn_parc, yparc, M_cluster, qd, nugget, exponent2)
372
+ fc_para2 = process_convolution_crossblocks(fc_para2, pb2, y, D, yn_parc, yparc, dim, exponent2)
373
+
374
+ nef = cov2nef(fc_para1,fc_para2)
375
+ run_status = nef > 2
376
+
377
+ rX, p_naive = pearsonr(x, y)
378
+ if run_status:
379
+ pef = nef2p(rX, nef)
380
+ else:
381
+ pef = np.nan
382
+ n_parc = np.asarray([xn_parc, yn_parc])
383
+ return pef, rX, nef, run_status, n_parc, p_naive, fc_para1, fc_para2
384
+
385
+ def covariance_estimation(x, coord=None, D=None, dim=None, M=None, qd=0.7, xparc=None, max_clusters=10, min_cluster_size=500, min_clusters=1, M_cluster=None, nugget=True):
386
+ '''
387
+ Compute the covariance matrix for a single map x using PaSTA or PaSTA-NS.
388
+
389
+ This can be particularly useful when pairwise association between a
390
+ large number of maps needs to be evaluated. Compute the covariance
391
+ matrix for each data separately and save for later use can avoid
392
+ repetitive covariance estimation in the
393
+ effective_sample_size_estimation function.
394
+
395
+ Statistical significance between two maps can be inferred by loading
396
+ saved covariance matrices (cov1 and cov2) of two maps (x and y), and
397
+ compute effective sample size and p-values following steps below:
398
+
399
+ get submatrix of covariance matrices for points that are valid in both
400
+ maps - valid = np.isfinite(x) & np.isfinite(y),
401
+ cov1 = cov1[np.ix_(valid, valid)],
402
+ cov2 = cov2[np.ix_(valid, valid)],
403
+ x = x[valid], y = y[valid]
404
+
405
+ compute nef - cov2nef(cov1, cov2)
406
+
407
+ compute test statistics such as Pearson correlation coefficient —
408
+ rX, p_naive = pearsonr(x, y)
409
+
410
+ compute significance p-value from test statistics and effective sample
411
+ size: nef2p(rX, nef)
412
+
413
+ Inputs are same as in effective_sample_size_estimation but with y and yparc removed
414
+
415
+ Returns
416
+ -------
417
+ covmat : ndarray (N, N)
418
+ Covariance matrix of map x in shape (N, N), where rows and columns
419
+ corresponding to invalid observations in x (e.g., NaN, Inf) are set
420
+ to np.nan and need to be removed before computing nef.
421
+ '''
422
+ assert (coord is not None or D is not None), 'at least one of coord and D is required'
423
+ assert ((coord is not None or dim is not None) or xparc is None), 'dim is required for PaSTA-NS when coord is not provided'
424
+ nx = len(x)
425
+ valid = np.isfinite(x)
426
+ x = x[valid]
427
+ x = stats.zscore(x)
428
+ covmat = np.full((nx, nx), np.nan)
429
+ if D is not None:
430
+ D = D[np.ix_(valid, valid)]
431
+ else:
432
+ coord = coord[valid,:]
433
+ D = squareform(pdist(coord))
434
+ dim = coord.shape[1]
435
+
436
+ if M is None:
437
+ M = 3 * np.ceil(np.sqrt(x.shape[0])).astype('int')
438
+
439
+ PrecomputedVariance = None
440
+ v1,h1 = estimate_variogram(D, x, M, qd)
441
+ c_para1, b1, f1, fcov1 = fit_variogram(h1,v1,D,PrecomputedVariance,nugget)
442
+
443
+ xparc, xn_parc, xunique_parcs, fc_para1 = parc_data(xparc, c_para1, b1, D, coord, max_clusters, min_clusters, min_cluster_size, 1)
444
+ if xn_parc > 1:
445
+ exponent1 = b1[2]
446
+ fc_para1, pb1 = fit_covariance_blocks(x, D, xn_parc, xparc, M_cluster, qd, nugget, exponent1)
447
+ fc_para1 = process_convolution_crossblocks(fc_para1, pb1, x, D, xn_parc, xparc, dim, exponent1)
448
+
449
+ covmat[np.ix_(valid,valid)] = fc_para1
450
+ return covmat
451
+
452
+
453
+ def cov2nef(c_para1, c_para2):
454
+ '''
455
+ Compute effective sample size from covariance matrices c_para1 and
456
+ c_para2.
457
+
458
+ Is a computational efficient implementation equivalent to::
459
+
460
+ nef=real(1/(trace(B*fc_para1*B*fc_para2)/(trace(B*fc_para1)*trace(B*fc_para2)))+1);
461
+
462
+ Parameters
463
+ ----------
464
+ c_para1 : ndarray
465
+ Covariance matrix.
466
+ c_para2 : ndarray
467
+ Covariance matrix.
468
+
469
+ Returns
470
+ -------
471
+ nef : float
472
+ Effective sample size.
473
+ '''
474
+ c1 = c_para1 - np.mean(c_para1, axis=0, keepdims=True) - np.mean(c_para1, axis=1, keepdims=True) + np.mean(c_para1)
475
+ c2 = c_para2 - np.mean(c_para2, axis=0, keepdims=True) - np.mean(c_para2, axis=1, keepdims=True) + np.mean(c_para2)
476
+ num = np.trace(c1 @ c2)
477
+ den = np.trace(c1) * np.trace(c2)
478
+ nef = np.real(1 / (num / den) + 1)
479
+ return nef
480
+
481
+ def nef2p(rX, nef):
482
+ '''
483
+ Infer statistical significance p-value from test statistics rX and
484
+ effective sample size nef.
485
+
486
+ Parameters
487
+ ----------
488
+ rX : float
489
+ Test statistic.
490
+ nef : float
491
+ Effective sample size.
492
+
493
+ Returns
494
+ -------
495
+ p : float
496
+ Statistical significance p-value.
497
+ '''
498
+ df = max(0, nef - 2)
499
+ if df == 0:
500
+ return np.nan
501
+ t = rX * np.sqrt(df / (1 - rX**2))
502
+ p = 2 * t_dist.sf(np.abs(t), df)
503
+ return p
504
+
505
+ def fit_covariance_blocks(x, D, n_clusters, point_cluster_idx, M_cluster, qd, nugget, exponent):
506
+ '''
507
+ Fit variogram model for each parcel and compute the diagonal blocks of
508
+ nonstationary covariance matrix.
509
+
510
+ Parameters
511
+ ----------
512
+ x : ndarray (N,)
513
+ Spatial map data to evaluate association. All values are valid.
514
+ D : ndarray (N, N)
515
+ Distance matrix.
516
+ n_clusters : int
517
+ Number of parcels.
518
+ point_cluster_idx : ndarray (N,)
519
+ Int array specifying parcellation settings for map x, ranging from
520
+ 0 to NP-1 if NP parcels.
521
+ M_cluster : int or None
522
+ Number of lag distances to evaluate in parcel when estimating their
523
+ variograms. When set to None, default to 3*sqrt(Np), where Np is
524
+ the number of observations in each parcel.
525
+ qd : float (0, 1]
526
+ nugget : bool
527
+ Indicator of whether use nugget in variogram models or not.
528
+ exponent : float
529
+ Shape parameter estimated using global stationary variogram model.
530
+ This will be kept the same across parcels to obtain valid
531
+ nonstationary covariance expression (i.e., PSD matrix).
532
+
533
+ Returns
534
+ -------
535
+ c_para : ndarray (N, N)
536
+ Covariance matrix for map x, where within parcel covariance are
537
+ estimated but cross-parcel elements are set to 0.
538
+ b : ndarray (n_clusters, 4)
539
+ Stable variogram model parameters, each row corresponds a parcel.
540
+ '''
541
+ c_para = np.zeros(D.shape) # initiation
542
+ b = np.zeros(shape=(n_clusters,4))
543
+ computeM = (M_cluster is None)
544
+ for i in np.arange(n_clusters):
545
+ v_select = point_cluster_idx == i
546
+ x_select = x[v_select]
547
+ var_x_select = x_select.var()
548
+ x_select = stats.zscore(x_select)
549
+ D_select = D[np.ix_(v_select, v_select)]
550
+ if computeM:
551
+ M_cluster = 3 * np.ceil(np.sqrt(x_select.shape[0])).astype(int)
552
+ v, h = estimate_variogram(D_select, x_select, M_cluster, qd)
553
+ pc_para, pb, f, fcov = fit_variogram_fixed_exponent(h, v, D_select, exponent, 1, nugget)
554
+ c_para[np.ix_(v_select, v_select)] = pc_para * var_x_select
555
+ pb[0] = pb[0] * var_x_select
556
+ pb[-1] = pb[-1] * var_x_select
557
+ b[i,:] = pb
558
+ return c_para, b
559
+
560
+ def fit_variogram_fixed_exponent(h, v, D, exponent, PrecomputedVariance=None, nugget: bool = True):
561
+ '''
562
+ Same as fit_variogram, but for stable model with predetermined
563
+ range parameter.
564
+
565
+ Parameters
566
+ ----------
567
+ h : ndarray
568
+ v : ndarray
569
+ D : ndarray
570
+ exponent : float
571
+ PrecomputedVariance : float or None
572
+ nugget : bool
573
+
574
+ Returns
575
+ -------
576
+ c_para : ndarray
577
+ b : ndarray
578
+ f : callable
579
+ fcov : callable
580
+ '''
581
+ if PrecomputedVariance is None:
582
+ PrecomputedVariance = np.max(v)
583
+ x0 = np.asarray([PrecomputedVariance, np.min(h)])
584
+ lb = np.asarray([0., 0.])
585
+ ub = np.asarray([2*PrecomputedVariance, np.inf])
586
+ if not nugget:
587
+ b, _ = curve_fit(lambda h, b1, b2: stable_variogram_fixed_exp_no_nugget(h, b1, b2, exponent), h, v, p0=x0, bounds=(lb, ub))
588
+ b = np.array([b[0], b[1], exponent, 0.0])
589
+ else:
590
+ x0 = np.append(x0, 0.)
591
+ lb = np.append(lb, 0.)
592
+ ub = np.append(ub, 0.5*PrecomputedVariance) # set ub for nugget to avoid inaccurate shape parameter fitting when no/very-short-range autocorrelation
593
+ b, _ = curve_fit(lambda h, b1, b2, b3: stable_variogram_fixed_exp(h, b1, b2, b3, exponent), h, v, p0=x0, bounds=(lb, ub))
594
+ b = np.asarray([b[0], b[1], exponent, b[-1]])
595
+
596
+ f = lambda h: stable_variogram(h, *b)
597
+ fcov = lambda h: stable_covariance_func(h, b)
598
+ c_para = fcov(D)
599
+ np.fill_diagonal(c_para, b[0] + b[3])
600
+
601
+ return c_para, b, f, fcov
602
+
603
+ def stable_variogram_fixed_exp_no_nugget(h, b1, b2, fixed_exp):
604
+ '''
605
+ Stable variogram with prespecified shape parameter, without nugget
606
+ (i.e., nugget set to 0).
607
+
608
+ Parameters
609
+ ----------
610
+ h : float or ndarray
611
+ b1 : float
612
+ b2 : float
613
+ fixed_exp : float
614
+
615
+ Returns
616
+ -------
617
+ float or ndarray
618
+ '''
619
+ return b1 * (1 - np.exp(-(h / b2) ** fixed_exp))
620
+
621
+ def stable_variogram_fixed_exp(h, b1, b2, b3, fixed_exp):
622
+ '''
623
+ Stable variogram with prespecified shape parameter, with nugget.
624
+
625
+ Parameters
626
+ ----------
627
+ h : float or ndarray
628
+ b1 : float
629
+ b2 : float
630
+ b3 : float
631
+ fixed_exp : float
632
+
633
+ Returns
634
+ -------
635
+ float or ndarray
636
+ '''
637
+ return b1 * (1 - np.exp(-(h / b2) ** fixed_exp)) + b3
638
+
639
+ def process_convolution_crossblocks(c_para, b, x, D, n_clusters, point_cluster_idx, dim, exponent):
640
+ '''
641
+ Process convolution to infer the cross-parcel covariance of
642
+ nonstationary covariance matrix.
643
+
644
+ Parameters
645
+ ----------
646
+ c_para : ndarray (N, N)
647
+ Covariance matrix output from fit_covariance_blocks, where
648
+ covariances are estimated for within parcel pairs but not
649
+ cross-parcel.
650
+ b : ndarray (n_clusters, 4)
651
+ Fitted stable variogram model parameters, each row per parcel.
652
+ x : ndarray (N,)
653
+ Map data.
654
+ D : ndarray (N, N)
655
+ Distance matrix.
656
+ n_clusters : int
657
+ Number of parcels.
658
+ point_cluster_idx : ndarray (N,)
659
+ Int array starting from 0 indicating the membership of each
660
+ point to parcels.
661
+ dim : int
662
+ Spatial dimension of the data.
663
+ exponent : float
664
+ Shape parameter fitted using the global stationary variogram,
665
+ kept the same for valid PSD covariance matrix.
666
+
667
+ Returns
668
+ -------
669
+ c_para : ndarray (N, N)
670
+ Nonstationary covariance matrix by process convolution.
671
+ '''
672
+ for i in np.arange(n_clusters-1):
673
+ v_select1 = point_cluster_idx == i
674
+ phi_i = b[i,1]
675
+ for j in np.arange(i+1, n_clusters):
676
+ v_select2 = point_cluster_idx == j
677
+ phi_j = b[j,1]
678
+ D_select = D[np.ix_(v_select1, v_select2)]
679
+ sig = (phi_i ** 2 + phi_j ** 2) / 2
680
+ Qij = D_select ** 2 / sig
681
+ c_para[np.ix_(v_select1, v_select2)] = (phi_i * phi_j / sig) ** (dim/2) * np.sqrt(b[i,0] * b[j,0]) * np.exp(- np.sqrt(Qij) ** exponent)
682
+ c_para[np.ix_(v_select2, v_select1)] = c_para[np.ix_(v_select1, v_select2)].T
683
+ return c_para
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,14 @@
1
+ from setuptools import setup, find_packages
2
+ setup(
3
+ name='brain-pasta',
4
+ version='0.0.0',
5
+ python_requires=">=3.9",
6
+ packages=find_packages(),
7
+ install_requires=[
8
+ 'numpy>=1.26,<2.0',
9
+ 'scipy>=1.13',
10
+ 'scikit-learn>=1.6',
11
+ 'scikit-learn-extra>=0.3',
12
+ 'setuptools'
13
+ ],
14
+ )