pyplsc 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyplsc-0.0.1/PKG-INFO +10 -0
- pyplsc-0.0.1/README.md +2 -0
- pyplsc-0.0.1/pyproject.toml +13 -0
- pyplsc-0.0.1/setup.cfg +4 -0
- pyplsc-0.0.1/src/pyplsc/__init__.py +317 -0
- pyplsc-0.0.1/src/pyplsc.egg-info/PKG-INFO +10 -0
- pyplsc-0.0.1/src/pyplsc.egg-info/SOURCES.txt +7 -0
- pyplsc-0.0.1/src/pyplsc.egg-info/dependency_links.txt +1 -0
- pyplsc-0.0.1/src/pyplsc.egg-info/top_level.txt +1 -0
pyplsc-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyplsc
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Python implementation of partial least squares correlation (PLSC)
|
|
5
|
+
Author-email: Isaac Kinley <isaac.kinley@gmail.com>
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
|
|
9
|
+
# pyplsc
|
|
10
|
+
Python implementation of partial least squares correlation (PLSC)
|
pyplsc-0.0.1/README.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
|
|
2
|
+
[build-system]
|
|
3
|
+
requires = ["setuptools>=61.0"]
|
|
4
|
+
build-backend = "setuptools.build_meta"
|
|
5
|
+
|
|
6
|
+
[project]
|
|
7
|
+
name = "pyplsc"
|
|
8
|
+
version = "0.0.1"
|
|
9
|
+
description = "Python implementation of partial least squares correlation (PLSC)"
|
|
10
|
+
authors = [{ name="Isaac Kinley", email="isaac.kinley@gmail.com" }]
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
requires-python = ">=3.9"
|
|
13
|
+
|
pyplsc-0.0.1/setup.cfg
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
|
|
2
|
+
import numpy as np
|
|
3
|
+
from tqdm import tqdm
|
|
4
|
+
from scipy.linalg import orthogonal_procrustes
|
|
5
|
+
from sklearn.utils.extmath import randomized_svd
|
|
6
|
+
from numpy.linalg import svd
|
|
7
|
+
from joblib import Parallel, delayed
|
|
8
|
+
|
|
9
|
+
from pdb import set_trace
|
|
10
|
+
|
|
11
|
+
class BaseClass():
|
|
12
|
+
def __init__(self):
|
|
13
|
+
self.perm_done = False
|
|
14
|
+
self.boot_done = False
|
|
15
|
+
def _setup_data(self, X, between, within, participant):
|
|
16
|
+
if within is not None and participant is None:
|
|
17
|
+
raise ValueError('Participants must be differentiated if there is a within-participants factor')
|
|
18
|
+
self.design_, sort_idx = _get_design_matrix(len(X), between, within, participant)
|
|
19
|
+
self.stratifier_ = _get_stratifier(self.design_)
|
|
20
|
+
self.X_ = X[sort_idx]
|
|
21
|
+
return sort_idx
|
|
22
|
+
def _initial_decomposition(self, to_factorize):
|
|
23
|
+
u, s, v = svd(to_factorize, full_matrices=False, compute_uv=True)
|
|
24
|
+
self.singular_vals_ = s
|
|
25
|
+
self.n_lv_ = len(s)
|
|
26
|
+
self.variance_explained_ = s / sum(s)
|
|
27
|
+
self.design_sals_ = u
|
|
28
|
+
self.brain_sals_ = v.T
|
|
29
|
+
def flip_signs(self, lv_idx):
|
|
30
|
+
self.design_sals_[:, lv_idx] *= -1
|
|
31
|
+
self.brain_sals_[:, lv_idx] *= -1
|
|
32
|
+
self.design_stat_[:, lv_idx] *= -1
|
|
33
|
+
if self.boot_done:
|
|
34
|
+
self.bootstrap_ratios_[:, lv_idx] *= -1
|
|
35
|
+
self.bootstrap_ci_[..., lv_idx] *= -1
|
|
36
|
+
self.bootstrap_ci_ = self.bootstrap_ci_[(1, 0), ...]
|
|
37
|
+
def permute(self, n_perm=5000, n_jobs=None):
|
|
38
|
+
if n_perm < 1:
|
|
39
|
+
raise ValueError('n_perm must be a positive integer')
|
|
40
|
+
perm_singvals = Parallel(n_jobs=n_jobs)(
|
|
41
|
+
delayed(self._single_permutation)()
|
|
42
|
+
for _ in tqdm(range(n_perm), desc="Permutations")
|
|
43
|
+
)
|
|
44
|
+
null_dist = np.stack(perm_singvals)
|
|
45
|
+
pvals = (np.sum(null_dist >= self.singular_vals_, axis=0) + 1) / (n_perm + 1)
|
|
46
|
+
self.pvals_ = pvals
|
|
47
|
+
self.perm_done = True
|
|
48
|
+
return null_dist
|
|
49
|
+
def bootstrap(self, n_boot=5000, confint_level=0.025, n_jobs=None):
|
|
50
|
+
if n_boot < 1:
|
|
51
|
+
raise ValueError('n_boot must be a positive integer')
|
|
52
|
+
self.n_boot_ = n_boot
|
|
53
|
+
self.confint_level_ = confint_level
|
|
54
|
+
# Get variables needed for bootstrapping
|
|
55
|
+
resample_vars = _get_vars_for_resampling(self.design_)
|
|
56
|
+
brain_resampled = []
|
|
57
|
+
design_resampled = []
|
|
58
|
+
boot_results = []
|
|
59
|
+
boot_results = Parallel(n_jobs=n_jobs)(
|
|
60
|
+
delayed(self._single_bootstrap_resample)(*resample_vars)
|
|
61
|
+
for _ in tqdm(range(n_boot), desc="Resamples")
|
|
62
|
+
)
|
|
63
|
+
design_resampled, brain_resampled = zip(*boot_results)
|
|
64
|
+
# Compute standard deviations for brain saliences to get bootstrap ratios
|
|
65
|
+
stds = np.stack(brain_resampled).std(axis=0)
|
|
66
|
+
self.bootstrap_ratios_ = (self.brain_sals_ @ np.diag(self.singular_vals_)) / stds
|
|
67
|
+
# Compute confidence intervals for design saliences
|
|
68
|
+
self.bootstrap_ci_ = np.quantile(np.stack(design_resampled), [confint_level, 1 - confint_level], axis=0)
|
|
69
|
+
self.boot_done = True
|
|
70
|
+
|
|
71
|
+
class BDA(BaseClass):
|
|
72
|
+
def __init__(self, subtract=None):
|
|
73
|
+
super().__init__()
|
|
74
|
+
self.subtract = subtract
|
|
75
|
+
def fit(self, X, between=None, within=None, participant=None):
|
|
76
|
+
if between is None and within is None:
|
|
77
|
+
raise ValueError('Observations must be differentiated by some categorical variable (specified via "between" or "within") for BDA')
|
|
78
|
+
self._setup_data(X, between, within, participant)
|
|
79
|
+
# TODO: enfore categoricity? I.e., check indicator arrays for float values
|
|
80
|
+
# TODO: check whether there are multiple levels of within and between factors
|
|
81
|
+
# TODO: check whether subtract option is possible given availability of factors
|
|
82
|
+
# TODO: make sure lengths of inputs are all the same
|
|
83
|
+
# TODO: enforce one between condition per participant
|
|
84
|
+
# SVD decomposition
|
|
85
|
+
mean_centred = _get_mean_centred(
|
|
86
|
+
X=self.X_,
|
|
87
|
+
design=self.design_,
|
|
88
|
+
subtract=self.subtract)
|
|
89
|
+
self._initial_decomposition(mean_centred)
|
|
90
|
+
self.design_stat_ = mean_centred @ self.brain_sals_ # Score per barycentre
|
|
91
|
+
return self
|
|
92
|
+
def transform_brain(self, X=None):
|
|
93
|
+
# Brain scores
|
|
94
|
+
if X is None:
|
|
95
|
+
X = self.X_
|
|
96
|
+
brain_scores = X @ self.brain_sals_
|
|
97
|
+
return brain_scores
|
|
98
|
+
def transform_design(self, Y=None):
|
|
99
|
+
# Design scores
|
|
100
|
+
if Y is None:
|
|
101
|
+
Y = self.stratifier_
|
|
102
|
+
design_scores = self.design_sals_[Y]
|
|
103
|
+
return design_scores
|
|
104
|
+
def _single_permutation(self):
|
|
105
|
+
perm_idx = _get_permutation(self.design_)
|
|
106
|
+
mean_centred = _get_mean_centred(
|
|
107
|
+
X=self.X_,
|
|
108
|
+
design=self.design_[perm_idx],
|
|
109
|
+
stratifier=self.stratifier_[perm_idx],
|
|
110
|
+
subtract=self.subtract)
|
|
111
|
+
s = svd(mean_centred, full_matrices=False, compute_uv=False)
|
|
112
|
+
return s
|
|
113
|
+
def _single_bootstrap_resample(self, *resample_vars):
|
|
114
|
+
# Get indices of resample
|
|
115
|
+
resample_idx = _get_resample_idx(*resample_vars)
|
|
116
|
+
# Run decomposition
|
|
117
|
+
mean_centred = _get_mean_centred(
|
|
118
|
+
X=self.X_[resample_idx],
|
|
119
|
+
design=self.design_[resample_idx],
|
|
120
|
+
stratifier=self.stratifier_[resample_idx],
|
|
121
|
+
subtract=self.subtract)
|
|
122
|
+
u, s, v = _svd_and_align(to_factorize=mean_centred,
|
|
123
|
+
target_v=self.brain_sals_)
|
|
124
|
+
brain_estimate = v @ np.diag(s)
|
|
125
|
+
# Brain scores
|
|
126
|
+
design_estimate = mean_centred @ self.brain_sals_
|
|
127
|
+
return design_estimate, brain_estimate
|
|
128
|
+
|
|
129
|
+
class PLSC(BaseClass):
|
|
130
|
+
def __init__(self):
|
|
131
|
+
super().__init__()
|
|
132
|
+
def fit(self, X, covariates, between=None, within=None, participant=None):
|
|
133
|
+
sort_idx = self._setup_data(X, between, within, participant)
|
|
134
|
+
self.covariates_ = covariates[sort_idx]
|
|
135
|
+
R = _get_stacked_cormats(
|
|
136
|
+
self.X_,
|
|
137
|
+
self.covariates_,
|
|
138
|
+
self.stratifier_)
|
|
139
|
+
self._initial_decomposition(R)
|
|
140
|
+
# Correlation between brain scores and covariates
|
|
141
|
+
brain_scores = self.X_ @ self.brain_sals_
|
|
142
|
+
self.design_stat_ = _get_stacked_cormats(brain_scores,
|
|
143
|
+
self.covariates_,
|
|
144
|
+
self.stratifier_)
|
|
145
|
+
def _single_permutation(self):
|
|
146
|
+
perm_idx = _get_permutation(self.design_)
|
|
147
|
+
R = _get_stacked_cormats(
|
|
148
|
+
self.X_,
|
|
149
|
+
self.covariates_[perm_idx],
|
|
150
|
+
self.stratifier_[perm_idx])
|
|
151
|
+
s = svd(R, full_matrices=False, compute_uv=False)
|
|
152
|
+
return s
|
|
153
|
+
def _single_bootstrap_resample(self, *resample_vars):
|
|
154
|
+
all_same = True
|
|
155
|
+
while all_same:
|
|
156
|
+
# Get indices of resample
|
|
157
|
+
resample_idx = _get_resample_idx(*resample_vars)
|
|
158
|
+
# Check for no unique observations within any level
|
|
159
|
+
all_same = _validate_resample(resample_idx, self.stratifier_)
|
|
160
|
+
# Run decomposition
|
|
161
|
+
resampled_X = self.X_[resample_idx]
|
|
162
|
+
resampled_cov = self.covariates_[resample_idx]
|
|
163
|
+
stacked_cormats = _get_stacked_cormats(
|
|
164
|
+
resampled_X,
|
|
165
|
+
resampled_cov,
|
|
166
|
+
self.stratifier_) # Because we're resampling within levels of the stratifier, we don't need to explicitly apply the resample_idx to stratifier. stratifier[resample_idx] == stratifier, always
|
|
167
|
+
u, s, v = _svd_and_align(to_factorize=stacked_cormats,
|
|
168
|
+
target_v=self.brain_sals_)
|
|
169
|
+
brain_estimate = v @ np.diag(s)
|
|
170
|
+
# Correlation between covariates and brain scores
|
|
171
|
+
design_estimate = _get_stacked_cormats(resampled_X @ self.brain_sals_, # Brain scores
|
|
172
|
+
resampled_cov,
|
|
173
|
+
self.stratifier_)
|
|
174
|
+
return design_estimate, brain_estimate
|
|
175
|
+
|
|
176
|
+
def _get_permutation(design):
|
|
177
|
+
# n_obs, between=None, participant=None)
|
|
178
|
+
if design[-1, 1] == 0: # If no within-participants factor:
|
|
179
|
+
# No between-participant conditions---just shuffle all rows
|
|
180
|
+
perm_idx = np.random.permutation(len(design))
|
|
181
|
+
else:
|
|
182
|
+
participant = design[:, 2]
|
|
183
|
+
if design[-1, 0] > 0: # If a between-participants factor:
|
|
184
|
+
# Shuffle participants
|
|
185
|
+
n_participants = participant[-1] + 1 # Max participant idx + 1
|
|
186
|
+
participant_permutation = np.random.permutation(n_participants)
|
|
187
|
+
# This next line works because "participant" is both an array of
|
|
188
|
+
# integer labels and an integer index that could be used to index
|
|
189
|
+
# an array of unique participant IDs
|
|
190
|
+
participant = participant_permutation[participant]
|
|
191
|
+
# Shuffle within participants
|
|
192
|
+
perm_idx = np.lexsort((np.random.rand(len(participant)), participant))
|
|
193
|
+
return perm_idx
|
|
194
|
+
|
|
195
|
+
def _get_stratifier(design):
|
|
196
|
+
# Get unique combinations of between and within factors
|
|
197
|
+
_, stratifier = np.unique(design[:, :2], axis=0, return_inverse=True)
|
|
198
|
+
return stratifier
|
|
199
|
+
|
|
200
|
+
def _pre_centre(X, design, subtract):
|
|
201
|
+
# Pre-subtract between- or within-wise means if applicable
|
|
202
|
+
if subtract == 'between':
|
|
203
|
+
group_idx = design[:, 0]
|
|
204
|
+
elif subtract == 'within':
|
|
205
|
+
group_idx = design[:, 1]
|
|
206
|
+
rowwise_group_means = _get_groupwise_means(X, group_idx)[group_idx]
|
|
207
|
+
return X - rowwise_group_means
|
|
208
|
+
|
|
209
|
+
def _get_mean_centred(X, design, stratifier=None, subtract=None):
|
|
210
|
+
if subtract is not None:
|
|
211
|
+
X = _pre_centre(X, design, subtract)
|
|
212
|
+
# Compute group-wise means
|
|
213
|
+
if stratifier is None: # Might not be pre-computed
|
|
214
|
+
stratifier = _get_stratifier(design)
|
|
215
|
+
groupwise_means = _get_groupwise_means(X, stratifier)
|
|
216
|
+
# Mean centre
|
|
217
|
+
mean_centred = groupwise_means - groupwise_means.mean(axis=0)
|
|
218
|
+
return mean_centred
|
|
219
|
+
|
|
220
|
+
def _get_groupwise_means(X, group_idx):
|
|
221
|
+
n_groups = group_idx.max() + 1
|
|
222
|
+
# Pre-allocate memory
|
|
223
|
+
groupwise_means = np.zeros((n_groups, X.shape[1]), dtype=X.dtype)
|
|
224
|
+
for group in range(n_groups):
|
|
225
|
+
groupwise_means[group] = X[group_idx == group].mean(axis=0)
|
|
226
|
+
return groupwise_means
|
|
227
|
+
|
|
228
|
+
def _get_vars_for_resampling(design):
|
|
229
|
+
# Set up variables used for resampling
|
|
230
|
+
row_idx = np.arange(len(design))
|
|
231
|
+
# Set up dummy indicators if needed
|
|
232
|
+
between, within, participant = design[:, :3].T
|
|
233
|
+
row_idx_by_participant = np.split(row_idx, np.cumsum(np.bincount(participant)[:-1]))
|
|
234
|
+
between_by_participant = between[np.cumsum(np.bincount(participant)) - 1]
|
|
235
|
+
participants_by_between = np.split(
|
|
236
|
+
np.arange(len(row_idx_by_participant)),
|
|
237
|
+
np.cumsum(np.bincount(between_by_participant)[:-1])
|
|
238
|
+
)
|
|
239
|
+
participant_offsets = np.cumsum([0] + [len(r) for r in row_idx_by_participant])
|
|
240
|
+
return row_idx, participants_by_between, participant_offsets
|
|
241
|
+
|
|
242
|
+
def _get_resample_idx(row_idx, participants_by_between, participant_offsets):
|
|
243
|
+
sampled_rows = []
|
|
244
|
+
for ps in participants_by_between:
|
|
245
|
+
samp = ps[np.random.randint(len(ps), size=len(ps))]
|
|
246
|
+
# sampled_rows.extend(row_idx_by_participant[p] for p in samp)
|
|
247
|
+
sampled_rows.extend(row_idx[participant_offsets[p]:participant_offsets[p+1]] for p in samp)
|
|
248
|
+
resample_idx = np.concatenate(sampled_rows)
|
|
249
|
+
return resample_idx
|
|
250
|
+
|
|
251
|
+
def _get_design_matrix(n_obs, between=None, within=None, participant=None):
|
|
252
|
+
# Assign null column of zeros if absent, otherwise assign integer labels
|
|
253
|
+
null_col = np.zeros((n_obs,), dtype=np.int64)
|
|
254
|
+
if between is None:
|
|
255
|
+
between = null_col
|
|
256
|
+
else:
|
|
257
|
+
_, between = np.unique(between, return_inverse=True)
|
|
258
|
+
if within is None:
|
|
259
|
+
within = null_col
|
|
260
|
+
participant = np.arange(n_obs)
|
|
261
|
+
else:
|
|
262
|
+
_, within = np.unique(within, return_inverse=True)
|
|
263
|
+
_, participant = np.unique(participant, return_inverse=True)
|
|
264
|
+
|
|
265
|
+
# Sort by between, then participant, then within, if applicable
|
|
266
|
+
sort_idx = np.lexsort((within, participant, between))
|
|
267
|
+
design_matrix = np.column_stack((between, within, participant))
|
|
268
|
+
design_matrix = design_matrix[sort_idx]
|
|
269
|
+
return design_matrix, sort_idx
|
|
270
|
+
|
|
271
|
+
def _get_stacked_cormats(X, covariates, stratifier):
|
|
272
|
+
submatrices = []
|
|
273
|
+
n_levels = stratifier.max() + 1
|
|
274
|
+
for level in range(n_levels):
|
|
275
|
+
idx = stratifier == level
|
|
276
|
+
submatrix = _corr(covariates[idx], X[idx])
|
|
277
|
+
submatrices.append(submatrix)
|
|
278
|
+
R = np.concat(submatrices)
|
|
279
|
+
return R
|
|
280
|
+
|
|
281
|
+
def _corr(X, Y):
|
|
282
|
+
Xc = X - X.mean(axis=0)
|
|
283
|
+
Yc = Y - Y.mean(axis=0)
|
|
284
|
+
|
|
285
|
+
n = X.shape[0] - 1
|
|
286
|
+
stdX = np.sqrt((Xc ** 2).sum(axis=0) / n)
|
|
287
|
+
stdY = np.sqrt((Yc ** 2).sum(axis=0) / n)
|
|
288
|
+
|
|
289
|
+
Xn = Xc / stdX
|
|
290
|
+
Yn = Yc / stdY
|
|
291
|
+
return Xn.T @ Yn / n
|
|
292
|
+
|
|
293
|
+
def _validate_resample(resample_idx, stratifier):
|
|
294
|
+
# Ensure that each stratfier level contains at least 2 unique observations
|
|
295
|
+
# To do this quickly, compute min and max observation idx within category
|
|
296
|
+
# and check that min != max
|
|
297
|
+
resampled_levels = stratifier[resample_idx]
|
|
298
|
+
order = np.argsort(resampled_levels)
|
|
299
|
+
stratifier = stratifier[order]
|
|
300
|
+
obs = resample_idx[order]
|
|
301
|
+
# Stratifier level boundaries
|
|
302
|
+
boundaries = np.flatnonzero(np.diff(stratifier)) + 1
|
|
303
|
+
starts = np.r_[0, boundaries]
|
|
304
|
+
# Min/max observation per category
|
|
305
|
+
mins = np.minimum.reduceat(obs, starts)
|
|
306
|
+
maxs = np.maximum.reduceat(obs, starts)
|
|
307
|
+
# Invalid if all observations are identical within any level
|
|
308
|
+
invalid = (mins == maxs).any()
|
|
309
|
+
return invalid
|
|
310
|
+
|
|
311
|
+
def _svd_and_align(to_factorize, target_v):
|
|
312
|
+
u, s, v = svd(to_factorize, full_matrices=False)
|
|
313
|
+
v = v.T
|
|
314
|
+
# Rotate to align with original decomposition
|
|
315
|
+
R, _ = orthogonal_procrustes(v, target_v, check_finite=False)
|
|
316
|
+
v = v @ R
|
|
317
|
+
return u, s, v
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyplsc
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Python implementation of partial least squares correlation (PLSC)
|
|
5
|
+
Author-email: Isaac Kinley <isaac.kinley@gmail.com>
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
|
|
9
|
+
# pyplsc
|
|
10
|
+
Python implementation of partial least squares correlation (PLSC)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pyplsc
|