pyplsc 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyplsc-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyplsc
3
+ Version: 0.0.1
4
+ Summary: Python implementation of partial least squares correlation (PLSC)
5
+ Author-email: Isaac Kinley <isaac.kinley@gmail.com>
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+
9
+ # pyplsc
10
+ Python implementation of partial least squares correlation (PLSC)
pyplsc-0.0.1/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # pyplsc
2
+ Python implementation of partial least squares correlation (PLSC)
@@ -0,0 +1,13 @@
1
+
2
+ [build-system]
3
+ requires = ["setuptools>=61.0"]
4
+ build-backend = "setuptools.build_meta"
5
+
6
+ [project]
7
+ name = "pyplsc"
8
+ version = "0.0.1"
9
+ description = "Python implementation of partial least squares correlation (PLSC)"
10
+ authors = [{ name="Isaac Kinley", email="isaac.kinley@gmail.com" }]
11
+ readme = "README.md"
12
+ requires-python = ">=3.9"
13
+
pyplsc-0.0.1/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,317 @@
1
+
2
+ import numpy as np
3
+ from tqdm import tqdm
4
+ from scipy.linalg import orthogonal_procrustes
5
+ from sklearn.utils.extmath import randomized_svd
6
+ from numpy.linalg import svd
7
+ from joblib import Parallel, delayed
8
+
9
+ from pdb import set_trace
10
+
11
+ class BaseClass():
12
+ def __init__(self):
13
+ self.perm_done = False
14
+ self.boot_done = False
15
+ def _setup_data(self, X, between, within, participant):
16
+ if within is not None and participant is None:
17
+ raise ValueError('Participants must be differentiated if there is a within-participants factor')
18
+ self.design_, sort_idx = _get_design_matrix(len(X), between, within, participant)
19
+ self.stratifier_ = _get_stratifier(self.design_)
20
+ self.X_ = X[sort_idx]
21
+ return sort_idx
22
+ def _initial_decomposition(self, to_factorize):
23
+ u, s, v = svd(to_factorize, full_matrices=False, compute_uv=True)
24
+ self.singular_vals_ = s
25
+ self.n_lv_ = len(s)
26
+ self.variance_explained_ = s / sum(s)
27
+ self.design_sals_ = u
28
+ self.brain_sals_ = v.T
29
+ def flip_signs(self, lv_idx):
30
+ self.design_sals_[:, lv_idx] *= -1
31
+ self.brain_sals_[:, lv_idx] *= -1
32
+ self.design_stat_[:, lv_idx] *= -1
33
+ if self.boot_done:
34
+ self.bootstrap_ratios_[:, lv_idx] *= -1
35
+ self.bootstrap_ci_[..., lv_idx] *= -1
36
+ self.bootstrap_ci_ = self.bootstrap_ci_[(1, 0), ...]
37
+ def permute(self, n_perm=5000, n_jobs=None):
38
+ if n_perm < 1:
39
+ raise ValueError('n_perm must be a positive integer')
40
+ perm_singvals = Parallel(n_jobs=n_jobs)(
41
+ delayed(self._single_permutation)()
42
+ for _ in tqdm(range(n_perm), desc="Permutations")
43
+ )
44
+ null_dist = np.stack(perm_singvals)
45
+ pvals = (np.sum(null_dist >= self.singular_vals_, axis=0) + 1) / (n_perm + 1)
46
+ self.pvals_ = pvals
47
+ self.perm_done = True
48
+ return null_dist
49
+ def bootstrap(self, n_boot=5000, confint_level=0.025, n_jobs=None):
50
+ if n_boot < 1:
51
+ raise ValueError('n_boot must be a positive integer')
52
+ self.n_boot_ = n_boot
53
+ self.confint_level_ = confint_level
54
+ # Get variables needed for bootstrapping
55
+ resample_vars = _get_vars_for_resampling(self.design_)
56
+ brain_resampled = []
57
+ design_resampled = []
58
+ boot_results = []
59
+ boot_results = Parallel(n_jobs=n_jobs)(
60
+ delayed(self._single_bootstrap_resample)(*resample_vars)
61
+ for _ in tqdm(range(n_boot), desc="Resamples")
62
+ )
63
+ design_resampled, brain_resampled = zip(*boot_results)
64
+ # Compute standard deviations for brain saliences to get bootstrap ratios
65
+ stds = np.stack(brain_resampled).std(axis=0)
66
+ self.bootstrap_ratios_ = (self.brain_sals_ @ np.diag(self.singular_vals_)) / stds
67
+ # Compute confidence intervals for design saliences
68
+ self.bootstrap_ci_ = np.quantile(np.stack(design_resampled), [confint_level, 1 - confint_level], axis=0)
69
+ self.boot_done = True
70
+
71
+ class BDA(BaseClass):
72
+ def __init__(self, subtract=None):
73
+ super().__init__()
74
+ self.subtract = subtract
75
+ def fit(self, X, between=None, within=None, participant=None):
76
+ if between is None and within is None:
77
+ raise ValueError('Observations must be differentiated by some categorical variable (specified via "between" or "within") for BDA')
78
+ self._setup_data(X, between, within, participant)
79
+ # TODO: enfore categoricity? I.e., check indicator arrays for float values
80
+ # TODO: check whether there are multiple levels of within and between factors
81
+ # TODO: check whether subtract option is possible given availability of factors
82
+ # TODO: make sure lengths of inputs are all the same
83
+ # TODO: enforce one between condition per participant
84
+ # SVD decomposition
85
+ mean_centred = _get_mean_centred(
86
+ X=self.X_,
87
+ design=self.design_,
88
+ subtract=self.subtract)
89
+ self._initial_decomposition(mean_centred)
90
+ self.design_stat_ = mean_centred @ self.brain_sals_ # Score per barycentre
91
+ return self
92
+ def transform_brain(self, X=None):
93
+ # Brain scores
94
+ if X is None:
95
+ X = self.X_
96
+ brain_scores = X @ self.brain_sals_
97
+ return brain_scores
98
+ def transform_design(self, Y=None):
99
+ # Design scores
100
+ if Y is None:
101
+ Y = self.stratifier_
102
+ design_scores = self.design_sals_[Y]
103
+ return design_scores
104
+ def _single_permutation(self):
105
+ perm_idx = _get_permutation(self.design_)
106
+ mean_centred = _get_mean_centred(
107
+ X=self.X_,
108
+ design=self.design_[perm_idx],
109
+ stratifier=self.stratifier_[perm_idx],
110
+ subtract=self.subtract)
111
+ s = svd(mean_centred, full_matrices=False, compute_uv=False)
112
+ return s
113
+ def _single_bootstrap_resample(self, *resample_vars):
114
+ # Get indices of resample
115
+ resample_idx = _get_resample_idx(*resample_vars)
116
+ # Run decomposition
117
+ mean_centred = _get_mean_centred(
118
+ X=self.X_[resample_idx],
119
+ design=self.design_[resample_idx],
120
+ stratifier=self.stratifier_[resample_idx],
121
+ subtract=self.subtract)
122
+ u, s, v = _svd_and_align(to_factorize=mean_centred,
123
+ target_v=self.brain_sals_)
124
+ brain_estimate = v @ np.diag(s)
125
+ # Brain scores
126
+ design_estimate = mean_centred @ self.brain_sals_
127
+ return design_estimate, brain_estimate
128
+
129
+ class PLSC(BaseClass):
130
+ def __init__(self):
131
+ super().__init__()
132
+ def fit(self, X, covariates, between=None, within=None, participant=None):
133
+ sort_idx = self._setup_data(X, between, within, participant)
134
+ self.covariates_ = covariates[sort_idx]
135
+ R = _get_stacked_cormats(
136
+ self.X_,
137
+ self.covariates_,
138
+ self.stratifier_)
139
+ self._initial_decomposition(R)
140
+ # Correlation between brain scores and covariates
141
+ brain_scores = self.X_ @ self.brain_sals_
142
+ self.design_stat_ = _get_stacked_cormats(brain_scores,
143
+ self.covariates_,
144
+ self.stratifier_)
145
+ def _single_permutation(self):
146
+ perm_idx = _get_permutation(self.design_)
147
+ R = _get_stacked_cormats(
148
+ self.X_,
149
+ self.covariates_[perm_idx],
150
+ self.stratifier_[perm_idx])
151
+ s = svd(R, full_matrices=False, compute_uv=False)
152
+ return s
153
+ def _single_bootstrap_resample(self, *resample_vars):
154
+ all_same = True
155
+ while all_same:
156
+ # Get indices of resample
157
+ resample_idx = _get_resample_idx(*resample_vars)
158
+ # Check for no unique observations within any level
159
+ all_same = _validate_resample(resample_idx, self.stratifier_)
160
+ # Run decomposition
161
+ resampled_X = self.X_[resample_idx]
162
+ resampled_cov = self.covariates_[resample_idx]
163
+ stacked_cormats = _get_stacked_cormats(
164
+ resampled_X,
165
+ resampled_cov,
166
+ self.stratifier_) # Because we're resampling within levels of the stratifier, we don't need to explicitly apply the resample_idx to stratifier. stratifier[resample_idx] == stratifier, always
167
+ u, s, v = _svd_and_align(to_factorize=stacked_cormats,
168
+ target_v=self.brain_sals_)
169
+ brain_estimate = v @ np.diag(s)
170
+ # Correlation between covariates and brain scores
171
+ design_estimate = _get_stacked_cormats(resampled_X @ self.brain_sals_, # Brain scores
172
+ resampled_cov,
173
+ self.stratifier_)
174
+ return design_estimate, brain_estimate
175
+
176
+ def _get_permutation(design):
177
+ # n_obs, between=None, participant=None)
178
+ if design[-1, 1] == 0: # If no within-participants factor:
179
+ # No between-participant conditions---just shuffle all rows
180
+ perm_idx = np.random.permutation(len(design))
181
+ else:
182
+ participant = design[:, 2]
183
+ if design[-1, 0] > 0: # If a between-participants factor:
184
+ # Shuffle participants
185
+ n_participants = participant[-1] + 1 # Max participant idx + 1
186
+ participant_permutation = np.random.permutation(n_participants)
187
+ # This next line works because "participant" is both an array of
188
+ # integer labels and an integer index that could be used to index
189
+ # an array of unique participant IDs
190
+ participant = participant_permutation[participant]
191
+ # Shuffle within participants
192
+ perm_idx = np.lexsort((np.random.rand(len(participant)), participant))
193
+ return perm_idx
194
+
195
+ def _get_stratifier(design):
196
+ # Get unique combinations of between and within factors
197
+ _, stratifier = np.unique(design[:, :2], axis=0, return_inverse=True)
198
+ return stratifier
199
+
200
+ def _pre_centre(X, design, subtract):
201
+ # Pre-subtract between- or within-wise means if applicable
202
+ if subtract == 'between':
203
+ group_idx = design[:, 0]
204
+ elif subtract == 'within':
205
+ group_idx = design[:, 1]
206
+ rowwise_group_means = _get_groupwise_means(X, group_idx)[group_idx]
207
+ return X - rowwise_group_means
208
+
209
+ def _get_mean_centred(X, design, stratifier=None, subtract=None):
210
+ if subtract is not None:
211
+ X = _pre_centre(X, design, subtract)
212
+ # Compute group-wise means
213
+ if stratifier is None: # Might not be pre-computed
214
+ stratifier = _get_stratifier(design)
215
+ groupwise_means = _get_groupwise_means(X, stratifier)
216
+ # Mean centre
217
+ mean_centred = groupwise_means - groupwise_means.mean(axis=0)
218
+ return mean_centred
219
+
220
+ def _get_groupwise_means(X, group_idx):
221
+ n_groups = group_idx.max() + 1
222
+ # Pre-allocate memory
223
+ groupwise_means = np.zeros((n_groups, X.shape[1]), dtype=X.dtype)
224
+ for group in range(n_groups):
225
+ groupwise_means[group] = X[group_idx == group].mean(axis=0)
226
+ return groupwise_means
227
+
228
+ def _get_vars_for_resampling(design):
229
+ # Set up variables used for resampling
230
+ row_idx = np.arange(len(design))
231
+ # Set up dummy indicators if needed
232
+ between, within, participant = design[:, :3].T
233
+ row_idx_by_participant = np.split(row_idx, np.cumsum(np.bincount(participant)[:-1]))
234
+ between_by_participant = between[np.cumsum(np.bincount(participant)) - 1]
235
+ participants_by_between = np.split(
236
+ np.arange(len(row_idx_by_participant)),
237
+ np.cumsum(np.bincount(between_by_participant)[:-1])
238
+ )
239
+ participant_offsets = np.cumsum([0] + [len(r) for r in row_idx_by_participant])
240
+ return row_idx, participants_by_between, participant_offsets
241
+
242
+ def _get_resample_idx(row_idx, participants_by_between, participant_offsets):
243
+ sampled_rows = []
244
+ for ps in participants_by_between:
245
+ samp = ps[np.random.randint(len(ps), size=len(ps))]
246
+ # sampled_rows.extend(row_idx_by_participant[p] for p in samp)
247
+ sampled_rows.extend(row_idx[participant_offsets[p]:participant_offsets[p+1]] for p in samp)
248
+ resample_idx = np.concatenate(sampled_rows)
249
+ return resample_idx
250
+
251
+ def _get_design_matrix(n_obs, between=None, within=None, participant=None):
252
+ # Assign null column of zeros if absent, otherwise assign integer labels
253
+ null_col = np.zeros((n_obs,), dtype=np.int64)
254
+ if between is None:
255
+ between = null_col
256
+ else:
257
+ _, between = np.unique(between, return_inverse=True)
258
+ if within is None:
259
+ within = null_col
260
+ participant = np.arange(n_obs)
261
+ else:
262
+ _, within = np.unique(within, return_inverse=True)
263
+ _, participant = np.unique(participant, return_inverse=True)
264
+
265
+ # Sort by between, then participant, then within, if applicable
266
+ sort_idx = np.lexsort((within, participant, between))
267
+ design_matrix = np.column_stack((between, within, participant))
268
+ design_matrix = design_matrix[sort_idx]
269
+ return design_matrix, sort_idx
270
+
271
+ def _get_stacked_cormats(X, covariates, stratifier):
272
+ submatrices = []
273
+ n_levels = stratifier.max() + 1
274
+ for level in range(n_levels):
275
+ idx = stratifier == level
276
+ submatrix = _corr(covariates[idx], X[idx])
277
+ submatrices.append(submatrix)
278
+ R = np.concat(submatrices)
279
+ return R
280
+
281
+ def _corr(X, Y):
282
+ Xc = X - X.mean(axis=0)
283
+ Yc = Y - Y.mean(axis=0)
284
+
285
+ n = X.shape[0] - 1
286
+ stdX = np.sqrt((Xc ** 2).sum(axis=0) / n)
287
+ stdY = np.sqrt((Yc ** 2).sum(axis=0) / n)
288
+
289
+ Xn = Xc / stdX
290
+ Yn = Yc / stdY
291
+ return Xn.T @ Yn / n
292
+
293
+ def _validate_resample(resample_idx, stratifier):
294
+ # Ensure that each stratfier level contains at least 2 unique observations
295
+ # To do this quickly, compute min and max observation idx within category
296
+ # and check that min != max
297
+ resampled_levels = stratifier[resample_idx]
298
+ order = np.argsort(resampled_levels)
299
+ stratifier = stratifier[order]
300
+ obs = resample_idx[order]
301
+ # Stratifier level boundaries
302
+ boundaries = np.flatnonzero(np.diff(stratifier)) + 1
303
+ starts = np.r_[0, boundaries]
304
+ # Min/max observation per category
305
+ mins = np.minimum.reduceat(obs, starts)
306
+ maxs = np.maximum.reduceat(obs, starts)
307
+ # Invalid if all observations are identical within any level
308
+ invalid = (mins == maxs).any()
309
+ return invalid
310
+
311
+ def _svd_and_align(to_factorize, target_v):
312
+ u, s, v = svd(to_factorize, full_matrices=False)
313
+ v = v.T
314
+ # Rotate to align with original decomposition
315
+ R, _ = orthogonal_procrustes(v, target_v, check_finite=False)
316
+ v = v @ R
317
+ return u, s, v
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyplsc
3
+ Version: 0.0.1
4
+ Summary: Python implementation of partial least squares correlation (PLSC)
5
+ Author-email: Isaac Kinley <isaac.kinley@gmail.com>
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+
9
+ # pyplsc
10
+ Python implementation of partial least squares correlation (PLSC)
@@ -0,0 +1,7 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/pyplsc/__init__.py
4
+ src/pyplsc.egg-info/PKG-INFO
5
+ src/pyplsc.egg-info/SOURCES.txt
6
+ src/pyplsc.egg-info/dependency_links.txt
7
+ src/pyplsc.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ pyplsc