boostrsa 0.0.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
boostrsa/__init__.py ADDED
File without changes
File without changes
File without changes
@@ -0,0 +1,130 @@
1
+
2
+ import numpy as np
3
+ import cupy as cp
4
+ from numba import cuda, jit
5
+ from boostrsa.src.types import ShrinkageMethod
6
+ from boostrsa.src.gpu.basic_operations import outer_sum_square, outer_sum
7
+ from boostrsa.src.gpu.matrix import diag, eyes
8
+ from boostrsa.src.gpu.basic_operations import scaling
9
+
10
+ def _covariance_eye(residuals, threads_per_block = 1024):
11
+ """
12
+ Computes an optimal shrinkage estimate of a sample covariance matrix as described by the following publication:
13
+ **matrix should be demeaned before!
14
+
15
+ Ledoit and Wolfe (2004): "A well-conditioned estimator for large-dimensional covariance matrices"
16
+
17
+ :param residuals(np.ndarray): , shape: (#data, #n_point, #n_channel)
18
+ """
19
+ print("shrinakge method:", ShrinkageMethod.shrinkage_eye)
20
+
21
+ # Constant
22
+ data_len = len(residuals)
23
+ n_point = residuals.shape[1]
24
+ n_channel = residuals.shape[2]
25
+
26
+ n_block = int(np.ceil(data_len / threads_per_block))
27
+
28
+ # sum
29
+ out_sum_device = cuda.to_device(np.zeros((data_len, n_channel, n_channel)))
30
+
31
+ # sum square
32
+ out_sum_square_device = cuda.to_device(np.zeros((data_len, n_channel, n_channel)))
33
+
34
+ # Calc sum, sum square
35
+ outer_sum[n_block, threads_per_block](residuals, out_sum_device)
36
+ outer_sum_square[n_block, threads_per_block](residuals, out_sum_square_device)
37
+
38
+ # b2
39
+ s = out_sum_device.copy_to_host() / n_point
40
+ s2 = out_sum_square_device.copy_to_host() / n_point
41
+ b2 = np.sum(s2 - s * s, axis = (1, 2)) / n_point
42
+
43
+ # calculate the scalar estimators to find the optimal shrinkage:
44
+ # m, d^2, b^2 as in Ledoit & Wolfe paper
45
+ # m - shape: (data_len)
46
+ # d2 - shape: (data_len)
47
+ # b2 - shape: (data_len)
48
+ repeat_eyes = np.repeat(np.eye(n_channel)[:, :, np.newaxis], data_len, axis = 2).T
49
+
50
+ diag_s = np.diagonal(s, axis1 = 1, axis2 = 2)
51
+ m = (np.sum(diag_s, axis = 1) / n_channel)
52
+ d2 = np.sum((s - m[:, None, None] * repeat_eyes) ** 2, axis = (1, 2))
53
+
54
+ b2 = np.minimum(d2, b2)
55
+
56
+ # shrink covariance matrix
57
+ s_shrink = (b2 / d2 * m)[:, None, None] * repeat_eyes + ((d2-b2) / d2)[:, None, None] * s
58
+
59
+ # correction for degrees of freedom
60
+ dof = n_point - 1
61
+ s_shrink = s_shrink * n_point / dof
62
+
63
+ return s_shrink
64
+
65
+ def _covariance_diag(residuals, threads_per_block = 1024):
66
+ """
67
+ Calculate covariance
68
+ **matrix should be demeaned before!
69
+
70
+ Schäfer, J., & Strimmer, K. (2005). "A Shrinkage Approach to Large-Scale
71
+ Covariance Matrix Estimation and Implications for Functional Genomics.
72
+
73
+ :param residuals(np.ndarray): , shape: (#data, #n_point, #n_channel)
74
+ """
75
+ print("shrinakge method:", ShrinkageMethod.shrinkage_diag)
76
+
77
+ # Constant
78
+ data_len = len(residuals)
79
+ n_point = residuals.shape[1]
80
+ n_channel = residuals.shape[2]
81
+
82
+ n_block = int(np.ceil(data_len / threads_per_block))
83
+
84
+ # sum
85
+ out_sum_device = cuda.to_device(np.zeros((data_len, n_channel, n_channel)))
86
+
87
+ # sum square
88
+ out_sum_square_device = cuda.to_device(np.zeros((data_len, n_channel, n_channel)))
89
+
90
+ # Calc sum, sum square
91
+ outer_sum[n_block, threads_per_block](residuals, out_sum_device)
92
+ outer_sum_square[n_block, threads_per_block](residuals, out_sum_square_device)
93
+
94
+ # s
95
+ dof = n_point - 1
96
+ s = out_sum_device.copy_to_host() / dof
97
+
98
+ # var
99
+ stack_var_device = cuda.to_device(np.zeros((data_len, n_channel)))
100
+ diag[n_block, threads_per_block](s, stack_var_device)
101
+
102
+ # std
103
+ stack_std = np.sqrt(stack_var_device)
104
+
105
+ # sum mean
106
+ stack_s_mean = out_sum_device / np.expand_dims(stack_std, 1) / np.expand_dims(stack_std, 2) / (n_point - 1)
107
+
108
+ # s2 mean
109
+ stack_s2_mean = out_sum_square_device / np.expand_dims(stack_var_device, 1) / np.expand_dims(stack_var_device, 2) / (n_point - 1)
110
+
111
+ # var_hat
112
+ stack_var_hat = n_point / dof ** 2 * (stack_s2_mean - stack_s_mean ** 2)
113
+
114
+ # mask
115
+ mask = ~np.eye(n_channel, dtype=bool)
116
+
117
+ # lamb
118
+ stack_lamb_device = np.sum(stack_var_hat[:, mask], axis = 1) / np.sum(stack_s_mean[:, mask] ** 2, axis = 1)
119
+ stack_lamb_device = cp.maximum(cp.minimum(cp.array(stack_lamb_device), 1), 0)
120
+
121
+ # Scaling
122
+ stack_scaling_mats_device = cuda.to_device(np.zeros((data_len, n_channel, n_channel)))
123
+ eyes[n_block, threads_per_block](stack_scaling_mats_device)
124
+
125
+ scaling[n_block, threads_per_block](stack_scaling_mats_device, stack_lamb_device)
126
+ stack_s_shrink = s * stack_scaling_mats_device
127
+
128
+ return stack_s_shrink
129
+
130
+
File without changes
@@ -0,0 +1,44 @@
1
+
2
+ import numpy as np
3
+
4
+ def convert_1d_to_symmertic(a_1d, size, k = 0):
5
+ """
6
+ Convert 1d array to symmetric matrix
7
+
8
+ :param a_1d(1d array):
9
+ :param size: matrix size
10
+ :param k(int): offset
11
+
12
+ return (np.array)
13
+ """
14
+
15
+ # put it back into a 2D symmetric array
16
+
17
+ X = np.zeros((size,size))
18
+ X[np.triu_indices(size, k = 0)] = a_1d
19
+ X = X + X.T - np.diag(np.diag(X))
20
+
21
+ return X
22
+
23
+ def mean_fold_variance(variances, fold_info):
24
+ """
25
+ Calculate fold variacne from fold info
26
+
27
+ :param variances: variances (#data, #cov.shape)
28
+ :param fold_info(2d array): fold information - [[fold1, fold2], ...]
29
+
30
+ return (np.array) - (#data * fold_len, cov.shape)
31
+ """
32
+ n_d = len(variances)
33
+
34
+ result_variances = []
35
+ for i in range(n_d):
36
+ for fold1_i, fold2_i in fold_info:
37
+ cov1 = variances[i][fold1_i]
38
+ cov2 = variances[i][fold2_i]
39
+
40
+ result_variances.append((cov1 + cov2) / 2)
41
+
42
+ return np.array(result_variances)
43
+
44
+
File without changes
@@ -0,0 +1,61 @@
1
+
2
+ from numba import cuda, jit
3
+
4
+ @cuda.jit
5
+ def outer_sum(matrices, out):
6
+ i = cuda.grid(1)
7
+
8
+ if i < len(matrices):
9
+ matrix = matrices[i]
10
+
11
+ for m_line in matrix:
12
+ for j, e1 in enumerate(m_line):
13
+ for k, e2 in enumerate(m_line):
14
+ out[i][j][k] += e1 * e2
15
+
16
+ @cuda.jit
17
+ def outer_sum_square(matrices, out):
18
+ i = cuda.grid(1)
19
+
20
+ if i < len(matrices):
21
+ matrix = matrices[i]
22
+
23
+ for m_line in matrix:
24
+ for j, e1 in enumerate(m_line):
25
+ for k, e2 in enumerate(m_line):
26
+ out[i][j][k] += (e1 * e2) ** 2
27
+
28
+ @cuda.jit
29
+ def scaling(out, lambs):
30
+ i = cuda.grid(1)
31
+ lamb = lambs[i]
32
+
33
+ nd = out.shape[0]
34
+ nr = out.shape[1]
35
+ nc = out.shape[2]
36
+
37
+ if i < len(out):
38
+ for j in range(nr):
39
+ for k in range(nc):
40
+ if j != k:
41
+ out[i][j][k] = (1 - lamb)
42
+
43
+ @cuda.jit(device=True, inline=True)
44
+ def matmul(a,b, out):
45
+ """
46
+ Matrix multiplication a @ b
47
+
48
+ :param a(np.array): 2d matrix
49
+ :param b(np.array): 2d matrix
50
+ :param out(device array): output
51
+ """
52
+ ar,ac = a.shape
53
+ br,bc = b.shape
54
+
55
+ for i in range(ar):
56
+ for j in range(bc):
57
+ for k in range(ac): # or br
58
+ out[i,j] += a[i,k] * b[k,j]
59
+ return out
60
+
61
+
@@ -0,0 +1,23 @@
1
+
2
+ from numba import cuda, jit
3
+
4
+ @cuda.jit
5
+ def set_mask(neighbors, brain_1d_indexes, out):
6
+ """
7
+ Set neighbor mask(iterate over all neighbors)
8
+
9
+ :param neighbors(np.array): list of neighbor , shape: (#center, #neighbor)
10
+ :param brain_1d_indexes(np.array): , shape: #channel
11
+ :param out: masked_residual, output device memory , shape: (#center, #channel)
12
+ """
13
+ i = cuda.grid(1)
14
+
15
+ if i < len(neighbors):
16
+ neighbor_positions = neighbors[i]
17
+
18
+ for neighbor_pos in neighbor_positions:
19
+ for brain_i, brain_pos in enumerate(brain_1d_indexes):
20
+ if brain_pos == neighbor_pos:
21
+ out[i][brain_i] = 1
22
+
23
+
@@ -0,0 +1,125 @@
1
+
2
+ from numba import cuda, jit
3
+ from basic_operations import matmul
4
+
5
+ @jit(nopython=True)
6
+ def upper_tri_1d_index(i, j, n_col, k):
7
+ """
8
+ Get upper triangle 1d index
9
+
10
+ if k = 1)
11
+
12
+ (0,1), (0,2), (0,3), (0,4) -> 0, 1, 2, 3
13
+ (1,2), (1,3), (1,4) -> 4, 5, 6
14
+ (2,3), (2,4) -> 7, 8
15
+ (3,3) -> 9
16
+
17
+ :param i: row index
18
+ :param j: column index
19
+ :param n_col: column number
20
+ :param k: #padding
21
+ """
22
+ if i > j:
23
+ return None
24
+ else:
25
+ sum_val = 0
26
+ for loop_row_i in range(0, i):
27
+ sum_val += (n_col - k) # maximum filled count of row.
28
+ sum_val += (-1) * loop_row_i # non-filled element is increased as row value is increased.
29
+ return sum_val + (j - i - k)
30
+
31
+ @jit(nopython=True)
32
+ def lower_tri_1d_index(i, j):
33
+ """
34
+ Get lower triangle 1d index
35
+
36
+ :param i: row index
37
+ :param j: column index
38
+ """
39
+
40
+ if i < j:
41
+ return None
42
+ else:
43
+ total_fill = 0
44
+ for pr_row_i in range(1, i + 1):
45
+ total_fill += (pr_row_i - 1)
46
+ return total_fill + j
47
+
48
+ @cuda.jit
49
+ def diag(matrices, out):
50
+ i = cuda.grid(1)
51
+
52
+ if i < len(matrices):
53
+ matrix = matrices[i]
54
+
55
+ n_row = len(matrix)
56
+ for j in range(n_row):
57
+ out[i][j] = matrix[j][j]
58
+
59
+ @cuda.jit
60
+ def eyes(out):
61
+ i = cuda.grid(1)
62
+
63
+ nd = out.shape[0]
64
+ nr = out.shape[1]
65
+ nc = out.shape[2]
66
+
67
+ if i < len(out):
68
+ for j in range(nr):
69
+ out[i][j][j] = 1
70
+
71
+ @cuda.jit
72
+ def rdm_from_kernel(kernels, div, out):
73
+ """
74
+ Calculate rdm matrix
75
+
76
+ :param kernels(Device array): kernel, shape: (n_data, n_fold, n_cond, n_cond))
77
+ :param div(int): div value
78
+ :param out(Device array): rdm output, shape: (n_data, n_fold, n_dissim)
79
+ """
80
+ n_data = kernels.shape[0]
81
+ n_validation = kernels.shape[1]
82
+ n_cond = kernels.shape[-1]
83
+
84
+ i, j = cuda.grid(2)
85
+
86
+ if i < n_data:
87
+ if j < n_validation:
88
+ kernel = kernels[i][j]
89
+
90
+ for row_i in range(n_cond):
91
+ for column_i in range(n_cond):
92
+ if row_i < column_i:
93
+ dissim_i = int(upper_tri_1d_index(row_i, column_i, n_cond, 1))
94
+
95
+ # Assign dissim value
96
+ v1 = kernel[row_i][row_i] + kernel[column_i][column_i]
97
+ v2 = kernel[row_i][column_i] + kernel[column_i][row_i]
98
+ out[i][j][dissim_i] = (v1 - v2) / div
99
+
100
+ @cuda.jit
101
+ def calc_kernel(measurments, precisions, fold_info, out1, out2):
102
+ """
103
+ Calculate rdm kernel for calculating crossnobis
104
+
105
+ (2048, 4, 8, 93)
106
+
107
+ :param measurments(Device array): , shape: (n_data, n_run, n_cond, n_neighbor)
108
+ :param precisions(Device array): , shape: (n_data, n_fold, n_neighbor, n_neighbor)
109
+ :param fold_info(Device array): fold information - [[fold1, fold2], ...]
110
+ :param out1(Device array): intermediate matmul output , shape: (n_data, n_fold, n_cond, n_neighbor)
111
+ :param out2(Device array): kernel output , shape: (n_data, n_fold, n_cond, n_cond))
112
+ """
113
+ n_data = out1.shape[0]
114
+ n_validation = out1.shape[1]
115
+
116
+ i, j = cuda.grid(2)
117
+ if i < n_data:
118
+ if j < n_validation:
119
+ data1_i, data2_i = fold_info[j]
120
+
121
+ # measurements1 @ noise @ measurements2.T
122
+ matmul(measurments[i][data1_i], precisions[i][j], out1[i][j])
123
+ matmul(out1[i][j], measurments[i][data2_i].T, out2[i][j])
124
+
125
+
@@ -0,0 +1,233 @@
1
+
2
+ # Common Libraries
3
+ import numpy as np
4
+ from numba import cuda, jit
5
+ import cupy as cp
6
+ import itertools
7
+ from tqdm import trange
8
+
9
+ # Custom Libraries
10
+ from types import ShrinkageMethod
11
+ from cores.cpu.matrix import convert_1d_to_symmertic, mean_fold_variance
12
+ from cores.cpgpu.stats import _covariance_diag, _covariance_eye
13
+ from cores.gpu.mask import set_mask
14
+ from cores.gpu.matrix import calc_kernel, rdm_from_kernel
15
+
16
+ # Functions
17
+ def calc_sl_precision(residuals,
18
+ neighbors,
19
+ n_split_data,
20
+ masking_indexes,
21
+ n_thread_per_block = 1024,
22
+ shrinkage_method = "shrinkage_diag"):
23
+ """
24
+ Calculate precision
25
+
26
+ :param residuals(np.ndarray): , shape: (#run, #point, #channel)
27
+ :param neighbors(np.ndarray): , shape: (#center, #neighbor)
28
+ :param n_split_data(int): how many datas to process at once
29
+ :param masking_indexes(np.array): , shape: (#channel) / index of masking brain
30
+ :param n_thread_per_block(int): block per thread
31
+
32
+ return (np.ndarray), shape: (#channel, #run, #neighbor, #neighbor)
33
+ """
34
+
35
+ n_run = residuals.shape[0]
36
+ n_p = residuals.shape[1]
37
+ n_channel = residuals.shape[-1]
38
+
39
+ n_center = len(neighbors)
40
+ n_block = int(np.ceil(n_split_data / n_thread_per_block))
41
+ n_neighbor = neighbors.shape[-1]
42
+ r, c = np.triu_indices(n_neighbor, k = 0)
43
+
44
+ mempool = cp.get_default_memory_pool()
45
+
46
+ chunk_precisions = []
47
+ for i in trange(0, n_center, n_split_data):
48
+ # select neighbors
49
+ target_neighbors = neighbors[i:i + n_split_data, :]
50
+ len_target = len(target_neighbors)
51
+
52
+ # output_1d
53
+ mask_out = cuda.to_device(np.zeros((len_target, n_channel)))
54
+
55
+ # Make mask - neighbor
56
+ set_mask[n_block, n_thread_per_block](target_neighbors, masking_indexes, mask_out)
57
+
58
+ # sync
59
+ cuda.synchronize()
60
+
61
+ # Apply mask
62
+ cpu_mask = mask_out.copy_to_host()
63
+ masked_residuals = []
64
+ for j in range(len(target_neighbors)):
65
+ masked_residuals.append(residuals[:, :, cpu_mask[j] == 1])
66
+ masked_residuals = np.array(masked_residuals)
67
+
68
+ del mask_out
69
+ cuda.defer_cleanup()
70
+
71
+ # Calculate demean
72
+ target_residuals = masked_residuals.reshape(-1, n_p, n_neighbor)
73
+ mean_residuals = np.mean(target_residuals, axis = 1, keepdims=1)
74
+ target_residuals = (target_residuals - mean_residuals)
75
+
76
+ # Calculate covariance
77
+ if shrinkage_method == ShrinkageMethod.shrinkage_diag:
78
+ covariances = _covariance_diag(target_residuals)
79
+ elif shrinkage_method == ShrinkageMethod.shrinkage_eye:
80
+ covariances = _covariance_eye(target_residuals)
81
+
82
+ # Calculate precision matrix
83
+ stack_precisions = cp.linalg.inv(cp.asarray(covariances)).get()
84
+
85
+ # sync
86
+ cuda.synchronize()
87
+
88
+ # concat
89
+ stack_precisions = stack_precisions.reshape(len_target, n_run, n_neighbor, n_neighbor)
90
+ stack_precisions = stack_precisions[:, :, r, c]
91
+
92
+ # add chunk
93
+ chunk_precisions.append(stack_precisions)
94
+
95
+ # Clean data
96
+ cuda.defer_cleanup()
97
+ mempool.free_all_blocks()
98
+
99
+ return chunk_precisions
100
+
101
+ def calc_sl_rdm_crossnobis(n_split_data,
102
+ centers,
103
+ neighbors,
104
+ precs,
105
+ measurements,
106
+ masking_indexes,
107
+ conds,
108
+ sessions,
109
+ n_thread_per_block = 1000):
110
+ """
111
+ Calculate searchlight crossnobis rdm
112
+
113
+ :param n_split_data(int): how many datas to process at once
114
+ :param centers(np.array): centers, shape: (#center)
115
+ :param neighbors(np.array): neighbors , shape: (#center, #neighbor)
116
+ :param precs(np.array): precisions , shape: (#channel, #run, #precision_mat_element)
117
+ :param measurements(np.array): measurment values , shape: (#cond, #channel)
118
+ :param masking_indexes: (np.array) , shape: (#channel) , index of masking brain
119
+ :param conds: conds(np.array - 1d)
120
+ :param sessions(np.array - 1d): session corressponding to conds
121
+ :param n_thread_per_block(int): , block per thread
122
+
123
+ """
124
+ # Data configuration
125
+ n_run = len(np.unique(sessions))
126
+ n_cond = len(np.unique(conds))
127
+ n_dissim = int((n_cond * n_cond - n_cond) / 2)
128
+ n_neighbor = neighbors.shape[-1]
129
+ uq_conds = np.unique(conds)
130
+ n_channel = measurements.shape[-1]
131
+ uq_sessions = np.unique(sessions)
132
+
133
+ assert n_channel == masking_indexes.shape[0], "n_channel should be same"
134
+
135
+ # Fold
136
+ fold_info = cuda.to_device(list(itertools.combinations(np.arange(len(uq_sessions)), 2)))
137
+ n_fold = len(fold_info)
138
+ total_calculation = n_split_data * n_fold
139
+
140
+ # GPU Configuration
141
+ n_block = int(np.ceil(n_split_data / n_thread_per_block))
142
+ n_thread_per_block_2d = int(np.ceil(np.sqrt(n_thread_per_block)))
143
+ block_2ds = (total_calculation // n_thread_per_block_2d, total_calculation // n_thread_per_block_2d)
144
+ thread_2ds = (n_thread_per_block_2d, n_thread_per_block_2d)
145
+
146
+ # Memory pool
147
+ mempool = cp.get_default_memory_pool()
148
+
149
+ # Calculation
150
+ rdm_outs = []
151
+ for i in trange(0, len(centers), n_split_data):
152
+ # select neighbors
153
+ target_centers = centers[i:i + n_split_data]
154
+ target_neighbors = neighbors[i:i + n_split_data, :]
155
+
156
+ n_target_centers = len(target_centers)
157
+
158
+ # output_1d
159
+ mask_out = cuda.to_device(np.zeros((n_target_centers, n_channel)))
160
+
161
+ # Make mask - neighbor
162
+ set_mask[n_block, n_thread_per_block](target_neighbors, masking_indexes, mask_out)
163
+ cuda.synchronize()
164
+
165
+ # Apply mask
166
+ cpu_mask = mask_out.copy_to_host()
167
+ masked_measurements = []
168
+ for j in range(n_target_centers):
169
+ masked_measurements.append(measurements[:, cpu_mask[j] == 1])
170
+ masked_measurements = np.array(masked_measurements)
171
+ masked_measurements = cp.asarray(masked_measurements)
172
+
173
+ del mask_out
174
+ cuda.defer_cleanup()
175
+
176
+ # precision
177
+ prec_mat_shape = int((n_neighbor * n_neighbor - n_neighbor) / 2) + n_neighbor
178
+ target_precs = precs[i:i+n_target_centers].reshape(-1, prec_mat_shape)
179
+ target_precs = np.array([convert_1d_to_symmertic(pre, size = n_neighbor) for pre in target_precs])
180
+ variances = cp.linalg.inv(cp.asarray(target_precs))
181
+ variances = variances.reshape(n_target_centers, n_run, n_neighbor, n_neighbor).get()
182
+ fold_preicions = cp.linalg.inv(cp.asarray(mean_fold_variance(variances, fold_info.copy_to_host())))
183
+ fold_preicions = cuda.to_device(fold_preicions.reshape(n_target_centers, len(fold_info), n_neighbor, n_neighbor).get())
184
+ mempool.free_all_blocks()
185
+
186
+ # Avg conds per session
187
+ avg_measurements = []
188
+ avg_conds = []
189
+ for session in uq_sessions:
190
+ filtering_session = sessions == session
191
+ sess_cond = conds[filtering_session]
192
+ sess_measurements = cp.compress(filtering_session, masked_measurements, axis = 1)
193
+
194
+ mean_measurments = []
195
+ for cond in uq_conds:
196
+ filtering_cond = sess_cond == cond
197
+ cond_measurments = cp.compress(filtering_cond, sess_measurements, axis = 1)
198
+ mean_cond_measurement = cp.mean(cond_measurments, axis = 1)
199
+ mean_measurments.append(cp.expand_dims(mean_cond_measurement, axis = 1))
200
+
201
+ avg_conds.append(cond)
202
+
203
+ avg_measurements.append(cp.expand_dims(cp.concatenate(mean_measurments, axis = 1), axis = 1))
204
+ avg_measurements = cp.concatenate(avg_measurements, axis = 1).get()
205
+
206
+ avg_conds = np.array(avg_conds)
207
+
208
+ mempool.free_all_blocks()
209
+
210
+ # make kernel
211
+ avg_measurements = cuda.to_device(avg_measurements)
212
+
213
+ matmul1_out = cuda.to_device(np.zeros((n_target_centers, n_fold, n_cond, n_neighbor)))
214
+ kernel_out = cuda.to_device(np.zeros((n_target_centers, n_fold, n_cond, n_cond)))
215
+ calc_kernel[block_2ds, thread_2ds](avg_measurements, fold_preicions, fold_info, matmul1_out, kernel_out)
216
+
217
+ cuda.synchronize()
218
+ del matmul1_out
219
+ cuda.defer_cleanup()
220
+
221
+ rdm_out = cuda.to_device(np.zeros((n_target_centers, n_fold, n_dissim)))
222
+ rdm_from_kernel[block_2ds, thread_2ds](kernel_out, n_neighbor, rdm_out)
223
+
224
+ cuda.synchronize()
225
+
226
+ mean_rdms = cp.mean(rdm_out.copy_to_host(), axis = 1)
227
+ rdm_outs.append(mean_rdms)
228
+
229
+ del kernel_out
230
+ del rdm_out
231
+ cuda.defer_cleanup()
232
+
233
+ return rdm_outs, uq_conds
boostrsa/types.py ADDED
@@ -0,0 +1,6 @@
1
+
2
+ class ShrinkageMethod:
3
+ shrinkage_eye = "shrinkage_eye"
4
+ shrinkage_diag = "shrinkage_diag"
5
+
6
+
@@ -0,0 +1,22 @@
1
+
2
+ MIT License
3
+
4
+ Copyright (c) 2024 seojin yoon
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.1
2
+ Name: boostrsa
3
+ Version: 0.0.1.dev0
4
+ Summary: This is toolbox for boosting calculation speed using GPU
5
+ Home-page: https://github.com/SeojinYoon/boostrsa.git
6
+ Author: seojin
7
+ Author-email: pures1@hanyang.ac.kr
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE.txt
14
+ Requires-Dist: numpy
15
+ Requires-Dist: pandas
16
+ Requires-Dist: tqdm
17
+
18
+
19
+ # Boostrsa
20
+
21
+ This library is based on rsatoolbox(https://github.com/rsagroup/rsatoolbox).
22
+
23
+ The purpose of library is made to boost calcuation speed for searchlight RSA(Representational Similarity Analysis). However, It is still in development, so this library only includes tools for boosting crossnobis distance calculation for constructing RDM(Representational Dissimilartiy Matrix) on the whole brain.
24
+
25
+ ## How it works?
26
+
27
+ Basically, this library uses a Nvidia's GPU instead of CPU for parallel processing. In the searchlight analysis, the data targeted for constructing the RDM in volvxes a voxel and its neighboring voxels. That is well-suited for parallel processing since the calculations for each target are independent of one another. This library utilizes GPU-compatible libraries such as Numba and Cupy to facilitate this process.
28
+
29
+ ## Dependencies
30
+
31
+ To use this library, you need to have a Nvidia's GPU and CUDA. Additionally, this library heavily relies on Cupy and Numba. It is essential to install the appropriate versions of these libraries.
32
+
33
+ ### Cupy
34
+
35
+ Cupy is designed to work with specific versions of CUDA. See cupy's guide and install appropriate version in correspond to your system (https://github.com/cupy/cupy).
36
+
37
+ Please check your cuda version to install cupy.
38
+ - versions
39
+ - cupy-cuda10x (for cuda 10)
40
+ - cupy-cuda11x (for cuda 11)
41
+ - cupy-cuda12x (for cuda 12)
42
+
43
+
44
+ If you installed the cuda10 in your computer, then install cupy-cuda10x. install cupy-cuda10x. ex) pip install cupy-cuda10x
45
+
46
+ ### Numba
47
+
48
+ The numba library is a powerful tool that enbales python functions to be compiled to machine code at runtime using the LLVM. One of its key features is the ability to generate native code for different architectures, including CPUs and GPUs, which greatly accelerates the execution of data-heavy and computationally intense python code.
49
+
50
+ Please see installation guideline of numba (https://numba.pydata.org/numba-doc/latest/user/installing.html).
51
+
52
+ Pip installation).
53
+ - pip install numba
54
+
55
+
56
+ ## Installation
57
+
58
+ pip install boostrsa
59
+
60
+ # Checked version
61
+
62
+ These are the latest checked environment.
63
+
64
+ - OS
65
+ - Linux, ubuntu - 21.10
66
+ - numba
67
+ - 0.57.0 ~ 0.59.1 is fine to use
68
+ - cupy
69
+ - cupy-cuda11x
70
+ - cupy-cuda12x
71
+
72
+ # Future works
73
+
74
+ - Add calculation sources to get neighbors and centers (boost)
75
+ - Add RSA sources (boost)
76
+ - Support other calculation methods except crossnobis distance
@@ -0,0 +1,17 @@
1
+ boostrsa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ boostrsa/searchlight.py,sha256=KMVqjdhCU_ZNZe92hYJEWuAlS3s5JoA0rv2uciGSqQs,8803
3
+ boostrsa/types.py,sha256=lebVS3iq3tWoUr6EtQqFOSQ6DebK65-QcdvmPhNBcmc,100
4
+ boostrsa/cores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ boostrsa/cores/cpgpu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ boostrsa/cores/cpgpu/stats.py,sha256=fKIOfchntSeGXkEYUyay5Tv2sx7ZFcLZekYHV8Jw9SM,4494
7
+ boostrsa/cores/cpu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ boostrsa/cores/cpu/matrix.py,sha256=u8xwPP1NZy2EEbZuQfBs7zgEewMfXMXNO0CdnHWbqgk,1046
9
+ boostrsa/cores/gpu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ boostrsa/cores/gpu/basic_operations.py,sha256=JXG_AenOLbkFZivdcBqjq9bEpR5Tg8pZ58Uh8DeE0dU,1329
11
+ boostrsa/cores/gpu/mask.py,sha256=PJ5IBElI9QK-5gKnaHzM_McwiTdI4eAUjLaeXU1wUp0,669
12
+ boostrsa/cores/gpu/matrix.py,sha256=8ZC9I1MuF-9No30XyCLw7bCET7UgYels-3_WJh5j-64,3551
13
+ boostrsa-0.0.1.dev0.dist-info/LICENSE.txt,sha256=rRxZ2W7igWGwEeG4euxXY27O0EpCLXsfNJDA6flkrAk,1069
14
+ boostrsa-0.0.1.dev0.dist-info/METADATA,sha256=4FHJ1q7hZF-6-3nP85IaCWIepVukgyOrddVQF8q2vHg,2943
15
+ boostrsa-0.0.1.dev0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
16
+ boostrsa-0.0.1.dev0.dist-info/top_level.txt,sha256=B-iYD_cA6HhxZfPlR0O10gaczZ5lv9yMAbsWqRhc1Ms,9
17
+ boostrsa-0.0.1.dev0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.43.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ boostrsa