biomedisa 2024.5.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biomedisa/__init__.py +53 -0
- biomedisa/__main__.py +18 -0
- biomedisa/biomedisa_features/DataGenerator.py +299 -0
- biomedisa/biomedisa_features/DataGeneratorCrop.py +121 -0
- biomedisa/biomedisa_features/PredictDataGenerator.py +87 -0
- biomedisa/biomedisa_features/PredictDataGeneratorCrop.py +74 -0
- biomedisa/biomedisa_features/__init__.py +0 -0
- biomedisa/biomedisa_features/active_contour.py +434 -0
- biomedisa/biomedisa_features/amira_to_np/__init__.py +0 -0
- biomedisa/biomedisa_features/amira_to_np/amira_data_stream.py +980 -0
- biomedisa/biomedisa_features/amira_to_np/amira_grammar.py +369 -0
- biomedisa/biomedisa_features/amira_to_np/amira_header.py +290 -0
- biomedisa/biomedisa_features/amira_to_np/amira_helper.py +72 -0
- biomedisa/biomedisa_features/assd.py +167 -0
- biomedisa/biomedisa_features/biomedisa_helper.py +801 -0
- biomedisa/biomedisa_features/create_slices.py +286 -0
- biomedisa/biomedisa_features/crop_helper.py +586 -0
- biomedisa/biomedisa_features/curvop_numba.py +149 -0
- biomedisa/biomedisa_features/django_env.py +172 -0
- biomedisa/biomedisa_features/keras_helper.py +1219 -0
- biomedisa/biomedisa_features/nc_reader.py +179 -0
- biomedisa/biomedisa_features/pid.py +52 -0
- biomedisa/biomedisa_features/process_image.py +253 -0
- biomedisa/biomedisa_features/pycuda_test.py +84 -0
- biomedisa/biomedisa_features/random_walk/__init__.py +0 -0
- biomedisa/biomedisa_features/random_walk/gpu_kernels.py +183 -0
- biomedisa/biomedisa_features/random_walk/pycuda_large.py +826 -0
- biomedisa/biomedisa_features/random_walk/pycuda_large_allx.py +806 -0
- biomedisa/biomedisa_features/random_walk/pycuda_small.py +414 -0
- biomedisa/biomedisa_features/random_walk/pycuda_small_allx.py +493 -0
- biomedisa/biomedisa_features/random_walk/pyopencl_large.py +760 -0
- biomedisa/biomedisa_features/random_walk/pyopencl_small.py +441 -0
- biomedisa/biomedisa_features/random_walk/rw_large.py +390 -0
- biomedisa/biomedisa_features/random_walk/rw_small.py +310 -0
- biomedisa/biomedisa_features/remove_outlier.py +399 -0
- biomedisa/biomedisa_features/split_volume.py +274 -0
- biomedisa/deeplearning.py +519 -0
- biomedisa/interpolation.py +371 -0
- biomedisa/mesh.py +406 -0
- biomedisa-2024.5.14.dist-info/LICENSE +191 -0
- biomedisa-2024.5.14.dist-info/METADATA +306 -0
- biomedisa-2024.5.14.dist-info/RECORD +44 -0
- biomedisa-2024.5.14.dist-info/WHEEL +5 -0
- biomedisa-2024.5.14.dist-info/top_level.txt +1 -0
@@ -0,0 +1,493 @@
|
|
1
|
+
##########################################################################
|
2
|
+
## ##
|
3
|
+
## Copyright (c) 2023 Philipp Lösel. All rights reserved. ##
|
4
|
+
## ##
|
5
|
+
## This file is part of the open source project biomedisa. ##
|
6
|
+
## ##
|
7
|
+
## Licensed under the European Union Public Licence (EUPL) ##
|
8
|
+
## v1.2, or - as soon as they will be approved by the ##
|
9
|
+
## European Commission - subsequent versions of the EUPL; ##
|
10
|
+
## ##
|
11
|
+
## You may redistribute it and/or modify it under the terms ##
|
12
|
+
## of the EUPL v1.2. You may not use this work except in ##
|
13
|
+
## compliance with this Licence. ##
|
14
|
+
## ##
|
15
|
+
## You can obtain a copy of the Licence at: ##
|
16
|
+
## ##
|
17
|
+
## https://joinup.ec.europa.eu/page/eupl-text-11-12 ##
|
18
|
+
## ##
|
19
|
+
## Unless required by applicable law or agreed to in ##
|
20
|
+
## writing, software distributed under the Licence is ##
|
21
|
+
## distributed on an "AS IS" basis, WITHOUT WARRANTIES ##
|
22
|
+
## OR CONDITIONS OF ANY KIND, either express or implied. ##
|
23
|
+
## ##
|
24
|
+
## See the Licence for the specific language governing ##
|
25
|
+
## permissions and limitations under the Licence. ##
|
26
|
+
## ##
|
27
|
+
##########################################################################
|
28
|
+
|
29
|
+
import numpy as np
|
30
|
+
import pycuda.driver as cuda
|
31
|
+
import pycuda.gpuarray as gpuarray
|
32
|
+
from pycuda.compiler import SourceModule
|
33
|
+
from biomedisa_features.random_walk.gpu_kernels import _build_kernel_fill
|
34
|
+
import numba
|
35
|
+
|
36
|
+
def walk(data, slices, indices_all, indices_child, nbrw, sorw, name, ctx, queue):
|
37
|
+
|
38
|
+
labels = np.zeros(0)
|
39
|
+
for k in range(3):
|
40
|
+
labels = np.append(labels, np.unique(slices[k]))
|
41
|
+
labels = np.unique(labels)
|
42
|
+
|
43
|
+
slicesChunk, indicesChunk = [], []
|
44
|
+
labelsChunk = np.zeros(0)
|
45
|
+
foundAxis = [0] * 3
|
46
|
+
for k in range(3):
|
47
|
+
slices_tmp, indices_tmp = _extract_slices(slices[k], indices_all, indices_child, k)
|
48
|
+
if indices_tmp: foundAxis[k] = 1
|
49
|
+
slicesChunk.append(slices_tmp)
|
50
|
+
indicesChunk.append(indices_tmp)
|
51
|
+
labelsChunk = np.append(labelsChunk, np.unique(slices_tmp))
|
52
|
+
labelsChunk = np.unique(labelsChunk)
|
53
|
+
|
54
|
+
# remove negative labels from list
|
55
|
+
index = np.argwhere(labels<0)
|
56
|
+
labels = np.delete(labels, index)
|
57
|
+
index = np.argwhere(labelsChunk<0)
|
58
|
+
labelsChunk = np.delete(labelsChunk, index)
|
59
|
+
|
60
|
+
walkmapChunk = _walk_on_current_gpu(data, slicesChunk, labelsChunk, indicesChunk, nbrw, sorw, name, foundAxis)
|
61
|
+
|
62
|
+
if walkmapChunk.shape[0] != len(labels):
|
63
|
+
walkmap = np.zeros((len(labels),)+data.shape, dtype=np.float32)
|
64
|
+
chunk2Walkmap = np.nonzero(np.in1d(labels, labelsChunk))[0]
|
65
|
+
for chunkIndex, walkmapIndex in enumerate(chunk2Walkmap):
|
66
|
+
walkmap[walkmapIndex] += walkmapChunk[chunkIndex]
|
67
|
+
else:
|
68
|
+
walkmap = walkmapChunk
|
69
|
+
|
70
|
+
return walkmap
|
71
|
+
|
72
|
+
def _extract_slices(slices, indices_all, indicesChunk, k):
|
73
|
+
indices = [x for (x, y) in indices_all if y == k]
|
74
|
+
indicesChunk = [x for (x, y) in indicesChunk if y == k]
|
75
|
+
extracted = np.zeros((0, slices.shape[1], slices.shape[2]), dtype=np.int32)
|
76
|
+
slicesIndicesToExtract = np.nonzero(np.in1d(indices, indicesChunk))[0]
|
77
|
+
for arraySliceIndex in slicesIndicesToExtract:
|
78
|
+
extracted = np.append(extracted, [slices[arraySliceIndex]], axis=0)
|
79
|
+
return extracted, indicesChunk
|
80
|
+
|
81
|
+
def _calc_label_walking_area(sliceData, labelValue):
|
82
|
+
walkingArea = np.zeros_like(sliceData)
|
83
|
+
walkingArea[sliceData == labelValue] = 1
|
84
|
+
return walkingArea
|
85
|
+
|
86
|
+
@numba.jit(nopython=True)
|
87
|
+
def _calc_var(raw, A):
|
88
|
+
ysh, xsh = raw.shape
|
89
|
+
beta = np.zeros((ysh, xsh))
|
90
|
+
for l in range(1, ysh-1):
|
91
|
+
for m in range(1, xsh-1):
|
92
|
+
if A[l, m] == 1:
|
93
|
+
dev, summe = 0, 0
|
94
|
+
B = raw[l, m]
|
95
|
+
for n in range(-1, 2):
|
96
|
+
for o in range(-1, 2):
|
97
|
+
if A[l+n, m+o] == 1:
|
98
|
+
dev += (B - raw[l+n, m+o])**2
|
99
|
+
summe += 1
|
100
|
+
var = dev / summe
|
101
|
+
if var < 1.0:
|
102
|
+
beta[l, m] = 1.0
|
103
|
+
else:
|
104
|
+
beta[l, m] = var
|
105
|
+
return beta
|
106
|
+
|
107
|
+
def _walk_on_current_gpu(raw, slices, allLabels, indices, nbrw, sorw, name, foundAxis):
|
108
|
+
|
109
|
+
walkmap = np.zeros((len(allLabels),)+raw.shape, dtype=np.float32)
|
110
|
+
|
111
|
+
if raw.dtype == 'uint8':
|
112
|
+
kernel = _build_kernel_int8()
|
113
|
+
raw = (raw-128).astype('int8')
|
114
|
+
else:
|
115
|
+
kernel = _build_kernel_float32()
|
116
|
+
raw = raw.astype(np.float32)
|
117
|
+
|
118
|
+
fill_gpu = _build_kernel_fill()
|
119
|
+
|
120
|
+
zsh, ysh, xsh = raw.shape
|
121
|
+
xsh_gpu = np.int32(xsh)
|
122
|
+
ysh_gpu = np.int32(ysh)
|
123
|
+
zsh_gpu = np.int32(zsh)
|
124
|
+
|
125
|
+
block = (32, 32, 1)
|
126
|
+
x_grid = (xsh // 32) + 1
|
127
|
+
y_grid = (ysh // 32) + 1
|
128
|
+
grid2 = (int(x_grid), int(y_grid), int(zsh))
|
129
|
+
|
130
|
+
slshape = [None] * 3
|
131
|
+
indices_gpu = [None] * 3
|
132
|
+
beta_gpu = [None] * 3
|
133
|
+
slices_gpu = [None] * 3
|
134
|
+
ysh = [None] * 3
|
135
|
+
xsh = [None] * 3
|
136
|
+
|
137
|
+
print(indices)
|
138
|
+
|
139
|
+
for k, found in enumerate(foundAxis):
|
140
|
+
if found:
|
141
|
+
indices_tmp = np.array(indices[k], dtype=np.int32)
|
142
|
+
slices_tmp = slices[k].astype(np.int32)
|
143
|
+
slshape[k], ysh[k], xsh[k] = slices_tmp.shape
|
144
|
+
indices_gpu[k] = gpuarray.to_gpu(indices_tmp)
|
145
|
+
slices_gpu[k] = gpuarray.to_gpu(slices_tmp)
|
146
|
+
Beta = np.zeros(slices_tmp.shape, dtype=np.float32)
|
147
|
+
for m in range(slshape[k]):
|
148
|
+
for n in allLabels:
|
149
|
+
A = _calc_label_walking_area(slices_tmp[m], n)
|
150
|
+
plane = indices_tmp[m]
|
151
|
+
if k==0: raw_tmp = raw[plane]
|
152
|
+
if k==1: raw_tmp = raw[:,plane]
|
153
|
+
if k==2: raw_tmp = raw[:,:,plane]
|
154
|
+
Beta[m] += _calc_var(raw_tmp.astype(float), A)
|
155
|
+
beta_gpu[k] = gpuarray.to_gpu(Beta)
|
156
|
+
|
157
|
+
sorw = np.int32(sorw)
|
158
|
+
nbrw = np.int32(nbrw)
|
159
|
+
raw_gpu = gpuarray.to_gpu(raw)
|
160
|
+
a = np.empty(raw.shape, dtype=np.float32)
|
161
|
+
a_gpu = cuda.mem_alloc(a.nbytes)
|
162
|
+
|
163
|
+
for label_counter, segment in enumerate(allLabels):
|
164
|
+
print('%s:' %(name) + ' ' + str(label_counter+1) + '/' + str(len(allLabels)))
|
165
|
+
fill_gpu(a_gpu, xsh_gpu, ysh_gpu, block=block, grid=grid2)
|
166
|
+
segment_gpu = np.int32(segment)
|
167
|
+
for k, found in enumerate(foundAxis):
|
168
|
+
if found:
|
169
|
+
axis_gpu = np.int32(k)
|
170
|
+
x_grid = (xsh[k] // 32) + 1
|
171
|
+
y_grid = (ysh[k] // 32) + 1
|
172
|
+
grid=(int(x_grid), int(y_grid), int(slshape[k]))
|
173
|
+
kernel(axis_gpu, segment_gpu, raw_gpu, slices_gpu[k], a_gpu, xsh_gpu, ysh_gpu, zsh_gpu, indices_gpu[k], sorw, beta_gpu[k], nbrw, block=block, grid=grid)
|
174
|
+
cuda.memcpy_dtoh(a, a_gpu)
|
175
|
+
walkmap[label_counter] += a
|
176
|
+
return walkmap
|
177
|
+
|
178
|
+
def _build_kernel_int8():
|
179
|
+
code = """
|
180
|
+
|
181
|
+
__device__ float weight(float B, float *raw, float div1, int position) {
|
182
|
+
float tmp = B - (float)(*((char*)(raw) + position));
|
183
|
+
return exp( - tmp * tmp * div1 );
|
184
|
+
}
|
185
|
+
|
186
|
+
__global__ void Funktion(int axis, int segment, float *raw, int *slices, float *a, int xsh, int ysh, int zsh, int *indices, int sorw, float *Beta, int nbrw) {
|
187
|
+
|
188
|
+
int col_g = blockIdx.x * blockDim.x + threadIdx.x;
|
189
|
+
int row_g = blockIdx.y * blockDim.y + threadIdx.y;
|
190
|
+
int slc_g = blockIdx.z;
|
191
|
+
|
192
|
+
int xsh_g, ysh_g, plane, row, column;
|
193
|
+
|
194
|
+
if (axis == 0) {
|
195
|
+
plane = indices[slc_g];
|
196
|
+
row = row_g;
|
197
|
+
column = col_g;
|
198
|
+
xsh_g = xsh;
|
199
|
+
ysh_g = ysh;
|
200
|
+
}
|
201
|
+
else if (axis == 1) {
|
202
|
+
row = indices[slc_g];
|
203
|
+
plane = row_g;
|
204
|
+
column = col_g;
|
205
|
+
xsh_g = xsh;
|
206
|
+
ysh_g = zsh;
|
207
|
+
}
|
208
|
+
else if (axis == 2) {
|
209
|
+
column = indices[slc_g];
|
210
|
+
plane = row_g;
|
211
|
+
row = col_g;
|
212
|
+
xsh_g = ysh;
|
213
|
+
ysh_g = zsh;
|
214
|
+
}
|
215
|
+
|
216
|
+
int flat_g = xsh_g * ysh_g;
|
217
|
+
int index = slc_g * flat_g + row_g * xsh_g + col_g;
|
218
|
+
int flat = xsh * ysh;
|
219
|
+
|
220
|
+
if (index<gridDim.z*flat_g && plane>0 && plane<zsh-1 && row>0 && row<ysh-1 && column>0 && column<xsh-1) {
|
221
|
+
|
222
|
+
if (slices[index]==segment) {
|
223
|
+
|
224
|
+
float rand;
|
225
|
+
float W0,W1,W2,W3,W4,W5;
|
226
|
+
int n,o,p;
|
227
|
+
|
228
|
+
/* Initialize MRG32k3a */
|
229
|
+
float norm = 2.328306549295728e-10;
|
230
|
+
float m1 = 4294967087.0;
|
231
|
+
float m2 = 4294944443.0;
|
232
|
+
float a12 = 1403580.0;
|
233
|
+
float a13n = 810728.0;
|
234
|
+
float a21 = 527612.0;
|
235
|
+
float a23n = 1370589.0;
|
236
|
+
long k1;
|
237
|
+
float p1, p2;
|
238
|
+
float s10 = index, s11 = index, s12 = index, s20 = index, s21 = index, s22 = index;
|
239
|
+
|
240
|
+
/* Compute standard deviation */
|
241
|
+
int position = plane*flat + row*xsh + column;
|
242
|
+
float B = (float)(*((char*)(raw) + position));
|
243
|
+
float var = Beta[index];
|
244
|
+
float div1 = 1 / (2 * var);
|
245
|
+
|
246
|
+
int k = plane;
|
247
|
+
int l = row;
|
248
|
+
int m = column;
|
249
|
+
|
250
|
+
int step = 0;
|
251
|
+
int n_rw = 0;
|
252
|
+
|
253
|
+
/* Compute random walks */
|
254
|
+
while (n_rw < nbrw) {
|
255
|
+
|
256
|
+
/* Compute weights */
|
257
|
+
W0 = weight(B, raw, div1, position + flat);
|
258
|
+
W1 = weight(B, raw, div1, position - flat);
|
259
|
+
W2 = weight(B, raw, div1, position + xsh);
|
260
|
+
W3 = weight(B, raw, div1, position - xsh);
|
261
|
+
W4 = weight(B, raw, div1, position + 1);
|
262
|
+
W5 = weight(B, raw, div1, position - 1);
|
263
|
+
|
264
|
+
W1 += W0;
|
265
|
+
W2 += W1;
|
266
|
+
W3 += W2;
|
267
|
+
W4 += W3;
|
268
|
+
W5 += W4;
|
269
|
+
|
270
|
+
/* Compute random numbers with MRG32k3a */
|
271
|
+
|
272
|
+
/* Component 1 */
|
273
|
+
p1 = a12 * s11 - a13n * s10;
|
274
|
+
k1 = p1 / m1;
|
275
|
+
p1 -= k1 * m1;
|
276
|
+
if (p1 < 0.0){
|
277
|
+
p1 += m1;}
|
278
|
+
s10 = s11;
|
279
|
+
s11 = s12;
|
280
|
+
s12 = p1;
|
281
|
+
|
282
|
+
/* Component 2 */
|
283
|
+
p2 = a21 * s22 - a23n * s20;
|
284
|
+
k1 = p2 / m2;
|
285
|
+
p2 -= k1 * m2;
|
286
|
+
if (p2 < 0.0){
|
287
|
+
p2 += m2;}
|
288
|
+
s20 = s21;
|
289
|
+
s21 = s22;
|
290
|
+
s22 = p2;
|
291
|
+
|
292
|
+
/* Combination */
|
293
|
+
if (p1 <= p2) {
|
294
|
+
rand = W5 * ((p1 - p2 + m1) * norm);
|
295
|
+
}
|
296
|
+
else {
|
297
|
+
rand = W5 * ((p1 - p2) * norm);
|
298
|
+
}
|
299
|
+
|
300
|
+
/* Determine new direction of random walk */
|
301
|
+
if (rand<W0 || rand==0){n=1; o=0; p=0;}
|
302
|
+
else if (rand>=W0 && rand<W1){n=-1; o=0; p=0;}
|
303
|
+
else if (rand>=W1 && rand<W2){n=0; o=1; p=0;}
|
304
|
+
else if (rand>=W2 && rand<W3){n=0; o=-1; p=0;}
|
305
|
+
else if (rand>=W3 && rand<W4){n=0; o=0; p=1;}
|
306
|
+
else if (rand>=W4 && rand<=W5){n=0; o=0; p=-1;}
|
307
|
+
|
308
|
+
/* Move in new direction */
|
309
|
+
if (k+n>0 && k+n<zsh-1 && l+o>0 && l+o<ysh-1 && m+p>0 && m+p<xsh-1) {
|
310
|
+
k += n;
|
311
|
+
l += o;
|
312
|
+
m += p;
|
313
|
+
position = k*flat + l*xsh + m;
|
314
|
+
atomicAdd(&a[position], 1);
|
315
|
+
}
|
316
|
+
|
317
|
+
step += 1;
|
318
|
+
|
319
|
+
if (step==sorw) {
|
320
|
+
k = plane;
|
321
|
+
l = row;
|
322
|
+
m = column;
|
323
|
+
position = k*flat + l*xsh + m;
|
324
|
+
n_rw += 1;
|
325
|
+
step = 0;
|
326
|
+
}
|
327
|
+
}
|
328
|
+
}
|
329
|
+
}
|
330
|
+
}
|
331
|
+
"""
|
332
|
+
mod = SourceModule(code)
|
333
|
+
kernel = mod.get_function("Funktion")
|
334
|
+
return kernel
|
335
|
+
|
336
|
+
def _build_kernel_float32():
|
337
|
+
code = """
|
338
|
+
|
339
|
+
__device__ float weight(float B, float A, float div1) {
|
340
|
+
float tmp = B - A;
|
341
|
+
return exp( - tmp * tmp * div1 );
|
342
|
+
}
|
343
|
+
|
344
|
+
__global__ void Funktion(int axis, int segment, float *raw, int *slices, float *a, int xsh, int ysh, int zsh, int *indices, int sorw, float *Beta, int nbrw) {
|
345
|
+
|
346
|
+
int col_g = blockIdx.x * blockDim.x + threadIdx.x;
|
347
|
+
int row_g = blockIdx.y * blockDim.y + threadIdx.y;
|
348
|
+
int slc_g = blockIdx.z;
|
349
|
+
|
350
|
+
int xsh_g, ysh_g, plane, row, column;
|
351
|
+
|
352
|
+
if (axis == 0) {
|
353
|
+
plane = indices[slc_g];
|
354
|
+
row = row_g;
|
355
|
+
column = col_g;
|
356
|
+
xsh_g = xsh;
|
357
|
+
ysh_g = ysh;
|
358
|
+
}
|
359
|
+
else if (axis == 1) {
|
360
|
+
row = indices[slc_g];
|
361
|
+
plane = row_g;
|
362
|
+
column = col_g;
|
363
|
+
xsh_g = xsh;
|
364
|
+
ysh_g = zsh;
|
365
|
+
}
|
366
|
+
else if (axis == 2) {
|
367
|
+
column = indices[slc_g];
|
368
|
+
plane = row_g;
|
369
|
+
row = col_g;
|
370
|
+
xsh_g = ysh;
|
371
|
+
ysh_g = zsh;
|
372
|
+
}
|
373
|
+
|
374
|
+
int flat_g = xsh_g * ysh_g;
|
375
|
+
int index = slc_g * flat_g + row_g * xsh_g + col_g;
|
376
|
+
int flat = xsh * ysh;
|
377
|
+
|
378
|
+
if (index<gridDim.z*flat_g && plane>0 && plane<zsh-1 && row>0 && row<ysh-1 && column>0 && column<xsh-1) {
|
379
|
+
|
380
|
+
if (slices[index]==segment) {
|
381
|
+
|
382
|
+
float rand;
|
383
|
+
float W0,W1,W2,W3,W4,W5;
|
384
|
+
int n,o,p;
|
385
|
+
|
386
|
+
/* Initialize MRG32k3a */
|
387
|
+
float norm = 2.328306549295728e-10;
|
388
|
+
float m1 = 4294967087.0;
|
389
|
+
float m2 = 4294944443.0;
|
390
|
+
float a12 = 1403580.0;
|
391
|
+
float a13n = 810728.0;
|
392
|
+
float a21 = 527612.0;
|
393
|
+
float a23n = 1370589.0;
|
394
|
+
long k1;
|
395
|
+
float p1, p2;
|
396
|
+
float s10 = index, s11 = index, s12 = index, s20 = index, s21 = index, s22 = index;
|
397
|
+
|
398
|
+
/* Compute standard deviation */
|
399
|
+
int position = plane*flat + row*xsh + column;
|
400
|
+
float B = raw[position];
|
401
|
+
float var = Beta[index];
|
402
|
+
float div1 = 1 / (2 * var);
|
403
|
+
|
404
|
+
int k = plane;
|
405
|
+
int l = row;
|
406
|
+
int m = column;
|
407
|
+
|
408
|
+
int step = 0;
|
409
|
+
int n_rw = 0;
|
410
|
+
|
411
|
+
/* Compute random walks */
|
412
|
+
while (n_rw < nbrw) {
|
413
|
+
|
414
|
+
/* Compute weights */
|
415
|
+
W0 = weight(B, raw[position + flat], div1);
|
416
|
+
W1 = weight(B, raw[position - flat], div1);
|
417
|
+
W2 = weight(B, raw[position + xsh], div1);
|
418
|
+
W3 = weight(B, raw[position - xsh], div1);
|
419
|
+
W4 = weight(B, raw[position + 1], div1);
|
420
|
+
W5 = weight(B, raw[position - 1], div1);
|
421
|
+
|
422
|
+
W1 += W0;
|
423
|
+
W2 += W1;
|
424
|
+
W3 += W2;
|
425
|
+
W4 += W3;
|
426
|
+
W5 += W4;
|
427
|
+
|
428
|
+
/* Compute random numbers with MRG32k3a */
|
429
|
+
|
430
|
+
/* Component 1 */
|
431
|
+
p1 = a12 * s11 - a13n * s10;
|
432
|
+
k1 = p1 / m1;
|
433
|
+
p1 -= k1 * m1;
|
434
|
+
if (p1 < 0.0){
|
435
|
+
p1 += m1;}
|
436
|
+
s10 = s11;
|
437
|
+
s11 = s12;
|
438
|
+
s12 = p1;
|
439
|
+
|
440
|
+
/* Component 2 */
|
441
|
+
p2 = a21 * s22 - a23n * s20;
|
442
|
+
k1 = p2 / m2;
|
443
|
+
p2 -= k1 * m2;
|
444
|
+
if (p2 < 0.0){
|
445
|
+
p2 += m2;}
|
446
|
+
s20 = s21;
|
447
|
+
s21 = s22;
|
448
|
+
s22 = p2;
|
449
|
+
|
450
|
+
/* Combination */
|
451
|
+
if (p1 <= p2) {
|
452
|
+
rand = W5 * ((p1 - p2 + m1) * norm);
|
453
|
+
}
|
454
|
+
else {
|
455
|
+
rand = W5 * ((p1 - p2) * norm);
|
456
|
+
}
|
457
|
+
|
458
|
+
/* Determine new direction of random walk */
|
459
|
+
if (rand<W0 || rand==0){n=1; o=0; p=0;}
|
460
|
+
else if (rand>=W0 && rand<W1){n=-1; o=0; p=0;}
|
461
|
+
else if (rand>=W1 && rand<W2){n=0; o=1; p=0;}
|
462
|
+
else if (rand>=W2 && rand<W3){n=0; o=-1; p=0;}
|
463
|
+
else if (rand>=W3 && rand<W4){n=0; o=0; p=1;}
|
464
|
+
else if (rand>=W4 && rand<=W5){n=0; o=0; p=-1;}
|
465
|
+
|
466
|
+
/* Move in new direction */
|
467
|
+
if (k+n>0 && k+n<zsh-1 && l+o>0 && l+o<ysh-1 && m+p>0 && m+p<xsh-1) {
|
468
|
+
k += n;
|
469
|
+
l += o;
|
470
|
+
m += p;
|
471
|
+
position = k*flat + l*xsh + m;
|
472
|
+
atomicAdd(&a[position], 1);
|
473
|
+
}
|
474
|
+
|
475
|
+
step += 1;
|
476
|
+
|
477
|
+
if (step==sorw) {
|
478
|
+
k = plane;
|
479
|
+
l = row;
|
480
|
+
m = column;
|
481
|
+
position = k*flat + l*xsh + m;
|
482
|
+
n_rw += 1;
|
483
|
+
step = 0;
|
484
|
+
}
|
485
|
+
}
|
486
|
+
}
|
487
|
+
}
|
488
|
+
}
|
489
|
+
"""
|
490
|
+
mod = SourceModule(code)
|
491
|
+
kernel = mod.get_function("Funktion")
|
492
|
+
return kernel
|
493
|
+
|