biomedisa 2024.5.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. biomedisa/__init__.py +53 -0
  2. biomedisa/__main__.py +18 -0
  3. biomedisa/biomedisa_features/DataGenerator.py +299 -0
  4. biomedisa/biomedisa_features/DataGeneratorCrop.py +121 -0
  5. biomedisa/biomedisa_features/PredictDataGenerator.py +87 -0
  6. biomedisa/biomedisa_features/PredictDataGeneratorCrop.py +74 -0
  7. biomedisa/biomedisa_features/__init__.py +0 -0
  8. biomedisa/biomedisa_features/active_contour.py +434 -0
  9. biomedisa/biomedisa_features/amira_to_np/__init__.py +0 -0
  10. biomedisa/biomedisa_features/amira_to_np/amira_data_stream.py +980 -0
  11. biomedisa/biomedisa_features/amira_to_np/amira_grammar.py +369 -0
  12. biomedisa/biomedisa_features/amira_to_np/amira_header.py +290 -0
  13. biomedisa/biomedisa_features/amira_to_np/amira_helper.py +72 -0
  14. biomedisa/biomedisa_features/assd.py +167 -0
  15. biomedisa/biomedisa_features/biomedisa_helper.py +801 -0
  16. biomedisa/biomedisa_features/create_slices.py +286 -0
  17. biomedisa/biomedisa_features/crop_helper.py +586 -0
  18. biomedisa/biomedisa_features/curvop_numba.py +149 -0
  19. biomedisa/biomedisa_features/django_env.py +172 -0
  20. biomedisa/biomedisa_features/keras_helper.py +1219 -0
  21. biomedisa/biomedisa_features/nc_reader.py +179 -0
  22. biomedisa/biomedisa_features/pid.py +52 -0
  23. biomedisa/biomedisa_features/process_image.py +253 -0
  24. biomedisa/biomedisa_features/pycuda_test.py +84 -0
  25. biomedisa/biomedisa_features/random_walk/__init__.py +0 -0
  26. biomedisa/biomedisa_features/random_walk/gpu_kernels.py +183 -0
  27. biomedisa/biomedisa_features/random_walk/pycuda_large.py +826 -0
  28. biomedisa/biomedisa_features/random_walk/pycuda_large_allx.py +806 -0
  29. biomedisa/biomedisa_features/random_walk/pycuda_small.py +414 -0
  30. biomedisa/biomedisa_features/random_walk/pycuda_small_allx.py +493 -0
  31. biomedisa/biomedisa_features/random_walk/pyopencl_large.py +760 -0
  32. biomedisa/biomedisa_features/random_walk/pyopencl_small.py +441 -0
  33. biomedisa/biomedisa_features/random_walk/rw_large.py +390 -0
  34. biomedisa/biomedisa_features/random_walk/rw_small.py +310 -0
  35. biomedisa/biomedisa_features/remove_outlier.py +399 -0
  36. biomedisa/biomedisa_features/split_volume.py +274 -0
  37. biomedisa/deeplearning.py +519 -0
  38. biomedisa/interpolation.py +371 -0
  39. biomedisa/mesh.py +406 -0
  40. biomedisa-2024.5.14.dist-info/LICENSE +191 -0
  41. biomedisa-2024.5.14.dist-info/METADATA +306 -0
  42. biomedisa-2024.5.14.dist-info/RECORD +44 -0
  43. biomedisa-2024.5.14.dist-info/WHEEL +5 -0
  44. biomedisa-2024.5.14.dist-info/top_level.txt +1 -0
@@ -0,0 +1,806 @@
1
+ ##########################################################################
2
+ ## ##
3
+ ## Copyright (c) 2024 Philipp Lösel. All rights reserved. ##
4
+ ## ##
5
+ ## This file is part of the open source project biomedisa. ##
6
+ ## ##
7
+ ## Licensed under the European Union Public Licence (EUPL) ##
8
+ ## v1.2, or - as soon as they will be approved by the ##
9
+ ## European Commission - subsequent versions of the EUPL; ##
10
+ ## ##
11
+ ## You may redistribute it and/or modify it under the terms ##
12
+ ## of the EUPL v1.2. You may not use this work except in ##
13
+ ## compliance with this Licence. ##
14
+ ## ##
15
+ ## You can obtain a copy of the Licence at: ##
16
+ ## ##
17
+ ## https://joinup.ec.europa.eu/page/eupl-text-11-12 ##
18
+ ## ##
19
+ ## Unless required by applicable law or agreed to in ##
20
+ ## writing, software distributed under the Licence is ##
21
+ ## distributed on an "AS IS" basis, WITHOUT WARRANTIES ##
22
+ ## OR CONDITIONS OF ANY KIND, either express or implied. ##
23
+ ## ##
24
+ ## See the Licence for the specific language governing ##
25
+ ## permissions and limitations under the Licence. ##
26
+ ## ##
27
+ ##########################################################################
28
+
29
+ from mpi4py import MPI
30
+ import numba
31
+ import numpy as np
32
+ import pycuda.driver as cuda
33
+ import pycuda.gpuarray as gpuarray
34
+ from pycuda.compiler import SourceModule
35
+ from biomedisa_features.random_walk.gpu_kernels import (_build_kernel_uncertainty,
36
+ _build_kernel_max, _build_kernel_fill, _build_update_gpu, _build_curvature_gpu)
37
+
38
+ def reduceBlocksize(slices):
39
+ testSlices = np.copy(slices, order='C')
40
+ testSlices[testSlices==-1] = 0
41
+ zsh, ysh, xsh = slices.shape
42
+ argmin_x, argmax_x, argmin_y, argmax_y = xsh, 0, ysh, 0
43
+ for k in range(zsh):
44
+ y, x = np.nonzero(testSlices[k])
45
+ if x.any():
46
+ argmin_x = min(argmin_x, np.amin(x))
47
+ argmax_x = max(argmax_x, np.amax(x))
48
+ argmin_y = min(argmin_y, np.amin(y))
49
+ argmax_y = max(argmax_y, np.amax(y))
50
+ argmin_x = argmin_x - 100 if argmin_x - 100 > 0 else 0
51
+ argmax_x = argmax_x + 100 if argmax_x + 100 < xsh else xsh
52
+ argmin_y = argmin_y - 100 if argmin_y - 100 > 0 else 0
53
+ argmax_y = argmax_y + 100 if argmax_y + 100 < ysh else ysh
54
+ slices[:, :argmin_y] = -1
55
+ slices[:, argmax_y:] = -1
56
+ slices[:, :, :argmin_x] = -1
57
+ slices[:, :, argmax_x:] = -1
58
+ return slices
59
+
60
+ def sendrecv(a, blockmin, blockmax, comm, rank, size):
61
+
62
+ sendbuf = np.empty(1, dtype=np.int32)
63
+ recvbuf = np.empty_like(sendbuf)
64
+
65
+ if rank == 0:
66
+
67
+ # send block
68
+ dest = rank+1
69
+ tmp = a[blockmax:]
70
+ if np.any(tmp):
71
+ sendbuf.fill(1)
72
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=0)
73
+ send = tmp.copy(order='C')
74
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=1)
75
+ comm.Send([send, MPI.FLOAT], dest=dest, tag=2)
76
+ else:
77
+ sendbuf.fill(0)
78
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=0)
79
+
80
+ # recv block
81
+ source = rank+1
82
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=3)
83
+ if recvbuf:
84
+ data_z, data_y, data_x = comm.recv(source=source, tag=4)
85
+ recv = np.empty((data_z, data_y, data_x), dtype=np.float32)
86
+ comm.Recv([recv, MPI.FLOAT], source=source, tag=5)
87
+ a[blockmax-data_z:blockmax] += recv
88
+
89
+ elif rank == size-1:
90
+
91
+ if rank % 2 == 1: add = 0
92
+ if rank % 2 == 0: add = 6
93
+
94
+ # recv block
95
+ source = rank-1
96
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=0+add)
97
+ if recvbuf:
98
+ data_z, data_y, data_x = comm.recv(source=source, tag=1+add)
99
+ recv = np.empty((data_z, data_y, data_x), dtype=np.float32)
100
+ comm.Recv([recv, MPI.FLOAT], source=source, tag=2+add)
101
+ limit = min(a.shape[0], blockmin+data_z) - blockmin
102
+ a[blockmin:blockmin+data_z] += recv[:limit]
103
+
104
+ # send block
105
+ dest = rank-1
106
+ tmp = a[:blockmin]
107
+ if np.any(tmp):
108
+ sendbuf.fill(1)
109
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=3+add)
110
+ send = tmp.copy(order='C')
111
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=4+add)
112
+ comm.Send([send, MPI.FLOAT], dest=dest, tag=5+add)
113
+ else:
114
+ sendbuf.fill(0)
115
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=3+add)
116
+
117
+ elif rank % 2 == 1:
118
+
119
+ # recv block
120
+ source = rank-1
121
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=0)
122
+ if recvbuf:
123
+ data_z, data_y, data_x = comm.recv(source=source, tag=1)
124
+ recv = np.empty((data_z, data_y, data_x), dtype=np.float32)
125
+ comm.Recv([recv, MPI.FLOAT], source=source, tag=2)
126
+ a[blockmin:blockmin+data_z] += recv
127
+
128
+ # send block
129
+ dest = rank-1
130
+ tmp = a[:blockmin]
131
+ if np.any(tmp):
132
+ sendbuf.fill(1)
133
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=3)
134
+ send = tmp.copy(order='C')
135
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=4)
136
+ comm.Send([send, MPI.FLOAT], dest=dest, tag=5)
137
+ else:
138
+ sendbuf.fill(0)
139
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=3)
140
+
141
+ # send block
142
+ dest = rank+1
143
+ tmp = a[blockmax:]
144
+ if np.any(tmp):
145
+ sendbuf.fill(1)
146
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=6)
147
+ send = tmp.copy(order='C')
148
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=7)
149
+ comm.Send([send, MPI.FLOAT], dest=dest, tag=8)
150
+ else:
151
+ sendbuf.fill(0)
152
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=6)
153
+
154
+ # recv block
155
+ source = rank+1
156
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=9)
157
+ if recvbuf:
158
+ data_z, data_y, data_x = comm.recv(source=source, tag=10)
159
+ recv = np.empty((data_z, data_y, data_x), dtype=np.float32)
160
+ comm.Recv([recv, MPI.FLOAT], source=source, tag=11)
161
+ a[blockmax-data_z:blockmax] += recv
162
+
163
+ elif rank % 2 == 0:
164
+
165
+ # send block
166
+ dest = rank+1
167
+ tmp = a[blockmax:]
168
+ if np.any(tmp):
169
+ sendbuf.fill(1)
170
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=0)
171
+ send = tmp.copy(order='C')
172
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=1)
173
+ comm.Send([send, MPI.FLOAT], dest=dest, tag=2)
174
+ else:
175
+ sendbuf.fill(0)
176
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=0)
177
+
178
+ # recv block
179
+ source = rank+1
180
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=3)
181
+ if recvbuf:
182
+ data_z, data_y, data_x = comm.recv(source=source, tag=4)
183
+ recv = np.empty((data_z, data_y, data_x), dtype=np.float32)
184
+ comm.Recv([recv, MPI.FLOAT], source=source, tag=5)
185
+ a[blockmax-data_z:blockmax] += recv
186
+
187
+ # recv block
188
+ source = rank-1
189
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=6)
190
+ if recvbuf:
191
+ data_z, data_y, data_x = comm.recv(source=source, tag=7)
192
+ recv = np.empty((data_z, data_y, data_x), dtype=np.float32)
193
+ comm.Recv([recv, MPI.FLOAT], source=source, tag=8)
194
+ a[blockmin:blockmin+data_z] += recv
195
+
196
+ # send block
197
+ dest = rank-1
198
+ tmp = a[:blockmin]
199
+ if np.any(tmp):
200
+ sendbuf.fill(1)
201
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=9)
202
+ send = tmp.copy(order='C')
203
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=10)
204
+ comm.Send([send, MPI.FLOAT], dest=dest, tag=11)
205
+ else:
206
+ sendbuf.fill(0)
207
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=9)
208
+
209
+ return a
210
+
211
+ @numba.jit(nopython=True)
212
+ def _calc_var(raw, A):
213
+ ysh, xsh = raw.shape
214
+ beta = np.zeros((ysh, xsh))
215
+ for l in range(1, ysh-1):
216
+ for m in range(1, xsh-1):
217
+ if A[l, m] == 1:
218
+ dev, summe = 0, 0
219
+ B = raw[l, m]
220
+ for n in range(-1, 2):
221
+ for o in range(-1, 2):
222
+ if A[l+n, m+o] == 1:
223
+ dev += (B - raw[l+n, m+o])**2
224
+ summe += 1
225
+ var = dev / summe
226
+ if var < 1.0:
227
+ beta[l, m] = 1.0
228
+ else:
229
+ beta[l, m] = var
230
+ return beta
231
+
232
+ @numba.jit(nopython=True)
233
+ def max_to_label(a, walkmap, final, blockmin, blockmax, segment):
234
+ zsh, ysh, xsh = a.shape
235
+ for k in range(blockmin, blockmax):
236
+ for l in range(ysh):
237
+ for m in range(xsh):
238
+ if a[k,l,m] > walkmap[k,l,m]:
239
+ walkmap[k,l,m] = a[k,l,m]
240
+ final[k-blockmin,l,m] = segment
241
+ return walkmap, final
242
+
243
+ def _calc_label_walking_area(sliceData, labelValue):
244
+ walkingArea = np.zeros_like(sliceData)
245
+ walkingArea[sliceData == labelValue] = 1
246
+ return walkingArea
247
+
248
+ def walk(comm, raw, slices, indices, nbrw, sorw, blockmin, blockmax, name,
249
+ allLabels, smooth, uncertainty, ctx, queue, platform):
250
+
251
+ rank = comm.Get_rank()
252
+ size = comm.Get_size()
253
+
254
+ if raw.dtype == 'uint8':
255
+ kernel = _build_kernel_int8()
256
+ raw = (raw-128).astype('int8')
257
+ else:
258
+ kernel = _build_kernel_float32()
259
+ raw = raw.astype(np.float32)
260
+
261
+ foundAxis = [0] * 3
262
+ for k in range(3):
263
+ if indices[k]:
264
+ foundAxis[k] = 1
265
+
266
+ zsh, ysh, xsh = raw.shape
267
+ fill_gpu = _build_kernel_fill()
268
+
269
+ block = (32, 32, 1)
270
+ x_grid = (xsh // 32) + 1
271
+ y_grid = (ysh // 32) + 1
272
+ grid2 = (int(x_grid), int(y_grid), int(zsh))
273
+
274
+ a = np.empty(raw.shape, dtype=np.float32)
275
+ final = np.zeros((blockmax-blockmin, ysh, xsh), dtype=np.uint8)
276
+ segment_npy = np.empty(1, dtype=np.uint8)
277
+
278
+ memory_error = False
279
+
280
+ try:
281
+ raw_gpu = gpuarray.to_gpu(raw)
282
+ a_gpu = cuda.mem_alloc(a.nbytes)
283
+
284
+ if smooth:
285
+ update_gpu = _build_update_gpu()
286
+ curvature_gpu = _build_curvature_gpu()
287
+ b_gpu = gpuarray.zeros(raw.shape, dtype=np.float32)
288
+
289
+ zshape = np.int32(zsh)
290
+ yshape = np.int32(ysh)
291
+ xshape = np.int32(xsh)
292
+ sorw = np.int32(sorw)
293
+ nbrw = np.int32(nbrw)
294
+
295
+ slshape = [None] * 3
296
+ indices_gpu = [None] * 3
297
+ beta_gpu = [None] * 3
298
+ slices_gpu = [None] * 3
299
+ ysh = [None] * 3
300
+ xsh = [None] * 3
301
+
302
+ for k, found in enumerate(foundAxis):
303
+ if found:
304
+ indices_tmp = np.array(indices[k], dtype=np.int32)
305
+ slices_tmp = slices[k].astype(np.int32)
306
+ slices_tmp = reduceBlocksize(slices_tmp)
307
+ slshape[k], ysh[k], xsh[k] = slices_tmp.shape
308
+ indices_gpu[k] = gpuarray.to_gpu(indices_tmp)
309
+ slices_gpu[k] = gpuarray.to_gpu(slices_tmp)
310
+ Beta = np.zeros(slices_tmp.shape, dtype=np.float32)
311
+ for m in range(slshape[k]):
312
+ for n in allLabels:
313
+ A = _calc_label_walking_area(slices_tmp[m], n)
314
+ plane = indices_tmp[m]
315
+ if k==0: raw_tmp = raw[plane]
316
+ if k==1: raw_tmp = raw[:,plane]
317
+ if k==2: raw_tmp = raw[:,:,plane]
318
+ Beta[m] += _calc_var(raw_tmp.astype(float), A)
319
+ beta_gpu[k] = gpuarray.to_gpu(Beta)
320
+
321
+ sendbuf = np.zeros(1, dtype=np.int32)
322
+ recvbuf = np.zeros(1, dtype=np.int32)
323
+ comm.Barrier()
324
+ comm.Allreduce([sendbuf, MPI.INT], [recvbuf, MPI.INT], op=MPI.MAX)
325
+
326
+ except Exception as e:
327
+ print('Error: GPU out of memory. Data too large.')
328
+ sendbuf = np.zeros(1, dtype=np.int32) + 1
329
+ recvbuf = np.zeros(1, dtype=np.int32)
330
+ comm.Barrier()
331
+ comm.Allreduce([sendbuf, MPI.INT], [recvbuf, MPI.INT], op=MPI.MAX)
332
+
333
+ if recvbuf > 0:
334
+ memory_error = True
335
+ try:
336
+ a_gpu.free()
337
+ except:
338
+ pass
339
+ return memory_error, None, None, None
340
+
341
+ if smooth:
342
+ try:
343
+ update_gpu = _build_update_gpu()
344
+ curvature_gpu = _build_curvature_gpu()
345
+ b_npy = np.zeros(raw.shape, dtype=np.float32)
346
+ b_gpu = cuda.mem_alloc(b_npy.nbytes)
347
+ cuda.memcpy_htod(b_gpu, b_npy)
348
+ final_smooth = np.zeros((blockmax-blockmin, yshape, xshape), dtype=np.uint8)
349
+ sendbuf_smooth = np.zeros(1, dtype=np.int32)
350
+ recvbuf_smooth = np.zeros(1, dtype=np.int32)
351
+ comm.Barrier()
352
+ comm.Allreduce([sendbuf_smooth, MPI.INT], [recvbuf_smooth, MPI.INT], op=MPI.MAX)
353
+ except Exception as e:
354
+ print('Warning: GPU out of memory to allocate smooth array. Process starts without smoothing.')
355
+ sendbuf_smooth = np.zeros(1, dtype=np.int32) + 1
356
+ recvbuf_smooth = np.zeros(1, dtype=np.int32)
357
+ comm.Barrier()
358
+ comm.Allreduce([sendbuf_smooth, MPI.INT], [recvbuf_smooth, MPI.INT], op=MPI.MAX)
359
+ if recvbuf_smooth > 0:
360
+ smooth = 0
361
+ try:
362
+ b_gpu.free()
363
+ except:
364
+ pass
365
+
366
+ if uncertainty:
367
+ try:
368
+ max_npy = np.zeros((3,)+raw.shape, dtype=np.float32)
369
+ max_gpu = cuda.mem_alloc(max_npy.nbytes)
370
+ cuda.memcpy_htod(max_gpu, max_npy)
371
+ kernel_uncertainty = _build_kernel_uncertainty()
372
+ kernel_max = _build_kernel_max()
373
+ sendbuf_uq = np.zeros(1, dtype=np.int32)
374
+ recvbuf_uq = np.zeros(1, dtype=np.int32)
375
+ comm.Barrier()
376
+ comm.Allreduce([sendbuf_uq, MPI.INT], [recvbuf_uq, MPI.INT], op=MPI.MAX)
377
+ except Exception as e:
378
+ print('Warning: GPU out of memory to allocate uncertainty array. Process starts without uncertainty.')
379
+ sendbuf_uq = np.zeros(1, dtype=np.int32) + 1
380
+ recvbuf_uq = np.zeros(1, dtype=np.int32)
381
+ comm.Barrier()
382
+ comm.Allreduce([sendbuf_uq, MPI.INT], [recvbuf_uq, MPI.INT], op=MPI.MAX)
383
+ if recvbuf_uq > 0:
384
+ uncertainty = False
385
+ try:
386
+ max_gpu.free()
387
+ except:
388
+ pass
389
+
390
+ for label_counter, segment in enumerate(allLabels):
391
+ print('%s:' %(name) + ' ' + str(label_counter+1) + '/' + str(len(allLabels)))
392
+ fill_gpu(a_gpu, xshape, yshape, block=block, grid=grid2)
393
+ segment_gpu = np.int32(segment)
394
+ segment_npy.fill(segment)
395
+ for k, found in enumerate(foundAxis):
396
+ if found:
397
+ axis_gpu = np.int32(k)
398
+ x_grid = (xsh[k] // 32) + 1
399
+ y_grid = (ysh[k] // 32) + 1
400
+ grid = (int(x_grid), int(y_grid), int(slshape[k]))
401
+ kernel(axis_gpu, segment_gpu, raw_gpu, slices_gpu[k], a_gpu, xshape, yshape, zshape, indices_gpu[k], sorw, beta_gpu[k], nbrw, block=block, grid=grid)
402
+ cuda.memcpy_dtoh(a, a_gpu)
403
+
404
+ if size > 1:
405
+ a = sendrecv(a, blockmin, blockmax, comm, rank, size)
406
+
407
+ if smooth or uncertainty:
408
+ cuda.memcpy_htod(a_gpu, a)
409
+
410
+ if uncertainty:
411
+ kernel_max(max_gpu, a_gpu, xshape, yshape, block=block, grid=grid2)
412
+
413
+ if smooth:
414
+ for k in range(smooth):
415
+ curvature_gpu(a_gpu, b_gpu, xshape, yshape, block=block, grid=grid2)
416
+ update_gpu(a_gpu, b_gpu, xshape, yshape, block=block, grid=grid2)
417
+ a_smooth = np.empty_like(a)
418
+ cuda.memcpy_dtoh(a_smooth, a_gpu)
419
+ if label_counter == 0:
420
+ a_smooth[a_smooth<0] = 0
421
+ walkmap_smooth = np.copy(a_smooth, order='C')
422
+ else:
423
+ walkmap_smooth, final_smooth = max_to_label(a_smooth, walkmap_smooth, final_smooth, blockmin, blockmax, segment)
424
+
425
+ if label_counter == 0:
426
+ a[a<0] = 0
427
+ walkmap = np.copy(a, order='C')
428
+ else:
429
+ walkmap, final = max_to_label(a, walkmap, final, blockmin, blockmax, segment)
430
+
431
+ if uncertainty:
432
+ kernel_uncertainty(max_gpu, a_gpu, xshape, yshape, block=block, grid=grid2)
433
+ final_uncertainty = np.empty_like(a)
434
+ cuda.memcpy_dtoh(final_uncertainty, a_gpu)
435
+ final_uncertainty = final_uncertainty[blockmin:blockmax]
436
+ else:
437
+ final_uncertainty = None
438
+
439
+ if not smooth:
440
+ final_smooth = None
441
+
442
+ try:
443
+ a_gpu.free()
444
+ except:
445
+ pass
446
+
447
+ return memory_error, final, final_uncertainty, final_smooth
448
+
449
+ def _build_kernel_int8():
450
+ code = """
451
+
452
+ __device__ float weight(float B, float *raw, float div1, unsigned int position) {
453
+ float tmp = B - (float)(*((char*)(raw) + position));
454
+ return exp( - tmp * tmp * div1 );
455
+ }
456
+
457
+ __global__ void Funktion(int axis, int segment, float *raw, int *slices, float *a, int xsh, int ysh, int zsh, int *indices, int sorw, float *Beta, int nbrw) {
458
+
459
+ int col_g = blockIdx.x * blockDim.x + threadIdx.x;
460
+ int row_g = blockIdx.y * blockDim.y + threadIdx.y;
461
+ int slc_g = blockIdx.z;
462
+
463
+ int xsh_g, ysh_g, plane, row, column;
464
+
465
+ if (axis == 0) {
466
+ plane = indices[slc_g];
467
+ row = row_g;
468
+ column = col_g;
469
+ xsh_g = xsh;
470
+ ysh_g = ysh;
471
+ }
472
+ else if (axis == 1) {
473
+ row = indices[slc_g];
474
+ plane = row_g;
475
+ column = col_g;
476
+ xsh_g = xsh;
477
+ ysh_g = zsh;
478
+ }
479
+ else if (axis == 2) {
480
+ column = indices[slc_g];
481
+ plane = row_g;
482
+ row = col_g;
483
+ xsh_g = ysh;
484
+ ysh_g = zsh;
485
+ }
486
+
487
+ int flat = xsh * ysh;
488
+ int flat_g = xsh_g * ysh_g;
489
+ unsigned int index = slc_g * flat_g + row_g * xsh_g + col_g;
490
+
491
+ if (index<gridDim.z*flat_g && plane>0 && plane<zsh-1 && row>0 && row<ysh-1 && column>0 && column<xsh-1) {
492
+
493
+ if (slices[index]==segment) {
494
+
495
+ /* Adaptive random walks */
496
+ int found = 0;
497
+ if ((col_g + row_g) % 4 == 0) {
498
+ found = 1;
499
+ }
500
+ else {
501
+ for (int y = -100; y < 101; y++) {
502
+ for (int x = -100; x < 101; x++) {
503
+ if (row_g+y > 0 && col_g+x > 0 && row_g+y < ysh_g-1 && col_g+x < xsh_g-1) {
504
+ unsigned int tmp = slc_g * flat_g + (row_g + y) * xsh_g + (col_g + x);
505
+ if (slices[tmp] != segment && slices[tmp] != -1) {
506
+ found = 1;
507
+ }
508
+ }
509
+ }
510
+ }
511
+ }
512
+
513
+ if (found == 1) {
514
+
515
+ float rand;
516
+ float W0,W1,W2,W3,W4,W5;
517
+ int n,o,p;
518
+
519
+ /* Initialize MRG32k3a */
520
+ float norm = 2.328306549295728e-10;
521
+ float m1 = 4294967087.0;
522
+ float m2 = 4294944443.0;
523
+ float a12 = 1403580.0;
524
+ float a13n = 810728.0;
525
+ float a21 = 527612.0;
526
+ float a23n = 1370589.0;
527
+ long k1;
528
+ float p1, p2;
529
+ float s10 = index, s11 = index, s12 = index, s20 = index, s21 = index, s22 = index;
530
+
531
+ /* Compute standard deviation */
532
+ unsigned int position = plane*flat + row*xsh + column;
533
+ float B = (float)(*((char*)(raw) + position));
534
+ float var = Beta[index];
535
+ float div1 = 1 / (2 * var);
536
+
537
+ int k = plane;
538
+ int l = row;
539
+ int m = column;
540
+
541
+ int step = 0;
542
+ int n_rw = 0;
543
+
544
+ /* Compute random walks */
545
+ while (n_rw < nbrw) {
546
+
547
+ /* Compute weights */
548
+ W0 = weight(B, raw, div1, position + flat);
549
+ W1 = weight(B, raw, div1, position - flat);
550
+ W2 = weight(B, raw, div1, position + xsh);
551
+ W3 = weight(B, raw, div1, position - xsh);
552
+ W4 = weight(B, raw, div1, position + 1);
553
+ W5 = weight(B, raw, div1, position - 1);
554
+
555
+ W1 += W0;
556
+ W2 += W1;
557
+ W3 += W2;
558
+ W4 += W3;
559
+ W5 += W4;
560
+
561
+ /* Compute random numbers with MRG32k3a */
562
+
563
+ /* Component 1 */
564
+ p1 = a12 * s11 - a13n * s10;
565
+ k1 = p1 / m1;
566
+ p1 -= k1 * m1;
567
+ if (p1 < 0.0){
568
+ p1 += m1;}
569
+ s10 = s11;
570
+ s11 = s12;
571
+ s12 = p1;
572
+
573
+ /* Component 2 */
574
+ p2 = a21 * s22 - a23n * s20;
575
+ k1 = p2 / m2;
576
+ p2 -= k1 * m2;
577
+ if (p2 < 0.0){
578
+ p2 += m2;}
579
+ s20 = s21;
580
+ s21 = s22;
581
+ s22 = p2;
582
+
583
+ /* Combination */
584
+ if (p1 <= p2) {
585
+ rand = W5 * ((p1 - p2 + m1) * norm);
586
+ }
587
+ else {
588
+ rand = W5 * ((p1 - p2) * norm);
589
+ }
590
+
591
+ /* Determine new direction of random walk */
592
+ if (rand<W0 || rand==0){n=1; o=0; p=0;}
593
+ else if (rand>=W0 && rand<W1){n=-1; o=0; p=0;}
594
+ else if (rand>=W1 && rand<W2){n=0; o=1; p=0;}
595
+ else if (rand>=W2 && rand<W3){n=0; o=-1; p=0;}
596
+ else if (rand>=W3 && rand<W4){n=0; o=0; p=1;}
597
+ else if (rand>=W4 && rand<=W5){n=0; o=0; p=-1;}
598
+
599
+ /* Move in new direction */
600
+ if (k+n>0 && k+n<zsh-1 && l+o>0 && l+o<ysh-1 && m+p>0 && m+p<xsh-1) {
601
+ k += n;
602
+ l += o;
603
+ m += p;
604
+ position = k*flat + l*xsh + m;
605
+ atomicAdd(&a[position], 1);
606
+ }
607
+
608
+ step += 1;
609
+
610
+ if (step==sorw) {
611
+ k = plane;
612
+ l = row;
613
+ m = column;
614
+ position = k*flat + l*xsh + m;
615
+ n_rw += 1;
616
+ step = 0;
617
+ }
618
+ }
619
+ }
620
+ }
621
+ }
622
+ }
623
+ """
624
+ mod = SourceModule(code)
625
+ kernel = mod.get_function("Funktion")
626
+ return kernel
627
+
628
+ def _build_kernel_float32():
629
+ code = """
630
+
631
+ __device__ float weight(float B, float A, float div1) {
632
+ float tmp = B - A;
633
+ return exp( - tmp * tmp * div1 );
634
+ }
635
+
636
+ __global__ void Funktion(int axis, int segment, float *raw, int *slices, float *a, int xsh, int ysh, int zsh, int *indices, int sorw, float *Beta, int nbrw) {
637
+
638
+ int col_g = blockIdx.x * blockDim.x + threadIdx.x;
639
+ int row_g = blockIdx.y * blockDim.y + threadIdx.y;
640
+ int slc_g = blockIdx.z;
641
+
642
+ int xsh_g, ysh_g, plane, row, column;
643
+
644
+ if (axis == 0) {
645
+ plane = indices[slc_g];
646
+ row = row_g;
647
+ column = col_g;
648
+ xsh_g = xsh;
649
+ ysh_g = ysh;
650
+ }
651
+ else if (axis == 1) {
652
+ row = indices[slc_g];
653
+ plane = row_g;
654
+ column = col_g;
655
+ xsh_g = xsh;
656
+ ysh_g = zsh;
657
+ }
658
+ else if (axis == 2) {
659
+ column = indices[slc_g];
660
+ plane = row_g;
661
+ row = col_g;
662
+ xsh_g = ysh;
663
+ ysh_g = zsh;
664
+ }
665
+
666
+ int flat = xsh * ysh;
667
+ int flat_g = xsh_g * ysh_g;
668
+ unsigned int index = slc_g * flat_g + row_g * xsh_g + col_g;
669
+
670
+ if (index<gridDim.z*flat_g && plane>0 && plane<zsh-1 && row>0 && row<ysh-1 && column>0 && column<xsh-1) {
671
+
672
+ if (slices[index]==segment) {
673
+
674
+ /* Adaptive random walks */
675
+ int found = 0;
676
+ if ((col_g + row_g) % 4 == 0) {
677
+ found = 1;
678
+ }
679
+ else {
680
+ for (int y = -100; y < 101; y++) {
681
+ for (int x = -100; x < 101; x++) {
682
+ if (row_g+y > 0 && col_g+x > 0 && row_g+y < ysh_g-1 && col_g+x < xsh_g-1) {
683
+ unsigned int tmp = slc_g * flat_g + (row_g + y) * xsh_g + (col_g + x);
684
+ if (slices[tmp] != segment && slices[tmp] != -1) {
685
+ found = 1;
686
+ }
687
+ }
688
+ }
689
+ }
690
+ }
691
+
692
+ if (found == 1) {
693
+
694
+ float rand;
695
+ float W0,W1,W2,W3,W4,W5;
696
+ int n,o,p;
697
+
698
+ /* Initialize MRG32k3a */
699
+ float norm = 2.328306549295728e-10;
700
+ float m1 = 4294967087.0;
701
+ float m2 = 4294944443.0;
702
+ float a12 = 1403580.0;
703
+ float a13n = 810728.0;
704
+ float a21 = 527612.0;
705
+ float a23n = 1370589.0;
706
+ long k1;
707
+ float p1, p2;
708
+ float s10 = index, s11 = index, s12 = index, s20 = index, s21 = index, s22 = index;
709
+
710
+ /* Compute standard deviation */
711
+ unsigned int position = plane*flat + row*xsh + column;
712
+ float B = raw[position];
713
+ float var = Beta[index];
714
+ float div1 = 1 / (2 * var);
715
+
716
+ int k = plane;
717
+ int l = row;
718
+ int m = column;
719
+
720
+ int step = 0;
721
+ int n_rw = 0;
722
+
723
+ /* Compute random walks */
724
+ while (n_rw < nbrw) {
725
+
726
+ /* Compute weights */
727
+ W0 = weight(B, raw[position + flat], div1);
728
+ W1 = weight(B, raw[position - flat], div1);
729
+ W2 = weight(B, raw[position + xsh], div1);
730
+ W3 = weight(B, raw[position - xsh], div1);
731
+ W4 = weight(B, raw[position + 1], div1);
732
+ W5 = weight(B, raw[position - 1], div1);
733
+
734
+ W1 += W0;
735
+ W2 += W1;
736
+ W3 += W2;
737
+ W4 += W3;
738
+ W5 += W4;
739
+
740
+ /* Compute random numbers with MRG32k3a */
741
+
742
+ /* Component 1 */
743
+ p1 = a12 * s11 - a13n * s10;
744
+ k1 = p1 / m1;
745
+ p1 -= k1 * m1;
746
+ if (p1 < 0.0){
747
+ p1 += m1;}
748
+ s10 = s11;
749
+ s11 = s12;
750
+ s12 = p1;
751
+
752
+ /* Component 2 */
753
+ p2 = a21 * s22 - a23n * s20;
754
+ k1 = p2 / m2;
755
+ p2 -= k1 * m2;
756
+ if (p2 < 0.0){
757
+ p2 += m2;}
758
+ s20 = s21;
759
+ s21 = s22;
760
+ s22 = p2;
761
+
762
+ /* Combination */
763
+ if (p1 <= p2) {
764
+ rand = W5 * ((p1 - p2 + m1) * norm);
765
+ }
766
+ else {
767
+ rand = W5 * ((p1 - p2) * norm);
768
+ }
769
+
770
+ /* Determine new direction of random walk */
771
+ if (rand<W0 || rand==0){n=1; o=0; p=0;}
772
+ else if (rand>=W0 && rand<W1){n=-1; o=0; p=0;}
773
+ else if (rand>=W1 && rand<W2){n=0; o=1; p=0;}
774
+ else if (rand>=W2 && rand<W3){n=0; o=-1; p=0;}
775
+ else if (rand>=W3 && rand<W4){n=0; o=0; p=1;}
776
+ else if (rand>=W4 && rand<=W5){n=0; o=0; p=-1;}
777
+
778
+ /* Move in new direction */
779
+ if (k+n>0 && k+n<zsh-1 && l+o>0 && l+o<ysh-1 && m+p>0 && m+p<xsh-1) {
780
+ k += n;
781
+ l += o;
782
+ m += p;
783
+ position = k*flat + l*xsh + m;
784
+ atomicAdd(&a[position], 1);
785
+ }
786
+
787
+ step += 1;
788
+
789
+ if (step==sorw) {
790
+ k = plane;
791
+ l = row;
792
+ m = column;
793
+ position = k*flat + l*xsh + m;
794
+ n_rw += 1;
795
+ step = 0;
796
+ }
797
+ }
798
+ }
799
+ }
800
+ }
801
+ }
802
+ """
803
+ mod = SourceModule(code)
804
+ kernel = mod.get_function("Funktion")
805
+ return kernel
806
+