biomedisa 2024.5.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. biomedisa/__init__.py +53 -0
  2. biomedisa/__main__.py +18 -0
  3. biomedisa/biomedisa_features/DataGenerator.py +299 -0
  4. biomedisa/biomedisa_features/DataGeneratorCrop.py +121 -0
  5. biomedisa/biomedisa_features/PredictDataGenerator.py +87 -0
  6. biomedisa/biomedisa_features/PredictDataGeneratorCrop.py +74 -0
  7. biomedisa/biomedisa_features/__init__.py +0 -0
  8. biomedisa/biomedisa_features/active_contour.py +434 -0
  9. biomedisa/biomedisa_features/amira_to_np/__init__.py +0 -0
  10. biomedisa/biomedisa_features/amira_to_np/amira_data_stream.py +980 -0
  11. biomedisa/biomedisa_features/amira_to_np/amira_grammar.py +369 -0
  12. biomedisa/biomedisa_features/amira_to_np/amira_header.py +290 -0
  13. biomedisa/biomedisa_features/amira_to_np/amira_helper.py +72 -0
  14. biomedisa/biomedisa_features/assd.py +167 -0
  15. biomedisa/biomedisa_features/biomedisa_helper.py +801 -0
  16. biomedisa/biomedisa_features/create_slices.py +286 -0
  17. biomedisa/biomedisa_features/crop_helper.py +586 -0
  18. biomedisa/biomedisa_features/curvop_numba.py +149 -0
  19. biomedisa/biomedisa_features/django_env.py +172 -0
  20. biomedisa/biomedisa_features/keras_helper.py +1219 -0
  21. biomedisa/biomedisa_features/nc_reader.py +179 -0
  22. biomedisa/biomedisa_features/pid.py +52 -0
  23. biomedisa/biomedisa_features/process_image.py +253 -0
  24. biomedisa/biomedisa_features/pycuda_test.py +84 -0
  25. biomedisa/biomedisa_features/random_walk/__init__.py +0 -0
  26. biomedisa/biomedisa_features/random_walk/gpu_kernels.py +183 -0
  27. biomedisa/biomedisa_features/random_walk/pycuda_large.py +826 -0
  28. biomedisa/biomedisa_features/random_walk/pycuda_large_allx.py +806 -0
  29. biomedisa/biomedisa_features/random_walk/pycuda_small.py +414 -0
  30. biomedisa/biomedisa_features/random_walk/pycuda_small_allx.py +493 -0
  31. biomedisa/biomedisa_features/random_walk/pyopencl_large.py +760 -0
  32. biomedisa/biomedisa_features/random_walk/pyopencl_small.py +441 -0
  33. biomedisa/biomedisa_features/random_walk/rw_large.py +390 -0
  34. biomedisa/biomedisa_features/random_walk/rw_small.py +310 -0
  35. biomedisa/biomedisa_features/remove_outlier.py +399 -0
  36. biomedisa/biomedisa_features/split_volume.py +274 -0
  37. biomedisa/deeplearning.py +519 -0
  38. biomedisa/interpolation.py +371 -0
  39. biomedisa/mesh.py +406 -0
  40. biomedisa-2024.5.14.dist-info/LICENSE +191 -0
  41. biomedisa-2024.5.14.dist-info/METADATA +306 -0
  42. biomedisa-2024.5.14.dist-info/RECORD +44 -0
  43. biomedisa-2024.5.14.dist-info/WHEEL +5 -0
  44. biomedisa-2024.5.14.dist-info/top_level.txt +1 -0
@@ -0,0 +1,826 @@
1
+ ##########################################################################
2
+ ## ##
3
+ ## Copyright (c) 2024 Philipp Lösel. All rights reserved. ##
4
+ ## ##
5
+ ## This file is part of the open source project biomedisa. ##
6
+ ## ##
7
+ ## Licensed under the European Union Public Licence (EUPL) ##
8
+ ## v1.2, or - as soon as they will be approved by the ##
9
+ ## European Commission - subsequent versions of the EUPL; ##
10
+ ## ##
11
+ ## You may redistribute it and/or modify it under the terms ##
12
+ ## of the EUPL v1.2. You may not use this work except in ##
13
+ ## compliance with this Licence. ##
14
+ ## ##
15
+ ## You can obtain a copy of the Licence at: ##
16
+ ## ##
17
+ ## https://joinup.ec.europa.eu/page/eupl-text-11-12 ##
18
+ ## ##
19
+ ## Unless required by applicable law or agreed to in ##
20
+ ## writing, software distributed under the Licence is ##
21
+ ## distributed on an "AS IS" basis, WITHOUT WARRANTIES ##
22
+ ## OR CONDITIONS OF ANY KIND, either express or implied. ##
23
+ ## ##
24
+ ## See the Licence for the specific language governing ##
25
+ ## permissions and limitations under the Licence. ##
26
+ ## ##
27
+ ##########################################################################
28
+
29
+ from mpi4py import MPI
30
+ import numba
31
+ import numpy as np
32
+ import pycuda.driver as cuda
33
+ import pycuda.gpuarray as gpuarray
34
+ from pycuda.compiler import SourceModule
35
+ from biomedisa_features.random_walk.gpu_kernels import (_build_kernel_uncertainty,
36
+ _build_kernel_max, _build_kernel_fill, _build_update_gpu, _build_curvature_gpu)
37
+
38
+ def reduceBlocksize(slices):
39
+ zsh, ysh, xsh = slices.shape
40
+ argmin_x, argmax_x, argmin_y, argmax_y = xsh, 0, ysh, 0
41
+ for k in range(zsh):
42
+ y, x = np.nonzero(slices[k])
43
+ if x.any():
44
+ argmin_x = min(argmin_x, np.amin(x))
45
+ argmax_x = max(argmax_x, np.amax(x))
46
+ argmin_y = min(argmin_y, np.amin(y))
47
+ argmax_y = max(argmax_y, np.amax(y))
48
+ argmin_x = argmin_x - 100 if argmin_x - 100 > 0 else 0
49
+ argmax_x = argmax_x + 100 if argmax_x + 100 < xsh else xsh
50
+ argmin_y = argmin_y - 100 if argmin_y - 100 > 0 else 0
51
+ argmax_y = argmax_y + 100 if argmax_y + 100 < ysh else ysh
52
+ slices[:, :argmin_y] = -1
53
+ slices[:, argmax_y:] = -1
54
+ slices[:, :, :argmin_x] = -1
55
+ slices[:, :, argmax_x:] = -1
56
+ return slices
57
+
58
+ def sendrecv(a, blockmin, blockmax, comm, rank, size):
59
+
60
+ sendbuf = np.empty(1, dtype=np.int32)
61
+ recvbuf = np.empty_like(sendbuf)
62
+
63
+ if rank == 0:
64
+
65
+ # send block
66
+ dest = rank+1
67
+ tmp = a[blockmax:]
68
+ if np.any(tmp):
69
+ sendbuf.fill(1)
70
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=0)
71
+ send = tmp.copy(order='C')
72
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=1)
73
+ comm.Send([send, MPI.FLOAT], dest=dest, tag=2)
74
+ else:
75
+ sendbuf.fill(0)
76
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=0)
77
+
78
+ # recv block
79
+ source = rank+1
80
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=3)
81
+ if recvbuf:
82
+ data_z, data_y, data_x = comm.recv(source=source, tag=4)
83
+ recv = np.empty((data_z, data_y, data_x), dtype=np.float32)
84
+ comm.Recv([recv, MPI.FLOAT], source=source, tag=5)
85
+ a[blockmax-data_z:blockmax] += recv
86
+
87
+ elif rank == size-1:
88
+
89
+ if rank % 2 == 1: add = 0
90
+ if rank % 2 == 0: add = 6
91
+
92
+ # recv block
93
+ source = rank-1
94
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=0+add)
95
+ if recvbuf:
96
+ data_z, data_y, data_x = comm.recv(source=source, tag=1+add)
97
+ recv = np.empty((data_z, data_y, data_x), dtype=np.float32)
98
+ comm.Recv([recv, MPI.FLOAT], source=source, tag=2+add)
99
+ a[blockmin:blockmin+data_z] += recv
100
+
101
+ # send block
102
+ dest = rank-1
103
+ tmp = a[:blockmin]
104
+ if np.any(tmp):
105
+ sendbuf.fill(1)
106
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=3+add)
107
+ send = tmp.copy(order='C')
108
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=4+add)
109
+ comm.Send([send, MPI.FLOAT], dest=dest, tag=5+add)
110
+ else:
111
+ sendbuf.fill(0)
112
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=3+add)
113
+
114
+ elif rank % 2 == 1:
115
+
116
+ # recv block
117
+ source = rank-1
118
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=0)
119
+ if recvbuf:
120
+ data_z, data_y, data_x = comm.recv(source=source, tag=1)
121
+ recv = np.empty((data_z, data_y, data_x), dtype=np.float32)
122
+ comm.Recv([recv, MPI.FLOAT], source=source, tag=2)
123
+ a[blockmin:blockmin+data_z] += recv
124
+
125
+ # send block
126
+ dest = rank-1
127
+ tmp = a[:blockmin]
128
+ if np.any(tmp):
129
+ sendbuf.fill(1)
130
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=3)
131
+ send = tmp.copy(order='C')
132
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=4)
133
+ comm.Send([send, MPI.FLOAT], dest=dest, tag=5)
134
+ else:
135
+ sendbuf.fill(0)
136
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=3)
137
+
138
+ # send block
139
+ dest = rank+1
140
+ tmp = a[blockmax:]
141
+ if np.any(tmp):
142
+ sendbuf.fill(1)
143
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=6)
144
+ send = tmp.copy(order='C')
145
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=7)
146
+ comm.Send([send, MPI.FLOAT], dest=dest, tag=8)
147
+ else:
148
+ sendbuf.fill(0)
149
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=6)
150
+
151
+ # recv block
152
+ source = rank+1
153
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=9)
154
+ if recvbuf:
155
+ data_z, data_y, data_x = comm.recv(source=source, tag=10)
156
+ recv = np.empty((data_z, data_y, data_x), dtype=np.float32)
157
+ comm.Recv([recv, MPI.FLOAT], source=source, tag=11)
158
+ a[blockmax-data_z:blockmax] += recv
159
+
160
+ elif rank % 2 == 0:
161
+
162
+ # send block
163
+ dest = rank+1
164
+ tmp = a[blockmax:]
165
+ if np.any(tmp):
166
+ sendbuf.fill(1)
167
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=0)
168
+ send = tmp.copy(order='C')
169
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=1)
170
+ comm.Send([send, MPI.FLOAT], dest=dest, tag=2)
171
+ else:
172
+ sendbuf.fill(0)
173
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=0)
174
+
175
+ # recv block
176
+ source = rank+1
177
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=3)
178
+ if recvbuf:
179
+ data_z, data_y, data_x = comm.recv(source=source, tag=4)
180
+ recv = np.empty((data_z, data_y, data_x), dtype=np.float32)
181
+ comm.Recv([recv, MPI.FLOAT], source=source, tag=5)
182
+ a[blockmax-data_z:blockmax] += recv
183
+
184
+ # recv block
185
+ source = rank-1
186
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=6)
187
+ if recvbuf:
188
+ data_z, data_y, data_x = comm.recv(source=source, tag=7)
189
+ recv = np.empty((data_z, data_y, data_x), dtype=np.float32)
190
+ comm.Recv([recv, MPI.FLOAT], source=source, tag=8)
191
+ a[blockmin:blockmin+data_z] += recv
192
+
193
+ # send block
194
+ dest = rank-1
195
+ tmp = a[:blockmin]
196
+ if np.any(tmp):
197
+ sendbuf.fill(1)
198
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=9)
199
+ send = tmp.copy(order='C')
200
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=10)
201
+ comm.Send([send, MPI.FLOAT], dest=dest, tag=11)
202
+ else:
203
+ sendbuf.fill(0)
204
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=9)
205
+
206
+ return a
207
+
208
+ @numba.jit(nopython=True)
209
+ def max_to_label(a, walkmap, final, blockmin, blockmax, segment):
210
+ zsh, ysh, xsh = a.shape
211
+ for k in range(blockmin, blockmax):
212
+ for l in range(ysh):
213
+ for m in range(xsh):
214
+ if a[k,l,m] > walkmap[k,l,m]:
215
+ walkmap[k,l,m] = a[k,l,m]
216
+ final[k-blockmin,l,m] = segment
217
+ return walkmap, final
218
+
219
+ def walk(comm, raw, slices, indices, nbrw, sorw, blockmin, blockmax, name,
220
+ allLabels, smooth, uncertainty, ctx, queue, platform):
221
+
222
+ # get rank and size of mpi process
223
+ rank = comm.Get_rank()
224
+ size = comm.Get_size()
225
+
226
+ # build kernels
227
+ if raw.dtype == 'uint8':
228
+ kernel = _build_kernel_int8()
229
+ raw = (raw-128).astype('int8')
230
+ else:
231
+ kernel = _build_kernel_float32()
232
+ raw = raw.astype(np.float32)
233
+ fill_gpu = _build_kernel_fill()
234
+
235
+ # image size
236
+ zsh, ysh, xsh = raw.shape
237
+ xsh_gpu = np.int32(xsh)
238
+ ysh_gpu = np.int32(ysh)
239
+ zsh_gpu = np.int32(zsh)
240
+
241
+ # block and gird size
242
+ block = (32, 32, 1)
243
+ x_grid = (xsh // 32) + 1
244
+ y_grid = (ysh // 32) + 1
245
+ grid2 = (int(x_grid), int(y_grid), int(zsh))
246
+
247
+ # hyper-parameter
248
+ sorw = np.int32(sorw)
249
+ nbrw = np.int32(nbrw)
250
+
251
+ # crop to region of interest
252
+ slices = slices.astype(np.int32)
253
+ slices = reduceBlocksize(slices)
254
+
255
+ # allocate host memory
256
+ hits = np.empty(raw.shape, dtype=np.float32)
257
+ final = np.zeros((blockmax-blockmin, ysh, xsh), dtype=np.uint8)
258
+
259
+ # allocate GPU memory or use subdomains
260
+ memory_error = False
261
+ subdomains = False
262
+ if zsh * ysh * xsh > 42e8:
263
+ print('Warning: Volume indexes exceed unsigned long int range. The volume is splitted into subdomains.')
264
+ subdomains = True
265
+ sendbuf = np.zeros(1, dtype=np.int32) + 1
266
+ recvbuf = np.zeros(1, dtype=np.int32)
267
+ comm.Barrier()
268
+ comm.Allreduce([sendbuf, MPI.INT], [recvbuf, MPI.INT], op=MPI.MAX)
269
+ else:
270
+ try:
271
+ if np.any(indices):
272
+ slshape = slices.shape[0]
273
+ indices = np.array(indices, dtype=np.int32)
274
+ indices_gpu = gpuarray.to_gpu(indices)
275
+ slices_gpu = gpuarray.to_gpu(slices)
276
+ grid = (int(x_grid), int(y_grid), int(slshape))
277
+ raw_gpu = gpuarray.to_gpu(raw)
278
+ hits_gpu = cuda.mem_alloc(hits.nbytes)
279
+ sendbuf = np.zeros(1, dtype=np.int32)
280
+ recvbuf = np.zeros(1, dtype=np.int32)
281
+ comm.Barrier()
282
+ comm.Allreduce([sendbuf, MPI.INT], [recvbuf, MPI.INT], op=MPI.MAX)
283
+ except Exception as e:
284
+ print('Warning: GPU ran out of memory. The volume is splitted into subdomains.')
285
+ subdomains = True
286
+ sendbuf = np.zeros(1, dtype=np.int32) + 1
287
+ recvbuf = np.zeros(1, dtype=np.int32)
288
+ comm.Barrier()
289
+ comm.Allreduce([sendbuf, MPI.INT], [recvbuf, MPI.INT], op=MPI.MAX)
290
+ try:
291
+ hits_gpu.free()
292
+ except:
293
+ pass
294
+
295
+ # disable smoothing and uncertainty for subdomains
296
+ if recvbuf > 0:
297
+ smooth, uncertainty = 0, 0
298
+
299
+ if smooth:
300
+ try:
301
+ update_gpu = _build_update_gpu()
302
+ curvature_gpu = _build_curvature_gpu()
303
+ b_npy = np.zeros(raw.shape, dtype=np.float32)
304
+ b_gpu = cuda.mem_alloc(b_npy.nbytes)
305
+ cuda.memcpy_htod(b_gpu, b_npy)
306
+ final_smooth = np.zeros((blockmax-blockmin, ysh, xsh), dtype=np.uint8)
307
+ sendbuf_smooth = np.zeros(1, dtype=np.int32)
308
+ recvbuf_smooth = np.zeros(1, dtype=np.int32)
309
+ comm.Barrier()
310
+ comm.Allreduce([sendbuf_smooth, MPI.INT], [recvbuf_smooth, MPI.INT], op=MPI.MAX)
311
+ except Exception as e:
312
+ print('Warning: GPU out of memory to allocate smooth array. Process starts without smoothing.')
313
+ sendbuf_smooth = np.zeros(1, dtype=np.int32) + 1
314
+ recvbuf_smooth = np.zeros(1, dtype=np.int32)
315
+ comm.Barrier()
316
+ comm.Allreduce([sendbuf_smooth, MPI.INT], [recvbuf_smooth, MPI.INT], op=MPI.MAX)
317
+ if recvbuf_smooth > 0:
318
+ smooth = 0
319
+ try:
320
+ b_gpu.free()
321
+ except:
322
+ pass
323
+
324
+ if uncertainty:
325
+ try:
326
+ max_npy = np.zeros((3,)+raw.shape, dtype=np.float32)
327
+ max_gpu = cuda.mem_alloc(max_npy.nbytes)
328
+ cuda.memcpy_htod(max_gpu, max_npy)
329
+ kernel_uncertainty = _build_kernel_uncertainty()
330
+ kernel_max = _build_kernel_max()
331
+ sendbuf_uq = np.zeros(1, dtype=np.int32)
332
+ recvbuf_uq = np.zeros(1, dtype=np.int32)
333
+ comm.Barrier()
334
+ comm.Allreduce([sendbuf_uq, MPI.INT], [recvbuf_uq, MPI.INT], op=MPI.MAX)
335
+ except Exception as e:
336
+ print('Warning: GPU out of memory to allocate uncertainty array. Process starts without uncertainty.')
337
+ sendbuf_uq = np.zeros(1, dtype=np.int32) + 1
338
+ recvbuf_uq = np.zeros(1, dtype=np.int32)
339
+ comm.Barrier()
340
+ comm.Allreduce([sendbuf_uq, MPI.INT], [recvbuf_uq, MPI.INT], op=MPI.MAX)
341
+ if recvbuf_uq > 0:
342
+ uncertainty = False
343
+ try:
344
+ max_gpu.free()
345
+ except:
346
+ pass
347
+
348
+ for label_counter, segment in enumerate(allLabels):
349
+ print('%s:' %(name) + ' ' + str(label_counter+1) + '/' + str(len(allLabels)))
350
+
351
+ # current segment
352
+ segment_gpu = np.int32(segment)
353
+
354
+ # split volume into subdomains
355
+ if subdomains:
356
+ try:
357
+ hits.fill(0)
358
+ sub_n = (blockmax-blockmin) // 100 + 1
359
+ for sub_k in range(sub_n):
360
+ sub_block_min = sub_k*100+blockmin
361
+ sub_block_max = (sub_k+1)*100+blockmin
362
+ data_block_min = max(sub_block_min-100,0)
363
+ data_block_max = min(sub_block_max+100,zsh)
364
+
365
+ # get subindices
366
+ sub_indices = []
367
+ sub_slices = np.empty((0, ysh, xsh), dtype=slices.dtype)
368
+ for k, sub_i in enumerate(indices):
369
+ if sub_block_min <= sub_i < sub_block_max and np.any(slices[k]==segment):
370
+ sub_indices.append(sub_i)
371
+ sub_slices = np.append(sub_slices, [slices[k]], axis=0)
372
+
373
+ # allocate memory and compute random walks on subdomain
374
+ if np.any(sub_indices):
375
+ sub_slshape = sub_slices.shape[0]
376
+ sub_indices = np.array(sub_indices, dtype=np.int32) - data_block_min
377
+ sub_indices_gpu = gpuarray.to_gpu(sub_indices)
378
+ sub_slices_gpu = gpuarray.to_gpu(sub_slices)
379
+
380
+ sub_zsh = data_block_max - data_block_min
381
+ sub_zsh_gpu = np.int32(sub_zsh)
382
+ sub_raw = np.copy(raw[data_block_min:data_block_max], order='C')
383
+ sub_raw_gpu = gpuarray.to_gpu(sub_raw)
384
+ sub_hits = np.empty(sub_raw.shape, dtype=np.float32)
385
+ sub_hits_gpu = cuda.mem_alloc(sub_hits.nbytes)
386
+ fill_gpu(sub_hits_gpu, xsh_gpu, ysh_gpu, block=block, grid=(int(x_grid), int(y_grid), int(sub_zsh)))
387
+ kernel(segment_gpu, sub_raw_gpu, sub_slices_gpu, sub_hits_gpu, xsh_gpu, ysh_gpu, sub_zsh_gpu, sub_indices_gpu, sorw, nbrw, block=block, grid=(int(x_grid), int(y_grid), int(sub_slshape)))
388
+ cuda.memcpy_dtoh(sub_hits, sub_hits_gpu)
389
+ hits[data_block_min:data_block_max] += sub_hits
390
+ sub_hits_gpu.free()
391
+ except Exception as e:
392
+ print('Error: GPU out of memory. Data too large.')
393
+ memory_error = True
394
+ try:
395
+ sub_hits_gpu.free()
396
+ except:
397
+ pass
398
+
399
+ # computation of random walks on the entire volume
400
+ else:
401
+ # reset array of hits
402
+ fill_gpu(hits_gpu, xsh_gpu, ysh_gpu, block=block, grid=grid2)
403
+
404
+ # compute random walks
405
+ if np.any(indices):
406
+ kernel(segment_gpu, raw_gpu, slices_gpu, hits_gpu, xsh_gpu, ysh_gpu, zsh_gpu, indices_gpu, sorw, nbrw, block=block, grid=grid)
407
+
408
+ # get hits
409
+ cuda.memcpy_dtoh(hits, hits_gpu)
410
+
411
+ # memory error
412
+ if memory_error:
413
+ sendbuf = np.zeros(1, dtype=np.int32) + 1
414
+ recvbuf = np.zeros(1, dtype=np.int32)
415
+ comm.Barrier()
416
+ comm.Allreduce([sendbuf, MPI.INT], [recvbuf, MPI.INT], op=MPI.MAX)
417
+ else:
418
+ sendbuf = np.zeros(1, dtype=np.int32)
419
+ recvbuf = np.zeros(1, dtype=np.int32)
420
+ comm.Barrier()
421
+ comm.Allreduce([sendbuf, MPI.INT], [recvbuf, MPI.INT], op=MPI.MAX)
422
+ if recvbuf > 0:
423
+ memory_error = True
424
+ try:
425
+ hits_gpu.free()
426
+ except:
427
+ pass
428
+ return memory_error, None, None, None
429
+
430
+ # communicate hits
431
+ if size > 1:
432
+ hits = sendrecv(hits, blockmin, blockmax, comm, rank, size)
433
+ if uncertainty or smooth:
434
+ cuda.memcpy_htod(hits_gpu, hits)
435
+
436
+ # save the three most occuring hits
437
+ if uncertainty:
438
+ kernel_max(max_gpu, hits_gpu, xsh_gpu, ysh_gpu, block=block, grid=grid2)
439
+
440
+ # smooth manifold
441
+ if smooth:
442
+ for k in range(smooth):
443
+ curvature_gpu(hits_gpu, b_gpu, xsh_gpu, ysh_gpu, block=block, grid=grid2)
444
+ update_gpu(hits_gpu, b_gpu, xsh_gpu, ysh_gpu, block=block, grid=grid2)
445
+ hits_smooth = np.empty_like(hits)
446
+ cuda.memcpy_dtoh(hits_smooth, hits_gpu)
447
+ if label_counter == 0:
448
+ hits_smooth[hits_smooth<0] = 0
449
+ walkmap_smooth = np.copy(hits_smooth, order='C')
450
+ else:
451
+ walkmap_smooth, final_smooth = max_to_label(hits_smooth, walkmap_smooth, final_smooth, blockmin, blockmax, segment)
452
+
453
+ # get the label with the most hits
454
+ if label_counter == 0:
455
+ walkmap = np.copy(hits, order='C')
456
+ else:
457
+ walkmap, final = max_to_label(hits, walkmap, final, blockmin, blockmax, segment)
458
+ #update = hits[blockmin:blockmax] > walkmap[blockmin:blockmax]
459
+ #walkmap[blockmin:blockmax][update] = hits[blockmin:blockmax][update]
460
+ #final[update] = segment
461
+
462
+ # compute uncertainty
463
+ if uncertainty:
464
+ kernel_uncertainty(max_gpu, hits_gpu, xsh_gpu, ysh_gpu, block=block, grid=grid2)
465
+ final_uncertainty = np.empty_like(hits)
466
+ cuda.memcpy_dtoh(final_uncertainty, hits_gpu)
467
+ final_uncertainty = final_uncertainty[blockmin:blockmax]
468
+ else:
469
+ final_uncertainty = None
470
+
471
+ if not smooth:
472
+ final_smooth = None
473
+
474
+ try:
475
+ hits_gpu.free()
476
+ except:
477
+ pass
478
+
479
+ return memory_error, final, final_uncertainty, final_smooth
480
+
481
+ def _build_kernel_int8():
482
+ code = """
483
+
484
+ __device__ float _calc_var(unsigned int position, unsigned int index, float B, float *raw, int segment, int *labels, int xsh) {
485
+ float dev = 0;
486
+ float summe = 0;
487
+ for (int n = -1; n < 2; n++) {
488
+ for (int o = -1; o < 2; o++) {
489
+ if (labels[index + n*xsh + o] == segment) {
490
+ float tmp = B - (float)(*((char*)(raw) + position + n*xsh + o));
491
+ dev += tmp * tmp;
492
+ summe += 1;
493
+ }
494
+ }
495
+ }
496
+ float var = dev / summe;
497
+ if (var < 1.0) {
498
+ var = 1.0;
499
+ }
500
+ return var;
501
+ }
502
+
503
+ __device__ float weight(float B, float *raw, float div1, unsigned int position) {
504
+ float tmp = B - (float)(*((char*)(raw) + position));
505
+ return exp( - tmp * tmp * div1 );
506
+ }
507
+
508
+ __global__ void Funktion(int segment, float *raw, int *slices, float *hits, int xsh, int ysh, int zsh, int *indices, int sorw, int nbrw) {
509
+
510
+ int flat = xsh * ysh;
511
+ int column = blockIdx.x * blockDim.x + threadIdx.x;
512
+ int row = blockIdx.y * blockDim.y + threadIdx.y;
513
+ int slice = blockIdx.z;
514
+ int plane = indices[slice];
515
+ unsigned int index = slice * flat + row * xsh + column;
516
+ unsigned int position = plane * flat + row * xsh + column;
517
+
518
+ if (index < gridDim.z*flat && plane>0 && row>0 && column>0 && plane<zsh-1 && row<ysh-1 && column<xsh-1) {
519
+
520
+ if (slices[index]==segment) {
521
+
522
+ /* Adaptive random walks */
523
+ int found = 0;
524
+ if ((column + row) % 4 == 0) {
525
+ found = 1;
526
+ }
527
+ else {
528
+ for (int y = -100; y < 101; y++) {
529
+ for (int x = -100; x < 101; x++) {
530
+ if (row+y > 0 && column+x > 0 && row+y < ysh-1 && column+x < xsh-1) {
531
+ unsigned int tmp = slice * flat + (row+y) * xsh + column+x;
532
+ if (slices[tmp] != segment && slices[tmp] != -1) {
533
+ found = 1;
534
+ }
535
+ }
536
+ }
537
+ }
538
+ }
539
+
540
+ if (found == 1) {
541
+
542
+ float rand;
543
+ float W0,W1,W2,W3,W4,W5;
544
+ int n,o,p;
545
+
546
+ /* Initialize MRG32k3a */
547
+ float norm = 2.328306549295728e-10;
548
+ float m1 = 4294967087.0;
549
+ float m2 = 4294944443.0;
550
+ float a12 = 1403580.0;
551
+ float a13n = 810728.0;
552
+ float a21 = 527612.0;
553
+ float a23n = 1370589.0;
554
+ long k1;
555
+ float p1, p2;
556
+ float s10 = index, s11 = index, s12 = index, s20 = index, s21 = index, s22 = index;
557
+
558
+ /* Compute standard deviation */
559
+ float B = (float)(*((char*)(raw) + position));
560
+ float var = _calc_var(position, index, B, raw, segment, slices, xsh);
561
+ float div1 = 1 / (2 * var);
562
+
563
+ int k = plane;
564
+ int l = row;
565
+ int m = column;
566
+
567
+ int step = 0;
568
+ int n_rw = 0;
569
+
570
+ /* Compute random walks */
571
+ while (n_rw < nbrw) {
572
+
573
+ /* Compute weights */
574
+ W0 = weight(B, raw, div1, position + flat);
575
+ W1 = weight(B, raw, div1, position - flat);
576
+ W2 = weight(B, raw, div1, position + xsh);
577
+ W3 = weight(B, raw, div1, position - xsh);
578
+ W4 = weight(B, raw, div1, position + 1);
579
+ W5 = weight(B, raw, div1, position - 1);
580
+
581
+ W1 += W0;
582
+ W2 += W1;
583
+ W3 += W2;
584
+ W4 += W3;
585
+ W5 += W4;
586
+
587
+ /* Compute random numbers with MRG32k3a */
588
+
589
+ /* Component 1 */
590
+ p1 = a12 * s11 - a13n * s10;
591
+ k1 = p1 / m1;
592
+ p1 -= k1 * m1;
593
+ if (p1 < 0.0){
594
+ p1 += m1;}
595
+ s10 = s11;
596
+ s11 = s12;
597
+ s12 = p1;
598
+
599
+ /* Component 2 */
600
+ p2 = a21 * s22 - a23n * s20;
601
+ k1 = p2 / m2;
602
+ p2 -= k1 * m2;
603
+ if (p2 < 0.0){
604
+ p2 += m2;}
605
+ s20 = s21;
606
+ s21 = s22;
607
+ s22 = p2;
608
+
609
+ /* Combination */
610
+ if (p1 <= p2) {
611
+ rand = W5 * ((p1 - p2 + m1) * norm);
612
+ }
613
+ else {
614
+ rand = W5 * ((p1 - p2) * norm);
615
+ }
616
+
617
+ /* Determine new direction of random walk */
618
+ if (rand<W0 || rand==0){n=1; o=0; p=0;}
619
+ else if (rand>=W0 && rand<W1){n=-1; o=0; p=0;}
620
+ else if (rand>=W1 && rand<W2){n=0; o=1; p=0;}
621
+ else if (rand>=W2 && rand<W3){n=0; o=-1; p=0;}
622
+ else if (rand>=W3 && rand<W4){n=0; o=0; p=1;}
623
+ else if (rand>=W4 && rand<=W5){n=0; o=0; p=-1;}
624
+
625
+ /* Move in new direction */
626
+ if (k+n>0 && k+n<zsh-1 && l+o>0 && l+o<ysh-1 && m+p>0 && m+p<xsh-1) {
627
+ k += n;
628
+ l += o;
629
+ m += p;
630
+ position = k*flat + l*xsh + m;
631
+ atomicAdd(&hits[position], 1);
632
+ }
633
+
634
+ step += 1;
635
+
636
+ if (step==sorw) {
637
+ k = plane;
638
+ l = row;
639
+ m = column;
640
+ position = k*flat + l*xsh + m;
641
+ n_rw += 1;
642
+ step = 0;
643
+ }
644
+ }
645
+ }
646
+ }
647
+ }
648
+ }
649
+ """
650
+ mod = SourceModule(code)
651
+ kernel = mod.get_function("Funktion")
652
+ return kernel
653
+
654
+ def _build_kernel_float32():
655
+ code = """
656
+
657
+ __device__ float _calc_var(unsigned int position, unsigned int index, float B, float *raw, int segment, int *labels, int xsh) {
658
+ float dev = 0;
659
+ float summe = 0;
660
+ for (int n = -1; n < 2; n++) {
661
+ for (int o = -1; o < 2; o++) {
662
+ if (labels[index + n*xsh + o] == segment) {
663
+ float tmp = B - raw[position + n*xsh + o];
664
+ dev += tmp * tmp;
665
+ summe += 1;
666
+ }
667
+ }
668
+ }
669
+ float var = dev / summe;
670
+ if (var < 1.0) {
671
+ var = 1.0;
672
+ }
673
+ return var;
674
+ }
675
+
676
+ __device__ float weight(float B, float A, float div1) {
677
+ float tmp = B - A;
678
+ return exp( - tmp * tmp * div1 );
679
+ }
680
+
681
+ __global__ void Funktion(int segment, float *raw, int *slices, float *hits, int xsh, int ysh, int zsh, int *indices, int sorw, int nbrw) {
682
+
683
+ int flat = xsh * ysh;
684
+ int column = blockIdx.x * blockDim.x + threadIdx.x;
685
+ int row = blockIdx.y * blockDim.y + threadIdx.y;
686
+ int slice = blockIdx.z;
687
+ int plane = indices[slice];
688
+ unsigned int index = slice * flat + row * xsh + column;
689
+ unsigned int position = plane * flat + row * xsh + column;
690
+
691
+ if (index < gridDim.z*flat && plane>0 && row>0 && column>0 && plane<zsh-1 && row<ysh-1 && column<xsh-1) {
692
+
693
+ if (slices[index]==segment) {
694
+
695
+ /* Adaptive random walks */
696
+ int found = 0;
697
+ if ((column + row) % 4 == 0) {
698
+ found = 1;
699
+ }
700
+ else {
701
+ for (int y = -100; y < 101; y++) {
702
+ for (int x = -100; x < 101; x++) {
703
+ if (row+y > 0 && column+x > 0 && row+y < ysh-1 && column+x < xsh-1) {
704
+ unsigned int tmp = slice * flat + (row+y) * xsh + column+x;
705
+ if (slices[tmp] != segment && slices[tmp] != -1) {
706
+ found = 1;
707
+ }
708
+ }
709
+ }
710
+ }
711
+ }
712
+
713
+ if (found == 1) {
714
+
715
+ float rand;
716
+ float W0,W1,W2,W3,W4,W5;
717
+ int n,o,p;
718
+
719
+ /* Initialize MRG32k3a */
720
+ float norm = 2.328306549295728e-10;
721
+ float m1 = 4294967087.0;
722
+ float m2 = 4294944443.0;
723
+ float a12 = 1403580.0;
724
+ float a13n = 810728.0;
725
+ float a21 = 527612.0;
726
+ float a23n = 1370589.0;
727
+ long k1;
728
+ float p1, p2;
729
+ float s10 = index, s11 = index, s12 = index, s20 = index, s21 = index, s22 = index;
730
+
731
+ /* Compute standard deviation */
732
+ float B = raw[position];
733
+ float var = _calc_var(position, index, B, raw, segment, slices, xsh);
734
+ float div1 = 1 / (2 * var);
735
+
736
+ int k = plane;
737
+ int l = row;
738
+ int m = column;
739
+
740
+ int step = 0;
741
+ int n_rw = 0;
742
+
743
+ /* Compute random walks */
744
+ while (n_rw < nbrw) {
745
+
746
+ /* Compute weights */
747
+ W0 = weight(B, raw[position + flat], div1);
748
+ W1 = weight(B, raw[position - flat], div1);
749
+ W2 = weight(B, raw[position + xsh], div1);
750
+ W3 = weight(B, raw[position - xsh], div1);
751
+ W4 = weight(B, raw[position + 1], div1);
752
+ W5 = weight(B, raw[position - 1], div1);
753
+
754
+ W1 += W0;
755
+ W2 += W1;
756
+ W3 += W2;
757
+ W4 += W3;
758
+ W5 += W4;
759
+
760
+ /* Compute random numbers with MRG32k3a */
761
+
762
+ /* Component 1 */
763
+ p1 = a12 * s11 - a13n * s10;
764
+ k1 = p1 / m1;
765
+ p1 -= k1 * m1;
766
+ if (p1 < 0.0){
767
+ p1 += m1;}
768
+ s10 = s11;
769
+ s11 = s12;
770
+ s12 = p1;
771
+
772
+ /* Component 2 */
773
+ p2 = a21 * s22 - a23n * s20;
774
+ k1 = p2 / m2;
775
+ p2 -= k1 * m2;
776
+ if (p2 < 0.0){
777
+ p2 += m2;}
778
+ s20 = s21;
779
+ s21 = s22;
780
+ s22 = p2;
781
+
782
+ /* Combination */
783
+ if (p1 <= p2) {
784
+ rand = W5 * ((p1 - p2 + m1) * norm);
785
+ }
786
+ else {
787
+ rand = W5 * ((p1 - p2) * norm);
788
+ }
789
+
790
+ /* Determine new direction of random walk */
791
+ if (rand<W0 || rand==0){n=1; o=0; p=0;}
792
+ else if (rand>=W0 && rand<W1){n=-1; o=0; p=0;}
793
+ else if (rand>=W1 && rand<W2){n=0; o=1; p=0;}
794
+ else if (rand>=W2 && rand<W3){n=0; o=-1; p=0;}
795
+ else if (rand>=W3 && rand<W4){n=0; o=0; p=1;}
796
+ else if (rand>=W4 && rand<=W5){n=0; o=0; p=-1;}
797
+
798
+ /* Move in new direction */
799
+ if (k+n>0 && k+n<zsh-1 && l+o>0 && l+o<ysh-1 && m+p>0 && m+p<xsh-1) {
800
+ k += n;
801
+ l += o;
802
+ m += p;
803
+ position = k*flat + l*xsh + m;
804
+ atomicAdd(&hits[position], 1);
805
+ }
806
+
807
+ step += 1;
808
+
809
+ if (step==sorw) {
810
+ k = plane;
811
+ l = row;
812
+ m = column;
813
+ position = k*flat + l*xsh + m;
814
+ n_rw += 1;
815
+ step = 0;
816
+ }
817
+ }
818
+ }
819
+ }
820
+ }
821
+ }
822
+ """
823
+ mod = SourceModule(code)
824
+ kernel = mod.get_function("Funktion")
825
+ return kernel
826
+