biomedisa 2024.5.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. biomedisa/__init__.py +53 -0
  2. biomedisa/__main__.py +18 -0
  3. biomedisa/biomedisa_features/DataGenerator.py +299 -0
  4. biomedisa/biomedisa_features/DataGeneratorCrop.py +121 -0
  5. biomedisa/biomedisa_features/PredictDataGenerator.py +87 -0
  6. biomedisa/biomedisa_features/PredictDataGeneratorCrop.py +74 -0
  7. biomedisa/biomedisa_features/__init__.py +0 -0
  8. biomedisa/biomedisa_features/active_contour.py +434 -0
  9. biomedisa/biomedisa_features/amira_to_np/__init__.py +0 -0
  10. biomedisa/biomedisa_features/amira_to_np/amira_data_stream.py +980 -0
  11. biomedisa/biomedisa_features/amira_to_np/amira_grammar.py +369 -0
  12. biomedisa/biomedisa_features/amira_to_np/amira_header.py +290 -0
  13. biomedisa/biomedisa_features/amira_to_np/amira_helper.py +72 -0
  14. biomedisa/biomedisa_features/assd.py +167 -0
  15. biomedisa/biomedisa_features/biomedisa_helper.py +801 -0
  16. biomedisa/biomedisa_features/create_slices.py +286 -0
  17. biomedisa/biomedisa_features/crop_helper.py +586 -0
  18. biomedisa/biomedisa_features/curvop_numba.py +149 -0
  19. biomedisa/biomedisa_features/django_env.py +172 -0
  20. biomedisa/biomedisa_features/keras_helper.py +1219 -0
  21. biomedisa/biomedisa_features/nc_reader.py +179 -0
  22. biomedisa/biomedisa_features/pid.py +52 -0
  23. biomedisa/biomedisa_features/process_image.py +253 -0
  24. biomedisa/biomedisa_features/pycuda_test.py +84 -0
  25. biomedisa/biomedisa_features/random_walk/__init__.py +0 -0
  26. biomedisa/biomedisa_features/random_walk/gpu_kernels.py +183 -0
  27. biomedisa/biomedisa_features/random_walk/pycuda_large.py +826 -0
  28. biomedisa/biomedisa_features/random_walk/pycuda_large_allx.py +806 -0
  29. biomedisa/biomedisa_features/random_walk/pycuda_small.py +414 -0
  30. biomedisa/biomedisa_features/random_walk/pycuda_small_allx.py +493 -0
  31. biomedisa/biomedisa_features/random_walk/pyopencl_large.py +760 -0
  32. biomedisa/biomedisa_features/random_walk/pyopencl_small.py +441 -0
  33. biomedisa/biomedisa_features/random_walk/rw_large.py +390 -0
  34. biomedisa/biomedisa_features/random_walk/rw_small.py +310 -0
  35. biomedisa/biomedisa_features/remove_outlier.py +399 -0
  36. biomedisa/biomedisa_features/split_volume.py +274 -0
  37. biomedisa/deeplearning.py +519 -0
  38. biomedisa/interpolation.py +371 -0
  39. biomedisa/mesh.py +406 -0
  40. biomedisa-2024.5.14.dist-info/LICENSE +191 -0
  41. biomedisa-2024.5.14.dist-info/METADATA +306 -0
  42. biomedisa-2024.5.14.dist-info/RECORD +44 -0
  43. biomedisa-2024.5.14.dist-info/WHEEL +5 -0
  44. biomedisa-2024.5.14.dist-info/top_level.txt +1 -0
@@ -0,0 +1,760 @@
1
+ ##########################################################################
2
+ ## ##
3
+ ## Copyright (c) 2024 Philipp Lösel. All rights reserved. ##
4
+ ## ##
5
+ ## This file is part of the open source project biomedisa. ##
6
+ ## ##
7
+ ## Licensed under the European Union Public Licence (EUPL) ##
8
+ ## v1.2, or - as soon as they will be approved by the ##
9
+ ## European Commission - subsequent versions of the EUPL; ##
10
+ ## ##
11
+ ## You may redistribute it and/or modify it under the terms ##
12
+ ## of the EUPL v1.2. You may not use this work except in ##
13
+ ## compliance with this Licence. ##
14
+ ## ##
15
+ ## You can obtain a copy of the Licence at: ##
16
+ ## ##
17
+ ## https://joinup.ec.europa.eu/page/eupl-text-11-12 ##
18
+ ## ##
19
+ ## Unless required by applicable law or agreed to in ##
20
+ ## writing, software distributed under the Licence is ##
21
+ ## distributed on an "AS IS" basis, WITHOUT WARRANTIES ##
22
+ ## OR CONDITIONS OF ANY KIND, either express or implied. ##
23
+ ## ##
24
+ ## See the Licence for the specific language governing ##
25
+ ## permissions and limitations under the Licence. ##
26
+ ## ##
27
+ ##########################################################################
28
+
29
+ from mpi4py import MPI
30
+ import numba
31
+ import numpy as np
32
+ import pyopencl as cl
33
+ import pyopencl.array
34
+
35
+ def reduceBlocksize(slices):
36
+ zsh, ysh, xsh = slices.shape
37
+ argmin_x, argmax_x, argmin_y, argmax_y = xsh, 0, ysh, 0
38
+ for k in range(zsh):
39
+ y, x = np.nonzero(slices[k])
40
+ if x.any():
41
+ argmin_x = min(argmin_x, np.amin(x))
42
+ argmax_x = max(argmax_x, np.amax(x))
43
+ argmin_y = min(argmin_y, np.amin(y))
44
+ argmax_y = max(argmax_y, np.amax(y))
45
+ argmin_x = argmin_x - 100 if argmin_x - 100 > 0 else 0
46
+ argmax_x = argmax_x + 100 if argmax_x + 100 < xsh else xsh
47
+ argmin_y = argmin_y - 100 if argmin_y - 100 > 0 else 0
48
+ argmax_y = argmax_y + 100 if argmax_y + 100 < ysh else ysh
49
+ slices[:, :argmin_y] = -1
50
+ slices[:, argmax_y:] = -1
51
+ slices[:, :, :argmin_x] = -1
52
+ slices[:, :, argmax_x:] = -1
53
+ return slices
54
+
55
+ def sendrecv(a, blockmin, blockmax, comm, rank, size):
56
+
57
+ sendbuf = np.empty(1, dtype=np.int32)
58
+ recvbuf = np.empty_like(sendbuf)
59
+
60
+ if rank == 0:
61
+
62
+ # send block
63
+ dest = rank+1
64
+ tmp = a[blockmax:]
65
+ if np.any(tmp):
66
+ sendbuf.fill(1)
67
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=0)
68
+ send = tmp.copy(order='C')
69
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=1)
70
+ comm.Send([send, MPI.INT], dest=dest, tag=2)
71
+ else:
72
+ sendbuf.fill(0)
73
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=0)
74
+
75
+ # recv block
76
+ source = rank+1
77
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=3)
78
+ if recvbuf:
79
+ data_z, data_y, data_x = comm.recv(source=source, tag=4)
80
+ recv = np.empty((data_z, data_y, data_x), dtype=np.int32)
81
+ comm.Recv([recv, MPI.INT], source=source, tag=5)
82
+ a[blockmax-data_z:blockmax] += recv
83
+
84
+ elif rank == size-1:
85
+
86
+ if rank % 2 == 1: add = 0
87
+ if rank % 2 == 0: add = 6
88
+
89
+ # recv block
90
+ source = rank-1
91
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=0+add)
92
+ if recvbuf:
93
+ data_z, data_y, data_x = comm.recv(source=source, tag=1+add)
94
+ recv = np.empty((data_z, data_y, data_x), dtype=np.int32)
95
+ comm.Recv([recv, MPI.INT], source=source, tag=2+add)
96
+ a[blockmin:blockmin+data_z] += recv
97
+
98
+ # send block
99
+ dest = rank-1
100
+ tmp = a[:blockmin]
101
+ if np.any(tmp):
102
+ sendbuf.fill(1)
103
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=3+add)
104
+ send = tmp.copy(order='C')
105
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=4+add)
106
+ comm.Send([send, MPI.INT], dest=dest, tag=5+add)
107
+ else:
108
+ sendbuf.fill(0)
109
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=3+add)
110
+
111
+ elif rank % 2 == 1:
112
+
113
+ # recv block
114
+ source = rank-1
115
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=0)
116
+ if recvbuf:
117
+ data_z, data_y, data_x = comm.recv(source=source, tag=1)
118
+ recv = np.empty((data_z, data_y, data_x), dtype=np.int32)
119
+ comm.Recv([recv, MPI.INT], source=source, tag=2)
120
+ a[blockmin:blockmin+data_z] += recv
121
+
122
+ # send block
123
+ dest = rank-1
124
+ tmp = a[:blockmin]
125
+ if np.any(tmp):
126
+ sendbuf.fill(1)
127
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=3)
128
+ send = tmp.copy(order='C')
129
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=4)
130
+ comm.Send([send, MPI.INT], dest=dest, tag=5)
131
+ else:
132
+ sendbuf.fill(0)
133
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=3)
134
+
135
+ # send block
136
+ dest = rank+1
137
+ tmp = a[blockmax:]
138
+ if np.any(tmp):
139
+ sendbuf.fill(1)
140
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=6)
141
+ send = tmp.copy(order='C')
142
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=7)
143
+ comm.Send([send, MPI.INT], dest=dest, tag=8)
144
+ else:
145
+ sendbuf.fill(0)
146
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=6)
147
+
148
+ # recv block
149
+ source = rank+1
150
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=9)
151
+ if recvbuf:
152
+ data_z, data_y, data_x = comm.recv(source=source, tag=10)
153
+ recv = np.empty((data_z, data_y, data_x), dtype=np.int32)
154
+ comm.Recv([recv, MPI.INT], source=source, tag=11)
155
+ a[blockmax-data_z:blockmax] += recv
156
+
157
+ elif rank % 2 == 0:
158
+
159
+ # send block
160
+ dest = rank+1
161
+ tmp = a[blockmax:]
162
+ if np.any(tmp):
163
+ sendbuf.fill(1)
164
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=0)
165
+ send = tmp.copy(order='C')
166
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=1)
167
+ comm.Send([send, MPI.INT], dest=dest, tag=2)
168
+ else:
169
+ sendbuf.fill(0)
170
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=0)
171
+
172
+ # recv block
173
+ source = rank+1
174
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=3)
175
+ if recvbuf:
176
+ data_z, data_y, data_x = comm.recv(source=source, tag=4)
177
+ recv = np.empty((data_z, data_y, data_x), dtype=np.int32)
178
+ comm.Recv([recv, MPI.INT], source=source, tag=5)
179
+ a[blockmax-data_z:blockmax] += recv
180
+
181
+ # recv block
182
+ source = rank-1
183
+ comm.Recv([recvbuf, MPI.INT], source=source, tag=6)
184
+ if recvbuf:
185
+ data_z, data_y, data_x = comm.recv(source=source, tag=7)
186
+ recv = np.empty((data_z, data_y, data_x), dtype=np.int32)
187
+ comm.Recv([recv, MPI.INT], source=source, tag=8)
188
+ a[blockmin:blockmin+data_z] += recv
189
+
190
+ # send block
191
+ dest = rank-1
192
+ tmp = a[:blockmin]
193
+ if np.any(tmp):
194
+ sendbuf.fill(1)
195
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=9)
196
+ send = tmp.copy(order='C')
197
+ comm.send([send.shape[0], send.shape[1], send.shape[2]], dest=dest, tag=10)
198
+ comm.Send([send, MPI.INT], dest=dest, tag=11)
199
+ else:
200
+ sendbuf.fill(0)
201
+ comm.Send([sendbuf, MPI.INT], dest=dest, tag=9)
202
+
203
+ return a
204
+
205
+ @numba.jit(nopython=True)
206
+ def max_to_label(a, walkmap, final, blockmin, blockmax, segment):
207
+ zsh, ysh, xsh = a.shape
208
+ for k in range(blockmin, blockmax):
209
+ for l in range(ysh):
210
+ for m in range(xsh):
211
+ if a[k,l,m] > walkmap[k,l,m]:
212
+ walkmap[k,l,m] = a[k,l,m]
213
+ final[k-blockmin,l,m] = segment
214
+ return walkmap, final
215
+
216
+ def walk(comm, raw, slices, indices, nbrw, sorw, blockmin, blockmax,
217
+ name, allLabels, smooth, uncertainty, ctx, queue, platform):
218
+
219
+ # disable smoothing and uncertainty
220
+ smooth, uncertainty = 0, 0
221
+
222
+ # get rank and size of mpi process
223
+ rank = comm.Get_rank()
224
+ size = comm.Get_size()
225
+
226
+ # build kernels
227
+ if raw.dtype == 'uint8':
228
+ src = _build_kernel_int8()
229
+ raw = (raw-128).astype('int8')
230
+ else:
231
+ src = _build_kernel_float32()
232
+ raw = raw.astype(np.float32)
233
+
234
+ # image size
235
+ zsh, ysh, xsh = raw.shape
236
+
237
+ # crop to region of interest
238
+ slices = slices.astype(np.int32)
239
+ slices = reduceBlocksize(slices)
240
+
241
+ # allocate host memory
242
+ hits = np.empty(raw.shape, dtype=np.int32)
243
+ final = np.zeros((blockmax-blockmin, ysh, xsh), dtype=np.uint8)
244
+
245
+ # kernel function instantiation
246
+ mf = cl.mem_flags
247
+ prg = cl.Program(ctx, src).build()
248
+
249
+ # allocate memory for variables on the device
250
+ xsh_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(xsh))
251
+ ysh_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(ysh))
252
+ zsh_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(zsh))
253
+ sorw_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(sorw))
254
+ nbrw_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(nbrw))
255
+ segment_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(0))
256
+ #gpu_mat = cl.array.to_device(queue, mat)
257
+ #gpu_grad = cl.array.empty(queue, mat.shape, dtype=np.float32, order="C")
258
+
259
+ # allocate device memory or use subdomains
260
+ memory_error = False
261
+ subdomains = False
262
+ if zsh * ysh * xsh > 42e8 or platform.split('_')[-1] == 'GPU':
263
+ if zsh * ysh * xsh > 42e8:
264
+ print('Warning: Volume indexes exceed unsigned long int range. The volume is splitted into subdomains.')
265
+ else:
266
+ print('The volume is splitted into subdomains for better performance.')
267
+ subdomains = True
268
+ sendbuf = np.zeros(1, dtype=np.int32) + 1
269
+ recvbuf = np.zeros(1, dtype=np.int32)
270
+ comm.Barrier()
271
+ comm.Allreduce([sendbuf, MPI.INT], [recvbuf, MPI.INT], op=MPI.MAX)
272
+ else:
273
+ try:
274
+ if np.any(indices):
275
+ slshape = slices.shape[0]
276
+ indices = np.array(indices, dtype=np.int32)
277
+ indices_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=indices)
278
+ slices_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=slices)
279
+ raw_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=raw)
280
+ hits_cl = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=hits)
281
+ sendbuf = np.zeros(1, dtype=np.int32)
282
+ recvbuf = np.zeros(1, dtype=np.int32)
283
+ comm.Barrier()
284
+ comm.Allreduce([sendbuf, MPI.INT], [recvbuf, MPI.INT], op=MPI.MAX)
285
+ except Exception as e:
286
+ print('Warning: Device ran out of memory. The volume is splitted into subdomains.')
287
+ subdomains = True
288
+ sendbuf = np.zeros(1, dtype=np.int32) + 1
289
+ recvbuf = np.zeros(1, dtype=np.int32)
290
+ comm.Barrier()
291
+ comm.Allreduce([sendbuf, MPI.INT], [recvbuf, MPI.INT], op=MPI.MAX)
292
+ try:
293
+ raw_cl.release()
294
+ hits_cl.release()
295
+ slices_cl.release()
296
+ except:
297
+ pass
298
+
299
+ for label_counter, segment in enumerate(allLabels):
300
+ print('%s:' %(name) + ' ' + str(label_counter+1) + '/' + str(len(allLabels)))
301
+
302
+ # split volume into subdomains
303
+ if subdomains:
304
+ try:
305
+ hits.fill(0)
306
+ sub_n = (blockmax-blockmin) // 100 + 1
307
+ for sub_k in range(sub_n):
308
+ sub_block_min = sub_k*100+blockmin
309
+ sub_block_max = (sub_k+1)*100+blockmin
310
+ data_block_min = max(sub_block_min-100,0)
311
+ data_block_max = min(sub_block_max+100,zsh)
312
+
313
+ # get subindices
314
+ sub_indices = []
315
+ sub_slices = np.empty((0, ysh, xsh), dtype=slices.dtype)
316
+ for k, sub_i in enumerate(indices):
317
+ if sub_block_min <= sub_i < sub_block_max and np.any(slices[k]==segment):
318
+ sub_indices.append(sub_i)
319
+ sub_slices = np.append(sub_slices, [slices[k]], axis=0)
320
+
321
+ # allocate memory and compute random walks on subdomain
322
+ if np.any(sub_indices):
323
+ sub_slshape = sub_slices.shape[0]
324
+ sub_indices = np.array(sub_indices, dtype=np.int32) - data_block_min
325
+ sub_indices_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=sub_indices)
326
+ sub_slices_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=sub_slices)
327
+ sub_zsh = data_block_max - data_block_min
328
+ sub_zsh_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.int32(sub_zsh))
329
+ sub_raw = np.copy(raw[data_block_min:data_block_max], order='C')
330
+ sub_raw_cl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=sub_raw)
331
+ sub_hits = np.empty(sub_raw.shape, dtype=np.int32)
332
+ sub_hits_cl = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=sub_hits)
333
+ cl.enqueue_fill_buffer(queue, sub_hits_cl, np.int32(0), offset=0, size=sub_hits.nbytes)
334
+ cl.enqueue_fill_buffer(queue, segment_cl, np.int32(segment), offset=0, size=4, wait_for=None)
335
+ block = None
336
+ grid = (sub_slshape, ysh, xsh)
337
+ prg.randomWalk(queue, grid, block, segment_cl, sub_raw_cl, sub_slices_cl, sub_hits_cl, xsh_cl, ysh_cl, sub_zsh_cl, sub_indices_cl, sorw_cl, nbrw_cl)
338
+ cl.enqueue_copy(queue, sub_hits, sub_hits_cl)
339
+ hits[data_block_min:data_block_max] += sub_hits
340
+ sub_hits_cl.release()
341
+ except Exception as e:
342
+ print('Error: Device out of memory. Data too large.')
343
+ memory_error = True
344
+ try:
345
+ sub_hits_cl.release()
346
+ sub_raw_cl.release()
347
+ except:
348
+ pass
349
+
350
+ # computation of random walks on the entire volume
351
+ else:
352
+ # compute random walks
353
+ block = None
354
+ grid = (slshape, ysh, xsh)
355
+ cl.enqueue_fill_buffer(queue, hits_cl, np.int32(0), offset=0, size=hits.nbytes)
356
+ cl.enqueue_fill_buffer(queue, segment_cl, np.int32(segment), offset=0, size=4, wait_for=None)
357
+ if np.any(indices):
358
+ prg.randomWalk(queue, grid, block, segment_cl, raw_cl, slices_cl, hits_cl, xsh_cl, ysh_cl, zsh_cl, indices_cl, sorw_cl, nbrw_cl)
359
+ cl.enqueue_copy(queue, hits, hits_cl)
360
+
361
+ # memory error
362
+ if memory_error:
363
+ sendbuf = np.zeros(1, dtype=np.int32) + 1
364
+ recvbuf = np.zeros(1, dtype=np.int32)
365
+ comm.Barrier()
366
+ comm.Allreduce([sendbuf, MPI.INT], [recvbuf, MPI.INT], op=MPI.MAX)
367
+ else:
368
+ sendbuf = np.zeros(1, dtype=np.int32)
369
+ recvbuf = np.zeros(1, dtype=np.int32)
370
+ comm.Barrier()
371
+ comm.Allreduce([sendbuf, MPI.INT], [recvbuf, MPI.INT], op=MPI.MAX)
372
+ if recvbuf > 0:
373
+ memory_error = True
374
+ return memory_error, None, None, None
375
+
376
+ # communicate hits
377
+ if size > 1:
378
+ hits = sendrecv(hits, blockmin, blockmax, comm, rank, size)
379
+
380
+ # get the label with the most hits
381
+ if label_counter == 0:
382
+ walkmap = np.copy(hits, order='C')
383
+ else:
384
+ walkmap, final = max_to_label(hits, walkmap, final, blockmin, blockmax, segment)
385
+ #update = hits[blockmin:blockmax] > walkmap[blockmin:blockmax]
386
+ #walkmap[blockmin:blockmax][update] = hits[blockmin:blockmax][update]
387
+ #final[update] = segment
388
+
389
+ # uncertainty and smooth are disabled
390
+ final_uncertainty = None
391
+ final_smooth = None
392
+
393
+ return memory_error, final, final_uncertainty, final_smooth
394
+
395
+ def _build_kernel_int8():
396
+ src = '''
397
+
398
+ float _calc_var(unsigned int position, unsigned int index, int B, __global char *raw, int segment, __global int *labels, int xsh) {
399
+ float dev = 0;
400
+ float summe = 0;
401
+ for (int n = -1; n < 2; n++) {
402
+ for (int o = -1; o < 2; o++) {
403
+ if (labels[index + n*xsh + o] == segment) {
404
+ float tmp = B - raw[position + n*xsh + o];
405
+ dev += tmp * tmp;
406
+ summe += 1;
407
+ }
408
+ }
409
+ }
410
+ float var = dev / summe;
411
+ if (var < 1.0) {
412
+ var = 1.0;
413
+ }
414
+ return var;
415
+ }
416
+
417
+ float weight(int B, int A, float div1) {
418
+ int tmp = B - A;
419
+ return exp( - tmp * tmp * div1 );
420
+ }
421
+
422
+ __kernel void randomWalk(__global int *Segment, __global char *raw, __global int *slices, __global int *hits, __global int *Xsh, __global int *Ysh, __global int *Zsh, __global int *indices, __global int *Sorw, __global int *Nbrw) {
423
+
424
+ int sorw = *Sorw;
425
+ int nbrw = *Nbrw;
426
+ int xsh = *Xsh;
427
+ int ysh = *Ysh;
428
+ int zsh = *Zsh;
429
+ int segment = *Segment;
430
+
431
+ // get_global_id(0) // blockIdx.z * blockDim.z + threadIdx.z
432
+ // get_local_id(0) // threadIdx.z
433
+ // get_global_size(0) // gridDim.z * blockDim.z
434
+ // get_local_size(0) // blockDim.z
435
+
436
+ int flat = xsh * ysh;
437
+ int column = get_global_id(2);
438
+ int row = get_global_id(1);
439
+ int slice = get_global_id(0);
440
+ int plane = indices[slice];
441
+ unsigned int index = slice * flat + row * xsh + column;
442
+ unsigned int position = plane*flat + row*xsh + column;
443
+
444
+ if (index < get_global_size(0)*flat && plane>0 && row>0 && column>0 && plane<zsh-1 && row<ysh-1 && column<xsh-1) {
445
+
446
+ if (slices[index]==segment) {
447
+
448
+ /* Adaptive random walks */
449
+ int found = 0;
450
+ if ((column + row) % 4 == 0) {
451
+ found = 1;
452
+ }
453
+ else {
454
+ for (int y = -100; y < 101; y++) {
455
+ for (int x = -100; x < 101; x++) {
456
+ if (row+y > 0 && column+x > 0 && row+y < ysh-1 && column+x < xsh-1) {
457
+ unsigned int tmp = slice * flat + (row+y) * xsh + column+x;
458
+ if (slices[tmp] != segment && slices[tmp] != -1) {
459
+ found = 1;
460
+ }
461
+ }
462
+ }
463
+ }
464
+ }
465
+
466
+ if (found == 1) {
467
+
468
+ float rand;
469
+ float W0,W1,W2,W3,W4,W5;
470
+ int n,o,p;
471
+
472
+ /* Initialize MRG32k3a */
473
+ float norm = 2.328306549295728e-10;
474
+ float m1 = 4294967087.0;
475
+ float m2 = 4294944443.0;
476
+ float a12 = 1403580.0;
477
+ float a13n = 810728.0;
478
+ float a21 = 527612.0;
479
+ float a23n = 1370589.0;
480
+ long k1;
481
+ float p1, p2;
482
+ float s10 = index, s11 = index, s12 = index, s20 = index, s21 = index, s22 = index;
483
+
484
+ /* Compute standard deviation */
485
+ int B = raw[position];
486
+ float var = _calc_var(position, index, B, raw, segment, slices, xsh);
487
+ float div1 = 1 / (2 * var);
488
+
489
+ int k = plane;
490
+ int l = row;
491
+ int m = column;
492
+
493
+ int step = 0;
494
+ int n_rw = 0;
495
+
496
+ /* Compute random walks */
497
+ while (n_rw < nbrw) {
498
+
499
+ /* Compute weights */
500
+ W0 = weight(B, raw[position + flat], div1);
501
+ W1 = weight(B, raw[position - flat], div1);
502
+ W2 = weight(B, raw[position + xsh], div1);
503
+ W3 = weight(B, raw[position - xsh], div1);
504
+ W4 = weight(B, raw[position + 1], div1);
505
+ W5 = weight(B, raw[position - 1], div1);
506
+
507
+ W1 += W0;
508
+ W2 += W1;
509
+ W3 += W2;
510
+ W4 += W3;
511
+ W5 += W4;
512
+
513
+ /* Compute random numbers with MRG32k3a */
514
+
515
+ /* Component 1 */
516
+ p1 = a12 * s11 - a13n * s10;
517
+ k1 = p1 / m1;
518
+ p1 -= k1 * m1;
519
+ if (p1 < 0.0){
520
+ p1 += m1;}
521
+ s10 = s11;
522
+ s11 = s12;
523
+ s12 = p1;
524
+
525
+ /* Component 2 */
526
+ p2 = a21 * s22 - a23n * s20;
527
+ k1 = p2 / m2;
528
+ p2 -= k1 * m2;
529
+ if (p2 < 0.0){
530
+ p2 += m2;}
531
+ s20 = s21;
532
+ s21 = s22;
533
+ s22 = p2;
534
+
535
+ /* Combination */
536
+ if (p1 <= p2) {
537
+ rand = W5 * ((p1 - p2 + m1) * norm);
538
+ }
539
+ else {
540
+ rand = W5 * ((p1 - p2) * norm);
541
+ }
542
+
543
+ /* Determine new direction of random walk */
544
+ if (rand<W0 || rand==0){n=1; o=0; p=0;}
545
+ else if (rand>=W0 && rand<W1){n=-1; o=0; p=0;}
546
+ else if (rand>=W1 && rand<W2){n=0; o=1; p=0;}
547
+ else if (rand>=W2 && rand<W3){n=0; o=-1; p=0;}
548
+ else if (rand>=W3 && rand<W4){n=0; o=0; p=1;}
549
+ else if (rand>=W4 && rand<=W5){n=0; o=0; p=-1;}
550
+
551
+ /* Move in new direction */
552
+ if (k+n>0 && k+n<zsh-1 && l+o>0 && l+o<ysh-1 && m+p>0 && m+p<xsh-1) {
553
+ k += n;
554
+ l += o;
555
+ m += p;
556
+ position = k*flat + l*xsh + m;
557
+ atomic_add(&hits[position], 1);
558
+ }
559
+
560
+ step += 1;
561
+
562
+ if (step==sorw) {
563
+ k = plane;
564
+ l = row;
565
+ m = column;
566
+ position = k*flat + l*xsh + m;
567
+ n_rw += 1;
568
+ step = 0;
569
+ }
570
+ }
571
+ }
572
+ }
573
+ }
574
+ }
575
+ '''
576
+ return src
577
+
578
+ def _build_kernel_float32():
579
+ src = '''
580
+
581
+ float _calc_var(unsigned int position, unsigned int index, float B, __global float *raw, int segment, __global int *labels, int xsh) {
582
+ float dev = 0;
583
+ float summe = 0;
584
+ for (int n = -1; n < 2; n++) {
585
+ for (int o = -1; o < 2; o++) {
586
+ if (labels[index + n*xsh + o] == segment) {
587
+ float tmp = B - raw[position + n*xsh + o];
588
+ dev += tmp * tmp;
589
+ summe += 1;
590
+ }
591
+ }
592
+ }
593
+ float var = dev / summe;
594
+ if (var < 1.0) {
595
+ var = 1.0;
596
+ }
597
+ return var;
598
+ }
599
+
600
+ float weight(float B, float A, float div1) {
601
+ float tmp = B - A;
602
+ return exp( - tmp * tmp * div1 );
603
+ }
604
+
605
+ __kernel void randomWalk(__global int *Segment, __global float *raw, __global int *slices, __global int *hits, __global int *Xsh, __global int *Ysh, __global int *Zsh, __global int *indices, __global int *Sorw, __global int *Nbrw) {
606
+
607
+ int sorw = *Sorw;
608
+ int nbrw = *Nbrw;
609
+ int xsh = *Xsh;
610
+ int ysh = *Ysh;
611
+ int zsh = *Zsh;
612
+ int segment = *Segment;
613
+
614
+ // get_global_id(0) // blockIdx.z * blockDim.z + threadIdx.z
615
+ // get_local_id(0) // threadIdx.z
616
+ // get_global_size(0) // gridDim.z * blockDim.z
617
+ // get_local_size(0) // blockDim.z
618
+
619
+ int flat = xsh * ysh;
620
+ int column = get_global_id(2);
621
+ int row = get_global_id(1);
622
+ int slice = get_global_id(0);
623
+ int plane = indices[slice];
624
+ unsigned int index = slice * flat + row * xsh + column;
625
+ unsigned int position = plane*flat + row*xsh + column;
626
+
627
+ if (index < get_global_size(0)*flat && plane>0 && row>0 && column>0 && plane<zsh-1 && row<ysh-1 && column<xsh-1) {
628
+
629
+ if (slices[index]==segment) {
630
+
631
+ /* Adaptive random walks */
632
+ int found = 0;
633
+ if ((column + row) % 4 == 0) {
634
+ found = 1;
635
+ }
636
+ else {
637
+ for (int y = -100; y < 101; y++) {
638
+ for (int x = -100; x < 101; x++) {
639
+ if (row+y > 0 && column+x > 0 && row+y < ysh-1 && column+x < xsh-1) {
640
+ unsigned int tmp = slice * flat + (row+y) * xsh + column+x;
641
+ if (slices[tmp] != segment && slices[tmp] != -1) {
642
+ found = 1;
643
+ }
644
+ }
645
+ }
646
+ }
647
+ }
648
+
649
+ if (found == 1) {
650
+
651
+ float rand;
652
+ float W0,W1,W2,W3,W4,W5;
653
+ int n,o,p;
654
+
655
+ /* Initialize MRG32k3a */
656
+ float norm = 2.328306549295728e-10;
657
+ float m1 = 4294967087.0;
658
+ float m2 = 4294944443.0;
659
+ float a12 = 1403580.0;
660
+ float a13n = 810728.0;
661
+ float a21 = 527612.0;
662
+ float a23n = 1370589.0;
663
+ long k1;
664
+ float p1, p2;
665
+ float s10 = index, s11 = index, s12 = index, s20 = index, s21 = index, s22 = index;
666
+
667
+ /* Compute standard deviation */
668
+ float B = raw[position];
669
+ float var = _calc_var(position, index, B, raw, segment, slices, xsh);
670
+ float div1 = 1 / (2 * var);
671
+
672
+ int k = plane;
673
+ int l = row;
674
+ int m = column;
675
+
676
+ int step = 0;
677
+ int n_rw = 0;
678
+
679
+ /* Compute random walks */
680
+ while (n_rw < nbrw) {
681
+
682
+ /* Compute weights */
683
+ W0 = weight(B, raw[position + flat], div1);
684
+ W1 = weight(B, raw[position - flat], div1);
685
+ W2 = weight(B, raw[position + xsh], div1);
686
+ W3 = weight(B, raw[position - xsh], div1);
687
+ W4 = weight(B, raw[position + 1], div1);
688
+ W5 = weight(B, raw[position - 1], div1);
689
+
690
+ W1 += W0;
691
+ W2 += W1;
692
+ W3 += W2;
693
+ W4 += W3;
694
+ W5 += W4;
695
+
696
+ /* Compute random numbers with MRG32k3a */
697
+
698
+ /* Component 1 */
699
+ p1 = a12 * s11 - a13n * s10;
700
+ k1 = p1 / m1;
701
+ p1 -= k1 * m1;
702
+ if (p1 < 0.0){
703
+ p1 += m1;}
704
+ s10 = s11;
705
+ s11 = s12;
706
+ s12 = p1;
707
+
708
+ /* Component 2 */
709
+ p2 = a21 * s22 - a23n * s20;
710
+ k1 = p2 / m2;
711
+ p2 -= k1 * m2;
712
+ if (p2 < 0.0){
713
+ p2 += m2;}
714
+ s20 = s21;
715
+ s21 = s22;
716
+ s22 = p2;
717
+
718
+ /* Combination */
719
+ if (p1 <= p2) {
720
+ rand = W5 * ((p1 - p2 + m1) * norm);
721
+ }
722
+ else {
723
+ rand = W5 * ((p1 - p2) * norm);
724
+ }
725
+
726
+ /* Determine new direction of random walk */
727
+ if (rand<W0 || rand==0){n=1; o=0; p=0;}
728
+ else if (rand>=W0 && rand<W1){n=-1; o=0; p=0;}
729
+ else if (rand>=W1 && rand<W2){n=0; o=1; p=0;}
730
+ else if (rand>=W2 && rand<W3){n=0; o=-1; p=0;}
731
+ else if (rand>=W3 && rand<W4){n=0; o=0; p=1;}
732
+ else if (rand>=W4 && rand<=W5){n=0; o=0; p=-1;}
733
+
734
+ /* Move in new direction */
735
+ if (k+n>0 && k+n<zsh-1 && l+o>0 && l+o<ysh-1 && m+p>0 && m+p<xsh-1) {
736
+ k += n;
737
+ l += o;
738
+ m += p;
739
+ position = k*flat + l*xsh + m;
740
+ atomic_add(&hits[position], 1);
741
+ }
742
+
743
+ step += 1;
744
+
745
+ if (step==sorw) {
746
+ k = plane;
747
+ l = row;
748
+ m = column;
749
+ position = k*flat + l*xsh + m;
750
+ n_rw += 1;
751
+ step = 0;
752
+ }
753
+ }
754
+ }
755
+ }
756
+ }
757
+ }
758
+ '''
759
+ return src
760
+