nabu 2023.2.1__py3-none-any.whl → 2024.1.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. doc/conf.py +1 -1
  2. doc/doc_config.py +32 -0
  3. nabu/__init__.py +2 -1
  4. nabu/app/bootstrap_stitching.py +1 -1
  5. nabu/app/cli_configs.py +122 -2
  6. nabu/app/composite_cor.py +27 -2
  7. nabu/app/correct_rot.py +70 -0
  8. nabu/app/create_distortion_map_from_poly.py +42 -18
  9. nabu/app/diag_to_pix.py +358 -0
  10. nabu/app/diag_to_rot.py +449 -0
  11. nabu/app/generate_header.py +4 -3
  12. nabu/app/histogram.py +2 -2
  13. nabu/app/multicor.py +6 -1
  14. nabu/app/parse_reconstruction_log.py +151 -0
  15. nabu/app/prepare_weights_double.py +83 -22
  16. nabu/app/reconstruct.py +5 -1
  17. nabu/app/reconstruct_helical.py +7 -0
  18. nabu/app/reduce_dark_flat.py +6 -3
  19. nabu/app/rotate.py +4 -4
  20. nabu/app/stitching.py +16 -2
  21. nabu/app/tests/test_reduce_dark_flat.py +18 -2
  22. nabu/app/validator.py +4 -4
  23. nabu/cuda/convolution.py +8 -376
  24. nabu/cuda/fft.py +4 -0
  25. nabu/cuda/kernel.py +4 -4
  26. nabu/cuda/medfilt.py +5 -158
  27. nabu/cuda/padding.py +5 -71
  28. nabu/cuda/processing.py +23 -2
  29. nabu/cuda/src/ElementOp.cu +78 -0
  30. nabu/cuda/src/backproj.cu +28 -2
  31. nabu/cuda/src/fourier_wavelets.cu +2 -2
  32. nabu/cuda/src/normalization.cu +23 -0
  33. nabu/cuda/src/padding.cu +2 -2
  34. nabu/cuda/src/transpose.cu +16 -0
  35. nabu/cuda/utils.py +39 -0
  36. nabu/estimation/alignment.py +10 -1
  37. nabu/estimation/cor.py +808 -38
  38. nabu/estimation/cor_sino.py +7 -9
  39. nabu/estimation/tests/test_cor.py +85 -3
  40. nabu/io/reader.py +26 -18
  41. nabu/io/tests/test_cast_volume.py +3 -3
  42. nabu/io/tests/test_detector_distortion.py +3 -3
  43. nabu/io/tiffwriter_zmm.py +2 -2
  44. nabu/io/utils.py +14 -4
  45. nabu/io/writer.py +5 -3
  46. nabu/misc/fftshift.py +6 -0
  47. nabu/misc/histogram.py +5 -285
  48. nabu/misc/histogram_cuda.py +8 -104
  49. nabu/misc/kernel_base.py +3 -121
  50. nabu/misc/padding_base.py +5 -69
  51. nabu/misc/processing_base.py +3 -107
  52. nabu/misc/rotation.py +5 -62
  53. nabu/misc/rotation_cuda.py +5 -65
  54. nabu/misc/transpose.py +6 -0
  55. nabu/misc/unsharp.py +3 -78
  56. nabu/misc/unsharp_cuda.py +5 -52
  57. nabu/misc/unsharp_opencl.py +8 -85
  58. nabu/opencl/fft.py +6 -0
  59. nabu/opencl/kernel.py +21 -6
  60. nabu/opencl/padding.py +5 -72
  61. nabu/opencl/processing.py +27 -5
  62. nabu/opencl/src/backproj.cl +3 -3
  63. nabu/opencl/src/fftshift.cl +65 -12
  64. nabu/opencl/src/padding.cl +2 -2
  65. nabu/opencl/src/roll.cl +96 -0
  66. nabu/opencl/src/transpose.cl +16 -0
  67. nabu/pipeline/config_validators.py +63 -3
  68. nabu/pipeline/dataset_validator.py +2 -2
  69. nabu/pipeline/estimators.py +193 -35
  70. nabu/pipeline/fullfield/chunked.py +34 -17
  71. nabu/pipeline/fullfield/chunked_cuda.py +7 -5
  72. nabu/pipeline/fullfield/computations.py +48 -13
  73. nabu/pipeline/fullfield/nabu_config.py +13 -13
  74. nabu/pipeline/fullfield/processconfig.py +10 -5
  75. nabu/pipeline/fullfield/reconstruction.py +1 -2
  76. nabu/pipeline/helical/fbp.py +5 -0
  77. nabu/pipeline/helical/filtering.py +12 -9
  78. nabu/pipeline/helical/gridded_accumulator.py +179 -33
  79. nabu/pipeline/helical/helical_chunked_regridded.py +262 -151
  80. nabu/pipeline/helical/helical_chunked_regridded_cuda.py +4 -11
  81. nabu/pipeline/helical/helical_reconstruction.py +56 -18
  82. nabu/pipeline/helical/span_strategy.py +1 -1
  83. nabu/pipeline/helical/tests/test_accumulator.py +4 -0
  84. nabu/pipeline/params.py +23 -2
  85. nabu/pipeline/processconfig.py +3 -8
  86. nabu/pipeline/tests/test_chunk_reader.py +78 -0
  87. nabu/pipeline/tests/test_estimators.py +120 -2
  88. nabu/pipeline/utils.py +25 -0
  89. nabu/pipeline/writer.py +2 -0
  90. nabu/preproc/ccd_cuda.py +9 -7
  91. nabu/preproc/ctf.py +21 -26
  92. nabu/preproc/ctf_cuda.py +25 -25
  93. nabu/preproc/double_flatfield.py +14 -2
  94. nabu/preproc/double_flatfield_cuda.py +7 -11
  95. nabu/preproc/flatfield_cuda.py +23 -27
  96. nabu/preproc/phase.py +19 -24
  97. nabu/preproc/phase_cuda.py +21 -21
  98. nabu/preproc/shift_cuda.py +58 -28
  99. nabu/preproc/tests/test_ctf.py +5 -5
  100. nabu/preproc/tests/test_double_flatfield.py +2 -2
  101. nabu/preproc/tests/test_vshift.py +13 -2
  102. nabu/processing/__init__.py +0 -0
  103. nabu/processing/convolution_cuda.py +375 -0
  104. nabu/processing/fft_base.py +163 -0
  105. nabu/processing/fft_cuda.py +256 -0
  106. nabu/processing/fft_opencl.py +54 -0
  107. nabu/processing/fftshift.py +134 -0
  108. nabu/processing/histogram.py +286 -0
  109. nabu/processing/histogram_cuda.py +103 -0
  110. nabu/processing/kernel_base.py +126 -0
  111. nabu/processing/medfilt_cuda.py +159 -0
  112. nabu/processing/muladd.py +29 -0
  113. nabu/processing/muladd_cuda.py +68 -0
  114. nabu/processing/padding_base.py +71 -0
  115. nabu/processing/padding_cuda.py +75 -0
  116. nabu/processing/padding_opencl.py +77 -0
  117. nabu/processing/processing_base.py +123 -0
  118. nabu/processing/roll_opencl.py +64 -0
  119. nabu/processing/rotation.py +63 -0
  120. nabu/processing/rotation_cuda.py +66 -0
  121. nabu/processing/tests/__init__.py +0 -0
  122. nabu/processing/tests/test_fft.py +268 -0
  123. nabu/processing/tests/test_fftshift.py +71 -0
  124. nabu/{misc → processing}/tests/test_histogram.py +2 -4
  125. nabu/{cuda → processing}/tests/test_medfilt.py +1 -1
  126. nabu/processing/tests/test_muladd.py +54 -0
  127. nabu/{cuda → processing}/tests/test_padding.py +119 -75
  128. nabu/processing/tests/test_roll.py +63 -0
  129. nabu/{misc → processing}/tests/test_rotation.py +3 -2
  130. nabu/processing/tests/test_transpose.py +72 -0
  131. nabu/{misc → processing}/tests/test_unsharp.py +41 -8
  132. nabu/processing/transpose.py +126 -0
  133. nabu/processing/unsharp.py +79 -0
  134. nabu/processing/unsharp_cuda.py +53 -0
  135. nabu/processing/unsharp_opencl.py +75 -0
  136. nabu/reconstruction/fbp.py +34 -10
  137. nabu/reconstruction/fbp_base.py +35 -16
  138. nabu/reconstruction/fbp_opencl.py +7 -12
  139. nabu/reconstruction/filtering.py +2 -2
  140. nabu/reconstruction/filtering_cuda.py +13 -14
  141. nabu/reconstruction/filtering_opencl.py +3 -4
  142. nabu/reconstruction/projection.py +2 -0
  143. nabu/reconstruction/rings.py +158 -1
  144. nabu/reconstruction/rings_cuda.py +218 -58
  145. nabu/reconstruction/sinogram_cuda.py +16 -12
  146. nabu/reconstruction/tests/test_deringer.py +116 -14
  147. nabu/reconstruction/tests/test_fbp.py +22 -31
  148. nabu/reconstruction/tests/test_filtering.py +11 -2
  149. nabu/resources/dataset_analyzer.py +89 -26
  150. nabu/resources/nxflatfield.py +2 -2
  151. nabu/resources/tests/test_nxflatfield.py +1 -1
  152. nabu/resources/utils.py +9 -2
  153. nabu/stitching/alignment.py +184 -0
  154. nabu/stitching/config.py +241 -39
  155. nabu/stitching/definitions.py +6 -0
  156. nabu/stitching/frame_composition.py +4 -2
  157. nabu/stitching/overlap.py +99 -3
  158. nabu/stitching/sample_normalization.py +60 -0
  159. nabu/stitching/slurm_utils.py +10 -10
  160. nabu/stitching/tests/test_alignment.py +99 -0
  161. nabu/stitching/tests/test_config.py +16 -1
  162. nabu/stitching/tests/test_overlap.py +68 -2
  163. nabu/stitching/tests/test_sample_normalization.py +49 -0
  164. nabu/stitching/tests/test_slurm_utils.py +5 -5
  165. nabu/stitching/tests/test_utils.py +3 -33
  166. nabu/stitching/tests/test_z_stitching.py +391 -22
  167. nabu/stitching/utils.py +144 -202
  168. nabu/stitching/z_stitching.py +309 -126
  169. nabu/testutils.py +18 -0
  170. nabu/thirdparty/tomocupy_remove_stripe.py +586 -0
  171. nabu/utils.py +32 -6
  172. {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/LICENSE +1 -1
  173. {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/METADATA +5 -5
  174. nabu-2024.1.0rc3.dist-info/RECORD +296 -0
  175. {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/WHEEL +1 -1
  176. {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/entry_points.txt +5 -1
  177. nabu/conftest.py +0 -14
  178. nabu/opencl/fftshift.py +0 -92
  179. nabu/opencl/tests/test_fftshift.py +0 -55
  180. nabu/opencl/tests/test_padding.py +0 -84
  181. nabu-2023.2.1.dist-info/RECORD +0 -252
  182. /nabu/cuda/src/{fftshift.cu → dfi_fftshift.cu} +0 -0
  183. {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,15 @@
1
1
  import numpy as np
2
- import pycuda.gpuarray as garray
3
- from ..utils import get_cuda_srcfile
4
- from ..cuda.processing import CudaProcessing
5
- from ..cuda.kernel import CudaKernel
6
- from .rings import MunchDeringer
7
- from silx.image.tomography import get_next_power
8
- from ..cuda.padding import CudaPadding
2
+ from ..utils import docstring, get_cuda_srcfile, updiv
3
+ from ..cuda.processing import CudaProcessing, __has_pycuda__
4
+ from ..processing.padding_cuda import CudaPadding
5
+ from ..processing.fft_cuda import get_fft_class, get_available_fft_implems
6
+ from ..processing.transpose import CudaTranspose
7
+ from ..thirdparty.tomocupy_remove_stripe import remove_all_stripe_pycuda, __have_tomocupy_deringer__
8
+ from .rings import MunchDeringer, SinoMeanDeringer, VoDeringer
9
9
 
10
+ if __has_pycuda__:
11
+ import pycuda.gpuarray as garray
12
+ from ..cuda.kernel import CudaKernel
10
13
 
11
14
  try:
12
15
  from pycudwt import Wavelets
@@ -14,21 +17,19 @@ try:
14
17
  __have_pycudwt__ = True
15
18
  except ImportError:
16
19
  __have_pycudwt__ = False
17
- try:
18
- from skcuda.fft import Plan
19
- from skcuda.fft import fft as cufft
20
- from skcuda.fft import ifft as cuifft
21
-
22
- __have_skcuda__ = True
23
- except Exception as exc:
24
- # We have to catch this very broad exception, because
25
- # skcuda.cublas.cublasError cannot be evaluated without error when no cuda GPU is found
26
- __have_skcuda__ = False
27
20
 
28
21
 
29
22
  class CudaMunchDeringer(MunchDeringer):
30
23
  def __init__(
31
- self, sigma, sinos_shape, levels=None, wname="db15", padding=None, padding_mode="edge", cuda_options=None
24
+ self,
25
+ sigma,
26
+ sinos_shape,
27
+ levels=None,
28
+ wname="db15",
29
+ padding=None,
30
+ padding_mode="edge",
31
+ fft_backend="skcuda",
32
+ cuda_options=None,
32
33
  ):
33
34
  """
34
35
  Initialize a "Munch Et Al" sinogram deringer with the Cuda backend.
@@ -55,14 +56,15 @@ class CudaMunchDeringer(MunchDeringer):
55
56
  super().__init__(sigma, sinos_shape, levels=levels, wname=wname, padding=padding, padding_mode=padding_mode)
56
57
  self._check_can_use_wavelets()
57
58
  self.cuda_processing = CudaProcessing(**(cuda_options or {}))
59
+ self.ctx = self.cuda_processing.ctx
58
60
  self._init_pycudwt()
59
61
  self._init_padding()
60
- self._init_fft()
62
+ self._init_fft(fft_backend)
61
63
  self._setup_fw_kernel()
62
64
 
63
65
  def _check_can_use_wavelets(self):
64
- if not (__have_pycudwt__ and __have_skcuda__):
65
- raise ValueError("Needs pycudwt and scikit-cuda to use this class")
66
+ if not (__have_pycudwt__):
67
+ raise ValueError("Needs pycudwt to use this class")
66
68
 
67
69
  def _init_padding(self):
68
70
  if self.padding is None:
@@ -74,37 +76,49 @@ class CudaMunchDeringer(MunchDeringer):
74
76
  cuda_options={"ctx": self.cuda_processing.ctx},
75
77
  )
76
78
 
77
- def _init_fft(self):
79
+ def _init_fft(self, fft_backend):
80
+ self.fft_cls = get_fft_class(backend=fft_backend)
81
+ # For all k >= 1, we perform a batched (I)FFT along axis 0 on an array
82
+ # of shape (n_a/2^k, n_x/2^k) (up to DWT size rounding)
83
+ if self.fft_cls.implem == "vkfft":
84
+ self._create_plans_vkfft()
85
+ else:
86
+ self._create_plans_skfft()
87
+
88
+ def _create_plans_skfft(self):
78
89
  self._fft_plans = {}
79
90
  for level, d_vcoeff in self._d_vertical_coeffs.items():
80
- n_angles, dwidth = d_vcoeff.shape
81
- # Batched vertical 1D FFT - need advanced data layout
82
- # http://docs.nvidia.com/cuda/cufft/#advanced-data-layout
83
- p_f = Plan(
84
- (n_angles,),
85
- np.float32,
86
- np.complex64,
87
- batch=dwidth,
88
- inembed=np.int32([0]),
89
- istride=dwidth,
90
- idist=1,
91
- onembed=np.int32([0]),
92
- ostride=dwidth,
93
- odist=1,
94
- )
95
- p_i = Plan(
96
- (n_angles,),
97
- np.complex64,
98
- np.float32,
99
- batch=dwidth,
100
- inembed=np.int32([0]),
101
- istride=dwidth,
102
- idist=1,
103
- onembed=np.int32([0]),
104
- ostride=dwidth,
105
- odist=1,
106
- )
107
- self._fft_plans[level] = {"forward": p_f, "inverse": p_i}
91
+ self._fft_plans[level] = self.fft_cls(d_vcoeff.shape, np.float32, r2c=True, axes=(0,), ctx=self.ctx)
92
+
93
+ def _create_plans_vkfft(self):
94
+ """
95
+ VKFFT does not support batched R2C transforms along axis 0 ("slow axis").
96
+ We can either use C2C (faster, but needs more memory) or transpose the arrays to do R2C along axis=1.
97
+ Here we transpose the arrays.
98
+ """
99
+ self._fft_plans = {}
100
+ self._transpose_forward_1 = {}
101
+ self._transpose_forward_2 = {}
102
+ self._transpose_inverse_1 = {}
103
+ self._transpose_inverse_2 = {}
104
+ for level, d_vcoeff in self._d_vertical_coeffs.items():
105
+ shape = d_vcoeff.shape
106
+ # Normally, a batched 1D fft on 2D data of shape (Ny, Nx) along axis 0 returns an array of shape (Ny/2+1, Nx):
107
+ #
108
+ # (Ny, Nx) --[fft_0]--> (Ny/2, Nx)
109
+ # f32 c64
110
+ #
111
+ # In this case, we can only do batched 1D transform along axis 1, so we have to trick with transposes:
112
+ #
113
+ # (Ny, Nx) --[T]--> (Nx, Ny) --[fft_1]--> (Nx, Ny/2) --[T]--> (Ny/2, Nx)
114
+ # f32 f32 c64 c64
115
+ #
116
+ # (In both cases IFFT is done the same way from right to left)
117
+ self._transpose_forward_1[level] = CudaTranspose(shape, np.float32, ctx=self.ctx)
118
+ self._fft_plans[level] = self.fft_cls(shape[::-1], np.float32, r2c=True, ctx=self.ctx)
119
+ self._transpose_forward_2[level] = CudaTranspose((shape[1], shape[0] // 2 + 1), np.complex64, ctx=self.ctx)
120
+ self._transpose_inverse_1[level] = CudaTranspose((shape[0] // 2 + 1, shape[1]), np.complex64, ctx=self.ctx)
121
+ self._transpose_inverse_2[level] = CudaTranspose(shape[::-1], np.float32, ctx=self.ctx)
108
122
 
109
123
  def _init_pycudwt(self):
110
124
  if self.levels is None:
@@ -118,7 +132,6 @@ class CudaMunchDeringer(MunchDeringer):
118
132
 
119
133
  def _get_vertical_coeffs(self):
120
134
  self._d_vertical_coeffs = {}
121
- self._d_sino_f = {}
122
135
  # Transfer the (0-memset) coefficients in order to get all the shapes
123
136
  coeffs = self.cudwt.coeffs
124
137
  for i in range(self.cudwt.levels):
@@ -126,7 +139,6 @@ class CudaMunchDeringer(MunchDeringer):
126
139
  self._d_vertical_coeffs[i + 1] = garray.empty(
127
140
  shape, np.float32, gpudata=self.cudwt.coeff_int_ptr(3 * i + 2)
128
141
  )
129
- self._d_sino_f[i + 1] = garray.zeros((shape[0] // 2 + 1, shape[1]), dtype=np.complex64)
130
142
 
131
143
  def _setup_fw_kernel(self):
132
144
  self._fw_kernel = CudaKernel(
@@ -135,6 +147,32 @@ class CudaMunchDeringer(MunchDeringer):
135
147
  signature="Piif",
136
148
  )
137
149
 
150
+ def _apply_fft(self, level):
151
+ d_coeffs = self._d_vertical_coeffs[level]
152
+ # All the memory is allocated (or re-used) under the hood
153
+ if self.fft_cls.implem == "vkfft":
154
+ d_coeffs_t = self._transpose_forward_1[level](
155
+ d_coeffs
156
+ ) # allocates self._transpose_forward_1[level].processing.dst
157
+ d_coeffs_t_f = self._fft_plans[level].fft(d_coeffs_t) # allocates self._fft_plans[level].output_fft
158
+ d_coeffs_f = self._transpose_forward_2[level](
159
+ d_coeffs_t_f
160
+ ) # allocates self._transpose_forward_2[level].processing.dst
161
+ else:
162
+ d_coeffs_f = self._fft_plans[level].fft(d_coeffs)
163
+ return d_coeffs_f
164
+
165
+ def _apply_ifft(self, d_coeffs_f, level):
166
+ d_coeffs = self._d_vertical_coeffs[level]
167
+ if self.fft_cls.implem == "vkfft":
168
+ d_coeffs_t_f = self._transpose_inverse_1[level](d_coeffs_f, dst=self._fft_plans[level].output_fft)
169
+ d_coeffs_t = self._fft_plans[level].ifft(
170
+ d_coeffs_t_f, output=self._transpose_forward_1[level].processing.dst
171
+ )
172
+ self._transpose_inverse_2[level](d_coeffs_t, dst=d_coeffs)
173
+ else:
174
+ self._fft_plans[level].ifft(d_coeffs_f, output=d_coeffs)
175
+
138
176
  def _destripe_2D(self, d_sino, output):
139
177
  if self.padding is not None:
140
178
  d_sino = self.padder.pad(d_sino)
@@ -144,15 +182,15 @@ class CudaMunchDeringer(MunchDeringer):
144
182
  self.cudwt.forward()
145
183
  for i in range(self.cudwt.levels):
146
184
  level = i + 1
147
- d_coeffs = self._d_vertical_coeffs[level]
148
- d_sino_f = self._d_sino_f[level]
149
- Ny, Nx = d_coeffs.shape
185
+ Ny, Nx = self._d_vertical_coeffs[level].shape
150
186
  # Batched FFT along axis 0
151
- cufft(d_coeffs, d_sino_f, self._fft_plans[level]["forward"])
187
+ d_vertical_coeffs_f = self._apply_fft(level)
188
+
152
189
  # Dampen the wavelets coefficients
153
- self._fw_kernel(d_sino_f, Nx, Ny, self.sigma)
190
+ self._fw_kernel(d_vertical_coeffs_f, Nx, Ny, self.sigma)
154
191
  # IFFT
155
- cuifft(d_sino_f, d_coeffs, self._fft_plans[level]["inverse"])
192
+ self._apply_ifft(d_vertical_coeffs_f, level)
193
+
156
194
  # Finally, inverse DWT
157
195
  self.cudwt.inverse()
158
196
  d_out = self._d_sino
@@ -160,3 +198,125 @@ class CudaMunchDeringer(MunchDeringer):
160
198
  d_out = self._d_sino[:, self.padding[0] : -self.padding[1]] # memcpy2D
161
199
  output.set(d_out)
162
200
  return output
201
+
202
+
203
+ def can_use_cuda_deringer():
204
+ """
205
+ Check wether cuda implementation of deringer can be used.
206
+ Checking for installed modules is not enough, as for example pyvkfft can be installed without cuda devices
207
+ """
208
+ can_do_fft = get_available_fft_implems() != []
209
+ return can_do_fft and __have_pycudwt__
210
+
211
+
212
+ class CudaVoDeringer(VoDeringer):
213
+ """
214
+ An interface to topocupy's "remove_all_stripe".
215
+ """
216
+
217
+ def _check_requirement(self):
218
+ if not (__have_tomocupy_deringer__):
219
+ raise ImportError("need cupy")
220
+
221
+ def remove_rings_radios(self, radios):
222
+ return remove_all_stripe_pycuda(radios, **self._remove_all_stripe_kwargs)
223
+
224
+ def remove_rings_sinograms(self, sinos):
225
+ radios = sinos.transpose(axes=(1, 0, 2)) # view, no copy
226
+ self.remove_rings_radios(radios)
227
+ return sinos
228
+
229
+ def remove_rings_sinogram(self, sino):
230
+ radios = sino.reshape(sino.shape[0], 1, sino.shape[1]) # no copy
231
+ self.remove_rings_radios(radios)
232
+ return sino
233
+
234
+ remove_rings = remove_rings_sinograms
235
+
236
+
237
+ class CudaSinoMeanDeringer(SinoMeanDeringer):
238
+ @docstring(SinoMeanDeringer)
239
+ def __init__(
240
+ self,
241
+ sinos_shape,
242
+ mode="subtract",
243
+ filter_cutoff=None,
244
+ padding_mode="edge",
245
+ fft_num_threads=None,
246
+ **cuda_options,
247
+ ):
248
+ self.processing = CudaProcessing(**(cuda_options or {}))
249
+ super().__init__(sinos_shape, mode, filter_cutoff, padding_mode, fft_num_threads)
250
+ self._init_kernels()
251
+
252
+ def _init_kernels(self):
253
+ self.d_sino_profile = self.processing.allocate_array("sino_profile", self.n_x)
254
+ self._mean_kernel = self.processing.kernel(
255
+ "vertical_mean",
256
+ filename=get_cuda_srcfile("normalization.cu"),
257
+ signature="PPiii",
258
+ )
259
+ self._mean_kernel_block = (32, 1, 32)
260
+ self._mean_kernel_grid = [updiv(a, b) for a, b in zip(self.sinos_shape[::-1], self._mean_kernel_block)]
261
+ self._mean_kernel_args = [self.d_sino_profile, np.int32(self.n_x), np.int32(self.n_angles), np.int32(self.n_z)]
262
+ self._mean_kernel_kwargs = {
263
+ "grid": self._mean_kernel_grid,
264
+ "block": self._mean_kernel_block,
265
+ }
266
+
267
+ self._op_kernel = self.processing.kernel(
268
+ "inplace_generic_op_3Dby1D",
269
+ filename=get_cuda_srcfile("ElementOp.cu"),
270
+ signature="PPiii",
271
+ options=["-DGENERIC_OP=%d" % (3 if self.mode == "divide" else 1)],
272
+ )
273
+ self._op_kernel_block = (16, 16, 4)
274
+ self._op_kernel_grid = [updiv(a, b) for a, b in zip(self.sinos_shape[::-1], self._op_kernel_block)]
275
+ self._op_kernel_args = [self.d_sino_profile, np.int32(self.n_x), np.int32(self.n_angles), np.int32(self.n_z)]
276
+ self._op_kernel_kwargs = {
277
+ "grid": self._op_kernel_grid,
278
+ "block": self._op_kernel_block,
279
+ }
280
+
281
+ def _init_filter(self, filter_cutoff, fft_num_threads, padding_mode):
282
+ super()._init_filter(filter_cutoff, fft_num_threads, padding_mode)
283
+ if filter_cutoff is None:
284
+ return
285
+ self._d_filter_f = self.processing.to_device("_filter_f", self._filter_f)
286
+
287
+ self.padder = CudaPadding(
288
+ (self.n_x, 1),
289
+ ((self._pad_left, self._pad_right), (0, 0)),
290
+ mode=self.padding_mode,
291
+ cuda_options={"ctx": self.processing.ctx},
292
+ )
293
+ fft_cls = get_fft_class()
294
+ self._fft = fft_cls(self._filter_size, np.float32, r2c=True)
295
+
296
+ def _apply_filter(self, sino_profile):
297
+ if self._filter_f is None:
298
+ return sino_profile
299
+
300
+ sino_profile = sino_profile.reshape((-1, 1)) # view
301
+ sino_profile_p = self.padder.pad(sino_profile).ravel()
302
+
303
+ sino_profile_f = self._fft.fft(sino_profile_p)
304
+ sino_profile_f *= self._d_filter_f
305
+ self._fft.ifft(sino_profile_f, output=sino_profile_p)
306
+
307
+ self.d_sino_profile[:] = sino_profile_p[self._pad_left : -self._pad_right]
308
+ return self.d_sino_profile
309
+
310
+ def remove_rings_sinogram(self, sino, output=None):
311
+ #
312
+ if output is not None:
313
+ raise NotImplementedError
314
+ #
315
+ self._mean_kernel(sino, *self._mean_kernel_args, **self._mean_kernel_kwargs)
316
+ self._apply_filter(self.d_sino_profile)
317
+ self._op_kernel(sino, *self._op_kernel_args, **self._op_kernel_kwargs)
318
+ return sino
319
+
320
+ def remove_rings_sinograms(self, sinograms):
321
+ for i in range(sinograms.shape[0]):
322
+ self.remove_rings_sinogram(sinograms[i])
@@ -1,6 +1,4 @@
1
1
  import numpy as np
2
- import pycuda.gpuarray as garray
3
- from ..cuda.kernel import CudaKernel
4
2
  from ..utils import get_cuda_srcfile, updiv, deprecated_class
5
3
  from .sinogram import SinoBuilder, SinoNormalization, SinoMult
6
4
  from .sinogram import _convert_halftomo_right # FIXME Temporary patch
@@ -26,7 +24,7 @@ class CudaSinoBuilder(SinoBuilder):
26
24
  if not (self.halftomo):
27
25
  return
28
26
  kernel_name = "halftomo_kernel"
29
- self.halftomo_kernel = CudaKernel(
27
+ self.halftomo_kernel = self.cuda_processing.kernel(
30
28
  kernel_name,
31
29
  get_cuda_srcfile("halftomo.cu"),
32
30
  signature="PPPiii",
@@ -36,15 +34,19 @@ class CudaSinoBuilder(SinoBuilder):
36
34
  self._halftomo_gridsize = (updiv(self.extended_sino_width, blk[0]), updiv((self.n_angles + 1) // 2, blk[1]), 1)
37
35
  d = self.n_x - self.extended_sino_width // 2 # will have to be adapted for varying axis pos
38
36
  self.halftomo_weights = np.linspace(0, 1, 2 * abs(d), endpoint=True, dtype="f")
39
- self.d_halftomo_weights = garray.to_gpu(self.halftomo_weights)
37
+ self.d_halftomo_weights = self.cuda_processing.to_device("d_halftomo_weights", self.halftomo_weights)
40
38
  # Allocate one single sinogram (kernel needs c-contiguous array).
41
39
  # If odd number of angles: repeat last angle.
42
- self.d_sino = garray.zeros((self.n_angles + (self.n_angles & 1), self.n_x), "f")
40
+ self.d_sino = self.cuda_processing.allocate_array(
41
+ "d_sino", (self.n_angles + (self.n_angles & 1), self.n_x), "f"
42
+ )
43
43
  self.h_sino = self.d_sino.get()
44
44
  #
45
45
  self.cuda_processing.init_arrays_to_none(["d_output"])
46
46
  if self._halftomo_flip:
47
- self.xflip_kernel = CudaKernel("reverse2D_x", get_cuda_srcfile("ElementOp.cu"), signature="Pii")
47
+ self.xflip_kernel = self.cuda_processing.kernel(
48
+ "reverse2D_x", get_cuda_srcfile("ElementOp.cu"), signature="Pii"
49
+ )
48
50
  blk = (32, 32, 1)
49
51
  self._xflip_blksize = blk
50
52
  self._xflip_gridsize_1 = (updiv(self.n_x, blk[0]), updiv(self.n_angles, blk[1]), 1)
@@ -106,7 +108,7 @@ class CudaSinoBuilder(SinoBuilder):
106
108
 
107
109
  def _get_sinos_halftomo(self, radios, output=None):
108
110
  if output is None:
109
- output = garray.zeros(self.output_shape, "f")
111
+ output = self.cuda_processing.allocate_array("output", self.output_shape, "f")
110
112
  elif output.shape != self.output_shape:
111
113
  raise ValueError("Expected output to have shape %s but got %s" % (self.output_shape, output.shape))
112
114
  for i in range(self.n_z):
@@ -127,7 +129,7 @@ class CudaSinoMult(SinoMult):
127
129
  self._init_kernel()
128
130
 
129
131
  def _init_kernel(self):
130
- self.halftomo_kernel = CudaKernel(
132
+ self.halftomo_kernel = self.cuda_processing.kernel(
131
133
  "halftomo_prepare_sinogram", filename=get_cuda_srcfile("halftomo.cu"), signature="PPiiii"
132
134
  )
133
135
  self.d_weights = self.cuda_processing.set_array("d_weights", self.weights)
@@ -165,9 +167,9 @@ class CudaSinoNormalization(SinoNormalization):
165
167
  #
166
168
 
167
169
  def _init_cuda_normalization(self):
168
- self._d_tmp = garray.zeros(self.sinos_shape[-2:], "f")
170
+ self._d_tmp = self.cuda_processing.allocate_array("_d_tmp", self.sinos_shape[-2:], "f")
169
171
  if self.normalization_kind == "chebyshev":
170
- self._chebyshev_kernel = CudaKernel(
172
+ self._chebyshev_kernel = self.cuda_processing.kernel(
171
173
  "normalize_chebyshev",
172
174
  filename=get_cuda_srcfile("normalization.cu"),
173
175
  signature="Piii",
@@ -183,12 +185,14 @@ class CudaSinoNormalization(SinoNormalization):
183
185
  # If normalization_array is 1D, make a 2D array by repeating the line
184
186
  if normalization_array.ndim == 1:
185
187
  normalization_array = np.tile(normalization_array, (self.n_angles, 1))
186
- self._d_normalization_array = garray.to_gpu(normalization_array.astype("f"))
188
+ self._d_normalization_array = self.cuda_processing.to_device(
189
+ "_d_normalization_array", normalization_array.astype("f")
190
+ )
187
191
  if self.normalization_kind == "subtraction":
188
192
  generic_op_val = 1
189
193
  elif self.normalization_kind == "division":
190
194
  generic_op_val = 3
191
- self._norm_kernel = CudaKernel(
195
+ self._norm_kernel = self.cuda_processing.kernel(
192
196
  "inplace_generic_op_2Dby2D",
193
197
  filename=get_cuda_srcfile("ElementOp.cu"),
194
198
  signature="PPii",
@@ -1,26 +1,33 @@
1
1
  import numpy as np
2
2
  import pytest
3
- from nabu.utils import clip_circle
4
- from nabu.testutils import get_data, compare_arrays, generate_tests_scenarios, __do_long_tests__
5
- from nabu.reconstruction.rings import MunchDeringer
3
+ from nabu.reconstruction.rings_cuda import CudaSinoMeanDeringer
4
+ from nabu.testutils import compare_arrays, get_data, generate_tests_scenarios, __do_long_tests__
5
+ from nabu.reconstruction.rings import MunchDeringer, SinoMeanDeringer, VoDeringer, __has_algotom__
6
6
  from nabu.thirdparty.pore3d_deringer_munch import munchetal_filter
7
7
  from nabu.cuda.utils import __has_pycuda__, get_cuda_context
8
8
 
9
- __have_gpuderinger__ = False
10
9
  if __has_pycuda__:
11
10
  import pycuda.gpuarray as garray
12
- from nabu.reconstruction.rings_cuda import CudaMunchDeringer, __have_pycudwt__, __have_skcuda__
13
-
14
- if __have_pycudwt__ and __have_skcuda__:
15
- __have_gpuderinger__ = True
11
+ from nabu.processing.fft_cuda import get_available_fft_implems
12
+ from nabu.reconstruction.rings_cuda import (
13
+ CudaMunchDeringer,
14
+ can_use_cuda_deringer,
15
+ CudaVoDeringer,
16
+ __have_tomocupy_deringer__,
17
+ )
16
18
 
19
+ __has_cuda_deringer__ = can_use_cuda_deringer()
20
+ else:
21
+ __has_cuda_deringer__ = False
22
+ __have_tomocupy_deringer__ = False
17
23
 
18
24
  fw_scenarios = generate_tests_scenarios(
19
25
  {
20
26
  "levels": [4],
21
27
  "sigma": [1.0],
22
28
  "wname": ["db15"],
23
- "padding": [None],
29
+ "padding": [(100, 100)],
30
+ "fft_implem": ["skcuda"],
24
31
  }
25
32
  )
26
33
  if __do_long_tests__:
@@ -30,6 +37,7 @@ if __do_long_tests__:
30
37
  "sigma": [1.0, 2.0],
31
38
  "wname": ["db15", "haar", "rbio4.4"],
32
39
  "padding": [None, (100, 100), (50, 71)],
40
+ "fft_implem": ["skcuda", "vkfft"],
33
41
  }
34
42
  )
35
43
 
@@ -38,12 +46,14 @@ if __do_long_tests__:
38
46
  def bootstrap(request):
39
47
  cls = request.cls
40
48
  cls.sino = get_data("mri_sino500.npz")["data"]
49
+ cls.sino2 = get_data("sino_bamboo_hercules.npz")["data"]
41
50
  cls.tol = 5e-3
42
51
  cls.rings = {150: 0.5, -150: 0.5}
43
- if __have_gpuderinger__:
52
+ if __has_pycuda__:
44
53
  cls.ctx = get_cuda_context(cleanup_at_exit=False)
54
+ cls._available_fft_implems = get_available_fft_implems()
45
55
  yield
46
- if __have_gpuderinger__:
56
+ if __has_pycuda__:
47
57
  cls.ctx.pop()
48
58
 
49
59
 
@@ -95,17 +105,22 @@ class TestMunchDeringer:
95
105
  assert err_max < self.tol, "Max error is too high"
96
106
 
97
107
  @pytest.mark.skipif(
98
- not (__have_gpuderinger__) or munchetal_filter is None,
99
- reason="Need pycuda, pycudwt and scikit-cuda for this test",
108
+ not (__has_cuda_deringer__) or munchetal_filter is None,
109
+ reason="Need pycuda, pycudwt and (scikit-cuda or pyvkfft) for this test",
100
110
  )
101
111
  @pytest.mark.parametrize("config", fw_scenarios)
102
112
  def test_cuda_munch_deringer(self, config):
113
+ fft_implem = config["fft_implem"]
114
+ if fft_implem not in self._available_fft_implems:
115
+ pytest.skip("FFT implementation %s is not available" % fft_implem)
103
116
  sino = self.add_stripes_to_sino(self.sino, self.rings)
104
117
  deringer = CudaMunchDeringer(
105
118
  config["sigma"],
106
119
  self.sino.shape,
107
120
  levels=config["levels"],
108
121
  wname=config["wname"],
122
+ padding=config["padding"],
123
+ fft_backend=fft_implem,
109
124
  cuda_options={"ctx": self.ctx},
110
125
  )
111
126
  d_sino = garray.to_gpu(sino)
@@ -115,4 +130,91 @@ class TestMunchDeringer:
115
130
  ref = self.get_fourier_wavelets_reference_result(sino, config)
116
131
 
117
132
  err_max = np.max(np.abs(res - ref))
118
- assert err_max < 1e-1, "Max error is too high"
133
+ assert err_max < 1e-1, "Max error is too high with configuration %s" % (str(config))
134
+
135
+ @pytest.mark.skipif(
136
+ not (__has_algotom__),
137
+ reason="Need algotom for this test",
138
+ )
139
+ def test_vo_deringer(self):
140
+ deringer = VoDeringer(self.sino.shape)
141
+ sino_deringed = deringer.remove_rings_sinogram(self.sino)
142
+ sinos = np.tile(self.sino, (10, 1, 1))
143
+ sinos_deringed = deringer.remove_rings_sinograms(sinos)
144
+ # TODO check result. The generated test sinogram is "too synthetic" for this kind of deringer
145
+
146
+ @pytest.mark.skipif(
147
+ not (__have_tomocupy_deringer__),
148
+ reason="Need cupy for this test",
149
+ )
150
+ def test_cuda_vo_deringer(self):
151
+ # Beware, this deringer seems to be buggy for "too-small" sinograms
152
+ # (NaNs on the edges and in some regions). To be investigated
153
+
154
+ deringer = CudaVoDeringer(self.sino2.shape)
155
+ d_sino = garray.to_gpu(self.sino2)
156
+ deringer.remove_rings_sinogram(d_sino)
157
+ sino = d_sino.get()
158
+
159
+ if __has_algotom__:
160
+ vo_deringer = VoDeringer(self.sino2.shape)
161
+ sino_deringed = vo_deringer.remove_rings_sinogram(self.sino2)
162
+
163
+ assert (
164
+ np.max(np.abs(sino - sino_deringed)) < 2e-3
165
+ ), "Cuda implementation of Vo deringer does not yield the same results as base implementation"
166
+
167
+ def test_mean_deringer(self):
168
+ deringer_no_filtering = SinoMeanDeringer(self.sino.shape, mode="subtract")
169
+
170
+ sino = self.sino.copy()
171
+ deringer_no_filtering.remove_rings_sinogram(sino)
172
+
173
+ sino = self.sino.copy()
174
+ deringer_with_filtering = SinoMeanDeringer(self.sino.shape, mode="subtract", filter_cutoff=(0, 30))
175
+ deringer_with_filtering.remove_rings_sinogram(sino)
176
+ # TODO check results
177
+
178
+ @pytest.mark.skipif(not (__has_pycuda__), reason="Need pycuda for this test")
179
+ def test_cuda_mean_deringer(self):
180
+ cuda_deringer = CudaSinoMeanDeringer(
181
+ self.sino.shape,
182
+ mode="subtract",
183
+ filter_cutoff=(
184
+ 0,
185
+ 10,
186
+ ),
187
+ ctx=self.ctx,
188
+ )
189
+ deringer = SinoMeanDeringer(
190
+ self.sino.shape,
191
+ mode="subtract",
192
+ filter_cutoff=(
193
+ 0,
194
+ 10,
195
+ ),
196
+ )
197
+
198
+ d_sino = cuda_deringer.processing.to_device("sino", self.sino)
199
+ cuda_deringer.remove_rings_sinogram(d_sino)
200
+
201
+ sino = self.sino.copy()
202
+ sino_d = deringer.remove_rings_sinogram(sino)
203
+
204
+ dirac = np.zeros(self.sino.shape[-1], "f")
205
+ dirac[dirac.size // 2] = 1
206
+ deringer_filter_response = deringer._apply_filter(dirac)
207
+
208
+ d_dirac = cuda_deringer.processing.to_device("dirac", dirac)
209
+ cuda_deringer_filter_response = cuda_deringer._apply_filter(d_dirac)
210
+
211
+ is_close, residual = compare_arrays(
212
+ deringer_filter_response, cuda_deringer_filter_response.get(), 1e-7, return_residual=True
213
+ )
214
+ assert is_close, "Cuda deringer does not have the correct filter response: max_error=%.2e" % residual
215
+
216
+ # There is a rather large discrepancy between the vertical_mean kernel and numpy.mean(). Not sure who is right
217
+ is_close, residual = compare_arrays(sino_d, d_sino.get(), 1e-1, return_residual=True)
218
+ assert is_close, (
219
+ "Cuda deringer does not yield the same result as base implementation: max_error=%.2e" % residual
220
+ )