nabu 2023.2.1__py3-none-any.whl → 2024.1.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doc/conf.py +1 -1
- doc/doc_config.py +32 -0
- nabu/__init__.py +2 -1
- nabu/app/bootstrap_stitching.py +1 -1
- nabu/app/cli_configs.py +122 -2
- nabu/app/composite_cor.py +27 -2
- nabu/app/correct_rot.py +70 -0
- nabu/app/create_distortion_map_from_poly.py +42 -18
- nabu/app/diag_to_pix.py +358 -0
- nabu/app/diag_to_rot.py +449 -0
- nabu/app/generate_header.py +4 -3
- nabu/app/histogram.py +2 -2
- nabu/app/multicor.py +6 -1
- nabu/app/parse_reconstruction_log.py +151 -0
- nabu/app/prepare_weights_double.py +83 -22
- nabu/app/reconstruct.py +5 -1
- nabu/app/reconstruct_helical.py +7 -0
- nabu/app/reduce_dark_flat.py +6 -3
- nabu/app/rotate.py +4 -4
- nabu/app/stitching.py +16 -2
- nabu/app/tests/test_reduce_dark_flat.py +18 -2
- nabu/app/validator.py +4 -4
- nabu/cuda/convolution.py +8 -376
- nabu/cuda/fft.py +4 -0
- nabu/cuda/kernel.py +4 -4
- nabu/cuda/medfilt.py +5 -158
- nabu/cuda/padding.py +5 -71
- nabu/cuda/processing.py +23 -2
- nabu/cuda/src/ElementOp.cu +78 -0
- nabu/cuda/src/backproj.cu +28 -2
- nabu/cuda/src/fourier_wavelets.cu +2 -2
- nabu/cuda/src/normalization.cu +23 -0
- nabu/cuda/src/padding.cu +2 -2
- nabu/cuda/src/transpose.cu +16 -0
- nabu/cuda/utils.py +39 -0
- nabu/estimation/alignment.py +10 -1
- nabu/estimation/cor.py +808 -38
- nabu/estimation/cor_sino.py +7 -9
- nabu/estimation/tests/test_cor.py +85 -3
- nabu/io/reader.py +26 -18
- nabu/io/tests/test_cast_volume.py +3 -3
- nabu/io/tests/test_detector_distortion.py +3 -3
- nabu/io/tiffwriter_zmm.py +2 -2
- nabu/io/utils.py +14 -4
- nabu/io/writer.py +5 -3
- nabu/misc/fftshift.py +6 -0
- nabu/misc/histogram.py +5 -285
- nabu/misc/histogram_cuda.py +8 -104
- nabu/misc/kernel_base.py +3 -121
- nabu/misc/padding_base.py +5 -69
- nabu/misc/processing_base.py +3 -107
- nabu/misc/rotation.py +5 -62
- nabu/misc/rotation_cuda.py +5 -65
- nabu/misc/transpose.py +6 -0
- nabu/misc/unsharp.py +3 -78
- nabu/misc/unsharp_cuda.py +5 -52
- nabu/misc/unsharp_opencl.py +8 -85
- nabu/opencl/fft.py +6 -0
- nabu/opencl/kernel.py +21 -6
- nabu/opencl/padding.py +5 -72
- nabu/opencl/processing.py +27 -5
- nabu/opencl/src/backproj.cl +3 -3
- nabu/opencl/src/fftshift.cl +65 -12
- nabu/opencl/src/padding.cl +2 -2
- nabu/opencl/src/roll.cl +96 -0
- nabu/opencl/src/transpose.cl +16 -0
- nabu/pipeline/config_validators.py +63 -3
- nabu/pipeline/dataset_validator.py +2 -2
- nabu/pipeline/estimators.py +193 -35
- nabu/pipeline/fullfield/chunked.py +34 -17
- nabu/pipeline/fullfield/chunked_cuda.py +7 -5
- nabu/pipeline/fullfield/computations.py +48 -13
- nabu/pipeline/fullfield/nabu_config.py +13 -13
- nabu/pipeline/fullfield/processconfig.py +10 -5
- nabu/pipeline/fullfield/reconstruction.py +1 -2
- nabu/pipeline/helical/fbp.py +5 -0
- nabu/pipeline/helical/filtering.py +12 -9
- nabu/pipeline/helical/gridded_accumulator.py +179 -33
- nabu/pipeline/helical/helical_chunked_regridded.py +262 -151
- nabu/pipeline/helical/helical_chunked_regridded_cuda.py +4 -11
- nabu/pipeline/helical/helical_reconstruction.py +56 -18
- nabu/pipeline/helical/span_strategy.py +1 -1
- nabu/pipeline/helical/tests/test_accumulator.py +4 -0
- nabu/pipeline/params.py +23 -2
- nabu/pipeline/processconfig.py +3 -8
- nabu/pipeline/tests/test_chunk_reader.py +78 -0
- nabu/pipeline/tests/test_estimators.py +120 -2
- nabu/pipeline/utils.py +25 -0
- nabu/pipeline/writer.py +2 -0
- nabu/preproc/ccd_cuda.py +9 -7
- nabu/preproc/ctf.py +21 -26
- nabu/preproc/ctf_cuda.py +25 -25
- nabu/preproc/double_flatfield.py +14 -2
- nabu/preproc/double_flatfield_cuda.py +7 -11
- nabu/preproc/flatfield_cuda.py +23 -27
- nabu/preproc/phase.py +19 -24
- nabu/preproc/phase_cuda.py +21 -21
- nabu/preproc/shift_cuda.py +58 -28
- nabu/preproc/tests/test_ctf.py +5 -5
- nabu/preproc/tests/test_double_flatfield.py +2 -2
- nabu/preproc/tests/test_vshift.py +13 -2
- nabu/processing/__init__.py +0 -0
- nabu/processing/convolution_cuda.py +375 -0
- nabu/processing/fft_base.py +163 -0
- nabu/processing/fft_cuda.py +256 -0
- nabu/processing/fft_opencl.py +54 -0
- nabu/processing/fftshift.py +134 -0
- nabu/processing/histogram.py +286 -0
- nabu/processing/histogram_cuda.py +103 -0
- nabu/processing/kernel_base.py +126 -0
- nabu/processing/medfilt_cuda.py +159 -0
- nabu/processing/muladd.py +29 -0
- nabu/processing/muladd_cuda.py +68 -0
- nabu/processing/padding_base.py +71 -0
- nabu/processing/padding_cuda.py +75 -0
- nabu/processing/padding_opencl.py +77 -0
- nabu/processing/processing_base.py +123 -0
- nabu/processing/roll_opencl.py +64 -0
- nabu/processing/rotation.py +63 -0
- nabu/processing/rotation_cuda.py +66 -0
- nabu/processing/tests/__init__.py +0 -0
- nabu/processing/tests/test_fft.py +268 -0
- nabu/processing/tests/test_fftshift.py +71 -0
- nabu/{misc → processing}/tests/test_histogram.py +2 -4
- nabu/{cuda → processing}/tests/test_medfilt.py +1 -1
- nabu/processing/tests/test_muladd.py +54 -0
- nabu/{cuda → processing}/tests/test_padding.py +119 -75
- nabu/processing/tests/test_roll.py +63 -0
- nabu/{misc → processing}/tests/test_rotation.py +3 -2
- nabu/processing/tests/test_transpose.py +72 -0
- nabu/{misc → processing}/tests/test_unsharp.py +41 -8
- nabu/processing/transpose.py +126 -0
- nabu/processing/unsharp.py +79 -0
- nabu/processing/unsharp_cuda.py +53 -0
- nabu/processing/unsharp_opencl.py +75 -0
- nabu/reconstruction/fbp.py +34 -10
- nabu/reconstruction/fbp_base.py +35 -16
- nabu/reconstruction/fbp_opencl.py +7 -12
- nabu/reconstruction/filtering.py +2 -2
- nabu/reconstruction/filtering_cuda.py +13 -14
- nabu/reconstruction/filtering_opencl.py +3 -4
- nabu/reconstruction/projection.py +2 -0
- nabu/reconstruction/rings.py +158 -1
- nabu/reconstruction/rings_cuda.py +218 -58
- nabu/reconstruction/sinogram_cuda.py +16 -12
- nabu/reconstruction/tests/test_deringer.py +116 -14
- nabu/reconstruction/tests/test_fbp.py +22 -31
- nabu/reconstruction/tests/test_filtering.py +11 -2
- nabu/resources/dataset_analyzer.py +89 -26
- nabu/resources/nxflatfield.py +2 -2
- nabu/resources/tests/test_nxflatfield.py +1 -1
- nabu/resources/utils.py +9 -2
- nabu/stitching/alignment.py +184 -0
- nabu/stitching/config.py +241 -39
- nabu/stitching/definitions.py +6 -0
- nabu/stitching/frame_composition.py +4 -2
- nabu/stitching/overlap.py +99 -3
- nabu/stitching/sample_normalization.py +60 -0
- nabu/stitching/slurm_utils.py +10 -10
- nabu/stitching/tests/test_alignment.py +99 -0
- nabu/stitching/tests/test_config.py +16 -1
- nabu/stitching/tests/test_overlap.py +68 -2
- nabu/stitching/tests/test_sample_normalization.py +49 -0
- nabu/stitching/tests/test_slurm_utils.py +5 -5
- nabu/stitching/tests/test_utils.py +3 -33
- nabu/stitching/tests/test_z_stitching.py +391 -22
- nabu/stitching/utils.py +144 -202
- nabu/stitching/z_stitching.py +309 -126
- nabu/testutils.py +18 -0
- nabu/thirdparty/tomocupy_remove_stripe.py +586 -0
- nabu/utils.py +32 -6
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/LICENSE +1 -1
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/METADATA +5 -5
- nabu-2024.1.0rc3.dist-info/RECORD +296 -0
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/WHEEL +1 -1
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/entry_points.txt +5 -1
- nabu/conftest.py +0 -14
- nabu/opencl/fftshift.py +0 -92
- nabu/opencl/tests/test_fftshift.py +0 -55
- nabu/opencl/tests/test_padding.py +0 -84
- nabu-2023.2.1.dist-info/RECORD +0 -252
- /nabu/cuda/src/{fftshift.cu → dfi_fftshift.cu} +0 -0
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,15 @@
|
|
1
1
|
import numpy as np
|
2
|
-
import
|
3
|
-
from ..
|
4
|
-
from ..
|
5
|
-
from ..
|
6
|
-
from .
|
7
|
-
from
|
8
|
-
from
|
2
|
+
from ..utils import docstring, get_cuda_srcfile, updiv
|
3
|
+
from ..cuda.processing import CudaProcessing, __has_pycuda__
|
4
|
+
from ..processing.padding_cuda import CudaPadding
|
5
|
+
from ..processing.fft_cuda import get_fft_class, get_available_fft_implems
|
6
|
+
from ..processing.transpose import CudaTranspose
|
7
|
+
from ..thirdparty.tomocupy_remove_stripe import remove_all_stripe_pycuda, __have_tomocupy_deringer__
|
8
|
+
from .rings import MunchDeringer, SinoMeanDeringer, VoDeringer
|
9
9
|
|
10
|
+
if __has_pycuda__:
|
11
|
+
import pycuda.gpuarray as garray
|
12
|
+
from ..cuda.kernel import CudaKernel
|
10
13
|
|
11
14
|
try:
|
12
15
|
from pycudwt import Wavelets
|
@@ -14,21 +17,19 @@ try:
|
|
14
17
|
__have_pycudwt__ = True
|
15
18
|
except ImportError:
|
16
19
|
__have_pycudwt__ = False
|
17
|
-
try:
|
18
|
-
from skcuda.fft import Plan
|
19
|
-
from skcuda.fft import fft as cufft
|
20
|
-
from skcuda.fft import ifft as cuifft
|
21
|
-
|
22
|
-
__have_skcuda__ = True
|
23
|
-
except Exception as exc:
|
24
|
-
# We have to catch this very broad exception, because
|
25
|
-
# skcuda.cublas.cublasError cannot be evaluated without error when no cuda GPU is found
|
26
|
-
__have_skcuda__ = False
|
27
20
|
|
28
21
|
|
29
22
|
class CudaMunchDeringer(MunchDeringer):
|
30
23
|
def __init__(
|
31
|
-
self,
|
24
|
+
self,
|
25
|
+
sigma,
|
26
|
+
sinos_shape,
|
27
|
+
levels=None,
|
28
|
+
wname="db15",
|
29
|
+
padding=None,
|
30
|
+
padding_mode="edge",
|
31
|
+
fft_backend="skcuda",
|
32
|
+
cuda_options=None,
|
32
33
|
):
|
33
34
|
"""
|
34
35
|
Initialize a "Munch Et Al" sinogram deringer with the Cuda backend.
|
@@ -55,14 +56,15 @@ class CudaMunchDeringer(MunchDeringer):
|
|
55
56
|
super().__init__(sigma, sinos_shape, levels=levels, wname=wname, padding=padding, padding_mode=padding_mode)
|
56
57
|
self._check_can_use_wavelets()
|
57
58
|
self.cuda_processing = CudaProcessing(**(cuda_options or {}))
|
59
|
+
self.ctx = self.cuda_processing.ctx
|
58
60
|
self._init_pycudwt()
|
59
61
|
self._init_padding()
|
60
|
-
self._init_fft()
|
62
|
+
self._init_fft(fft_backend)
|
61
63
|
self._setup_fw_kernel()
|
62
64
|
|
63
65
|
def _check_can_use_wavelets(self):
|
64
|
-
if not (__have_pycudwt__
|
65
|
-
raise ValueError("Needs pycudwt
|
66
|
+
if not (__have_pycudwt__):
|
67
|
+
raise ValueError("Needs pycudwt to use this class")
|
66
68
|
|
67
69
|
def _init_padding(self):
|
68
70
|
if self.padding is None:
|
@@ -74,37 +76,49 @@ class CudaMunchDeringer(MunchDeringer):
|
|
74
76
|
cuda_options={"ctx": self.cuda_processing.ctx},
|
75
77
|
)
|
76
78
|
|
77
|
-
def _init_fft(self):
|
79
|
+
def _init_fft(self, fft_backend):
|
80
|
+
self.fft_cls = get_fft_class(backend=fft_backend)
|
81
|
+
# For all k >= 1, we perform a batched (I)FFT along axis 0 on an array
|
82
|
+
# of shape (n_a/2^k, n_x/2^k) (up to DWT size rounding)
|
83
|
+
if self.fft_cls.implem == "vkfft":
|
84
|
+
self._create_plans_vkfft()
|
85
|
+
else:
|
86
|
+
self._create_plans_skfft()
|
87
|
+
|
88
|
+
def _create_plans_skfft(self):
|
78
89
|
self._fft_plans = {}
|
79
90
|
for level, d_vcoeff in self._d_vertical_coeffs.items():
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
)
|
107
|
-
self._fft_plans[level] =
|
91
|
+
self._fft_plans[level] = self.fft_cls(d_vcoeff.shape, np.float32, r2c=True, axes=(0,), ctx=self.ctx)
|
92
|
+
|
93
|
+
def _create_plans_vkfft(self):
|
94
|
+
"""
|
95
|
+
VKFFT does not support batched R2C transforms along axis 0 ("slow axis").
|
96
|
+
We can either use C2C (faster, but needs more memory) or transpose the arrays to do R2C along axis=1.
|
97
|
+
Here we transpose the arrays.
|
98
|
+
"""
|
99
|
+
self._fft_plans = {}
|
100
|
+
self._transpose_forward_1 = {}
|
101
|
+
self._transpose_forward_2 = {}
|
102
|
+
self._transpose_inverse_1 = {}
|
103
|
+
self._transpose_inverse_2 = {}
|
104
|
+
for level, d_vcoeff in self._d_vertical_coeffs.items():
|
105
|
+
shape = d_vcoeff.shape
|
106
|
+
# Normally, a batched 1D fft on 2D data of shape (Ny, Nx) along axis 0 returns an array of shape (Ny/2+1, Nx):
|
107
|
+
#
|
108
|
+
# (Ny, Nx) --[fft_0]--> (Ny/2, Nx)
|
109
|
+
# f32 c64
|
110
|
+
#
|
111
|
+
# In this case, we can only do batched 1D transform along axis 1, so we have to trick with transposes:
|
112
|
+
#
|
113
|
+
# (Ny, Nx) --[T]--> (Nx, Ny) --[fft_1]--> (Nx, Ny/2) --[T]--> (Ny/2, Nx)
|
114
|
+
# f32 f32 c64 c64
|
115
|
+
#
|
116
|
+
# (In both cases IFFT is done the same way from right to left)
|
117
|
+
self._transpose_forward_1[level] = CudaTranspose(shape, np.float32, ctx=self.ctx)
|
118
|
+
self._fft_plans[level] = self.fft_cls(shape[::-1], np.float32, r2c=True, ctx=self.ctx)
|
119
|
+
self._transpose_forward_2[level] = CudaTranspose((shape[1], shape[0] // 2 + 1), np.complex64, ctx=self.ctx)
|
120
|
+
self._transpose_inverse_1[level] = CudaTranspose((shape[0] // 2 + 1, shape[1]), np.complex64, ctx=self.ctx)
|
121
|
+
self._transpose_inverse_2[level] = CudaTranspose(shape[::-1], np.float32, ctx=self.ctx)
|
108
122
|
|
109
123
|
def _init_pycudwt(self):
|
110
124
|
if self.levels is None:
|
@@ -118,7 +132,6 @@ class CudaMunchDeringer(MunchDeringer):
|
|
118
132
|
|
119
133
|
def _get_vertical_coeffs(self):
|
120
134
|
self._d_vertical_coeffs = {}
|
121
|
-
self._d_sino_f = {}
|
122
135
|
# Transfer the (0-memset) coefficients in order to get all the shapes
|
123
136
|
coeffs = self.cudwt.coeffs
|
124
137
|
for i in range(self.cudwt.levels):
|
@@ -126,7 +139,6 @@ class CudaMunchDeringer(MunchDeringer):
|
|
126
139
|
self._d_vertical_coeffs[i + 1] = garray.empty(
|
127
140
|
shape, np.float32, gpudata=self.cudwt.coeff_int_ptr(3 * i + 2)
|
128
141
|
)
|
129
|
-
self._d_sino_f[i + 1] = garray.zeros((shape[0] // 2 + 1, shape[1]), dtype=np.complex64)
|
130
142
|
|
131
143
|
def _setup_fw_kernel(self):
|
132
144
|
self._fw_kernel = CudaKernel(
|
@@ -135,6 +147,32 @@ class CudaMunchDeringer(MunchDeringer):
|
|
135
147
|
signature="Piif",
|
136
148
|
)
|
137
149
|
|
150
|
+
def _apply_fft(self, level):
|
151
|
+
d_coeffs = self._d_vertical_coeffs[level]
|
152
|
+
# All the memory is allocated (or re-used) under the hood
|
153
|
+
if self.fft_cls.implem == "vkfft":
|
154
|
+
d_coeffs_t = self._transpose_forward_1[level](
|
155
|
+
d_coeffs
|
156
|
+
) # allocates self._transpose_forward_1[level].processing.dst
|
157
|
+
d_coeffs_t_f = self._fft_plans[level].fft(d_coeffs_t) # allocates self._fft_plans[level].output_fft
|
158
|
+
d_coeffs_f = self._transpose_forward_2[level](
|
159
|
+
d_coeffs_t_f
|
160
|
+
) # allocates self._transpose_forward_2[level].processing.dst
|
161
|
+
else:
|
162
|
+
d_coeffs_f = self._fft_plans[level].fft(d_coeffs)
|
163
|
+
return d_coeffs_f
|
164
|
+
|
165
|
+
def _apply_ifft(self, d_coeffs_f, level):
|
166
|
+
d_coeffs = self._d_vertical_coeffs[level]
|
167
|
+
if self.fft_cls.implem == "vkfft":
|
168
|
+
d_coeffs_t_f = self._transpose_inverse_1[level](d_coeffs_f, dst=self._fft_plans[level].output_fft)
|
169
|
+
d_coeffs_t = self._fft_plans[level].ifft(
|
170
|
+
d_coeffs_t_f, output=self._transpose_forward_1[level].processing.dst
|
171
|
+
)
|
172
|
+
self._transpose_inverse_2[level](d_coeffs_t, dst=d_coeffs)
|
173
|
+
else:
|
174
|
+
self._fft_plans[level].ifft(d_coeffs_f, output=d_coeffs)
|
175
|
+
|
138
176
|
def _destripe_2D(self, d_sino, output):
|
139
177
|
if self.padding is not None:
|
140
178
|
d_sino = self.padder.pad(d_sino)
|
@@ -144,15 +182,15 @@ class CudaMunchDeringer(MunchDeringer):
|
|
144
182
|
self.cudwt.forward()
|
145
183
|
for i in range(self.cudwt.levels):
|
146
184
|
level = i + 1
|
147
|
-
|
148
|
-
d_sino_f = self._d_sino_f[level]
|
149
|
-
Ny, Nx = d_coeffs.shape
|
185
|
+
Ny, Nx = self._d_vertical_coeffs[level].shape
|
150
186
|
# Batched FFT along axis 0
|
151
|
-
|
187
|
+
d_vertical_coeffs_f = self._apply_fft(level)
|
188
|
+
|
152
189
|
# Dampen the wavelets coefficients
|
153
|
-
self._fw_kernel(
|
190
|
+
self._fw_kernel(d_vertical_coeffs_f, Nx, Ny, self.sigma)
|
154
191
|
# IFFT
|
155
|
-
|
192
|
+
self._apply_ifft(d_vertical_coeffs_f, level)
|
193
|
+
|
156
194
|
# Finally, inverse DWT
|
157
195
|
self.cudwt.inverse()
|
158
196
|
d_out = self._d_sino
|
@@ -160,3 +198,125 @@ class CudaMunchDeringer(MunchDeringer):
|
|
160
198
|
d_out = self._d_sino[:, self.padding[0] : -self.padding[1]] # memcpy2D
|
161
199
|
output.set(d_out)
|
162
200
|
return output
|
201
|
+
|
202
|
+
|
203
|
+
def can_use_cuda_deringer():
|
204
|
+
"""
|
205
|
+
Check wether cuda implementation of deringer can be used.
|
206
|
+
Checking for installed modules is not enough, as for example pyvkfft can be installed without cuda devices
|
207
|
+
"""
|
208
|
+
can_do_fft = get_available_fft_implems() != []
|
209
|
+
return can_do_fft and __have_pycudwt__
|
210
|
+
|
211
|
+
|
212
|
+
class CudaVoDeringer(VoDeringer):
|
213
|
+
"""
|
214
|
+
An interface to topocupy's "remove_all_stripe".
|
215
|
+
"""
|
216
|
+
|
217
|
+
def _check_requirement(self):
|
218
|
+
if not (__have_tomocupy_deringer__):
|
219
|
+
raise ImportError("need cupy")
|
220
|
+
|
221
|
+
def remove_rings_radios(self, radios):
|
222
|
+
return remove_all_stripe_pycuda(radios, **self._remove_all_stripe_kwargs)
|
223
|
+
|
224
|
+
def remove_rings_sinograms(self, sinos):
|
225
|
+
radios = sinos.transpose(axes=(1, 0, 2)) # view, no copy
|
226
|
+
self.remove_rings_radios(radios)
|
227
|
+
return sinos
|
228
|
+
|
229
|
+
def remove_rings_sinogram(self, sino):
|
230
|
+
radios = sino.reshape(sino.shape[0], 1, sino.shape[1]) # no copy
|
231
|
+
self.remove_rings_radios(radios)
|
232
|
+
return sino
|
233
|
+
|
234
|
+
remove_rings = remove_rings_sinograms
|
235
|
+
|
236
|
+
|
237
|
+
class CudaSinoMeanDeringer(SinoMeanDeringer):
|
238
|
+
@docstring(SinoMeanDeringer)
|
239
|
+
def __init__(
|
240
|
+
self,
|
241
|
+
sinos_shape,
|
242
|
+
mode="subtract",
|
243
|
+
filter_cutoff=None,
|
244
|
+
padding_mode="edge",
|
245
|
+
fft_num_threads=None,
|
246
|
+
**cuda_options,
|
247
|
+
):
|
248
|
+
self.processing = CudaProcessing(**(cuda_options or {}))
|
249
|
+
super().__init__(sinos_shape, mode, filter_cutoff, padding_mode, fft_num_threads)
|
250
|
+
self._init_kernels()
|
251
|
+
|
252
|
+
def _init_kernels(self):
|
253
|
+
self.d_sino_profile = self.processing.allocate_array("sino_profile", self.n_x)
|
254
|
+
self._mean_kernel = self.processing.kernel(
|
255
|
+
"vertical_mean",
|
256
|
+
filename=get_cuda_srcfile("normalization.cu"),
|
257
|
+
signature="PPiii",
|
258
|
+
)
|
259
|
+
self._mean_kernel_block = (32, 1, 32)
|
260
|
+
self._mean_kernel_grid = [updiv(a, b) for a, b in zip(self.sinos_shape[::-1], self._mean_kernel_block)]
|
261
|
+
self._mean_kernel_args = [self.d_sino_profile, np.int32(self.n_x), np.int32(self.n_angles), np.int32(self.n_z)]
|
262
|
+
self._mean_kernel_kwargs = {
|
263
|
+
"grid": self._mean_kernel_grid,
|
264
|
+
"block": self._mean_kernel_block,
|
265
|
+
}
|
266
|
+
|
267
|
+
self._op_kernel = self.processing.kernel(
|
268
|
+
"inplace_generic_op_3Dby1D",
|
269
|
+
filename=get_cuda_srcfile("ElementOp.cu"),
|
270
|
+
signature="PPiii",
|
271
|
+
options=["-DGENERIC_OP=%d" % (3 if self.mode == "divide" else 1)],
|
272
|
+
)
|
273
|
+
self._op_kernel_block = (16, 16, 4)
|
274
|
+
self._op_kernel_grid = [updiv(a, b) for a, b in zip(self.sinos_shape[::-1], self._op_kernel_block)]
|
275
|
+
self._op_kernel_args = [self.d_sino_profile, np.int32(self.n_x), np.int32(self.n_angles), np.int32(self.n_z)]
|
276
|
+
self._op_kernel_kwargs = {
|
277
|
+
"grid": self._op_kernel_grid,
|
278
|
+
"block": self._op_kernel_block,
|
279
|
+
}
|
280
|
+
|
281
|
+
def _init_filter(self, filter_cutoff, fft_num_threads, padding_mode):
|
282
|
+
super()._init_filter(filter_cutoff, fft_num_threads, padding_mode)
|
283
|
+
if filter_cutoff is None:
|
284
|
+
return
|
285
|
+
self._d_filter_f = self.processing.to_device("_filter_f", self._filter_f)
|
286
|
+
|
287
|
+
self.padder = CudaPadding(
|
288
|
+
(self.n_x, 1),
|
289
|
+
((self._pad_left, self._pad_right), (0, 0)),
|
290
|
+
mode=self.padding_mode,
|
291
|
+
cuda_options={"ctx": self.processing.ctx},
|
292
|
+
)
|
293
|
+
fft_cls = get_fft_class()
|
294
|
+
self._fft = fft_cls(self._filter_size, np.float32, r2c=True)
|
295
|
+
|
296
|
+
def _apply_filter(self, sino_profile):
|
297
|
+
if self._filter_f is None:
|
298
|
+
return sino_profile
|
299
|
+
|
300
|
+
sino_profile = sino_profile.reshape((-1, 1)) # view
|
301
|
+
sino_profile_p = self.padder.pad(sino_profile).ravel()
|
302
|
+
|
303
|
+
sino_profile_f = self._fft.fft(sino_profile_p)
|
304
|
+
sino_profile_f *= self._d_filter_f
|
305
|
+
self._fft.ifft(sino_profile_f, output=sino_profile_p)
|
306
|
+
|
307
|
+
self.d_sino_profile[:] = sino_profile_p[self._pad_left : -self._pad_right]
|
308
|
+
return self.d_sino_profile
|
309
|
+
|
310
|
+
def remove_rings_sinogram(self, sino, output=None):
|
311
|
+
#
|
312
|
+
if output is not None:
|
313
|
+
raise NotImplementedError
|
314
|
+
#
|
315
|
+
self._mean_kernel(sino, *self._mean_kernel_args, **self._mean_kernel_kwargs)
|
316
|
+
self._apply_filter(self.d_sino_profile)
|
317
|
+
self._op_kernel(sino, *self._op_kernel_args, **self._op_kernel_kwargs)
|
318
|
+
return sino
|
319
|
+
|
320
|
+
def remove_rings_sinograms(self, sinograms):
|
321
|
+
for i in range(sinograms.shape[0]):
|
322
|
+
self.remove_rings_sinogram(sinograms[i])
|
@@ -1,6 +1,4 @@
|
|
1
1
|
import numpy as np
|
2
|
-
import pycuda.gpuarray as garray
|
3
|
-
from ..cuda.kernel import CudaKernel
|
4
2
|
from ..utils import get_cuda_srcfile, updiv, deprecated_class
|
5
3
|
from .sinogram import SinoBuilder, SinoNormalization, SinoMult
|
6
4
|
from .sinogram import _convert_halftomo_right # FIXME Temporary patch
|
@@ -26,7 +24,7 @@ class CudaSinoBuilder(SinoBuilder):
|
|
26
24
|
if not (self.halftomo):
|
27
25
|
return
|
28
26
|
kernel_name = "halftomo_kernel"
|
29
|
-
self.halftomo_kernel =
|
27
|
+
self.halftomo_kernel = self.cuda_processing.kernel(
|
30
28
|
kernel_name,
|
31
29
|
get_cuda_srcfile("halftomo.cu"),
|
32
30
|
signature="PPPiii",
|
@@ -36,15 +34,19 @@ class CudaSinoBuilder(SinoBuilder):
|
|
36
34
|
self._halftomo_gridsize = (updiv(self.extended_sino_width, blk[0]), updiv((self.n_angles + 1) // 2, blk[1]), 1)
|
37
35
|
d = self.n_x - self.extended_sino_width // 2 # will have to be adapted for varying axis pos
|
38
36
|
self.halftomo_weights = np.linspace(0, 1, 2 * abs(d), endpoint=True, dtype="f")
|
39
|
-
self.d_halftomo_weights =
|
37
|
+
self.d_halftomo_weights = self.cuda_processing.to_device("d_halftomo_weights", self.halftomo_weights)
|
40
38
|
# Allocate one single sinogram (kernel needs c-contiguous array).
|
41
39
|
# If odd number of angles: repeat last angle.
|
42
|
-
self.d_sino =
|
40
|
+
self.d_sino = self.cuda_processing.allocate_array(
|
41
|
+
"d_sino", (self.n_angles + (self.n_angles & 1), self.n_x), "f"
|
42
|
+
)
|
43
43
|
self.h_sino = self.d_sino.get()
|
44
44
|
#
|
45
45
|
self.cuda_processing.init_arrays_to_none(["d_output"])
|
46
46
|
if self._halftomo_flip:
|
47
|
-
self.xflip_kernel =
|
47
|
+
self.xflip_kernel = self.cuda_processing.kernel(
|
48
|
+
"reverse2D_x", get_cuda_srcfile("ElementOp.cu"), signature="Pii"
|
49
|
+
)
|
48
50
|
blk = (32, 32, 1)
|
49
51
|
self._xflip_blksize = blk
|
50
52
|
self._xflip_gridsize_1 = (updiv(self.n_x, blk[0]), updiv(self.n_angles, blk[1]), 1)
|
@@ -106,7 +108,7 @@ class CudaSinoBuilder(SinoBuilder):
|
|
106
108
|
|
107
109
|
def _get_sinos_halftomo(self, radios, output=None):
|
108
110
|
if output is None:
|
109
|
-
output =
|
111
|
+
output = self.cuda_processing.allocate_array("output", self.output_shape, "f")
|
110
112
|
elif output.shape != self.output_shape:
|
111
113
|
raise ValueError("Expected output to have shape %s but got %s" % (self.output_shape, output.shape))
|
112
114
|
for i in range(self.n_z):
|
@@ -127,7 +129,7 @@ class CudaSinoMult(SinoMult):
|
|
127
129
|
self._init_kernel()
|
128
130
|
|
129
131
|
def _init_kernel(self):
|
130
|
-
self.halftomo_kernel =
|
132
|
+
self.halftomo_kernel = self.cuda_processing.kernel(
|
131
133
|
"halftomo_prepare_sinogram", filename=get_cuda_srcfile("halftomo.cu"), signature="PPiiii"
|
132
134
|
)
|
133
135
|
self.d_weights = self.cuda_processing.set_array("d_weights", self.weights)
|
@@ -165,9 +167,9 @@ class CudaSinoNormalization(SinoNormalization):
|
|
165
167
|
#
|
166
168
|
|
167
169
|
def _init_cuda_normalization(self):
|
168
|
-
self._d_tmp =
|
170
|
+
self._d_tmp = self.cuda_processing.allocate_array("_d_tmp", self.sinos_shape[-2:], "f")
|
169
171
|
if self.normalization_kind == "chebyshev":
|
170
|
-
self._chebyshev_kernel =
|
172
|
+
self._chebyshev_kernel = self.cuda_processing.kernel(
|
171
173
|
"normalize_chebyshev",
|
172
174
|
filename=get_cuda_srcfile("normalization.cu"),
|
173
175
|
signature="Piii",
|
@@ -183,12 +185,14 @@ class CudaSinoNormalization(SinoNormalization):
|
|
183
185
|
# If normalization_array is 1D, make a 2D array by repeating the line
|
184
186
|
if normalization_array.ndim == 1:
|
185
187
|
normalization_array = np.tile(normalization_array, (self.n_angles, 1))
|
186
|
-
self._d_normalization_array =
|
188
|
+
self._d_normalization_array = self.cuda_processing.to_device(
|
189
|
+
"_d_normalization_array", normalization_array.astype("f")
|
190
|
+
)
|
187
191
|
if self.normalization_kind == "subtraction":
|
188
192
|
generic_op_val = 1
|
189
193
|
elif self.normalization_kind == "division":
|
190
194
|
generic_op_val = 3
|
191
|
-
self._norm_kernel =
|
195
|
+
self._norm_kernel = self.cuda_processing.kernel(
|
192
196
|
"inplace_generic_op_2Dby2D",
|
193
197
|
filename=get_cuda_srcfile("ElementOp.cu"),
|
194
198
|
signature="PPii",
|
@@ -1,26 +1,33 @@
|
|
1
1
|
import numpy as np
|
2
2
|
import pytest
|
3
|
-
from nabu.
|
4
|
-
from nabu.testutils import
|
5
|
-
from nabu.reconstruction.rings import MunchDeringer
|
3
|
+
from nabu.reconstruction.rings_cuda import CudaSinoMeanDeringer
|
4
|
+
from nabu.testutils import compare_arrays, get_data, generate_tests_scenarios, __do_long_tests__
|
5
|
+
from nabu.reconstruction.rings import MunchDeringer, SinoMeanDeringer, VoDeringer, __has_algotom__
|
6
6
|
from nabu.thirdparty.pore3d_deringer_munch import munchetal_filter
|
7
7
|
from nabu.cuda.utils import __has_pycuda__, get_cuda_context
|
8
8
|
|
9
|
-
__have_gpuderinger__ = False
|
10
9
|
if __has_pycuda__:
|
11
10
|
import pycuda.gpuarray as garray
|
12
|
-
from nabu.
|
13
|
-
|
14
|
-
|
15
|
-
|
11
|
+
from nabu.processing.fft_cuda import get_available_fft_implems
|
12
|
+
from nabu.reconstruction.rings_cuda import (
|
13
|
+
CudaMunchDeringer,
|
14
|
+
can_use_cuda_deringer,
|
15
|
+
CudaVoDeringer,
|
16
|
+
__have_tomocupy_deringer__,
|
17
|
+
)
|
16
18
|
|
19
|
+
__has_cuda_deringer__ = can_use_cuda_deringer()
|
20
|
+
else:
|
21
|
+
__has_cuda_deringer__ = False
|
22
|
+
__have_tomocupy_deringer__ = False
|
17
23
|
|
18
24
|
fw_scenarios = generate_tests_scenarios(
|
19
25
|
{
|
20
26
|
"levels": [4],
|
21
27
|
"sigma": [1.0],
|
22
28
|
"wname": ["db15"],
|
23
|
-
"padding": [
|
29
|
+
"padding": [(100, 100)],
|
30
|
+
"fft_implem": ["skcuda"],
|
24
31
|
}
|
25
32
|
)
|
26
33
|
if __do_long_tests__:
|
@@ -30,6 +37,7 @@ if __do_long_tests__:
|
|
30
37
|
"sigma": [1.0, 2.0],
|
31
38
|
"wname": ["db15", "haar", "rbio4.4"],
|
32
39
|
"padding": [None, (100, 100), (50, 71)],
|
40
|
+
"fft_implem": ["skcuda", "vkfft"],
|
33
41
|
}
|
34
42
|
)
|
35
43
|
|
@@ -38,12 +46,14 @@ if __do_long_tests__:
|
|
38
46
|
def bootstrap(request):
|
39
47
|
cls = request.cls
|
40
48
|
cls.sino = get_data("mri_sino500.npz")["data"]
|
49
|
+
cls.sino2 = get_data("sino_bamboo_hercules.npz")["data"]
|
41
50
|
cls.tol = 5e-3
|
42
51
|
cls.rings = {150: 0.5, -150: 0.5}
|
43
|
-
if
|
52
|
+
if __has_pycuda__:
|
44
53
|
cls.ctx = get_cuda_context(cleanup_at_exit=False)
|
54
|
+
cls._available_fft_implems = get_available_fft_implems()
|
45
55
|
yield
|
46
|
-
if
|
56
|
+
if __has_pycuda__:
|
47
57
|
cls.ctx.pop()
|
48
58
|
|
49
59
|
|
@@ -95,17 +105,22 @@ class TestMunchDeringer:
|
|
95
105
|
assert err_max < self.tol, "Max error is too high"
|
96
106
|
|
97
107
|
@pytest.mark.skipif(
|
98
|
-
not (
|
99
|
-
reason="Need pycuda, pycudwt and scikit-cuda for this test",
|
108
|
+
not (__has_cuda_deringer__) or munchetal_filter is None,
|
109
|
+
reason="Need pycuda, pycudwt and (scikit-cuda or pyvkfft) for this test",
|
100
110
|
)
|
101
111
|
@pytest.mark.parametrize("config", fw_scenarios)
|
102
112
|
def test_cuda_munch_deringer(self, config):
|
113
|
+
fft_implem = config["fft_implem"]
|
114
|
+
if fft_implem not in self._available_fft_implems:
|
115
|
+
pytest.skip("FFT implementation %s is not available" % fft_implem)
|
103
116
|
sino = self.add_stripes_to_sino(self.sino, self.rings)
|
104
117
|
deringer = CudaMunchDeringer(
|
105
118
|
config["sigma"],
|
106
119
|
self.sino.shape,
|
107
120
|
levels=config["levels"],
|
108
121
|
wname=config["wname"],
|
122
|
+
padding=config["padding"],
|
123
|
+
fft_backend=fft_implem,
|
109
124
|
cuda_options={"ctx": self.ctx},
|
110
125
|
)
|
111
126
|
d_sino = garray.to_gpu(sino)
|
@@ -115,4 +130,91 @@ class TestMunchDeringer:
|
|
115
130
|
ref = self.get_fourier_wavelets_reference_result(sino, config)
|
116
131
|
|
117
132
|
err_max = np.max(np.abs(res - ref))
|
118
|
-
assert err_max < 1e-1, "Max error is too high"
|
133
|
+
assert err_max < 1e-1, "Max error is too high with configuration %s" % (str(config))
|
134
|
+
|
135
|
+
@pytest.mark.skipif(
|
136
|
+
not (__has_algotom__),
|
137
|
+
reason="Need algotom for this test",
|
138
|
+
)
|
139
|
+
def test_vo_deringer(self):
|
140
|
+
deringer = VoDeringer(self.sino.shape)
|
141
|
+
sino_deringed = deringer.remove_rings_sinogram(self.sino)
|
142
|
+
sinos = np.tile(self.sino, (10, 1, 1))
|
143
|
+
sinos_deringed = deringer.remove_rings_sinograms(sinos)
|
144
|
+
# TODO check result. The generated test sinogram is "too synthetic" for this kind of deringer
|
145
|
+
|
146
|
+
@pytest.mark.skipif(
|
147
|
+
not (__have_tomocupy_deringer__),
|
148
|
+
reason="Need cupy for this test",
|
149
|
+
)
|
150
|
+
def test_cuda_vo_deringer(self):
|
151
|
+
# Beware, this deringer seems to be buggy for "too-small" sinograms
|
152
|
+
# (NaNs on the edges and in some regions). To be investigated
|
153
|
+
|
154
|
+
deringer = CudaVoDeringer(self.sino2.shape)
|
155
|
+
d_sino = garray.to_gpu(self.sino2)
|
156
|
+
deringer.remove_rings_sinogram(d_sino)
|
157
|
+
sino = d_sino.get()
|
158
|
+
|
159
|
+
if __has_algotom__:
|
160
|
+
vo_deringer = VoDeringer(self.sino2.shape)
|
161
|
+
sino_deringed = vo_deringer.remove_rings_sinogram(self.sino2)
|
162
|
+
|
163
|
+
assert (
|
164
|
+
np.max(np.abs(sino - sino_deringed)) < 2e-3
|
165
|
+
), "Cuda implementation of Vo deringer does not yield the same results as base implementation"
|
166
|
+
|
167
|
+
def test_mean_deringer(self):
|
168
|
+
deringer_no_filtering = SinoMeanDeringer(self.sino.shape, mode="subtract")
|
169
|
+
|
170
|
+
sino = self.sino.copy()
|
171
|
+
deringer_no_filtering.remove_rings_sinogram(sino)
|
172
|
+
|
173
|
+
sino = self.sino.copy()
|
174
|
+
deringer_with_filtering = SinoMeanDeringer(self.sino.shape, mode="subtract", filter_cutoff=(0, 30))
|
175
|
+
deringer_with_filtering.remove_rings_sinogram(sino)
|
176
|
+
# TODO check results
|
177
|
+
|
178
|
+
@pytest.mark.skipif(not (__has_pycuda__), reason="Need pycuda for this test")
|
179
|
+
def test_cuda_mean_deringer(self):
|
180
|
+
cuda_deringer = CudaSinoMeanDeringer(
|
181
|
+
self.sino.shape,
|
182
|
+
mode="subtract",
|
183
|
+
filter_cutoff=(
|
184
|
+
0,
|
185
|
+
10,
|
186
|
+
),
|
187
|
+
ctx=self.ctx,
|
188
|
+
)
|
189
|
+
deringer = SinoMeanDeringer(
|
190
|
+
self.sino.shape,
|
191
|
+
mode="subtract",
|
192
|
+
filter_cutoff=(
|
193
|
+
0,
|
194
|
+
10,
|
195
|
+
),
|
196
|
+
)
|
197
|
+
|
198
|
+
d_sino = cuda_deringer.processing.to_device("sino", self.sino)
|
199
|
+
cuda_deringer.remove_rings_sinogram(d_sino)
|
200
|
+
|
201
|
+
sino = self.sino.copy()
|
202
|
+
sino_d = deringer.remove_rings_sinogram(sino)
|
203
|
+
|
204
|
+
dirac = np.zeros(self.sino.shape[-1], "f")
|
205
|
+
dirac[dirac.size // 2] = 1
|
206
|
+
deringer_filter_response = deringer._apply_filter(dirac)
|
207
|
+
|
208
|
+
d_dirac = cuda_deringer.processing.to_device("dirac", dirac)
|
209
|
+
cuda_deringer_filter_response = cuda_deringer._apply_filter(d_dirac)
|
210
|
+
|
211
|
+
is_close, residual = compare_arrays(
|
212
|
+
deringer_filter_response, cuda_deringer_filter_response.get(), 1e-7, return_residual=True
|
213
|
+
)
|
214
|
+
assert is_close, "Cuda deringer does not have the correct filter response: max_error=%.2e" % residual
|
215
|
+
|
216
|
+
# There is a rather large discrepancy between the vertical_mean kernel and numpy.mean(). Not sure who is right
|
217
|
+
is_close, residual = compare_arrays(sino_d, d_sino.get(), 1e-1, return_residual=True)
|
218
|
+
assert is_close, (
|
219
|
+
"Cuda deringer does not yield the same result as base implementation: max_error=%.2e" % residual
|
220
|
+
)
|