PyPI - nabu - Versions diffs - 2023.2.1__py3-none-any.whl → 2024.1.0rc3__py3-none-any.whl - Mend

nabu 2023.2.1py3-none-any.whl → 2024.1.0rc3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (183) hide show

doc/conf.py +1 -1
doc/doc_config.py +32 -0
nabu/__init__.py +2 -1
nabu/app/bootstrap_stitching.py +1 -1
nabu/app/cli_configs.py +122 -2
nabu/app/composite_cor.py +27 -2
nabu/app/correct_rot.py +70 -0
nabu/app/create_distortion_map_from_poly.py +42 -18
nabu/app/diag_to_pix.py +358 -0
nabu/app/diag_to_rot.py +449 -0
nabu/app/generate_header.py +4 -3
nabu/app/histogram.py +2 -2
nabu/app/multicor.py +6 -1
nabu/app/parse_reconstruction_log.py +151 -0
nabu/app/prepare_weights_double.py +83 -22
nabu/app/reconstruct.py +5 -1
nabu/app/reconstruct_helical.py +7 -0
nabu/app/reduce_dark_flat.py +6 -3
nabu/app/rotate.py +4 -4
nabu/app/stitching.py +16 -2
nabu/app/tests/test_reduce_dark_flat.py +18 -2
nabu/app/validator.py +4 -4
nabu/cuda/convolution.py +8 -376
nabu/cuda/fft.py +4 -0
nabu/cuda/kernel.py +4 -4
nabu/cuda/medfilt.py +5 -158
nabu/cuda/padding.py +5 -71
nabu/cuda/processing.py +23 -2
nabu/cuda/src/ElementOp.cu +78 -0
nabu/cuda/src/backproj.cu +28 -2
nabu/cuda/src/fourier_wavelets.cu +2 -2
nabu/cuda/src/normalization.cu +23 -0
nabu/cuda/src/padding.cu +2 -2
nabu/cuda/src/transpose.cu +16 -0
nabu/cuda/utils.py +39 -0
nabu/estimation/alignment.py +10 -1
nabu/estimation/cor.py +808 -38
nabu/estimation/cor_sino.py +7 -9
nabu/estimation/tests/test_cor.py +85 -3
nabu/io/reader.py +26 -18
nabu/io/tests/test_cast_volume.py +3 -3
nabu/io/tests/test_detector_distortion.py +3 -3
nabu/io/tiffwriter_zmm.py +2 -2
nabu/io/utils.py +14 -4
nabu/io/writer.py +5 -3
nabu/misc/fftshift.py +6 -0
nabu/misc/histogram.py +5 -285
nabu/misc/histogram_cuda.py +8 -104
nabu/misc/kernel_base.py +3 -121
nabu/misc/padding_base.py +5 -69
nabu/misc/processing_base.py +3 -107
nabu/misc/rotation.py +5 -62
nabu/misc/rotation_cuda.py +5 -65
nabu/misc/transpose.py +6 -0
nabu/misc/unsharp.py +3 -78
nabu/misc/unsharp_cuda.py +5 -52
nabu/misc/unsharp_opencl.py +8 -85
nabu/opencl/fft.py +6 -0
nabu/opencl/kernel.py +21 -6
nabu/opencl/padding.py +5 -72
nabu/opencl/processing.py +27 -5
nabu/opencl/src/backproj.cl +3 -3
nabu/opencl/src/fftshift.cl +65 -12
nabu/opencl/src/padding.cl +2 -2
nabu/opencl/src/roll.cl +96 -0
nabu/opencl/src/transpose.cl +16 -0
nabu/pipeline/config_validators.py +63 -3
nabu/pipeline/dataset_validator.py +2 -2
nabu/pipeline/estimators.py +193 -35
nabu/pipeline/fullfield/chunked.py +34 -17
nabu/pipeline/fullfield/chunked_cuda.py +7 -5
nabu/pipeline/fullfield/computations.py +48 -13
nabu/pipeline/fullfield/nabu_config.py +13 -13
nabu/pipeline/fullfield/processconfig.py +10 -5
nabu/pipeline/fullfield/reconstruction.py +1 -2
nabu/pipeline/helical/fbp.py +5 -0
nabu/pipeline/helical/filtering.py +12 -9
nabu/pipeline/helical/gridded_accumulator.py +179 -33
nabu/pipeline/helical/helical_chunked_regridded.py +262 -151
nabu/pipeline/helical/helical_chunked_regridded_cuda.py +4 -11
nabu/pipeline/helical/helical_reconstruction.py +56 -18
nabu/pipeline/helical/span_strategy.py +1 -1
nabu/pipeline/helical/tests/test_accumulator.py +4 -0
nabu/pipeline/params.py +23 -2
nabu/pipeline/processconfig.py +3 -8
nabu/pipeline/tests/test_chunk_reader.py +78 -0
nabu/pipeline/tests/test_estimators.py +120 -2
nabu/pipeline/utils.py +25 -0
nabu/pipeline/writer.py +2 -0
nabu/preproc/ccd_cuda.py +9 -7
nabu/preproc/ctf.py +21 -26
nabu/preproc/ctf_cuda.py +25 -25
nabu/preproc/double_flatfield.py +14 -2
nabu/preproc/double_flatfield_cuda.py +7 -11
nabu/preproc/flatfield_cuda.py +23 -27
nabu/preproc/phase.py +19 -24
nabu/preproc/phase_cuda.py +21 -21
nabu/preproc/shift_cuda.py +58 -28
nabu/preproc/tests/test_ctf.py +5 -5
nabu/preproc/tests/test_double_flatfield.py +2 -2
nabu/preproc/tests/test_vshift.py +13 -2
nabu/processing/__init__.py +0 -0
nabu/processing/convolution_cuda.py +375 -0
nabu/processing/fft_base.py +163 -0
nabu/processing/fft_cuda.py +256 -0
nabu/processing/fft_opencl.py +54 -0
nabu/processing/fftshift.py +134 -0
nabu/processing/histogram.py +286 -0
nabu/processing/histogram_cuda.py +103 -0
nabu/processing/kernel_base.py +126 -0
nabu/processing/medfilt_cuda.py +159 -0
nabu/processing/muladd.py +29 -0
nabu/processing/muladd_cuda.py +68 -0
nabu/processing/padding_base.py +71 -0
nabu/processing/padding_cuda.py +75 -0
nabu/processing/padding_opencl.py +77 -0
nabu/processing/processing_base.py +123 -0
nabu/processing/roll_opencl.py +64 -0
nabu/processing/rotation.py +63 -0
nabu/processing/rotation_cuda.py +66 -0
nabu/processing/tests/__init__.py +0 -0
nabu/processing/tests/test_fft.py +268 -0
nabu/processing/tests/test_fftshift.py +71 -0
nabu/{misc → processing}/tests/test_histogram.py +2 -4
nabu/{cuda → processing}/tests/test_medfilt.py +1 -1
nabu/processing/tests/test_muladd.py +54 -0
nabu/{cuda → processing}/tests/test_padding.py +119 -75
nabu/processing/tests/test_roll.py +63 -0
nabu/{misc → processing}/tests/test_rotation.py +3 -2
nabu/processing/tests/test_transpose.py +72 -0
nabu/{misc → processing}/tests/test_unsharp.py +41 -8
nabu/processing/transpose.py +126 -0
nabu/processing/unsharp.py +79 -0
nabu/processing/unsharp_cuda.py +53 -0
nabu/processing/unsharp_opencl.py +75 -0
nabu/reconstruction/fbp.py +34 -10
nabu/reconstruction/fbp_base.py +35 -16
nabu/reconstruction/fbp_opencl.py +7 -12
nabu/reconstruction/filtering.py +2 -2
nabu/reconstruction/filtering_cuda.py +13 -14
nabu/reconstruction/filtering_opencl.py +3 -4
nabu/reconstruction/projection.py +2 -0
nabu/reconstruction/rings.py +158 -1
nabu/reconstruction/rings_cuda.py +218 -58
nabu/reconstruction/sinogram_cuda.py +16 -12
nabu/reconstruction/tests/test_deringer.py +116 -14
nabu/reconstruction/tests/test_fbp.py +22 -31
nabu/reconstruction/tests/test_filtering.py +11 -2
nabu/resources/dataset_analyzer.py +89 -26
nabu/resources/nxflatfield.py +2 -2
nabu/resources/tests/test_nxflatfield.py +1 -1
nabu/resources/utils.py +9 -2
nabu/stitching/alignment.py +184 -0
nabu/stitching/config.py +241 -39
nabu/stitching/definitions.py +6 -0
nabu/stitching/frame_composition.py +4 -2
nabu/stitching/overlap.py +99 -3
nabu/stitching/sample_normalization.py +60 -0
nabu/stitching/slurm_utils.py +10 -10
nabu/stitching/tests/test_alignment.py +99 -0
nabu/stitching/tests/test_config.py +16 -1
nabu/stitching/tests/test_overlap.py +68 -2
nabu/stitching/tests/test_sample_normalization.py +49 -0
nabu/stitching/tests/test_slurm_utils.py +5 -5
nabu/stitching/tests/test_utils.py +3 -33
nabu/stitching/tests/test_z_stitching.py +391 -22
nabu/stitching/utils.py +144 -202
nabu/stitching/z_stitching.py +309 -126
nabu/testutils.py +18 -0
nabu/thirdparty/tomocupy_remove_stripe.py +586 -0
nabu/utils.py +32 -6
{nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/LICENSE +1 -1
{nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/METADATA +5 -5
nabu-2024.1.0rc3.dist-info/RECORD +296 -0
{nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/WHEEL +1 -1
{nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/entry_points.txt +5 -1
nabu/conftest.py +0 -14
nabu/opencl/fftshift.py +0 -92
nabu/opencl/tests/test_fftshift.py +0 -55
nabu/opencl/tests/test_padding.py +0 -84
nabu-2023.2.1.dist-info/RECORD +0 -252
/nabu/cuda/src/{fftshift.cu → dfi_fftshift.cu} +0 -0
{nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/top_level.txt +0 -0

nabu/reconstruction/rings_cuda.py CHANGED Viewed

@@ -1,12 +1,15 @@
 import numpy as np
-import pycuda.gpuarray as garray
-from ..utils import get_cuda_srcfile
-from ..cuda.processing import CudaProcessing
-from ..cuda.kernel import CudaKernel
-from .rings import MunchDeringer
-from silx.image.tomography import get_next_power
-from ..cuda.padding import CudaPadding
+from ..utils import docstring, get_cuda_srcfile, updiv
+from ..cuda.processing import CudaProcessing, __has_pycuda__
+from ..processing.padding_cuda import CudaPadding
+from ..processing.fft_cuda import get_fft_class, get_available_fft_implems
+from ..processing.transpose import CudaTranspose
+from ..thirdparty.tomocupy_remove_stripe import remove_all_stripe_pycuda, __have_tomocupy_deringer__
+from .rings import MunchDeringer, SinoMeanDeringer, VoDeringer
+if __has_pycuda__:
+    import pycuda.gpuarray as garray
+    from ..cuda.kernel import CudaKernel
 try:
     from pycudwt import Wavelets
@@ -14,21 +17,19 @@ try:
     __have_pycudwt__ = True
 except ImportError:
     __have_pycudwt__ = False
-try:
-    from skcuda.fft import Plan
-    from skcuda.fft import fft as cufft
-    from skcuda.fft import ifft as cuifft
-    __have_skcuda__ = True
-except Exception as exc:
-    # We have to catch this very broad exception, because
-    # skcuda.cublas.cublasError cannot be evaluated without error when no cuda GPU is found
-    __have_skcuda__ = False
 class CudaMunchDeringer(MunchDeringer):
     def __init__(
-        self, sigma, sinos_shape, levels=None, wname="db15", padding=None, padding_mode="edge", cuda_options=None
+        self,
+        sigma,
+        sinos_shape,
+        levels=None,
+        wname="db15",
+        padding=None,
+        padding_mode="edge",
+        fft_backend="skcuda",
+        cuda_options=None,
     ):
         """
         Initialize a "Munch Et Al" sinogram deringer with the Cuda backend.
@@ -55,14 +56,15 @@ class CudaMunchDeringer(MunchDeringer):
         super().__init__(sigma, sinos_shape, levels=levels, wname=wname, padding=padding, padding_mode=padding_mode)
         self._check_can_use_wavelets()
         self.cuda_processing = CudaProcessing(**(cuda_options or {}))
+        self.ctx = self.cuda_processing.ctx
         self._init_pycudwt()
         self._init_padding()
-        self._init_fft()
+        self._init_fft(fft_backend)
         self._setup_fw_kernel()
     def _check_can_use_wavelets(self):
-        if not (__have_pycudwt__ and __have_skcuda__):
-            raise ValueError("Needs pycudwt and scikit-cuda to use this class")
+        if not (__have_pycudwt__):
+            raise ValueError("Needs pycudwt to use this class")
     def _init_padding(self):
         if self.padding is None:
@@ -74,37 +76,49 @@ class CudaMunchDeringer(MunchDeringer):
             cuda_options={"ctx": self.cuda_processing.ctx},
         )
-    def _init_fft(self):
+    def _init_fft(self, fft_backend):
+        self.fft_cls = get_fft_class(backend=fft_backend)
+        # For all k >= 1, we perform a batched (I)FFT along axis 0 on an array
+        # of shape (n_a/2^k, n_x/2^k)  (up to DWT size rounding)
+        if self.fft_cls.implem == "vkfft":
+            self._create_plans_vkfft()
+        else:
+            self._create_plans_skfft()
+    def _create_plans_skfft(self):
         self._fft_plans = {}
         for level, d_vcoeff in self._d_vertical_coeffs.items():
-            n_angles, dwidth = d_vcoeff.shape
-            # Batched vertical 1D FFT - need advanced data layout
-            # http://docs.nvidia.com/cuda/cufft/#advanced-data-layout
-            p_f = Plan(
-                (n_angles,),
-                np.float32,
-                np.complex64,
-                batch=dwidth,
-                inembed=np.int32([0]),
-                istride=dwidth,
-                idist=1,
-                onembed=np.int32([0]),
-                ostride=dwidth,
-                odist=1,
-            )
-            p_i = Plan(
-                (n_angles,),
-                np.complex64,
-                np.float32,
-                batch=dwidth,
-                inembed=np.int32([0]),
-                istride=dwidth,
-                idist=1,
-                onembed=np.int32([0]),
-                ostride=dwidth,
-                odist=1,
-            )
-            self._fft_plans[level] = {"forward": p_f, "inverse": p_i}
+            self._fft_plans[level] = self.fft_cls(d_vcoeff.shape, np.float32, r2c=True, axes=(0,), ctx=self.ctx)
+    def _create_plans_vkfft(self):
+        """
+        VKFFT does not support batched R2C transforms along axis 0 ("slow axis").
+        We can either use C2C (faster, but needs more memory) or transpose the arrays to do R2C along axis=1.
+        Here we transpose the arrays.
+        """
+        self._fft_plans = {}
+        self._transpose_forward_1 = {}
+        self._transpose_forward_2 = {}
+        self._transpose_inverse_1 = {}
+        self._transpose_inverse_2 = {}
+        for level, d_vcoeff in self._d_vertical_coeffs.items():
+            shape = d_vcoeff.shape
+            # Normally, a batched 1D fft on 2D data of shape (Ny, Nx) along axis 0 returns an array of shape (Ny/2+1, Nx):
+            #
+            #  (Ny, Nx)  --[fft_0]--> (Ny/2, Nx)
+            #    f32                      c64
+            #
+            # In this case, we can only do batched 1D transform along axis 1, so we have to trick with transposes:
+            #
+            #  (Ny, Nx) --[T]--> (Nx, Ny) --[fft_1]--> (Nx, Ny/2) --[T]--> (Ny/2, Nx)
+            #    f32                f32                   c64                  c64
+            #
+            # (In both cases IFFT is done the same way from right to left)
+            self._transpose_forward_1[level] = CudaTranspose(shape, np.float32, ctx=self.ctx)
+            self._fft_plans[level] = self.fft_cls(shape[::-1], np.float32, r2c=True, ctx=self.ctx)
+            self._transpose_forward_2[level] = CudaTranspose((shape[1], shape[0] // 2 + 1), np.complex64, ctx=self.ctx)
+            self._transpose_inverse_1[level] = CudaTranspose((shape[0] // 2 + 1, shape[1]), np.complex64, ctx=self.ctx)
+            self._transpose_inverse_2[level] = CudaTranspose(shape[::-1], np.float32, ctx=self.ctx)
     def _init_pycudwt(self):
         if self.levels is None:
@@ -118,7 +132,6 @@ class CudaMunchDeringer(MunchDeringer):
     def _get_vertical_coeffs(self):
         self._d_vertical_coeffs = {}
-        self._d_sino_f = {}
         # Transfer the (0-memset) coefficients in order to get all the shapes
         coeffs = self.cudwt.coeffs
         for i in range(self.cudwt.levels):
@@ -126,7 +139,6 @@ class CudaMunchDeringer(MunchDeringer):
             self._d_vertical_coeffs[i + 1] = garray.empty(
                 shape, np.float32, gpudata=self.cudwt.coeff_int_ptr(3 * i + 2)
             )
-            self._d_sino_f[i + 1] = garray.zeros((shape[0] // 2 + 1, shape[1]), dtype=np.complex64)
     def _setup_fw_kernel(self):
         self._fw_kernel = CudaKernel(
@@ -135,6 +147,32 @@ class CudaMunchDeringer(MunchDeringer):
             signature="Piif",
         )
+    def _apply_fft(self, level):
+        d_coeffs = self._d_vertical_coeffs[level]
+        # All the memory is allocated (or re-used) under the hood
+        if self.fft_cls.implem == "vkfft":
+            d_coeffs_t = self._transpose_forward_1[level](
+                d_coeffs
+            )  # allocates self._transpose_forward_1[level].processing.dst
+            d_coeffs_t_f = self._fft_plans[level].fft(d_coeffs_t)  # allocates self._fft_plans[level].output_fft
+            d_coeffs_f = self._transpose_forward_2[level](
+                d_coeffs_t_f
+            )  # allocates self._transpose_forward_2[level].processing.dst
+        else:
+            d_coeffs_f = self._fft_plans[level].fft(d_coeffs)
+        return d_coeffs_f
+    def _apply_ifft(self, d_coeffs_f, level):
+        d_coeffs = self._d_vertical_coeffs[level]
+        if self.fft_cls.implem == "vkfft":
+            d_coeffs_t_f = self._transpose_inverse_1[level](d_coeffs_f, dst=self._fft_plans[level].output_fft)
+            d_coeffs_t = self._fft_plans[level].ifft(
+                d_coeffs_t_f, output=self._transpose_forward_1[level].processing.dst
+            )
+            self._transpose_inverse_2[level](d_coeffs_t, dst=d_coeffs)
+        else:
+            self._fft_plans[level].ifft(d_coeffs_f, output=d_coeffs)
     def _destripe_2D(self, d_sino, output):
         if self.padding is not None:
             d_sino = self.padder.pad(d_sino)
@@ -144,15 +182,15 @@ class CudaMunchDeringer(MunchDeringer):
         self.cudwt.forward()
         for i in range(self.cudwt.levels):
             level = i + 1
-            d_coeffs = self._d_vertical_coeffs[level]
-            d_sino_f = self._d_sino_f[level]
-            Ny, Nx = d_coeffs.shape
+            Ny, Nx = self._d_vertical_coeffs[level].shape
             # Batched FFT along axis 0
-            cufft(d_coeffs, d_sino_f, self._fft_plans[level]["forward"])
+            d_vertical_coeffs_f = self._apply_fft(level)
             # Dampen the wavelets coefficients
-            self._fw_kernel(d_sino_f, Nx, Ny, self.sigma)
+            self._fw_kernel(d_vertical_coeffs_f, Nx, Ny, self.sigma)
             # IFFT
-            cuifft(d_sino_f, d_coeffs, self._fft_plans[level]["inverse"])
+            self._apply_ifft(d_vertical_coeffs_f, level)
         # Finally, inverse DWT
         self.cudwt.inverse()
         d_out = self._d_sino
@@ -160,3 +198,125 @@ class CudaMunchDeringer(MunchDeringer):
             d_out = self._d_sino[:, self.padding[0] : -self.padding[1]]  # memcpy2D
         output.set(d_out)
         return output
+def can_use_cuda_deringer():
+    """
+    Check wether cuda implementation of deringer can be used.
+    Checking for installed modules is not enough, as for example pyvkfft can be installed without cuda devices
+    """
+    can_do_fft = get_available_fft_implems() != []
+    return can_do_fft and __have_pycudwt__
+class CudaVoDeringer(VoDeringer):
+    """
+    An interface to topocupy's "remove_all_stripe".
+    """
+    def _check_requirement(self):
+        if not (__have_tomocupy_deringer__):
+            raise ImportError("need cupy")
+    def remove_rings_radios(self, radios):
+        return remove_all_stripe_pycuda(radios, **self._remove_all_stripe_kwargs)
+    def remove_rings_sinograms(self, sinos):
+        radios = sinos.transpose(axes=(1, 0, 2))  # view, no copy
+        self.remove_rings_radios(radios)
+        return sinos
+    def remove_rings_sinogram(self, sino):
+        radios = sino.reshape(sino.shape[0], 1, sino.shape[1])  # no copy
+        self.remove_rings_radios(radios)
+        return sino
+    remove_rings = remove_rings_sinograms
+class CudaSinoMeanDeringer(SinoMeanDeringer):
+    @docstring(SinoMeanDeringer)
+    def __init__(
+        self,
+        sinos_shape,
+        mode="subtract",
+        filter_cutoff=None,
+        padding_mode="edge",
+        fft_num_threads=None,
+        **cuda_options,
+    ):
+        self.processing = CudaProcessing(**(cuda_options or {}))
+        super().__init__(sinos_shape, mode, filter_cutoff, padding_mode, fft_num_threads)
+        self._init_kernels()
+    def _init_kernels(self):
+        self.d_sino_profile = self.processing.allocate_array("sino_profile", self.n_x)
+        self._mean_kernel = self.processing.kernel(
+            "vertical_mean",
+            filename=get_cuda_srcfile("normalization.cu"),
+            signature="PPiii",
+        )
+        self._mean_kernel_block = (32, 1, 32)
+        self._mean_kernel_grid = [updiv(a, b) for a, b in zip(self.sinos_shape[::-1], self._mean_kernel_block)]
+        self._mean_kernel_args = [self.d_sino_profile, np.int32(self.n_x), np.int32(self.n_angles), np.int32(self.n_z)]
+        self._mean_kernel_kwargs = {
+            "grid": self._mean_kernel_grid,
+            "block": self._mean_kernel_block,
+        }
+        self._op_kernel = self.processing.kernel(
+            "inplace_generic_op_3Dby1D",
+            filename=get_cuda_srcfile("ElementOp.cu"),
+            signature="PPiii",
+            options=["-DGENERIC_OP=%d" % (3 if self.mode == "divide" else 1)],
+        )
+        self._op_kernel_block = (16, 16, 4)
+        self._op_kernel_grid = [updiv(a, b) for a, b in zip(self.sinos_shape[::-1], self._op_kernel_block)]
+        self._op_kernel_args = [self.d_sino_profile, np.int32(self.n_x), np.int32(self.n_angles), np.int32(self.n_z)]
+        self._op_kernel_kwargs = {
+            "grid": self._op_kernel_grid,
+            "block": self._op_kernel_block,
+        }
+    def _init_filter(self, filter_cutoff, fft_num_threads, padding_mode):
+        super()._init_filter(filter_cutoff, fft_num_threads, padding_mode)
+        if filter_cutoff is None:
+            return
+        self._d_filter_f = self.processing.to_device("_filter_f", self._filter_f)
+        self.padder = CudaPadding(
+            (self.n_x, 1),
+            ((self._pad_left, self._pad_right), (0, 0)),
+            mode=self.padding_mode,
+            cuda_options={"ctx": self.processing.ctx},
+        )
+        fft_cls = get_fft_class()
+        self._fft = fft_cls(self._filter_size, np.float32, r2c=True)
+    def _apply_filter(self, sino_profile):
+        if self._filter_f is None:
+            return sino_profile
+        sino_profile = sino_profile.reshape((-1, 1))  # view
+        sino_profile_p = self.padder.pad(sino_profile).ravel()
+        sino_profile_f = self._fft.fft(sino_profile_p)
+        sino_profile_f *= self._d_filter_f
+        self._fft.ifft(sino_profile_f, output=sino_profile_p)
+        self.d_sino_profile[:] = sino_profile_p[self._pad_left : -self._pad_right]
+        return self.d_sino_profile
+    def remove_rings_sinogram(self, sino, output=None):
+        #
+        if output is not None:
+            raise NotImplementedError
+        #
+        self._mean_kernel(sino, *self._mean_kernel_args, **self._mean_kernel_kwargs)
+        self._apply_filter(self.d_sino_profile)
+        self._op_kernel(sino, *self._op_kernel_args, **self._op_kernel_kwargs)
+        return sino
+    def remove_rings_sinograms(self, sinograms):
+        for i in range(sinograms.shape[0]):
+            self.remove_rings_sinogram(sinograms[i])

nabu/reconstruction/sinogram_cuda.py CHANGED Viewed

@@ -1,6 +1,4 @@
 import numpy as np
-import pycuda.gpuarray as garray
-from ..cuda.kernel import CudaKernel
 from ..utils import get_cuda_srcfile, updiv, deprecated_class
 from .sinogram import SinoBuilder, SinoNormalization, SinoMult
 from .sinogram import _convert_halftomo_right  # FIXME Temporary patch
@@ -26,7 +24,7 @@ class CudaSinoBuilder(SinoBuilder):
         if not (self.halftomo):
             return
         kernel_name = "halftomo_kernel"
-        self.halftomo_kernel = CudaKernel(
+        self.halftomo_kernel = self.cuda_processing.kernel(
             kernel_name,
             get_cuda_srcfile("halftomo.cu"),
             signature="PPPiii",
@@ -36,15 +34,19 @@ class CudaSinoBuilder(SinoBuilder):
         self._halftomo_gridsize = (updiv(self.extended_sino_width, blk[0]), updiv((self.n_angles + 1) // 2, blk[1]), 1)
         d = self.n_x - self.extended_sino_width // 2  # will have to be adapted for varying axis pos
         self.halftomo_weights = np.linspace(0, 1, 2 * abs(d), endpoint=True, dtype="f")
-        self.d_halftomo_weights = garray.to_gpu(self.halftomo_weights)
+        self.d_halftomo_weights = self.cuda_processing.to_device("d_halftomo_weights", self.halftomo_weights)
         # Allocate one single sinogram (kernel needs c-contiguous array).
         # If odd number of angles: repeat last angle.
-        self.d_sino = garray.zeros((self.n_angles + (self.n_angles & 1), self.n_x), "f")
+        self.d_sino = self.cuda_processing.allocate_array(
+            "d_sino", (self.n_angles + (self.n_angles & 1), self.n_x), "f"
+        )
         self.h_sino = self.d_sino.get()
         #
         self.cuda_processing.init_arrays_to_none(["d_output"])
         if self._halftomo_flip:
-            self.xflip_kernel = CudaKernel("reverse2D_x", get_cuda_srcfile("ElementOp.cu"), signature="Pii")
+            self.xflip_kernel = self.cuda_processing.kernel(
+                "reverse2D_x", get_cuda_srcfile("ElementOp.cu"), signature="Pii"
+            )
             blk = (32, 32, 1)
             self._xflip_blksize = blk
             self._xflip_gridsize_1 = (updiv(self.n_x, blk[0]), updiv(self.n_angles, blk[1]), 1)
@@ -106,7 +108,7 @@ class CudaSinoBuilder(SinoBuilder):
     def _get_sinos_halftomo(self, radios, output=None):
         if output is None:
-            output = garray.zeros(self.output_shape, "f")
+            output = self.cuda_processing.allocate_array("output", self.output_shape, "f")
         elif output.shape != self.output_shape:
             raise ValueError("Expected output to have shape %s but got %s" % (self.output_shape, output.shape))
         for i in range(self.n_z):
@@ -127,7 +129,7 @@ class CudaSinoMult(SinoMult):
         self._init_kernel()
     def _init_kernel(self):
-        self.halftomo_kernel = CudaKernel(
+        self.halftomo_kernel = self.cuda_processing.kernel(
             "halftomo_prepare_sinogram", filename=get_cuda_srcfile("halftomo.cu"), signature="PPiiii"
         )
         self.d_weights = self.cuda_processing.set_array("d_weights", self.weights)
@@ -165,9 +167,9 @@ class CudaSinoNormalization(SinoNormalization):
     #
     def _init_cuda_normalization(self):
-        self._d_tmp = garray.zeros(self.sinos_shape[-2:], "f")
+        self._d_tmp = self.cuda_processing.allocate_array("_d_tmp", self.sinos_shape[-2:], "f")
         if self.normalization_kind == "chebyshev":
-            self._chebyshev_kernel = CudaKernel(
+            self._chebyshev_kernel = self.cuda_processing.kernel(
                 "normalize_chebyshev",
                 filename=get_cuda_srcfile("normalization.cu"),
                 signature="Piii",
@@ -183,12 +185,14 @@ class CudaSinoNormalization(SinoNormalization):
             # If normalization_array is 1D, make a 2D array by repeating the line
             if normalization_array.ndim == 1:
                 normalization_array = np.tile(normalization_array, (self.n_angles, 1))
-            self._d_normalization_array = garray.to_gpu(normalization_array.astype("f"))
+            self._d_normalization_array = self.cuda_processing.to_device(
+                "_d_normalization_array", normalization_array.astype("f")
+            )
             if self.normalization_kind == "subtraction":
                 generic_op_val = 1
             elif self.normalization_kind == "division":
                 generic_op_val = 3
-            self._norm_kernel = CudaKernel(
+            self._norm_kernel = self.cuda_processing.kernel(
                 "inplace_generic_op_2Dby2D",
                 filename=get_cuda_srcfile("ElementOp.cu"),
                 signature="PPii",

nabu/reconstruction/tests/test_deringer.py CHANGED Viewed

@@ -1,26 +1,33 @@
 import numpy as np
 import pytest
-from nabu.utils import clip_circle
-from nabu.testutils import get_data, compare_arrays, generate_tests_scenarios, __do_long_tests__
-from nabu.reconstruction.rings import MunchDeringer
+from nabu.reconstruction.rings_cuda import CudaSinoMeanDeringer
+from nabu.testutils import compare_arrays, get_data, generate_tests_scenarios, __do_long_tests__
+from nabu.reconstruction.rings import MunchDeringer, SinoMeanDeringer, VoDeringer, __has_algotom__
 from nabu.thirdparty.pore3d_deringer_munch import munchetal_filter
 from nabu.cuda.utils import __has_pycuda__, get_cuda_context
-__have_gpuderinger__ = False
 if __has_pycuda__:
     import pycuda.gpuarray as garray
-    from nabu.reconstruction.rings_cuda import CudaMunchDeringer, __have_pycudwt__, __have_skcuda__
-    if __have_pycudwt__ and __have_skcuda__:
-        __have_gpuderinger__ = True
+    from nabu.processing.fft_cuda import get_available_fft_implems
+    from nabu.reconstruction.rings_cuda import (
+        CudaMunchDeringer,
+        can_use_cuda_deringer,
+        CudaVoDeringer,
+        __have_tomocupy_deringer__,
+    )
+    __has_cuda_deringer__ = can_use_cuda_deringer()
+else:
+    __has_cuda_deringer__ = False
+    __have_tomocupy_deringer__ = False
 fw_scenarios = generate_tests_scenarios(
     {
         "levels": [4],
         "sigma": [1.0],
         "wname": ["db15"],
-        "padding": [None],
+        "padding": [(100, 100)],
+        "fft_implem": ["skcuda"],
     }
 )
 if __do_long_tests__:
@@ -30,6 +37,7 @@ if __do_long_tests__:
             "sigma": [1.0, 2.0],
             "wname": ["db15", "haar", "rbio4.4"],
             "padding": [None, (100, 100), (50, 71)],
+            "fft_implem": ["skcuda", "vkfft"],
         }
     )
@@ -38,12 +46,14 @@ if __do_long_tests__:
 def bootstrap(request):
     cls = request.cls
     cls.sino = get_data("mri_sino500.npz")["data"]
+    cls.sino2 = get_data("sino_bamboo_hercules.npz")["data"]
     cls.tol = 5e-3
     cls.rings = {150: 0.5, -150: 0.5}
-    if __have_gpuderinger__:
+    if __has_pycuda__:
         cls.ctx = get_cuda_context(cleanup_at_exit=False)
+        cls._available_fft_implems = get_available_fft_implems()
     yield
-    if __have_gpuderinger__:
+    if __has_pycuda__:
         cls.ctx.pop()
@@ -95,17 +105,22 @@ class TestMunchDeringer:
         assert err_max < self.tol, "Max error is too high"
     @pytest.mark.skipif(
-        not (__have_gpuderinger__) or munchetal_filter is None,
-        reason="Need pycuda, pycudwt and scikit-cuda for this test",
+        not (__has_cuda_deringer__) or munchetal_filter is None,
+        reason="Need pycuda, pycudwt and (scikit-cuda or pyvkfft) for this test",
     )
     @pytest.mark.parametrize("config", fw_scenarios)
     def test_cuda_munch_deringer(self, config):
+        fft_implem = config["fft_implem"]
+        if fft_implem not in self._available_fft_implems:
+            pytest.skip("FFT implementation %s is not available" % fft_implem)
         sino = self.add_stripes_to_sino(self.sino, self.rings)
         deringer = CudaMunchDeringer(
             config["sigma"],
             self.sino.shape,
             levels=config["levels"],
             wname=config["wname"],
+            padding=config["padding"],
+            fft_backend=fft_implem,
             cuda_options={"ctx": self.ctx},
         )
         d_sino = garray.to_gpu(sino)
@@ -115,4 +130,91 @@ class TestMunchDeringer:
         ref = self.get_fourier_wavelets_reference_result(sino, config)
         err_max = np.max(np.abs(res - ref))
-        assert err_max < 1e-1, "Max error is too high"
+        assert err_max < 1e-1, "Max error is too high with configuration %s" % (str(config))
+    @pytest.mark.skipif(
+        not (__has_algotom__),
+        reason="Need algotom for this test",
+    )
+    def test_vo_deringer(self):
+        deringer = VoDeringer(self.sino.shape)
+        sino_deringed = deringer.remove_rings_sinogram(self.sino)
+        sinos = np.tile(self.sino, (10, 1, 1))
+        sinos_deringed = deringer.remove_rings_sinograms(sinos)
+        # TODO check result. The generated test sinogram is "too synthetic" for this kind of deringer
+    @pytest.mark.skipif(
+        not (__have_tomocupy_deringer__),
+        reason="Need cupy for this test",
+    )
+    def test_cuda_vo_deringer(self):
+        # Beware, this deringer seems to be buggy for "too-small" sinograms
+        # (NaNs on the edges and in some regions). To be investigated
+        deringer = CudaVoDeringer(self.sino2.shape)
+        d_sino = garray.to_gpu(self.sino2)
+        deringer.remove_rings_sinogram(d_sino)
+        sino = d_sino.get()
+        if __has_algotom__:
+            vo_deringer = VoDeringer(self.sino2.shape)
+            sino_deringed = vo_deringer.remove_rings_sinogram(self.sino2)
+            assert (
+                np.max(np.abs(sino - sino_deringed)) < 2e-3
+            ), "Cuda implementation of Vo deringer does not yield the same results as base implementation"
+    def test_mean_deringer(self):
+        deringer_no_filtering = SinoMeanDeringer(self.sino.shape, mode="subtract")
+        sino = self.sino.copy()
+        deringer_no_filtering.remove_rings_sinogram(sino)
+        sino = self.sino.copy()
+        deringer_with_filtering = SinoMeanDeringer(self.sino.shape, mode="subtract", filter_cutoff=(0, 30))
+        deringer_with_filtering.remove_rings_sinogram(sino)
+        # TODO check results
+    @pytest.mark.skipif(not (__has_pycuda__), reason="Need pycuda for this test")
+    def test_cuda_mean_deringer(self):
+        cuda_deringer = CudaSinoMeanDeringer(
+            self.sino.shape,
+            mode="subtract",
+            filter_cutoff=(
+                0,
+                10,
+            ),
+            ctx=self.ctx,
+        )
+        deringer = SinoMeanDeringer(
+            self.sino.shape,
+            mode="subtract",
+            filter_cutoff=(
+                0,
+                10,
+            ),
+        )
+        d_sino = cuda_deringer.processing.to_device("sino", self.sino)
+        cuda_deringer.remove_rings_sinogram(d_sino)
+        sino = self.sino.copy()
+        sino_d = deringer.remove_rings_sinogram(sino)
+        dirac = np.zeros(self.sino.shape[-1], "f")
+        dirac[dirac.size // 2] = 1
+        deringer_filter_response = deringer._apply_filter(dirac)
+        d_dirac = cuda_deringer.processing.to_device("dirac", dirac)
+        cuda_deringer_filter_response = cuda_deringer._apply_filter(d_dirac)
+        is_close, residual = compare_arrays(
+            deringer_filter_response, cuda_deringer_filter_response.get(), 1e-7, return_residual=True
+        )
+        assert is_close, "Cuda deringer does not have the correct filter response: max_error=%.2e" % residual
+        # There is a rather large discrepancy between the vertical_mean kernel and numpy.mean(). Not sure who is right
+        is_close, residual = compare_arrays(sino_d, d_sino.get(), 1e-1, return_residual=True)
+        assert is_close, (
+            "Cuda deringer does not yield the same result as base implementation: max_error=%.2e" % residual
+        )

nabu 2023.2.1__py3-none-any.whl → 2024.1.0rc3__py3-none-any.whl

nabu 2023.2.1py3-none-any.whl → 2024.1.0rc3py3-none-any.whl