PyPI - nabu - Versions diffs - 2023.2.1__py3-none-any.whl → 2024.1.0rc3__py3-none-any.whl - Mend

nabu 2023.2.1py3-none-any.whl → 2024.1.0rc3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (183) hide show

doc/conf.py +1 -1
doc/doc_config.py +32 -0
nabu/__init__.py +2 -1
nabu/app/bootstrap_stitching.py +1 -1
nabu/app/cli_configs.py +122 -2
nabu/app/composite_cor.py +27 -2
nabu/app/correct_rot.py +70 -0
nabu/app/create_distortion_map_from_poly.py +42 -18
nabu/app/diag_to_pix.py +358 -0
nabu/app/diag_to_rot.py +449 -0
nabu/app/generate_header.py +4 -3
nabu/app/histogram.py +2 -2
nabu/app/multicor.py +6 -1
nabu/app/parse_reconstruction_log.py +151 -0
nabu/app/prepare_weights_double.py +83 -22
nabu/app/reconstruct.py +5 -1
nabu/app/reconstruct_helical.py +7 -0
nabu/app/reduce_dark_flat.py +6 -3
nabu/app/rotate.py +4 -4
nabu/app/stitching.py +16 -2
nabu/app/tests/test_reduce_dark_flat.py +18 -2
nabu/app/validator.py +4 -4
nabu/cuda/convolution.py +8 -376
nabu/cuda/fft.py +4 -0
nabu/cuda/kernel.py +4 -4
nabu/cuda/medfilt.py +5 -158
nabu/cuda/padding.py +5 -71
nabu/cuda/processing.py +23 -2
nabu/cuda/src/ElementOp.cu +78 -0
nabu/cuda/src/backproj.cu +28 -2
nabu/cuda/src/fourier_wavelets.cu +2 -2
nabu/cuda/src/normalization.cu +23 -0
nabu/cuda/src/padding.cu +2 -2
nabu/cuda/src/transpose.cu +16 -0
nabu/cuda/utils.py +39 -0
nabu/estimation/alignment.py +10 -1
nabu/estimation/cor.py +808 -38
nabu/estimation/cor_sino.py +7 -9
nabu/estimation/tests/test_cor.py +85 -3
nabu/io/reader.py +26 -18
nabu/io/tests/test_cast_volume.py +3 -3
nabu/io/tests/test_detector_distortion.py +3 -3
nabu/io/tiffwriter_zmm.py +2 -2
nabu/io/utils.py +14 -4
nabu/io/writer.py +5 -3
nabu/misc/fftshift.py +6 -0
nabu/misc/histogram.py +5 -285
nabu/misc/histogram_cuda.py +8 -104
nabu/misc/kernel_base.py +3 -121
nabu/misc/padding_base.py +5 -69
nabu/misc/processing_base.py +3 -107
nabu/misc/rotation.py +5 -62
nabu/misc/rotation_cuda.py +5 -65
nabu/misc/transpose.py +6 -0
nabu/misc/unsharp.py +3 -78
nabu/misc/unsharp_cuda.py +5 -52
nabu/misc/unsharp_opencl.py +8 -85
nabu/opencl/fft.py +6 -0
nabu/opencl/kernel.py +21 -6
nabu/opencl/padding.py +5 -72
nabu/opencl/processing.py +27 -5
nabu/opencl/src/backproj.cl +3 -3
nabu/opencl/src/fftshift.cl +65 -12
nabu/opencl/src/padding.cl +2 -2
nabu/opencl/src/roll.cl +96 -0
nabu/opencl/src/transpose.cl +16 -0
nabu/pipeline/config_validators.py +63 -3
nabu/pipeline/dataset_validator.py +2 -2
nabu/pipeline/estimators.py +193 -35
nabu/pipeline/fullfield/chunked.py +34 -17
nabu/pipeline/fullfield/chunked_cuda.py +7 -5
nabu/pipeline/fullfield/computations.py +48 -13
nabu/pipeline/fullfield/nabu_config.py +13 -13
nabu/pipeline/fullfield/processconfig.py +10 -5
nabu/pipeline/fullfield/reconstruction.py +1 -2
nabu/pipeline/helical/fbp.py +5 -0
nabu/pipeline/helical/filtering.py +12 -9
nabu/pipeline/helical/gridded_accumulator.py +179 -33
nabu/pipeline/helical/helical_chunked_regridded.py +262 -151
nabu/pipeline/helical/helical_chunked_regridded_cuda.py +4 -11
nabu/pipeline/helical/helical_reconstruction.py +56 -18
nabu/pipeline/helical/span_strategy.py +1 -1
nabu/pipeline/helical/tests/test_accumulator.py +4 -0
nabu/pipeline/params.py +23 -2
nabu/pipeline/processconfig.py +3 -8
nabu/pipeline/tests/test_chunk_reader.py +78 -0
nabu/pipeline/tests/test_estimators.py +120 -2
nabu/pipeline/utils.py +25 -0
nabu/pipeline/writer.py +2 -0
nabu/preproc/ccd_cuda.py +9 -7
nabu/preproc/ctf.py +21 -26
nabu/preproc/ctf_cuda.py +25 -25
nabu/preproc/double_flatfield.py +14 -2
nabu/preproc/double_flatfield_cuda.py +7 -11
nabu/preproc/flatfield_cuda.py +23 -27
nabu/preproc/phase.py +19 -24
nabu/preproc/phase_cuda.py +21 -21
nabu/preproc/shift_cuda.py +58 -28
nabu/preproc/tests/test_ctf.py +5 -5
nabu/preproc/tests/test_double_flatfield.py +2 -2
nabu/preproc/tests/test_vshift.py +13 -2
nabu/processing/__init__.py +0 -0
nabu/processing/convolution_cuda.py +375 -0
nabu/processing/fft_base.py +163 -0
nabu/processing/fft_cuda.py +256 -0
nabu/processing/fft_opencl.py +54 -0
nabu/processing/fftshift.py +134 -0
nabu/processing/histogram.py +286 -0
nabu/processing/histogram_cuda.py +103 -0
nabu/processing/kernel_base.py +126 -0
nabu/processing/medfilt_cuda.py +159 -0
nabu/processing/muladd.py +29 -0
nabu/processing/muladd_cuda.py +68 -0
nabu/processing/padding_base.py +71 -0
nabu/processing/padding_cuda.py +75 -0
nabu/processing/padding_opencl.py +77 -0
nabu/processing/processing_base.py +123 -0
nabu/processing/roll_opencl.py +64 -0
nabu/processing/rotation.py +63 -0
nabu/processing/rotation_cuda.py +66 -0
nabu/processing/tests/__init__.py +0 -0
nabu/processing/tests/test_fft.py +268 -0
nabu/processing/tests/test_fftshift.py +71 -0
nabu/{misc → processing}/tests/test_histogram.py +2 -4
nabu/{cuda → processing}/tests/test_medfilt.py +1 -1
nabu/processing/tests/test_muladd.py +54 -0
nabu/{cuda → processing}/tests/test_padding.py +119 -75
nabu/processing/tests/test_roll.py +63 -0
nabu/{misc → processing}/tests/test_rotation.py +3 -2
nabu/processing/tests/test_transpose.py +72 -0
nabu/{misc → processing}/tests/test_unsharp.py +41 -8
nabu/processing/transpose.py +126 -0
nabu/processing/unsharp.py +79 -0
nabu/processing/unsharp_cuda.py +53 -0
nabu/processing/unsharp_opencl.py +75 -0
nabu/reconstruction/fbp.py +34 -10
nabu/reconstruction/fbp_base.py +35 -16
nabu/reconstruction/fbp_opencl.py +7 -12
nabu/reconstruction/filtering.py +2 -2
nabu/reconstruction/filtering_cuda.py +13 -14
nabu/reconstruction/filtering_opencl.py +3 -4
nabu/reconstruction/projection.py +2 -0
nabu/reconstruction/rings.py +158 -1
nabu/reconstruction/rings_cuda.py +218 -58
nabu/reconstruction/sinogram_cuda.py +16 -12
nabu/reconstruction/tests/test_deringer.py +116 -14
nabu/reconstruction/tests/test_fbp.py +22 -31
nabu/reconstruction/tests/test_filtering.py +11 -2
nabu/resources/dataset_analyzer.py +89 -26
nabu/resources/nxflatfield.py +2 -2
nabu/resources/tests/test_nxflatfield.py +1 -1
nabu/resources/utils.py +9 -2
nabu/stitching/alignment.py +184 -0
nabu/stitching/config.py +241 -39
nabu/stitching/definitions.py +6 -0
nabu/stitching/frame_composition.py +4 -2
nabu/stitching/overlap.py +99 -3
nabu/stitching/sample_normalization.py +60 -0
nabu/stitching/slurm_utils.py +10 -10
nabu/stitching/tests/test_alignment.py +99 -0
nabu/stitching/tests/test_config.py +16 -1
nabu/stitching/tests/test_overlap.py +68 -2
nabu/stitching/tests/test_sample_normalization.py +49 -0
nabu/stitching/tests/test_slurm_utils.py +5 -5
nabu/stitching/tests/test_utils.py +3 -33
nabu/stitching/tests/test_z_stitching.py +391 -22
nabu/stitching/utils.py +144 -202
nabu/stitching/z_stitching.py +309 -126
nabu/testutils.py +18 -0
nabu/thirdparty/tomocupy_remove_stripe.py +586 -0
nabu/utils.py +32 -6
{nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/LICENSE +1 -1
{nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/METADATA +5 -5
nabu-2024.1.0rc3.dist-info/RECORD +296 -0
{nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/WHEEL +1 -1
{nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/entry_points.txt +5 -1
nabu/conftest.py +0 -14
nabu/opencl/fftshift.py +0 -92
nabu/opencl/tests/test_fftshift.py +0 -55
nabu/opencl/tests/test_padding.py +0 -84
nabu-2023.2.1.dist-info/RECORD +0 -252
/nabu/cuda/src/{fftshift.cu → dfi_fftshift.cu} +0 -0
{nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/top_level.txt +0 -0

nabu/preproc/ctf.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import math
 import numpy as np
+from scipy.fft import rfft2, irfft2, fft2, ifft2
 from ..resources.logger import LoggerOrPrint
 from ..misc import fourier_filters
 from ..misc.padding import pad_interpolate, recut
-from ..utils import get_num_threads
+from ..utils import get_num_threads, deprecation_warning
 class GeoPars:
@@ -111,6 +112,7 @@ class CTFPhaseRetrieval:
         lim2=0.2,
         use_rfft=False,
         fftw_num_threads=None,
+        fft_num_threads=None,
         logger=None,
     ):
         """
@@ -138,10 +140,11 @@ class CTFPhaseRetrieval:
         use_rfft: bool, optional
             Whether to use real-to-complex (R2C) FFT instead of usual complex-to-complex (C2C).
         fftw_num_threads: bool or None or int, optional
-            If False is passed: don't use FFTW.
-            If None is passed: use all available threads.
-            If a number is provided: number of threads to use for FFTW.
-            You can pass a negative number to use N - fftw_num_threads cores.
+            DEPRECATED - please use fft_num_threads instead.
+        fft_num_threads: bool or None or int, optional
+            Number of threads to use for FFT.
+            If a number is provided: number of threads to use for FFT.
+            You can pass a negative number to use N - fft_num_threads cores.
         logger: optional
             a logger object
         """
@@ -152,12 +155,18 @@ class CTFPhaseRetrieval:
         self._calc_shape(shape, padded_shape, padding_mode)
         self.delta_beta = delta_beta
+        # COMPAT.
+        if fftw_num_threads is not None:
+            deprecation_warning("'fftw_num_threads' is replaced with 'fft_num_threads'", func_name="ctf_fftw")
+            fft_num_threads = fftw_num_threads
+        # ---
         self.lim = None
         self.lim1 = lim1
         self.lim2 = lim2
         self.normalize_by_mean = normalize_by_mean
         self.translation_vh = translation_vh
-        self._setup_fft(use_rfft, fftw_num_threads)
+        self._setup_fft(use_rfft, fft_num_threads)
         self._get_ctf_filter()
     def _calc_shape(self, shape, padded_shape, padding_mode):
@@ -175,25 +184,11 @@ class CTFPhaseRetrieval:
         self.shape_padded = tuple(padded_shape)
         self.padding_mode = padding_mode
-    def _setup_fft(self, use_rfft, fftw_num_threads):
+    def _setup_fft(self, use_rfft, fft_num_threads):
         self.use_rfft = use_rfft
-        self._fft_func = np.fft.rfft2 if use_rfft else np.fft.fft2
-        self._ifft_func = np.fft.irfft2 if use_rfft else np.fft.ifft2
-        self.use_fftw = False
-        if fftw_num_threads is False:
-            return
-        fftw_num_threads = get_num_threads(fftw_num_threads)
-        if self.use_rfft and (fftw_num_threads > 0):
-            # importing silx.math.fft creates opencl contexts all over the place
-            # because of the silx.opencl.ocl singleton.
-            # So, import silx as late as possible
-            from silx.math.fft.fftw import FFTW, __have_fftw__
-            if __have_fftw__:
-                self.use_fftw = True
-                self.fftw = FFTW(shape=self.shape_padded, dtype="f", num_threads=fftw_num_threads)
-                self._fft_func = self.fftw.fft
-                self._ifft_func = self.fftw.ifft
+        self._fft_func = rfft2 if use_rfft else fft2
+        self._ifft_func = irfft2 if use_rfft else ifft2
+        self.fft_num_threads = get_num_threads(fft_num_threads)
     def _get_ctf_filter(self):
         """
@@ -320,7 +315,7 @@ class CTFPhaseRetrieval:
         self._ctf_filter_denom = (2 * self.unreg_filter_denom * self.unreg_filter_denom + self.lim).astype(np.complex64)
     def _apply_filter(self, img):
-        img_f = self._fft_func(img)
+        img_f = self._fft_func(img, workers=self.fft_num_threads)
         img_f *= self.unreg_filter_denom
         unreg_filter_denom_0_mean = self.unreg_filter_denom[0, 0]
@@ -331,7 +326,7 @@ class CTFPhaseRetrieval:
         ## formula 8, with regularisation to stay at a safe distance from the poles
         img_f /= self._ctf_filter_denom
-        ph = self._ifft_func(img_f).real
+        ph = self._ifft_func(img_f, workers=self.fft_num_threads).real
         return ph
     def retrieve_phase(self, img, output=None):

nabu/preproc/ctf_cuda.py CHANGED Viewed

@@ -1,12 +1,15 @@
 import numpy as np
-from pycuda import gpuarray as garray
-from ..utils import calc_padding_lengths, updiv, get_cuda_srcfile
+from ..utils import calc_padding_lengths, updiv, get_cuda_srcfile, docstring
 from ..cuda.processing import CudaProcessing
-from ..cuda.kernel import CudaKernel
-from ..cuda.padding import CudaPadding
+from ..cuda.utils import __has_pycuda__
+from ..processing.padding_cuda import CudaPadding
+from ..processing.fft_cuda import get_fft_class
 from .phase_cuda import CudaPaganinPhaseRetrieval
 from .ctf import CTFPhaseRetrieval
+if __has_pycuda__:
+    from pycuda import gpuarray as garray
 # TODO:
 #  - better padding scheme (for now 2*shape)
@@ -17,6 +20,7 @@ class CudaCTFPhaseRetrieval(CTFPhaseRetrieval):
     Cuda back-end of CTFPhaseRetrieval
     """
+    @docstring(CTFPhaseRetrieval)
     def __init__(
         self,
         shape,
@@ -29,9 +33,11 @@ class CudaCTFPhaseRetrieval(CTFPhaseRetrieval):
         lim1=1.0e-5,
         lim2=0.2,
         use_rfft=True,
-        fftw_num_threads=None,
+        fftw_num_threads=None,  # COMPAT.
+        fft_num_threads=None,
         logger=None,
         cuda_options=None,
+        fft_backend="skcuda",
     ):
         """
         Initialize a CudaCTFPhaseRetrieval.
@@ -62,30 +68,26 @@ class CudaCTFPhaseRetrieval(CTFPhaseRetrieval):
             lim2=lim2,
             logger=logger,
             use_rfft=True,
-            fftw_num_threads=False,
+            fft_num_threads=False,
         )
         self._init_ctf_filter()
         self._init_cuda_padding()
-        self._init_fft()
+        self._init_fft(fft_backend)
         self._init_mult_kernel()
     def _init_ctf_filter(self):
         self._mean_scale_factor = self.unreg_filter_denom[0, 0] * np.prod(self.shape_padded)
-        self._d_filter_num = garray.to_gpu(self.unreg_filter_denom).astype("f")
-        self._d_filter_denom = garray.to_gpu(
-            (1.0 / (2 * self.unreg_filter_denom * self.unreg_filter_denom + self.lim)).astype("f")
+        self._d_filter_num = self.cuda_processing.to_device("_d_filter_num", self.unreg_filter_denom).astype("f")
+        self._d_filter_denom = self.cuda_processing.to_device(
+            "_d_filter_denom", (1.0 / (2 * self.unreg_filter_denom * self.unreg_filter_denom + self.lim)).astype("f")
         )
     def _init_cuda_padding(self):
         pad_width = calc_padding_lengths(self.shape, self.shape_padded)
         # Custom coordinate transform to get directly FFT layout
-        R, C = np.indices(self.shape, dtype=np.int32)
-        coords_R = np.roll(
-            np.pad(R, pad_width, mode=self.padding_mode), (-pad_width[0][0], -pad_width[1][0]), axis=(0, 1)
-        )
-        coords_C = np.roll(
-            np.pad(C, pad_width, mode=self.padding_mode), (-pad_width[0][0], -pad_width[1][0]), axis=(0, 1)
-        )
+        R, C = np.indices(self.shape, dtype=np.int32, sparse=True)
+        coords_R = np.roll(np.pad(R.ravel(), pad_width[0], mode=self.padding_mode), -pad_width[0][0])
+        coords_C = np.roll(np.pad(C.ravel(), pad_width[1], mode=self.padding_mode), -pad_width[1][0])
         self.cuda_padding = CudaPadding(
             self.shape,
             (coords_R, coords_C),
@@ -93,16 +95,14 @@ class CudaCTFPhaseRetrieval(CTFPhaseRetrieval):
             # propagate cuda options ?
         )
-    def _init_fft(self):
-        # Import has to be done here, otherwise scikit-cuda creates a cuda/cublas context at import
-        from silx.math.fft.cufft import CUFFT
-        self.cufft = CUFFT(template=np.zeros(self.shape_padded, dtype="f"))
-        self.d_radio_padded = self.cufft.data_in
-        self.d_radio_f = self.cufft.data_out
+    def _init_fft(self, fft_backend):
+        fft_cls = get_fft_class(backend=fft_backend)
+        self.cufft = fft_cls(shape=self.shape_padded, dtype=np.float32, r2c=True)
+        self.d_radio_padded = self.cuda_processing.allocate_array("d_radio_padded", self.shape_padded, "f")
+        self.d_radio_f = self.cuda_processing.allocate_array("d_radio_f", self.cufft.shape_out, np.complex64)
     def _init_mult_kernel(self):
-        self.cpxmult_kernel = CudaKernel(
+        self.cpxmult_kernel = self.cuda_processing.kernel(
             "CTF_kernel",
             filename=get_cuda_srcfile("ElementOp.cu"),
             signature="PPPfii",

nabu/preproc/double_flatfield.py CHANGED Viewed

@@ -5,6 +5,7 @@ from silx.io.url import DataUrl
 from ..utils import check_supported, check_shape, get_2D_3D_shape
 from ..io.reader import Readers
 from ..io.writer import Writers
+from .ccd import Log
 class DoubleFlatField:
@@ -22,6 +23,8 @@ class DoubleFlatField:
         average_is_on_log=False,
         sigma_filter=None,
         filter_mode="reflect",
+        log_clip_min=None,
+        log_clip_max=None,
     ):
         """
         Init double flat field by summing a series of urls and considering the same subregion of them.
@@ -55,6 +58,8 @@ class DoubleFlatField:
         self.radios_shape = get_2D_3D_shape(shape)
         self.n_angles = self.radios_shape[0]
         self.shape = self.radios_shape[1:]
+        self._log_clip_min = log_clip_min
+        self._log_clip_max = log_clip_max
         self._init_filedump(result_url, sub_region, detector_corrector)
         self._init_processing(input_is_mlog, output_is_mlog, average_is_on_log, sigma_filter, filter_mode)
         self._computed = False
@@ -112,17 +117,19 @@ class DoubleFlatField:
             self.sigma_filter = None
         self.filter_mode = filter_mode
         proc = lambda x, o: np.copyto(o, x)
+        self._mlog = Log((1,) + self.shape, clip_min=self._log_clip_min, clip_max=self._log_clip_max)
         if self.input_is_mlog:
             if not self.average_is_on_log:
                 proc = lambda x, o: np.exp(-x, out=o)
         else:
             if self.average_is_on_log:
-                proc = lambda x, o: -np.log(x, out=o)
+                proc = self._proc_mlog
         postproc = lambda x: x
         if self.output_is_mlog:
             if not self.average_is_on_log:
-                postproc = lambda x: -np.log(x)
+                postproc = self._proc_mlog
         else:
             if self.average_is_on_log:
                 postproc = lambda x: np.exp(-x)
@@ -130,6 +137,11 @@ class DoubleFlatField:
         self.proc = proc
         self.postproc = postproc
+    def _proc_mlog(self, x, o):
+        o[:] = x[:]
+        self._mlog.take_logarithm(o)
+        return o
     def compute_double_flatfield(self, radios, recompute=False):
         """
         Read the radios and generate the "double flat field" by averaging

nabu/preproc/double_flatfield_cuda.py CHANGED Viewed

@@ -2,7 +2,8 @@ from .double_flatfield import DoubleFlatField
 from ..utils import check_shape
 from ..cuda.utils import __has_pycuda__
 from ..cuda.processing import CudaProcessing
-from ..misc.unsharp_cuda import CudaUnsharpMask
+from ..processing.unsharp_cuda import CudaUnsharpMask
+from .ccd_cuda import CudaLog
 if __has_pycuda__:
     import pycuda.gpuarray as garray
@@ -21,6 +22,8 @@ class CudaDoubleFlatField(DoubleFlatField):
         average_is_on_log=False,
         sigma_filter=None,
         filter_mode="reflect",
+        log_clip_min=None,
+        log_clip_max=None,
         cuda_options=None,
     ):
         """
@@ -37,6 +40,8 @@ class CudaDoubleFlatField(DoubleFlatField):
             average_is_on_log=average_is_on_log,
             sigma_filter=sigma_filter,
             filter_mode=filter_mode,
+            log_clip_min=log_clip_min,
+            log_clip_max=log_clip_max,
         )
         self._init_gaussian_filter()
@@ -57,16 +62,6 @@ class CudaDoubleFlatField(DoubleFlatField):
         cumath.exp(o, out=o)
         return o
-    @staticmethod
-    def _proc_mlog(x, o, min_clip=None):
-        if min_clip is not None:
-            garray.maximum(x, min_clip, out=o)
-            cumath.log(o, out=o)
-        else:
-            cumath.log(x, out=o)
-        o *= -1
-        return o
     def _init_processing(self, input_is_mlog, output_is_mlog, average_is_on_log, sigma_filter, filter_mode):
         self.input_is_mlog = input_is_mlog
         self.output_is_mlog = output_is_mlog
@@ -77,6 +72,7 @@ class CudaDoubleFlatField(DoubleFlatField):
         self.filter_mode = filter_mode
         # proc = lambda x,o: np.copyto(o, x)
         proc = self._proc_copy
+        self._mlog = CudaLog((1,) + self.shape, clip_min=self._log_clip_min, clip_max=self._log_clip_max)
         if self.input_is_mlog:
             if not self.average_is_on_log:
                 # proc = lambda x,o: np.exp(-x, out=o)

nabu/preproc/flatfield_cuda.py CHANGED Viewed

@@ -1,25 +1,25 @@
-from typing import Union
 import numpy as np
-import pycuda.gpuarray as garray
+from nabu.cuda.processing import CudaProcessing
 from ..preproc.flatfield import FlatFieldArrays
-from ..cuda.kernel import CudaKernel
 from ..utils import get_cuda_srcfile
 from ..io.reader import load_images_from_dataurl_dict
+from ..cuda.utils import __has_pycuda__
 class CudaFlatFieldArrays(FlatFieldArrays):
     def __init__(
         self,
-        radios_shape: tuple,
-        flats: dict,
-        darks: dict,
+        radios_shape,
+        flats,
+        darks,
         radios_indices=None,
-        interpolation: str = "linear",
+        interpolation="linear",
         distortion_correction=None,
         nan_value=1.0,
         radios_srcurrent=None,
         flats_srcurrent=None,
-        cuda_options: Union[dict, None] = None,
+        cuda_options=None,
     ):
         """
         Initialize a flat-field normalization CUDA process.
@@ -41,16 +41,10 @@ class CudaFlatFieldArrays(FlatFieldArrays):
             flats_srcurrent=flats_srcurrent,
             nan_value=nan_value,
         )
-        self._set_cuda_options(cuda_options)
+        self.cuda_processing = CudaProcessing(**(cuda_options or {}))
         self._init_cuda_kernels()
         self._load_flats_and_darks_on_gpu()
-    def _set_cuda_options(self, user_cuda_options):
-        self.cuda_options = {"device_id": None, "ctx": None, "cleanup_at_exit": None}
-        if user_cuda_options is None:
-            user_cuda_options = {}
-        self.cuda_options.update(user_cuda_options)
     def _init_cuda_kernels(self):
         # TODO
         if self.interpolation != "linear":
@@ -63,7 +57,7 @@ class CudaFlatFieldArrays(FlatFieldArrays):
         ]
         if self.nan_value is not None:
             options.append("-DNAN_VALUE=%f" % self.nan_value)
-        self.cuda_kernel = CudaKernel(
+        self.cuda_kernel = self.cuda_processing.kernel(
             "flatfield_normalization", self._cuda_fname, signature="PPPiiiPP", options=options
         )
         self._nx = np.int32(self.shape[1])
@@ -71,17 +65,19 @@ class CudaFlatFieldArrays(FlatFieldArrays):
     def _load_flats_and_darks_on_gpu(self):
         # Flats
-        self.d_flats = garray.zeros((self.n_flats,) + self.shape, np.float32)
+        self.d_flats = self.cuda_processing.allocate_array("d_flats", (self.n_flats,) + self.shape, np.float32)
         for i, flat_idx in enumerate(self._sorted_flat_indices):
             self.d_flats[i].set(np.ascontiguousarray(self.flats[flat_idx], dtype=np.float32))
         # Darks
-        self.d_darks = garray.zeros((self.n_darks,) + self.shape, np.float32)
+        self.d_darks = self.cuda_processing.allocate_array("d_darks", (self.n_darks,) + self.shape, np.float32)
         for i, dark_idx in enumerate(self._sorted_dark_indices):
             self.d_darks[i].set(np.ascontiguousarray(self.darks[dark_idx], dtype=np.float32))
-        self.d_darks_indices = garray.to_gpu(np.array(self._sorted_dark_indices, dtype=np.int32))
+        self.d_darks_indices = self.cuda_processing.to_device(
+            "d_darks_indices", np.array(self._sorted_dark_indices, dtype=np.int32)
+        )
         # Indices
-        self.d_flats_indices = garray.to_gpu(self.flats_idx)
-        self.d_flats_weights = garray.to_gpu(self.flats_weights)
+        self.d_flats_indices = self.cuda_processing.to_device("d_flats_indices", self.flats_idx)
+        self.d_flats_weights = self.cuda_processing.to_device("d_flats_weights", self.flats_weights)
     def normalize_radios(self, radios):
         """
@@ -93,7 +89,7 @@ class CudaFlatFieldArrays(FlatFieldArrays):
         radios_shape: `pycuda.gpuarray.GPUArray`
             Radios chunk.
         """
-        if not (isinstance(radios, garray.GPUArray)):
+        if not (isinstance(radios, self.cuda_processing.array_class)):
             raise ValueError("Expected a pycuda.gpuarray (got %s)" % str(type(radios)))
         if radios.dtype != np.float32:
             raise ValueError("radios must be in float32 dtype (got %s)" % str(radios.dtype))
@@ -121,16 +117,16 @@ CudaFlatField = CudaFlatFieldArrays
 class CudaFlatFieldDataUrls(CudaFlatField):
     def __init__(
         self,
-        radios_shape: tuple,
-        flats: dict,
-        darks: dict,
+        radios_shape,
+        flats,
+        darks,
         radios_indices=None,
-        interpolation: str = "linear",
+        interpolation="linear",
         distortion_correction=None,
         nan_value=1.0,
         radios_srcurrent=None,
         flats_srcurrent=None,
-        cuda_options: Union[dict, None] = None,
+        cuda_options=None,
         **chunk_reader_kwargs,
     ):
         flats_arrays_dict = load_images_from_dataurl_dict(flats, **chunk_reader_kwargs)

nabu/preproc/phase.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from math import pi
 from bisect import bisect
 import numpy as np
+from scipy.fft import rfft2, irfft2, fft2, ifft2
 from ..utils import generate_powers, get_decay, check_supported, get_num_threads, deprecation_warning
 #
@@ -53,6 +54,7 @@ class PaganinPhaseRetrieval:
         use_rfft=True,
         use_R2C=None,
         fftw_num_threads=None,
+        fft_num_threads=None,
     ):
         """
         Paganin Phase Retrieval for an infinitely distant point source.
@@ -113,9 +115,11 @@ class PaganinPhaseRetrieval:
         use_R2C: bool, optional
             DEPRECATED, use use_rfft instead
         fftw_num_threads: bool or None or int, optional
-            Whether to use FFTW for speeding up FFT.
+            DEPRECATED - please use fft_num_threads
+        fft_num_threads: bool or None or int, optional
+            Number of threads for FFT.
             Default is to use all available threads. You can pass a negative number
-            to use N - fftw_num_threads cores.
+            to use N - fft_num_threads cores.
         Important
         ----------
@@ -171,8 +175,11 @@ class PaganinPhaseRetrieval:
         # COMPAT.
         if use_R2C is not None:
             deprecation_warning("'use_R2C' is replaced with 'use_rfft'", func_name="pag_r2c")
-        # -
-        self._get_fft(use_rfft, fftw_num_threads)
+        if fftw_num_threads is not None:
+            deprecation_warning("'fftw_num_threads' is replaced with 'fft_num_threads'", func_name="pag_fftw")
+            fft_num_threads = fftw_num_threads
+        # ---
+        self._get_fft(use_rfft, fft_num_threads)
         self.compute_filter()
     def _init_parameters(self, distance, energy, pixel_size, delta_beta, padding):
@@ -191,28 +198,16 @@ class PaganinPhaseRetrieval:
             "reflect": self._pad_reflect,
         }
-    def _get_fft(self, use_rfft, fftw_num_threads):
+    def _get_fft(self, use_rfft, fft_num_threads):
         self.use_rfft = use_rfft
         self.use_R2C = use_rfft  # Compat.
-        fftw_num_threads = get_num_threads(fftw_num_threads)
+        self.fft_num_threads = get_num_threads(fft_num_threads)
         if self.use_rfft:
-            self.fft_func = np.fft.rfft2
-            self.ifft_func = np.fft.irfft2
+            self.fft_func = rfft2
+            self.ifft_func = irfft2
         else:
-            self.fft_func = np.fft.fft2
-            self.ifft_func = np.fft.ifft2
-        self.use_fftw = False
-        if self.use_rfft and (fftw_num_threads > 0):
-            # importing silx.math.fft creates opencl contexts all over the place
-            # because of the silx.opencl.ocl singleton.
-            # So, import silx as late as possible
-            from silx.math.fft.fftw import FFTW, __have_fftw__
-            if __have_fftw__:
-                self.use_fftw = True
-                self.fftw = FFTW(shape=self.shape_padded, dtype="f", num_threads=fftw_num_threads)
-                self.fft_func = self.fftw.fft
-                self.ifft_func = self.fftw.ifft
+            self.fft_func = fft2
+            self.ifft_func = ifft2
     def _calc_shape(self, shape, margin):
         if np.isscalar(shape):
@@ -378,9 +373,9 @@ class PaganinPhaseRetrieval:
     def apply_filter(self, radio, padding_method=None, output=None):
         self.pad_data(radio, padding_method=padding_method)
-        radio_f = self.fft_func(self.data_padded)
+        radio_f = self.fft_func(self.data_padded, workers=self.fft_num_threads)
         radio_f *= self.paganin_filter
-        radio_filtered = self.ifft_func(radio_f).real
+        radio_filtered = self.ifft_func(radio_f, workers=self.fft_num_threads).real
         s0, s1 = self.shape_inner
         ((U, _), (L, _)) = self.margin
         if output is None:

nabu/preproc/phase_cuda.py CHANGED Viewed

@@ -1,15 +1,15 @@
 import numpy as np
 import pycuda.driver as cuda
-from pycuda import gpuarray as garray
-from ..utils import get_cuda_srcfile, check_supported
-from .phase import PaganinPhaseRetrieval
+from ..utils import get_cuda_srcfile, check_supported, docstring
 from ..cuda.processing import CudaProcessing
-from ..cuda.kernel import CudaKernel
+from ..processing.fft_cuda import get_fft_class
+from .phase import PaganinPhaseRetrieval
 class CudaPaganinPhaseRetrieval(PaganinPhaseRetrieval):
     supported_paddings = ["zeros", "constant", "edge"]
+    @docstring(PaganinPhaseRetrieval)
     def __init__(
         self,
         shape,
@@ -20,7 +20,9 @@ class CudaPaganinPhaseRetrieval(PaganinPhaseRetrieval):
         padding="edge",
         margin=None,
         cuda_options=None,
-        fftw_num_threads=None,
+        fftw_num_threads=None,  # COMPAT.
+        fft_num_threads=None,
+        fft_backend="skcuda",
     ):
         """
         Please refer to the documentation of
@@ -37,10 +39,10 @@ class CudaPaganinPhaseRetrieval(PaganinPhaseRetrieval):
             padding=padding,
             margin=margin,
             use_rfft=True,
-            fftw_num_threads=None,
+            fft_num_threads=False,
         )
         self._init_gpu_arrays()
-        self._init_fft()
+        self._init_fft(fft_backend)
         self._init_padding_kernel()
         self._init_mult_kernel()
@@ -51,25 +53,23 @@ class CudaPaganinPhaseRetrieval(PaganinPhaseRetrieval):
         return padding
     def _init_gpu_arrays(self):
-        self.d_paganin_filter = garray.to_gpu(np.ascontiguousarray(self.paganin_filter, dtype=np.float32))
+        self.d_paganin_filter = self.cuda_processing.to_device(
+            "d_paganin_filter", np.ascontiguousarray(self.paganin_filter, dtype=np.float32)
+        )
     # overwrite parent method, don't initialize any FFT plan
-    def _get_fft(self, use_rfft, fftw_num_threads):
+    def _get_fft(self, use_rfft, fft_num_threads):
         self.use_rfft = use_rfft
-        self.use_fftw = False
-    def _init_fft(self):
-        # Import has to be done here, otherwise scikit-cuda creates a cuda/cublas context at import
-        from silx.math.fft.cufft import CUFFT
-        #
-        self.cufft = CUFFT(template=self.data_padded.astype("f"))
-        self.d_radio_padded = self.cufft.data_in
-        self.d_radio_f = self.cufft.data_out
+    def _init_fft(self, fft_backend):
+        fft_cls = get_fft_class(backend=fft_backend)
+        self.cufft = fft_cls(shape=self.data_padded.shape, dtype=np.float32, r2c=True)
+        self.d_radio_padded = self.cuda_processing.allocate_array("d_radio_padded", self.cufft.shape, "f")
+        self.d_radio_f = self.cuda_processing.allocate_array("d_radio_f", self.cufft.shape_out, np.complex64)
     def _init_padding_kernel(self):
         kern_signature = {"constant": "Piiiiiiiiffff", "edge": "Piiiiiiii"}
-        self.padding_kernel = CudaKernel(
+        self.padding_kernel = self.cuda_processing.kernel(
             "padding_%s" % self.padding,
             filename=get_cuda_srcfile("padding.cu"),
             signature=kern_signature[self.padding],
@@ -92,7 +92,7 @@ class CudaPaganinPhaseRetrieval(PaganinPhaseRetrieval):
             self.padding_kernel_args.extend([0, 0, 0, 0])
     def _init_mult_kernel(self):
-        self.cpxmult_kernel = CudaKernel(
+        self.cpxmult_kernel = self.cuda_processing.kernel(
             "inplace_complexreal_mul_2Dby2D",
             filename=get_cuda_srcfile("ElementOp.cu"),
             signature="PPii",
@@ -109,7 +109,7 @@ class CudaPaganinPhaseRetrieval(PaganinPhaseRetrieval):
         assert data.dtype == np.float32
         # Rectangular memcopy
         # TODO profile, and if needed include this copy in the padding kernel
-        if isinstance(data, np.ndarray) or isinstance(data, garray.GPUArray):
+        if isinstance(data, np.ndarray) or isinstance(data, self.cuda_processing.array_class):
             self.d_radio_padded[: self.shape[0], : self.shape[1]] = data[:, :]
         elif isinstance(data, cuda.DeviceAllocation):
             # TODO manual memcpy2D

nabu 2023.2.1__py3-none-any.whl → 2024.1.0rc3__py3-none-any.whl

nabu 2023.2.1py3-none-any.whl → 2024.1.0rc3py3-none-any.whl