PyPI - nabu - Versions diffs - 2025.1.0.dev5__py3-none-any.whl → 2025.1.0.dev13__py3-none-any.whl - Mend

nabu 2025.1.0.dev5py3-none-any.whl → 2025.1.0.dev13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

nabu/__init__.py +1 -1
nabu/app/double_flatfield.py +18 -5
nabu/app/multicor.py +25 -10
nabu/app/reconstruct_helical.py +4 -4
nabu/app/stitching.py +7 -2
nabu/cuda/src/backproj.cu +10 -10
nabu/cuda/src/cone.cu +4 -0
nabu/cuda/utils.py +1 -1
nabu/estimation/cor.py +3 -3
nabu/io/cast_volume.py +16 -0
nabu/io/reader.py +3 -2
nabu/opencl/src/backproj.cl +10 -10
nabu/pipeline/estimators.py +6 -6
nabu/pipeline/fullfield/chunked.py +13 -13
nabu/pipeline/fullfield/computations.py +4 -1
nabu/pipeline/fullfield/get_double_flatfield.py +147 -0
nabu/pipeline/fullfield/nabu_config.py +16 -4
nabu/pipeline/fullfield/processconfig.py +25 -4
nabu/pipeline/fullfield/reconstruction.py +9 -4
nabu/pipeline/helical/gridded_accumulator.py +1 -1
nabu/pipeline/helical/helical_reconstruction.py +2 -2
nabu/pipeline/helical/nabu_config.py +1 -1
nabu/pipeline/helical/weight_balancer.py +1 -1
nabu/pipeline/params.py +8 -3
nabu/preproc/shift.py +1 -1
nabu/preproc/tests/test_ctf.py +1 -1
nabu/preproc/tests/test_paganin.py +1 -3
nabu/processing/fft_base.py +6 -2
nabu/processing/fft_cuda.py +17 -167
nabu/processing/fft_opencl.py +19 -2
nabu/processing/padding_cuda.py +0 -1
nabu/processing/processing_base.py +11 -5
nabu/processing/tests/test_fft.py +1 -63
nabu/reconstruction/cone.py +39 -9
nabu/reconstruction/fbp.py +7 -0
nabu/reconstruction/fbp_base.py +8 -0
nabu/reconstruction/filtering.py +59 -25
nabu/reconstruction/filtering_cuda.py +21 -20
nabu/reconstruction/filtering_opencl.py +8 -14
nabu/reconstruction/hbp.py +10 -10
nabu/reconstruction/mlem.py +3 -0
nabu/reconstruction/rings_cuda.py +41 -13
nabu/reconstruction/tests/test_cone.py +35 -0
nabu/reconstruction/tests/test_deringer.py +2 -2
nabu/reconstruction/tests/test_fbp.py +35 -14
nabu/reconstruction/tests/test_filtering.py +14 -5
nabu/reconstruction/tests/test_halftomo.py +1 -1
nabu/reconstruction/tests/test_reconstructor.py +1 -1
nabu/resources/dataset_analyzer.py +34 -2
nabu/resources/tests/test_extract.py +4 -2
nabu/stitching/config.py +6 -1
nabu/stitching/stitcher/dumper/__init__.py +1 -0
nabu/stitching/stitcher/dumper/postprocessing.py +105 -1
nabu/stitching/stitcher/post_processing.py +14 -4
nabu/stitching/stitcher/pre_processing.py +1 -1
nabu/stitching/stitcher/single_axis.py +8 -7
nabu/stitching/stitcher/z_stitcher.py +8 -4
nabu/stitching/utils/utils.py +2 -2
nabu/testutils.py +2 -2
nabu/utils.py +9 -2
{nabu-2025.1.0.dev5.dist-info → nabu-2025.1.0.dev13.dist-info}/METADATA +9 -28
{nabu-2025.1.0.dev5.dist-info → nabu-2025.1.0.dev13.dist-info}/RECORD +66 -65
{nabu-2025.1.0.dev5.dist-info → nabu-2025.1.0.dev13.dist-info}/WHEEL +1 -1
{nabu-2025.1.0.dev5.dist-info → nabu-2025.1.0.dev13.dist-info}/entry_points.txt +0 -0
{nabu-2025.1.0.dev5.dist-info → nabu-2025.1.0.dev13.dist-info/licenses}/LICENSE +0 -0
{nabu-2025.1.0.dev5.dist-info → nabu-2025.1.0.dev13.dist-info}/top_level.txt +0 -0

nabu/pipeline/fullfield/processconfig.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import posixpath
 import numpy as np
+from .get_double_flatfield import get_double_flatfield
 from silx.io import get_data
 from silx.io.url import DataUrl
 from ...utils import copy_dict_items, compare_dicts
@@ -32,6 +33,7 @@ class ProcessConfig(ProcessConfigBase):
     (2) update_dataset_info_with_user_config
       - Update flats/darks
+      - Double-flat-field
       - CoR  (value or estimation method) # no estimation yet
       - rotation angles
       - translations files
@@ -89,12 +91,29 @@ class ProcessConfig(ProcessConfigBase):
         self.subsampling_factor = subsampling_factor or 1
         self.subsampling_start = subsampling_start or 0
+        self._get_double_flatfield()
         self._update_dataset_with_user_overwrites()
         self._get_rotation_axis_position()
         self._update_rotation_angles()
         self._get_translation_file("reconstruction", "translation_movements_file", "translations")
         self._get_user_sino_normalization()
+    def _get_double_flatfield(self):
+        self._dff_file = None
+        dff_mode = self.nabu_config["preproc"]["double_flatfield"]
+        if not (dff_mode):
+            return
+        self._dff_file = get_double_flatfield(
+            self.dataset_info,
+            dff_mode,
+            output_dir=self.nabu_config["output"]["location"],
+            darks_flats_dir=self.nabu_config["dataset"]["darks_flats_dir"],
+            dff_options={
+                "dff_sigma": self.nabu_config["preproc"]["dff_sigma"],
+                "do_flatfield": (self.nabu_config["preproc"]["flatfield"] is not False),
+            },
+        )
     def _update_dataset_with_user_overwrites(self):
         user_overwrites = self.nabu_config["dataset"]["overwrite_metadata"].strip()
         if user_overwrites in ("", None):
@@ -451,11 +470,11 @@ class ProcessConfig(ProcessConfigBase):
         #
         # Double flat field
         #
-        if nabu_config["preproc"]["double_flatfield_enabled"]:
+        if nabu_config["preproc"]["double_flatfield"]:
             tasks.append("double_flatfield")
             options["double_flatfield"] = {
                 "sigma": nabu_config["preproc"]["dff_sigma"],
-                "processes_file": nabu_config["preproc"]["processes_file"],
+                "processes_file": self._dff_file or nabu_config["preproc"]["processes_file"],
                 "log_min_clip": nabu_config["preproc"]["log_min_clip"],
                 "log_max_clip": nabu_config["preproc"]["log_max_clip"],
             }
@@ -558,6 +577,7 @@ class ProcessConfig(ProcessConfigBase):
                 self.rec_params,
                 [
                     "method",
+                    "iterations",
                     "implementation",
                     "fbp_filter_type",
                     "fbp_filter_cutoff",
@@ -575,6 +595,7 @@ class ProcessConfig(ProcessConfigBase):
                     "sample_detector_dist",
                     "hbp_legs",
                     "hbp_reduction_steps",
+                    "crop_filtered_data",
                 ],
             )
             rec_options = options["reconstruction"]
@@ -593,8 +614,6 @@ class ProcessConfig(ProcessConfigBase):
                 voxel_size,
             )  # pix size is in microns in dataset_info
-            rec_options["iterations"] = nabu_config["reconstruction"]["iterations"]
             # x/y/z position information
             def get_mean_pos(position_array):
                 if position_array is None:
@@ -616,6 +635,8 @@ class ProcessConfig(ProcessConfigBase):
                 rec_options["position"] = mean_positions_xyz
             if rec_options["method"] == "cone" and rec_options["sample_detector_dist"] is None:
                 rec_options["sample_detector_dist"] = self.dataset_info.distance  # was checked to be not None earlier
+            if rec_options["method"].lower() == "mlem" and rec_options["implementation"] in [None, ""]:
+                rec_options["implementation"] = "corrct"
             # New key
             rec_options["cor_estimated_auto"] = isinstance(nabu_config["reconstruction"]["rotation_axis_position"], str)

nabu/pipeline/fullfield/reconstruction.py CHANGED Viewed

@@ -120,7 +120,7 @@ class FullFieldReconstructor:
         vm = virtual_memory()
         self.resources["mem_avail_GB"] = vm.available / 1e9
         # Account for other memory constraints. There might be a better way
-        slurm_mem_constraint_MB = int(environ.get("SLURM_MEM_PER_NODE", 0))
+        slurm_mem_constraint_MB = int(environ.get("SLURM_MEM_PER_NODE", 0))  # noqa: PLW1508
         if slurm_mem_constraint_MB > 0:
             self.resources["mem_avail_GB"] = slurm_mem_constraint_MB / 1e3
         #
@@ -131,8 +131,12 @@ class FullFieldReconstructor:
         self.resources["gpus"] = avail_gpus
         if len(avail_gpus) == 0:
             return
-        # pick first GPU by default. TODO: handle user's nabu_config["resources"]["gpu_id"]
-        self.resources["gpu_id"] = self._gpu_id = first_generator_item(avail_gpus.keys())
+        user_gpus = self.process_config.nabu_config.get("resources", {}).get("gpu_id", [])
+        if len(user_gpus) == 0:
+            user_gpus = [0]
+        # For now nabu does not support multi-GPU reconstruction. Take the first one.
+        user_gpu_idx = user_gpus[0]
+        self.resources["gpu_id"] = self._gpu_id = list(avail_gpus.keys())[user_gpu_idx]
     def _get_backend(self, backend, cuda_options):
         self._pipeline_cls = ChunkedPipeline
@@ -145,6 +149,7 @@ class FullFieldReconstructor:
                 backend = "numpy"
             else:
                 self.gpu_mem = self.resources["gpus"][self._gpu_id]["memory_GB"] * self.gpu_mem_fraction
+                self.cuda_options = {"device_id": self._gpu_id}
         if backend == "cuda":
             if not (__has_pycuda__):
                 raise RuntimeError("pycuda not avilable")
@@ -307,7 +312,7 @@ class FullFieldReconstructor:
         sigma = opts["unsharp_sigma"]
         # nabu uses cutoff = 4
         cutoff = 4
-        gaussian_kernel_size = int(ceil(2 * cutoff * sigma + 1))
+        gaussian_kernel_size = ceil(2 * cutoff * sigma + 1)
         self.logger.debug("Unsharp mask margin: %d pixels" % gaussian_kernel_size)
         return (gaussian_kernel_size, gaussian_kernel_size)

nabu/pipeline/helical/gridded_accumulator.py CHANGED Viewed

@@ -532,7 +532,7 @@ def get_reconstruction_space(span_info, min_scanwise_z, end_scanwise_z, phase_ma
     # regridded dataset, estimating a meaningul angular step representative
     # of the raw data
     my_angle_step = abs(np.diff(span_info.projection_angles_deg).mean())
-    n_gridded_angles = int(round(360.0 / my_angle_step))
+    n_gridded_angles = round(360.0 / my_angle_step)
     radios_h = phase_margin_pix + (my_z_end - my_z_min) + phase_margin_pix

nabu/pipeline/helical/helical_reconstruction.py CHANGED Viewed

@@ -168,8 +168,8 @@ class HelicalReconstructorRegridded:
             # the meaming of z_min and z_max is: position in slices units from the
             # first available slice and in the direction of the scan
-            self.z_min = int(round(z_start * (0 - z_fract_min) + z_max * z_fract_min))
-            self.z_max = int(round(z_start * (0 - z_fract_max) + z_max * z_fract_max)) + 1
+            self.z_min = round(z_start * (0 - z_fract_min) + z_max * z_fract_min)
+            self.z_max = round(z_start * (0 - z_fract_max) + z_max * z_fract_max) + 1
     def _compute_translations_margin(self):
         return 0, 0

nabu/pipeline/helical/nabu_config.py CHANGED Viewed

@@ -43,7 +43,7 @@ nabu_config["preproc"]["processes_file"] = {
     "validator": optional_file_location_validator,
     "type": "required",
 }
-nabu_config["preproc"]["double_flatfield_enabled"]["default"] = 1
+nabu_config["preproc"]["double_flatfield"]["default"] = 1
 nabu_config["reconstruction"].update(

nabu/pipeline/helical/weight_balancer.py CHANGED Viewed

@@ -83,7 +83,7 @@ def shift(arr, shift, fill_value=0.0):
     """
     result = np.zeros_like(arr)
-    num1 = int(math.floor(shift))
+    num1 = math.floor(shift)
     num2 = num1 + 1
     partition = shift - num1

nabu/pipeline/params.py CHANGED Viewed

@@ -25,12 +25,17 @@ unsharp_methods = {
     "": None,
 }
+# see PaddingBase.supported_modes
 padding_modes = {
-    "edges": "edge",
-    "edge": "edge",
-    "mirror": "mirror",
     "zeros": "zeros",
     "zero": "zeros",
+    "constant": "zeros",
+    "edges": "edge",
+    "edge": "edge",
+    "mirror": "reflect",
+    "reflect": "reflect",
+    "symmetric": "symmetric",
+    "wrap": "wrap",
 }
 reconstruction_methods = {

nabu/preproc/shift.py CHANGED Viewed

@@ -42,7 +42,7 @@ class VerticalShift:
     def _init_interp_coefficients(self):
         self.interp_infos = []
         for s in self.shifts:
-            s0 = int(floor(s))
+            s0 = floor(s)
             f = s - s0
             self.interp_infos.append([s0, f])

nabu/preproc/tests/test_ctf.py CHANGED Viewed

@@ -223,7 +223,7 @@ class TestCtf:
             # phase_fft = ctf_fft.retrieve_phase(img)
             self.check_result(phase_r2c, self.ref_plain, "Something wrong with CtfFilter-FFT")
-    @pytest.mark.skipif(not (__has_pycuda__ and __has_cufft__), reason="pycuda and (scikit-cuda or vkfft)")
+    @pytest.mark.skipif(not (__has_pycuda__ and __has_cufft__), reason="pycuda and (cupy? or vkfft)")
     def test_cuda_ctf(self):
         data = nabu_get_data("brain_phantom.npz")["data"]
         delta_beta = 50.0

nabu/preproc/tests/test_paganin.py CHANGED Viewed

@@ -77,9 +77,7 @@ class TestPaganin:
         errmax = np.max(np.abs(res - res_tomopy) / np.max(res_tomopy))
         assert errmax < self.rtol_pag, "Max error is too high"
-    @pytest.mark.skipif(
-        not (__has_pycuda__ and __has_cufft__), reason="Need pycuda and (scikit-cuda or vkfft) for this test"
-    )
+    @pytest.mark.skipif(not (__has_pycuda__ and __has_cufft__), reason="Need pycuda and (cupy? or vkfft) for this test")
     @pytest.mark.parametrize("config", scenarios)
     def test_gpu_paganin(self, config):
         paganin, data, pag_kwargs = self.get_paganin_instance_and_data(config, self.data)

nabu/processing/fft_base.py CHANGED Viewed

@@ -93,6 +93,10 @@ class _BaseFFT:
         pass
+def raise_base_class_error(slf, *args, **kwargs):
+    raise ValueError
 class _BaseVKFFT(_BaseFFT):
     """
     FFT using VKFFT backend
@@ -101,7 +105,7 @@ class _BaseVKFFT(_BaseFFT):
     implem = "vkfft"
     backend = "none"
     ProcessingCls = BaseClassError
-    vkffs_cls = BaseClassError
+    get_fft_obj = raise_base_class_error
     def _configure_batched_transform(self):
         if self.axes is not None and len(self.shape) == len(self.axes):
@@ -128,7 +132,7 @@ class _BaseVKFFT(_BaseFFT):
         self._vkfft_ndim = None
     def _compute_fft_plans(self):
-        self._vkfft_plan = self.vkffs_cls(
+        self._vkfft_plan = self.get_fft_obj(
             self.shape,
             self.dtype,
             ndim=self._vkfft_ndim,

nabu/processing/fft_cuda.py CHANGED Viewed

@@ -1,149 +1,33 @@
 import os
 import warnings
+from functools import lru_cache
 from multiprocessing import get_context
 from multiprocessing.pool import Pool
-import numpy as np
-from ..utils import check_supported
-from .fft_base import _BaseFFT, _BaseVKFFT
+from ..utils import BaseClassError, check_supported, no_decorator
+from .fft_base import _BaseVKFFT
 try:
-    from pyvkfft.cuda import VkFFTApp as vk_cufft
+    from pyvkfft.cuda import VkFFTApp as CudaVkFFTApp
     __has_vkfft__ = True
 except (ImportError, OSError):
     __has_vkfft__ = False
-    vk_cufft = None
+    CudaVkFFTApp = BaseClassError
 from ..cuda.processing import CudaProcessing
-Plan = None
-cu_fft = None
-cu_ifft = None
-__has_skcuda__ = None
+n_cached_ffts = int(os.getenv("NABU_FFT_CACHE", "0"))
-def init_skcuda():
-    # This needs to be done here, because scikit-cuda creates a Cuda context at import,
-    # which can mess things up in some cases.
-    # Ugly solution to an ugly problem.
-    # ruff: noqa: PLW0603
-    global __has_skcuda__, Plan, cu_fft, cu_ifft
-    try:
-        from skcuda.fft import Plan
-        from skcuda.fft import fft as cu_fft
-        from skcuda.fft import ifft as cu_ifft
-        __has_skcuda__ = True
-    except ImportError:
-        __has_skcuda__ = False
-class SKCUFFT(_BaseFFT):
-    implem = "skcuda"
-    backend = "cuda"
-    ProcessingCls = CudaProcessing
-    def _configure_batched_transform(self):
-        if __has_skcuda__ is None:
-            init_skcuda()
-        if not (__has_skcuda__):
-            raise ImportError("Please install pycuda and scikit-cuda to use the CUDA back-end")
-        self.cufft_batch_size = 1
-        self.cufft_shape = self.shape
-        self._cufft_plan_kwargs = {}
-        if (self.axes is not None) and (len(self.axes) < len(self.shape)):
-            # In the easiest case, the transform is computed along the fastest dimensions:
-            #  - 1D transforms of lines of 2D data
-            #  - 2D transforms of images of 3D data (stacked along slow dim)
-            #  - 1D transforms of 3D data along fastest dim
-            # Otherwise, we have to configure cuda "advanced memory layout".
-            data_ndims = len(self.shape)
+maybe_cached = lru_cache(maxsize=n_cached_ffts) if n_cached_ffts > 0 else no_decorator
-            if data_ndims == 2:
-                n_y, n_x = self.shape
-                along_fast_dim = self.axes[0] == 1
-                self.cufft_shape = n_x if along_fast_dim else n_y
-                self.cufft_batch_size = n_y if along_fast_dim else n_x
-                if not (along_fast_dim):
-                    # Batched vertical 1D FFT on 2D data need advanced data layout
-                    # http://docs.nvidia.com/cuda/cufft/#advanced-data-layout
-                    self._cufft_plan_kwargs = {
-                        "inembed": np.int32([0]),
-                        "istride": n_x,
-                        "idist": 1,
-                        "onembed": np.int32([0]),
-                        "ostride": n_x,
-                        "odist": 1,
-                    }
-            if data_ndims == 3:
-                # TODO/FIXME - the following work for C2C but not R2C ?!
-                # fast_axes = [(1, 2), (2, 1), (2,)]
-                fast_axes = [(2,)]
-                if self.axes not in fast_axes:
-                    raise NotImplementedError(
-                        "With the CUDA backend, batched transform on 3D data is only supported along fastest dimensions"
-                    )
-                self.cufft_batch_size = self.shape[0]
-                self.cufft_shape = self.shape[1:]
-                if len(self.axes) == 1:
-                    # 1D transform on 3D data: here only supported along fast dim, so batch_size is Nx*Ny
-                    self.cufft_batch_size = np.prod(self.shape[:2])
-                    self.cufft_shape = (self.shape[-1],)
-                if len(self.cufft_shape) == 1:
-                    self.cufft_shape = self.cufft_shape[0]
+@maybe_cached
+def _get_vkfft_cuda(*args, **kwargs):
+    return CudaVkFFTApp(*args, **kwargs)
-    def _configure_normalization(self, normalize):
-        self.normalize = normalize
-        if self.normalize == "ortho":
-            # TODO
-            raise NotImplementedError("Normalization mode 'ortho' is not implemented with CUDA backend yet.")
-        self.cufft_scale_inverse = self.normalize == "rescale"
-    def _compute_fft_plans(self):
-        self.plan_forward = Plan(  # pylint: disable = E1102
-            self.cufft_shape,
-            self.dtype,
-            self.dtype_out,
-            batch=self.cufft_batch_size,
-            stream=self.processing.stream,
-            **self._cufft_plan_kwargs,
-            # cufft extensible plan API is only supported after 0.5.1
-            # (commit 65288d28ca0b93e1234133f8d460dc6becb65121)
-            # but there is still no official 0.5.2
-            # ~ auto_allocate=True # cufft extensible plan API
-        )
-        self.plan_inverse = Plan(  # pylint: disable = E1102
-            self.cufft_shape,  # not shape_out
-            self.dtype_out,
-            self.dtype,
-            batch=self.cufft_batch_size,
-            stream=self.processing.stream,
-            **self._cufft_plan_kwargs,
-            # cufft extensible plan API is only supported after 0.5.1
-            # (commit 65288d28ca0b93e1234133f8d460dc6becb65121)
-            # but there is still no official 0.5.2
-            # ~ auto_allocate=True
-        )
-    def fft(self, array, output=None):
-        if output is None:
-            output = self.output_fft = self.processing.allocate_array(
-                "output_fft", self.shape_out, dtype=self.dtype_out
-            )
-        cu_fft(array, output, self.plan_forward, scale=False)  # pylint: disable = E1102
-        return output
-    def ifft(self, array, output=None):
-        if output is None:
-            output = self.output_ifft = self.processing.allocate_array("output_ifft", self.shape, dtype=self.dtype)
-        cu_ifft(  # pylint: disable = E1102
-            array,
-            output,
-            self.plan_inverse,
-            scale=self.cufft_scale_inverse,
-        )
-        return output
+def get_vkfft_cuda(slf, *args, **kwargs):
+    return _get_vkfft_cuda(*args, **kwargs)
 class VKCUFFT(_BaseVKFFT):
@@ -154,7 +38,7 @@ class VKCUFFT(_BaseVKFFT):
     implem = "vkfft"
     backend = "cuda"
     ProcessingCls = CudaProcessing
-    vkffs_cls = vk_cufft
+    get_fft_obj = get_vkfft_cuda
     def _init_backend(self, backend_options):
         super()._init_backend(backend_options)
@@ -175,6 +59,7 @@ def _has_vkfft(x):
     return avail
+@lru_cache(maxsize=2)
 def has_vkfft(safe=True):
     """
     Determine whether pyvkfft is available.
@@ -196,43 +81,9 @@ def has_vkfft(safe=True):
     return v
-def _has_skfft(x):
-    # should be run from within a Process
-    try:
-        from nabu.processing.fft_cuda import SKCUFFT
-        _ = SKCUFFT((16,), "f")
-        avail = True
-    except (ImportError, RuntimeError, OSError, NameError):
-        avail = False
-    return avail
-def has_skcuda(safe=True):
-    """
-    Determine whether scikit-cuda/CUFFT is available.
-    Currently, scikit-cuda will create a Cuda context for Cublas, which can mess up the current execution.
-    Do it in a separate thread.
-    """
-    if not safe:
-        return _has_skfft(None)
-    try:
-        ctx = get_context("spawn")
-        with Pool(1, context=ctx) as p:
-            v = p.map(_has_skfft, [1])[0]
-    except AssertionError:
-        # Can get AssertionError: daemonic processes are not allowed to have children
-        # if the calling code is already a subprocess
-        return _has_skfft(None)
-    return v
+@lru_cache(maxsize=2)
 def get_fft_class(backend="vkfft"):
     backends = {
-        "scikit-cuda": SKCUFFT,
-        "skcuda": SKCUFFT,
-        "cufft": SKCUFFT,
-        "scikit": SKCUFFT,
         "vkfft": VKCUFFT,
         "pyvkfft": VKCUFFT,
     }
@@ -248,7 +99,7 @@ def get_fft_class(backend="vkfft"):
     avail_fft_implems = get_available_fft_implems()
     if len(avail_fft_implems) == 0:
-        raise RuntimeError("Could not any Cuda FFT implementation. Please install either scikit-cuda or pyvkfft")
+        raise RuntimeError("Could not any Cuda FFT implementation. Please install pyvkfft")
     if backend not in avail_fft_implems:
         warnings.warn("Could not get FFT backend '%s'" % backend, RuntimeWarning)
         backend = avail_fft_implems[0]
@@ -256,10 +107,9 @@ def get_fft_class(backend="vkfft"):
     return get_fft_cls(backend)
+@lru_cache(maxsize=1)
 def get_available_fft_implems():
     avail_implems = []
     if has_vkfft(safe=True):
         avail_implems.append("vkfft")
-    if has_skcuda(safe=True):
-        avail_implems.append("skcuda")
     return avail_implems

nabu/processing/fft_opencl.py CHANGED Viewed

@@ -1,15 +1,32 @@
+from functools import lru_cache
+import os
 from multiprocessing import get_context
 from multiprocessing.pool import Pool
+from ..utils import BaseClassError, no_decorator
 from .fft_base import _BaseVKFFT
 from ..opencl.processing import OpenCLProcessing
 try:
-    from pyvkfft.opencl import VkFFTApp as vk_clfft
+    from pyvkfft.opencl import VkFFTApp as OpenCLVkFFTApp
     __has_vkfft__ = True
 except (ImportError, OSError):
     __has_vkfft__ = False
     vk_clfft = None
+    OpenCLVkFFTApp = BaseClassError
+n_cached_ffts = int(os.getenv("NABU_FFT_CACHE", "0"))
+maybe_cached = lru_cache(maxsize=n_cached_ffts) if n_cached_ffts > 0 else no_decorator
+@maybe_cached
+def _get_vkfft_opencl(*args, **kwargs):
+    return OpenCLVkFFTApp(*args, **kwargs)
+def get_vkfft_opencl(slf, *args, **kwargs):
+    return _get_vkfft_opencl(*args, **kwargs)
 class VKCLFFT(_BaseVKFFT):
@@ -20,7 +37,7 @@ class VKCLFFT(_BaseVKFFT):
     implem = "vkfft"
     backend = "opencl"
     ProcessingCls = OpenCLProcessing
-    vkffs_cls = vk_clfft
+    get_fft_obj = get_vkfft_opencl
     def _init_backend(self, backend_options):
         super()._init_backend(backend_options)

nabu/processing/padding_cuda.py CHANGED Viewed

@@ -11,7 +11,6 @@ class CudaPadding(PaddingBase):
     backend = "cuda"
-    # TODO docstring from base class
     def __init__(self, shape, pad_width, mode="constant", cuda_options=None, **kwargs):
         super().__init__(shape, pad_width, mode=mode, **kwargs)
         self.cuda_processing = self.processing = CudaProcessing(**(cuda_options or {}))

nabu/processing/processing_base.py CHANGED Viewed

@@ -99,6 +99,15 @@ class ProcessingBase:
     _recover_arrays_references = recover_arrays_references
     _allocate_array = allocate_array
     _set_array = set_array
+    # --
+    def is_contiguous(self, arr):
+        if isinstance(arr, self.array_class):
+            return arr.flags.c_contiguous
+        elif isinstance(arr, np.ndarray):
+            return arr.flags["C_CONTIGUOUS"]
+        else:
+            raise TypeError
     def check_array(self, arr, expected_shape, expected_dtype="f", check_contiguous=True):
         """
@@ -108,11 +117,8 @@ class ProcessingBase:
             raise ValueError("Expected shape %s but got %s" % (str(expected_shape), str(arr.shape)))
         if arr.dtype != np.dtype(expected_dtype):
             raise ValueError("Expected data type %s but got %s" % (str(expected_dtype), str(arr.dtype)))
-        if check_contiguous:
-            if isinstance(arr, np.ndarray) and not (arr.flags["C_CONTIGUOUS"]):
-                raise ValueError("Expected C-contiguous array")
-            if isinstance(arr, self.array_class) and not arr.flags.c_contiguous:
-                raise ValueError("Expected C-contiguous array")
+        if check_contiguous and not (self.is_contiguous(arr)):
+            raise ValueError("Expected C-contiguous array")
     def kernel(self, *args, **kwargs):
         raise ValueError("Base class")

nabu/processing/tests/test_fft.py CHANGED Viewed

@@ -4,14 +4,13 @@ import numpy as np
 from scipy.fft import fftn, ifftn, rfftn, irfftn
 from nabu.testutils import generate_tests_scenarios, get_data, get_array_of_given_shape, __do_long_tests__
 from nabu.cuda.utils import get_cuda_context, __has_pycuda__
-from nabu.processing.fft_cuda import SKCUFFT, VKCUFFT, get_available_fft_implems
+from nabu.processing.fft_cuda import VKCUFFT, get_available_fft_implems
 from nabu.opencl.utils import __has_pyopencl__, get_opencl_context
 from nabu.processing.fft_opencl import VKCLFFT, has_vkfft as has_cl_vkfft
 from nabu.processing.fft_base import is_fast_axes
 available_cuda_fft = get_available_fft_implems()
 __has_vkfft__ = "vkfft" in available_cuda_fft
-__has_skcuda__ = "skcuda" in available_cuda_fft
 scenarios = {
@@ -113,67 +112,6 @@ class TestFFT:
         ref = ref_ifft_func(data, axes=axes)
         return ref
-    @pytest.mark.skipif(
-        not (__has_skcuda__ and __has_pycuda__), reason="Need pycuda and (scikit-cuda or vkfft) for this test"
-    )
-    @pytest.mark.parametrize("config", scenarios)
-    def test_sckcuda(self, config):
-        r2c = config["r2c"]
-        shape = config["shape"]
-        precision = config["precision"]
-        ndim = len(shape)
-        if ndim == 3 and not (__do_long_tests__):
-            pytest.skip("3D FFTs are done only for long tests - use NABU_LONG_TESTS=1")
-        data = self._get_data_array(config)
-        res, cufft = self._do_fft(data, r2c, return_fft_obj=True, backend_cls=SKCUFFT)
-        ref = self._do_reference_fft(data, r2c)
-        tol = self.abs_tol[precision][ndim]
-        self.check_result(res, ref, config, tol, name="skcuda")
-        # Complex-to-complex can also be performed on real data (as in numpy.fft.fft(real_data))
-        if not (r2c):
-            res = self._do_fft(data, False, backend_cls=SKCUFFT)
-            ref = self._do_reference_fft(data, False)
-            self.check_result(res, ref, config, tol, name="skcuda")
-        # IFFT
-        res = cufft.ifft(cufft.output_fft).get()
-        self.check_result(res, data, config, tol, name="skcuda")
-        # Perhaps we should also check against numpy/scipy ifft,
-        # but it does not yield the good shape for R2C on odd-sized data
-    @pytest.mark.skipif(
-        not (__has_skcuda__ and __has_pycuda__), reason="Need pycuda and (scikit-cuda or vkfft) for this test"
-    )
-    @pytest.mark.parametrize("config", scenarios)
-    def test_skcuda_batched(self, config):
-        shape = config["shape"]
-        if len(shape) == 1:
-            return
-        elif len(shape) == 3 and not (__do_long_tests__):
-            pytest.skip("3D FFTs are done only for long tests - use NABU_LONG_TESTS=1")
-        r2c = config["r2c"]
-        tol = self.abs_tol[config["precision"]][len(shape)]
-        data = self._get_data_array(config)
-        if data.ndim == 2:
-            axes_to_test = [(0,), (1,)]
-        elif data.ndim == 3:
-            # axes_to_test = [(1, 2), (2, 1), (2,)] # See fft.py: works for C2C but not R2C ?
-            axes_to_test = [(2,)]
-        for axes in axes_to_test:
-            res, cufft = self._do_fft(data, r2c, axes=axes, return_fft_obj=True, backend_cls=SKCUFFT)
-            ref = self._do_reference_fft(data, r2c, axes=axes)
-            self.check_result(res, ref, config, tol, name="skcuda batched axes=%s" % (str(axes)))
-            # IFFT
-            res = cufft.ifft(cufft.output_fft).get()
-            self.check_result(res, data, config, tol, name="skcuda")
     @pytest.mark.parametrize("config", scenarios)
     def test_vkfft(self, config):
         backend = config["backend"]

nabu 2025.1.0.dev5__py3-none-any.whl → 2025.1.0.dev13__py3-none-any.whl

nabu 2025.1.0.dev5py3-none-any.whl → 2025.1.0.dev13py3-none-any.whl