PyPI - httomolibgpu - Versions diffs - 5.1__py3-none-any.whl → 5.3__py3-none-any.whl - Mend

httomolibgpu 5.1py3-none-any.whl → 5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

httomolibgpu/__init__.py CHANGED Viewed

@@ -9,6 +9,7 @@ from httomolibgpu.prep.normalize import dark_flat_field_correction, minus_log
 from httomolibgpu.prep.phase import paganin_filter, paganin_filter_savu_legacy
 from httomolibgpu.prep.stripe import (
     remove_stripe_based_sorting,
+    remove_stripe_fw,
     remove_stripe_ti,
     remove_all_stripe,
     raven_filter,

httomolibgpu/cuda_kernels/remove_stripe_fw.cu ADDED Viewed

@@ -0,0 +1,155 @@
+template<int WSize>
+__global__ void grouped_convolution_x(
+    int dim_x,
+    int dim_y,
+    int dim_z,
+    const float* in,
+    int in_stride_x,
+    int in_stride_y,
+    int in_stride_z,
+    float* out,
+    int out_stride_z,
+    int out_stride_group,
+    const float* w
+)
+{
+    const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
+    const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
+    const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
+    if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
+    {
+        return;
+    }
+    constexpr int out_groups = 2;
+    for (int i = 0; i < out_groups; ++i)
+    {
+        float acc = 0.F;
+        for (int j = 0; j < WSize; ++j)
+        {
+            const int w_idx = i * WSize + j;
+            const int in_idx = (g_thd_x * in_stride_x + j) + g_thd_y * in_stride_y + g_thd_z * in_stride_z;
+            acc += w[w_idx] * in[in_idx];
+        }
+        const int out_idx = g_thd_x + g_thd_y * dim_x + g_thd_z * out_stride_z + i * out_stride_group;
+        out[out_idx] = acc;
+    }
+}
+template<int WSize>
+__global__ void grouped_convolution_y(
+    int dim_x,
+    int dim_y,
+    int dim_z,
+    const float* in,
+    int in_stride_x,
+    int in_stride_y,
+    int in_stride_z,
+    int in_stride_group,
+    float* out,
+    int out_stride_z,
+    int out_stride_group,
+    const float* w
+)
+{
+    const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
+    const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
+    const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
+    if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
+    {
+        return;
+    }
+    constexpr int in_groups = 2;
+    constexpr int out_groups = 2;
+    constexpr int item_stride_y = 2;
+    for (int group = 0; group < in_groups; ++group)
+    {
+        for (int i = 0; i < out_groups; ++i)
+        {
+            float acc = 0.F;
+            for (int j = 0; j < WSize; ++j)
+            {
+                const int w_idx = (out_groups * group + i) * WSize + j;
+                const int in_idx = g_thd_x * in_stride_x + (item_stride_y * g_thd_y + j) * in_stride_y + group * in_stride_group + g_thd_z * in_stride_z;
+                acc += w[w_idx] * in[in_idx];
+            }
+            const int out_idx = g_thd_x + g_thd_y * dim_x + g_thd_z * out_stride_z + (out_groups * group + i) * out_stride_group;
+            out[out_idx] = acc;
+        }
+    }
+}
+template<int WSize>
+__global__ void transposed_convolution_x(
+    int dim_x,
+    int dim_y,
+    int dim_z,
+    const float* in,
+    int in_dim_x,
+    int in_stride_y,
+    int in_stride_z,
+    const float* w,
+    float* out
+)
+{
+    const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
+    const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
+    const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
+    if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
+    {
+        return;
+    }
+    constexpr int item_out_stride = 2;
+    float acc = 0.F;
+    for (int i = 0; i < WSize; ++i)
+    {
+        const int in_x = (g_thd_x - i) / item_out_stride;
+        const int in_x_mod = (g_thd_x - i) % item_out_stride;
+        if (in_x_mod == 0 && in_x >= 0 && in_x < in_dim_x)
+        {
+            const int in_idx = in_x + g_thd_y * in_stride_y + g_thd_z * in_stride_z;
+            acc += in[in_idx] * w[i];
+        }
+    }
+    const int out_idx = g_thd_x + dim_x * g_thd_y + dim_x * dim_y * g_thd_z;
+    out[out_idx] = acc;
+}
+template<int WSize>
+__global__ void transposed_convolution_y(
+    int dim_x,
+    int dim_y,
+    int dim_z,
+    const float* in,
+    int in_dim_y,
+    int in_stride_y,
+    int in_stride_z,
+    const float* w,
+    float* out
+)
+{
+    const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
+    const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
+    const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
+    if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
+    {
+        return;
+    }
+    constexpr int item_out_stride = 2;
+    float acc = 0.F;
+    for (int i = 0; i < WSize; ++i)
+    {
+        const int in_y = (g_thd_y - i) / item_out_stride;
+        const int in_y_mod = (g_thd_y - i) % item_out_stride;
+        if (in_y_mod == 0 && in_y >= 0 && in_y < in_dim_y)
+        {
+            const int in_idx = g_thd_x + in_y * in_stride_y + g_thd_z * in_stride_z;
+            acc += in[in_idx] * w[i];
+        }
+    }
+    const int out_idx = g_thd_x + dim_x * g_thd_y + dim_x * dim_y * g_thd_z;
+    out[out_idx] = acc;
+}

httomolibgpu/cupywrapper.py CHANGED Viewed

@@ -2,6 +2,7 @@ cupy_run = False
 try:
     import cupy as cp
     import nvtx
+    from cupyx.scipy.fft import next_fast_len
     try:
         cp.cuda.Device(0).compute_capability
@@ -15,5 +16,6 @@ except ImportError as e:
     )
     from unittest.mock import Mock
     import numpy as cp
+    from scipy.fft import next_fast_len
     nvtx = Mock()

httomolibgpu/prep/phase.py CHANGED Viewed

@@ -26,6 +26,7 @@ from httomolibgpu.memory_estimator_helpers import _DeviceMemStack
 cp = cupywrapper.cp
 cupy_run = cupywrapper.cupy_run
+next_fast_len = cupywrapper.next_fast_len
 from unittest.mock import Mock
@@ -38,7 +39,7 @@ else:
     fftshift = Mock()
 from numpy import float32
-from typing import Optional, Tuple
+from typing import Literal, Optional, Tuple
 import math
 __all__ = [
@@ -56,6 +57,10 @@ def paganin_filter(
     distance: float = 1.0,
     energy: float = 53.0,
     ratio_delta_beta: float = 250,
+    calculate_padding_value_method: Literal[
+        "next_power_of_2", "next_fast_length", "use_pad_x_y"
+    ] = "next_power_of_2",
+    pad_x_y: Optional[list] = None,
     calc_peak_gpu_mem: bool = False,
 ) -> cp.ndarray:
     """
@@ -74,6 +79,10 @@ def paganin_filter(
         Beam energy in keV.
     ratio_delta_beta : float
         The ratio of delta/beta, where delta is the phase shift and real part of the complex material refractive index and beta is the absorption.
+    calculate_padding_value_method: str
+        Method to calculate the padded size of the input data. Accepted values are 'next_power_of_2', 'next_fast_length' and 'use_pad_x_y`.
+    pad_x_y list | None:
+        Padding values in pixels horizontally and vertically. Must be None, unless `calculate_padding_value_method` is 'use_pad_x_y'.
     calc_peak_gpu_mem: bool
         Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user.
@@ -93,9 +102,9 @@ def paganin_filter(
         mem_stack.malloc(np.prod(tomo) * np.float32().itemsize)
     dz_orig, dy_orig, dx_orig = tomo.shape if not mem_stack else tomo
-    # Perform padding to the power of 2 as FFT is O(n*log(n)) complexity
-    # TODO: adding other options of padding?
-    padded_tomo, pad_tup = _pad_projections_to_second_power(tomo, mem_stack)
+    padded_tomo, pad_tup = _pad_projections(
+        tomo, calculate_padding_value_method, pad_x_y, mem_stack
+    )
     dz, dy, dx = padded_tomo.shape if not mem_stack else padded_tomo
@@ -219,21 +228,59 @@ def _shift_bit_length(x: int) -> int:
     return 1 << (x - 1).bit_length()
-def _calculate_pad_size(datashape: tuple) -> list:
+def _calculate_pad_size(
+    datashape: tuple,
+    calculate_padding_value_method: Literal[
+        "next_power_of_2", "next_fast_length", "use_pad_x_y"
+    ],
+    pad_x_y: Optional[list],
+) -> list:
     """Calculating the padding size
     Args:
-        datashape (tuple): the shape of the 3D data
+        datashape (tuple):
+            the shape of the 3D data
+        calculate_padding_value_method: str
+            Method to calculate the padded size of the input data. Accepted values are 'next_power_of_2', 'next_fast_length' and 'use_pad_x_y`.
+        pad_x_y (int, int) | None:
+            Padding values in pixels horizontally and vertically. Must be None, unless `calculate_padding_value_method` is 'use_pad_x_y'.
     Returns:
         list: the padded dimensions
     """
+    if pad_x_y is not None and calculate_padding_value_method != "use_pad_x_y":
+        raise ValueError(
+            'calculate_padding_value_method must be "use_pad_x_y" when pad_x_y is specified'
+        )
+    elif calculate_padding_value_method == "use_pad_x_y":
+        if pad_x_y is None:
+            raise ValueError(
+                'pad_x_y must be provided when calculate_padding_value_method is "use_pad_x_y"'
+            )
+        elif (
+            not isinstance(pad_x_y, list)
+            or len(pad_x_y) != 2
+            or not isinstance(pad_x_y[0], int)
+            or not isinstance(pad_x_y[1], int)
+        ):
+            raise ValueError("pad_x_y must be a list of two integers")
+    if calculate_padding_value_method == "next_power_of_2":
+        calculate_padded_dim = lambda _, size: _shift_bit_length(size + 1)
+    elif calculate_padding_value_method == "next_fast_length":
+        calculate_padded_dim = lambda _, size: next_fast_len(size)
+    elif calculate_padding_value_method == "use_pad_x_y":
+        calculate_padded_dim = lambda dim, size: size + 2 * pad_x_y[2 - dim]
+    else:
+        raise ValueError(
+            f'Unexpected calculate_padding_value_method: "{calculate_padding_value_method}"'
+        )
     pad_list = []
     for index, element in enumerate(datashape):
         if index == 0:
             pad_width = (0, 0)  # do not pad the slicing dim
         else:
-            diff = _shift_bit_length(element + 1) - element
+            diff = calculate_padded_dim(index, element) - element
             if element % 2 == 0:
                 pad_width_scalar = diff // 2
                 pad_width = (pad_width_scalar, pad_width_scalar)
@@ -248,17 +295,27 @@ def _calculate_pad_size(datashape: tuple) -> list:
     return pad_list
-def _pad_projections_to_second_power(
-    tomo: cp.ndarray, mem_stack: Optional[_DeviceMemStack]
+def _pad_projections(
+    tomo: cp.ndarray,
+    calculate_padding_value_method: Literal[
+        "next_power_of_2", "next_fast_length", "use_pad_x_y"
+    ],
+    pad_x_y: Optional[list],
+    mem_stack: Optional[_DeviceMemStack],
 ) -> Tuple[cp.ndarray, Tuple[int, int]]:
     """
-    Performs padding of each projection to the next power of 2.
+    Performs padding of each projection to a size optimal for FFT.
     If the shape is not even we also care of that before padding.
     Parameters
     ----------
     tomo : cp.ndarray
         3d projection data
+    calculate_padding_value_method: str
+        Method to calculate the padded size of the input data. Accepted values are 'next_power_of_2', 'next_fast_length' and 'use_pad_x_y`.
+    pad_x_y: list | None:
+        Padding values in pixels horizontally and vertically. Must be None, unless `calculate_padding_value_method` is 'use_pad_x_y'.
     Returns
     -------
@@ -268,7 +325,9 @@ def _pad_projections_to_second_power(
     """
     full_shape_tomo = cp.shape(tomo) if not mem_stack else tomo
-    pad_list = _calculate_pad_size(full_shape_tomo)
+    pad_list = _calculate_pad_size(
+        full_shape_tomo, calculate_padding_value_method, pad_x_y
+    )
     if mem_stack:
         padded_tomo = [

httomolibgpu/prep/stripe.py CHANGED Viewed

@@ -21,6 +21,7 @@
 """Module for stripes removal"""
 import numpy as np
+import pywt
 from httomolibgpu import cupywrapper
 cp = cupywrapper.cp
@@ -31,6 +32,7 @@ from unittest.mock import Mock
 if cupy_run:
     from cupyx.scipy.ndimage import median_filter, binary_dilation, uniform_filter1d
     from cupyx.scipy.fft import fft2, ifft2, fftshift
+    from cupyx.scipy.fftpack import get_fft_plan
     from httomolibgpu.cuda_kernels import load_cuda_module
 else:
     median_filter = Mock()
@@ -41,10 +43,11 @@ else:
     fftshift = Mock()
-from typing import Union
+from typing import Optional, Tuple, Union
 __all__ = [
     "remove_stripe_based_sorting",
+    "remove_stripe_fw",
     "remove_stripe_ti",
     "remove_all_stripe",
     "raven_filter",
@@ -156,6 +159,604 @@ def remove_stripe_ti(
         return data
+###### Ring removal with wavelet filtering (adapted for cupy from pytroch_wavelet package https://pytorch-wavelets.readthedocs.io/)##########
+# These functions are taken from TomoCuPy package
+# *************************************************************************** #
+#                  Copyright © 2022, UChicago Argonne, LLC                    #
+#                           All Rights Reserved                               #
+#                         Software Name: Tomocupy                             #
+#                     By: Argonne National Laboratory                         #
+#                                                                             #
+#                           OPEN SOURCE LICENSE                               #
+#                                                                             #
+# Redistribution and use in source and binary forms, with or without          #
+# modification, are permitted provided that the following conditions are met: #
+#                                                                             #
+# 1. Redistributions of source code must retain the above copyright notice,   #
+#    this list of conditions and the following disclaimer.                    #
+# 2. Redistributions in binary form must reproduce the above copyright        #
+#    notice, this list of conditions and the following disclaimer in the      #
+#    documentation and/or other materials provided with the distribution.     #
+# 3. Neither the name of the copyright holder nor the names of its            #
+#    contributors may be used to endorse or promote products derived          #
+#    from this software without specific prior written permission.            #
+#                                                                             #
+#                                                                             #
+# *************************************************************************** #
+def _reflect(x: np.ndarray, minx: float, maxx: float) -> np.ndarray:
+    """Reflect the values in matrix *x* about the scalar values *minx* and
+    *maxx*.  Hence a vector *x* containing a long linearly increasing series is
+    converted into a waveform which ramps linearly up and down between *minx*
+    and *maxx*.  If *x* contains integers and *minx* and *maxx* are (integers +
+    0.5), the ramps will have repeated max and min samples.
+    .. codeauthor:: Rich Wareham <rjw57@cantab.net>, Aug 2013
+    .. codeauthor:: Nick Kingsbury, Cambridge University, January 1999.
+    """
+    rng = maxx - minx
+    rng_by_2 = 2 * rng
+    mod = np.fmod(x - minx, rng_by_2)
+    normed_mod = np.where(mod < 0, mod + rng_by_2, mod)
+    out = np.where(normed_mod >= rng, rng_by_2 - normed_mod, normed_mod) + minx
+    return np.array(out, dtype=x.dtype)
+class _DeviceMemStack:
+    def __init__(self) -> None:
+        self.allocations = []
+        self.current = 0
+        self.highwater = 0
+    def malloc(self, bytes):
+        self.allocations.append(bytes)
+        allocated = self._round_up(bytes)
+        self.current += allocated
+        self.highwater = max(self.current, self.highwater)
+    def free(self, bytes):
+        assert bytes in self.allocations
+        self.allocations.remove(bytes)
+        self.current -= self._round_up(bytes)
+        assert self.current >= 0
+    def _round_up(self, size):
+        ALLOCATION_UNIT_SIZE = 512
+        size = (size + ALLOCATION_UNIT_SIZE - 1) // ALLOCATION_UNIT_SIZE
+        return size * ALLOCATION_UNIT_SIZE
+def _mypad(
+    x: cp.ndarray, pad: Tuple[int, int, int, int], mem_stack: Optional[_DeviceMemStack]
+) -> cp.ndarray:
+    """Function to do numpy like padding on Arrays. Only works for 2-D
+    padding.
+    Inputs:
+        x (array): Array to pad
+        pad (tuple): tuple of (left, right, top, bottom) pad sizes
+    """
+    # Vertical only
+    if pad[0] == 0 and pad[1] == 0:
+        m1, m2 = pad[2], pad[3]
+        l = x.shape[-2] if not mem_stack else x[-2]
+        xe = _reflect(np.arange(-m1, l + m2, dtype="int32"), -0.5, l - 0.5)
+        if mem_stack:
+            ret_shape = [x[0], x[1], xe.size, x[3]]
+            mem_stack.malloc(np.prod(ret_shape) * np.float32().itemsize)
+            return ret_shape
+        return x[:, :, xe, :]
+    # horizontal only
+    elif pad[2] == 0 and pad[3] == 0:
+        m1, m2 = pad[0], pad[1]
+        l = x.shape[-1] if not mem_stack else x[-1]
+        xe = _reflect(np.arange(-m1, l + m2, dtype="int32"), -0.5, l - 0.5)
+        if mem_stack:
+            ret_shape = [x[0], x[1], x[2], xe.size]
+            mem_stack.malloc(np.prod(ret_shape) * np.float32().itemsize)
+            return ret_shape
+        return x[:, :, :, xe]
+def _next_power_of_two(x: int, max_val: int = 128) -> int:
+    n = 1
+    while n < x and n < max_val:
+        n *= 2
+    return n
+def _conv2d(
+    x: cp.ndarray,
+    w: np.ndarray,
+    stride: Tuple[int, int],
+    groups: int,
+    mem_stack: Optional[_DeviceMemStack],
+) -> cp.ndarray:
+    """Convolution (equivalent pytorch.conv2d)"""
+    b, ci, hi, wi = x.shape if not mem_stack else x
+    co, _, hk, wk = w.shape
+    ho = int(np.floor(1 + (hi - hk) / stride[0]))
+    wo = int(np.floor(1 + (wi - wk) / stride[1]))
+    out_shape = [b, co, ho, wo]
+    if mem_stack:
+        mem_stack.malloc(np.prod(out_shape) * np.float32().itemsize)
+        return out_shape
+    out = cp.zeros(out_shape, dtype="float32")
+    w = cp.asarray(w)
+    x = cp.expand_dims(x, axis=1)
+    w = np.expand_dims(w, axis=0)
+    symbol_names = [f"grouped_convolution_x<{wk}>", f"grouped_convolution_y<{hk}>"]
+    module = load_cuda_module("remove_stripe_fw", name_expressions=symbol_names)
+    dim_x = out.shape[-1]
+    dim_y = out.shape[-2]
+    dim_z = out.shape[0]
+    in_stride_x = stride[1]
+    in_stride_y = x.strides[-2] // x.dtype.itemsize
+    in_stride_z = x.strides[0] // x.dtype.itemsize
+    out_stride_z = out.strides[0] // x.dtype.itemsize
+    out_stride_group = out.strides[1] // x.dtype.itemsize
+    block_x = _next_power_of_two(dim_x)
+    block_dim = (block_x, 1, 1)
+    grid_x = (dim_x + block_x - 1) // block_x
+    grid_dim = (grid_x, dim_y, dim_z)
+    if groups == 1:
+        grouped_convolution_kernel_x = module.get_function(symbol_names[0])
+        grouped_convolution_kernel_x(
+            grid_dim,
+            block_dim,
+            (
+                dim_x,
+                dim_y,
+                dim_z,
+                x,
+                in_stride_x,
+                in_stride_y,
+                in_stride_z,
+                out,
+                out_stride_z,
+                out_stride_group,
+                w,
+            ),
+        )
+        return out
+    grouped_convolution_kernel_y = module.get_function(symbol_names[1])
+    in_stride_group = x.strides[2] // x.dtype.itemsize
+    grouped_convolution_kernel_y(
+        grid_dim,
+        block_dim,
+        (
+            dim_x,
+            dim_y,
+            dim_z,
+            x,
+            in_stride_x,
+            in_stride_y,
+            in_stride_z,
+            in_stride_group,
+            out,
+            out_stride_z,
+            out_stride_group,
+            w,
+        ),
+    )
+    del w
+    return out
+def _conv_transpose2d(
+    x: cp.ndarray,
+    w: np.ndarray,
+    stride: Tuple[int, int],
+    pad: Tuple[int, int],
+    groups: int,
+    mem_stack: Optional[_DeviceMemStack],
+) -> cp.ndarray:
+    """Transposed convolution (equivalent pytorch.conv_transpose2d)"""
+    b, co, ho, wo = x.shape if not mem_stack else x
+    co, ci, hk, wk = w.shape
+    hi = (ho - 1) * stride[0] + hk
+    wi = (wo - 1) * stride[1] + wk
+    out_shape = [b, ci, hi, wi]
+    if mem_stack:
+        mem_stack.malloc(np.prod(out_shape) * np.float32().itemsize)
+        mem_stack.malloc(w.size * np.float32().itemsize)
+        if pad != 0:
+            new_out_shape = [
+                out_shape[0],
+                out_shape[1],
+                out_shape[2] - 2 * pad[0],
+                out_shape[3] - 2 * pad[1],
+            ]
+            mem_stack.malloc(np.prod(new_out_shape) * np.float32().itemsize)
+            mem_stack.free(np.prod(out_shape) * np.float32().itemsize)
+            out_shape = new_out_shape
+        mem_stack.free(w.size * np.float32().itemsize)
+        return out_shape
+    out = cp.zeros(out_shape, dtype="float32")
+    w = cp.asarray(w)
+    symbol_names = [
+        f"transposed_convolution_x<{wk}>",
+        f"transposed_convolution_y<{hk}>",
+    ]
+    module = load_cuda_module("remove_stripe_fw", name_expressions=symbol_names)
+    dim_x = out.shape[-1]
+    dim_y = out.shape[-2]
+    dim_z = out.shape[0]
+    in_dim_x = x.shape[-1]
+    in_dim_y = x.shape[-2]
+    in_stride_y = x.strides[-2] // x.dtype.itemsize
+    in_stride_z = x.strides[0] // x.dtype.itemsize
+    block_x = _next_power_of_two(dim_x)
+    block_dim = (block_x, 1, 1)
+    grid_x = (dim_x + block_x - 1) // block_x
+    grid_dim = (grid_x, dim_y, dim_z)
+    if wk > 1:
+        transposed_convolution_kernel_x = module.get_function(symbol_names[0])
+        transposed_convolution_kernel_x(
+            grid_dim,
+            block_dim,
+            (dim_x, dim_y, dim_z, x, in_dim_x, in_stride_y, in_stride_z, w, out),
+        )
+    elif hk > 1:
+        transposed_convolution_kernel_y = module.get_function(symbol_names[1])
+        transposed_convolution_kernel_y(
+            grid_dim,
+            block_dim,
+            (dim_x, dim_y, dim_z, x, in_dim_y, in_stride_y, in_stride_z, w, out),
+        )
+    else:
+        assert False
+    if pad != 0:
+        out = out[:, :, pad[0] : out.shape[2] - pad[0], pad[1] : out.shape[3] - pad[1]]
+    return cp.ascontiguousarray(out)
+def _afb1d(
+    x: cp.ndarray,
+    h0: np.ndarray,
+    h1: np.ndarray,
+    dim: int,
+    mem_stack: Optional[_DeviceMemStack],
+) -> cp.ndarray:
+    """1D analysis filter bank (along one dimension only) of an image
+    Parameters
+    ----------
+    x (array): 4D input with the last two dimensions the spatial input
+    h0 (array): 4D input for the lowpass filter. Should have shape (1, 1,
+        h, 1) or (1, 1, 1, w)
+    h1 (array): 4D input for the highpass filter. Should have shape (1, 1,
+        h, 1) or (1, 1, 1, w)
+    dim (int) - dimension of filtering. d=2 is for a vertical filter (called
+        column filtering but filters across the rows). d=3 is for a
+        horizontal filter, (called row filtering but filters across the
+        columns).
+    Returns
+    -------
+    lohi: lowpass and highpass subbands concatenated along the channel
+        dimension
+    """
+    C = x.shape[1] if not mem_stack else x[1]
+    # Convert the dim to positive
+    d = dim % 4
+    s = (2, 1) if d == 2 else (1, 2)
+    N = x.shape[d] if not mem_stack else x[d]
+    L = h0.size
+    shape = [1, 1, 1, 1]
+    shape[d] = L
+    h = np.concatenate([h0.reshape(*shape), h1.reshape(*shape)] * C, axis=0)
+    # Calculate the pad size
+    outsize = pywt.dwt_coeff_len(N, L, mode="symmetric")
+    p = 2 * (outsize - 1) - N + L
+    pad = (0, 0, p // 2, (p + 1) // 2) if d == 2 else (p // 2, (p + 1) // 2, 0, 0)
+    padded_x = _mypad(x, pad=pad, mem_stack=mem_stack)
+    lohi = _conv2d(padded_x, h, stride=s, groups=C, mem_stack=mem_stack)
+    if mem_stack:
+        mem_stack.free(np.prod(padded_x) * np.float32().itemsize)
+    del padded_x
+    return lohi
+def _sfb1d(
+    lo: cp.ndarray,
+    hi: cp.ndarray,
+    g0: np.ndarray,
+    g1: np.ndarray,
+    dim: int,
+    mem_stack: Optional[_DeviceMemStack],
+) -> cp.ndarray:
+    """1D synthesis filter bank of an image Array"""
+    C = lo.shape[1] if not mem_stack else lo[1]
+    d = dim % 4
+    L = g0.size
+    shape = [1, 1, 1, 1]
+    shape[d] = L
+    s = (2, 1) if d == 2 else (1, 2)
+    g0 = np.concatenate([g0.reshape(*shape)] * C, axis=0)
+    g1 = np.concatenate([g1.reshape(*shape)] * C, axis=0)
+    pad = (L - 2, 0) if d == 2 else (0, L - 2)
+    y_lo = _conv_transpose2d(lo, g0, stride=s, pad=pad, groups=C, mem_stack=mem_stack)
+    y_hi = _conv_transpose2d(hi, g1, stride=s, pad=pad, groups=C, mem_stack=mem_stack)
+    if mem_stack:
+        # Allocation of the sum
+        mem_stack.malloc(np.prod(y_hi) * np.float32().itemsize)
+        mem_stack.free(np.prod(y_lo) * np.float32().itemsize)
+        mem_stack.free(np.prod(y_hi) * np.float32().itemsize)
+        return y_lo
+    return y_lo + y_hi
+class _DWTForward:
+    """Performs a 2d DWT Forward decomposition of an image
+    Args:
+        wave (str): Which wavelet to use.
+    """
+    def __init__(self, wave: str):
+        super().__init__()
+        wave = pywt.Wavelet(wave)
+        h0_col, h1_col = wave.dec_lo, wave.dec_hi
+        h0_row, h1_row = h0_col, h1_col
+        self.h0_col = np.array(h0_col).astype("float32")[::-1].reshape((1, 1, -1, 1))
+        self.h1_col = np.array(h1_col).astype("float32")[::-1].reshape((1, 1, -1, 1))
+        self.h0_row = np.array(h0_row).astype("float32")[::-1].reshape((1, 1, 1, -1))
+        self.h1_row = np.array(h1_row).astype("float32")[::-1].reshape((1, 1, 1, -1))
+    def apply(
+        self, x: cp.ndarray, mem_stack: Optional[_DeviceMemStack] = None
+    ) -> Tuple[cp.ndarray, cp.ndarray]:
+        """Forward pass of the DWT.
+        Args:
+            x (array): Input of shape :math:`(N, C_{in}, H_{in}, W_{in})`
+        Returns:
+            (yl, yh)
+                tuple of lowpass (yl) and bandpass (yh) coefficients.
+                yh is a list of scale coefficients. yl has shape
+                :math:`(N, C_{in}, H_{in}', W_{in}')` and yh has shape
+                :math:`list(N, C_{in}, 3, H_{in}'', W_{in}'')`. The new
+                dimension in yh iterates over the LH, HL and HH coefficients.
+        Note:
+            :math:`H_{in}', W_{in}', H_{in}'', W_{in}''` denote the correctly
+            downsampled shapes of the DWT pyramid.
+        """
+        # Do a multilevel transform
+        # Do 1 level of the transform
+        lohi = _afb1d(x, self.h0_row, self.h1_row, dim=3, mem_stack=mem_stack)
+        y = _afb1d(lohi, self.h0_col, self.h1_col, dim=2, mem_stack=mem_stack)
+        if mem_stack:
+            y_shape = [y[0], np.prod(y) // y[0] // 4 // y[-2] // y[-1], 4, y[-2], y[-1]]
+            x_shape = [y_shape[0], y_shape[1], y_shape[3], y_shape[4]]
+            yh_shape = [y_shape[0], y_shape[1], y_shape[2] - 1, y_shape[3], y_shape[4]]
+            mem_stack.free(np.prod(lohi) * np.float32().itemsize)
+            mem_stack.malloc(np.prod(x_shape) * np.float32().itemsize)
+            mem_stack.malloc(np.prod(yh_shape) * np.float32().itemsize)
+            mem_stack.free(np.prod(y) * np.float32().itemsize)
+            return x_shape, yh_shape
+        del lohi
+        s = y.shape
+        y = y.reshape(s[0], -1, 4, s[-2], s[-1])
+        x = cp.ascontiguousarray(y[:, :, 0])
+        yh = cp.ascontiguousarray(y[:, :, 1:])
+        return (x, yh)
+class _DWTInverse:
+    """Performs a 2d DWT Inverse reconstruction of an image
+    Args:
+        wave (str): Which wavelet to use.
+    """
+    def __init__(self, wave: str):
+        super().__init__()
+        wave = pywt.Wavelet(wave)
+        g0_col, g1_col = wave.rec_lo, wave.rec_hi
+        g0_row, g1_row = g0_col, g1_col
+        # Prepare the filters
+        self.g0_col = np.array(g0_col).astype("float32").reshape((1, 1, -1, 1))
+        self.g1_col = np.array(g1_col).astype("float32").reshape((1, 1, -1, 1))
+        self.g0_row = np.array(g0_row).astype("float32").reshape((1, 1, 1, -1))
+        self.g1_row = np.array(g1_row).astype("float32").reshape((1, 1, 1, -1))
+    def apply(
+        self,
+        coeffs: Tuple[cp.ndarray, cp.ndarray],
+        mem_stack: Optional[_DeviceMemStack] = None,
+    ) -> cp.ndarray:
+        """
+        Args:
+            coeffs (yl, yh): tuple of lowpass and bandpass coefficients, where:
+              yl is a lowpass array of shape :math:`(N, C_{in}, H_{in}',
+              W_{in}')` and yh is a list of bandpass arrays of shape
+              :math:`list(N, C_{in}, 3, H_{in}'', W_{in}'')`. I.e. should match
+              the format returned by DWTForward
+        Returns:
+            Reconstructed input of shape :math:`(N, C_{in}, H_{in}, W_{in})`
+        Note:
+            :math:`H_{in}', W_{in}', H_{in}'', W_{in}''` denote the correctly
+            downsampled shapes of the DWT pyramid.
+        """
+        yl, yh = coeffs
+        lh = yh[:, :, 0, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
+        hl = yh[:, :, 1, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
+        hh = yh[:, :, 2, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
+        lo = _sfb1d(yl, lh, self.g0_col, self.g1_col, dim=2, mem_stack=mem_stack)
+        hi = _sfb1d(hl, hh, self.g0_col, self.g1_col, dim=2, mem_stack=mem_stack)
+        yl = _sfb1d(lo, hi, self.g0_row, self.g1_row, dim=3, mem_stack=mem_stack)
+        if mem_stack:
+            mem_stack.free(np.prod(lo) * np.float32().itemsize)
+            mem_stack.free(np.prod(hi) * np.float32().itemsize)
+        del lo
+        del hi
+        return yl
+def _repair_memory_fragmentation_if_needed(fragmentation_threshold: float = 0.2):
+    pool = cp.get_default_memory_pool()
+    total = pool.total_bytes()
+    if (total / pool.used_bytes()) - 1 > fragmentation_threshold:
+        pool.free_all_blocks()
+def remove_stripe_fw(
+    data: cp.ndarray,
+    sigma: float = 2,
+    wname: str = "db5",
+    level: Optional[int] = None,
+    calc_peak_gpu_mem: bool = False,
+) -> cp.ndarray:
+    """
+    Remove horizontal stripes from sinogram using the Fourier-Wavelet (FW) based method :cite:`munch2009stripe`. The original source code
+    taken from TomoCupy and NABU packages.
+    Parameters
+    ----------
+    data : ndarray
+        3D tomographic data as a CuPy array.
+    sigma : float
+        Damping parameter in Fourier space.
+    wname : str
+        Type of the wavelet filter: select from 'db5', 'db7', 'haar', 'sym5', 'sym16' 'bior4.4'.
+    level : int, optional
+        Number of discrete wavelet transform levels.
+    calc_peak_gpu_mem: str:
+        Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user.
+    Returns
+    -------
+    ndarray
+        Stripe-corrected 3D tomographic data as a CuPy array.
+    """
+    if level is None:
+        if calc_peak_gpu_mem:
+            size = np.max(data)  # data is a tuple in this case
+        else:
+            size = np.max(data.shape)
+        level = int(np.ceil(np.log2(size)))
+    [nproj, nz, ni] = data.shape if not calc_peak_gpu_mem else data
+    nproj_pad = nproj + nproj // 8
+    # Accepts all wave types available to PyWavelets
+    xfm = _DWTForward(wave=wname)
+    ifm = _DWTInverse(wave=wname)
+    # Wavelet decomposition.
+    cc = []
+    sli_shape = [nz, 1, nproj_pad, ni]
+    if calc_peak_gpu_mem:
+        mem_stack = _DeviceMemStack()
+        # A data copy is assumed when invoking the function
+        mem_stack.malloc(np.prod(data) * np.float32().itemsize)
+        mem_stack.malloc(np.prod(sli_shape) * np.float32().itemsize)
+        cc = []
+        fcV_bytes = None
+        for k in range(level):
+            new_sli_shape, c = xfm.apply(sli_shape, mem_stack)
+            mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
+            sli_shape = new_sli_shape
+            cc.append(c)
+            if fcV_bytes:
+                mem_stack.free(fcV_bytes)
+            fcV_shape = [c[0], c[3], c[4]]
+            fcV_bytes = np.prod(fcV_shape) * np.complex64().itemsize
+            mem_stack.malloc(fcV_bytes)
+            # For the FFT
+            mem_stack.malloc(2 * np.prod(fcV_shape) * np.float32().itemsize)
+            mem_stack.malloc(2 * fcV_bytes)
+            fft_dummy = cp.empty(fcV_shape, dtype="float32")
+            fft_plan = get_fft_plan(fft_dummy)
+            fft_plan_size = fft_plan.work_area.mem.size
+            del fft_dummy
+            del fft_plan
+            mem_stack.malloc(fft_plan_size)
+            mem_stack.free(2 * np.prod(fcV_shape) * np.float32().itemsize)
+            mem_stack.free(fft_plan_size)
+            mem_stack.free(2 * fcV_bytes)
+            # The rest of the iteration doesn't contribute to the peak
+        # NOTE: The last iteration of fcV is "leaked"
+        for k in range(level)[::-1]:
+            new_sli_shape = [sli_shape[0], sli_shape[1], cc[k][-2], cc[k][-1]]
+            new_sli_shape = ifm.apply((new_sli_shape, cc[k]), mem_stack)
+            mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
+            sli_shape = new_sli_shape
+        mem_stack.malloc(np.prod(data) * np.float32().itemsize)
+        for c in cc:
+            mem_stack.free(np.prod(c) * np.float32().itemsize)
+        mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
+        return int(mem_stack.highwater * 1.1)
+    sli = cp.zeros(sli_shape, dtype="float32")
+    sli[:, 0, (nproj_pad - nproj) // 2 : (nproj_pad + nproj) // 2] = data.swapaxes(0, 1)
+    for k in range(level):
+        sli, c = xfm.apply(sli)
+        cc.append(c)
+        # FFT
+        fft_in = cp.ascontiguousarray(cc[k][:, 0, 1])
+        fft_plan = get_fft_plan(fft_in, axes=1)
+        with fft_plan:
+            fcV = cp.fft.fft(fft_in, axis=1)
+        del fft_plan
+        del fft_in
+        _, my, mx = fcV.shape
+        # Damping of ring artifact information.
+        y_hat = np.fft.ifftshift((np.arange(-my, my, 2) + 1) / 2)
+        damp = -np.expm1(-(y_hat**2) / (2 * sigma**2))
+        fcV *= cp.tile(damp, (mx, 1)).swapaxes(0, 1)
+        # Inverse FFT.
+        ifft_in = cp.ascontiguousarray(fcV)
+        ifft_plan = get_fft_plan(ifft_in, axes=1)
+        with ifft_plan:
+            cc[k][:, 0, 1] = cp.fft.ifft(ifft_in, my, axis=1).real
+        del ifft_plan
+        del ifft_in
+        _repair_memory_fragmentation_if_needed()
+    # Wavelet reconstruction.
+    for k in range(level)[::-1]:
+        shape0 = cc[k][0, 0, 1].shape
+        sli = sli[:, :, : shape0[0], : shape0[1]]
+        sli = ifm.apply((sli, cc[k]))
+        _repair_memory_fragmentation_if_needed()
+    data = sli[:, 0, (nproj_pad - nproj) // 2 : (nproj_pad + nproj) // 2, :ni]
+    data = data.swapaxes(0, 1)
+    return cp.ascontiguousarray(data)
 ######## Optimized version for Vo-all ring removal in tomopy########
 # This function is taken from TomoCuPy package
 # *************************************************************************** #

{httomolibgpu-5.1.dist-info → httomolibgpu-5.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: httomolibgpu
-Version: 5.1
+Version: 5.3
 Summary: Commonly used tomography data processing methods at DLS.
 Author-email: Daniil Kazantsev <daniil.kazantsev@diamond.ac.uk>, Yousef Moazzam <yousef.moazzam@diamond.ac.uk>, Naman Gera <naman.gera@diamond.ac.uk>
 License: BSD-3-Clause
@@ -19,6 +19,7 @@ Requires-Dist: scipy
 Requires-Dist: pillow
 Requires-Dist: scikit-image
 Requires-Dist: tomobar
+Requires-Dist: PyWavelets
 Provides-Extra: dev
 Requires-Dist: pytest; extra == "dev"
 Requires-Dist: pytest-cov; extra == "dev"

{httomolibgpu-5.1.dist-info → httomolibgpu-5.3.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-httomolibgpu/__init__.py,sha256=Fdj5ipIGgeKqSCYRb5bBVMAZ04ZvZJzuBoGOAqc0zgk,937
-httomolibgpu/cupywrapper.py,sha256=6ITGJ2Jw5I5kVmKEL5LlsnLRniEqqBLsHiAjvLtk0Xk,493
+httomolibgpu/__init__.py,sha256=Dt_TYhjJGPVathlceTYQhoRSyH8n7FGQJlRMUlFZNdc,959
+httomolibgpu/cupywrapper.py,sha256=vHuBN4Wo3YxPnQP0OAJypLfZA6AXyXFgVmaZw_67pvo,579
 httomolibgpu/memory_estimator_helpers.py,sha256=QaJady-z8y9Emw7W-lB608vBTNvVYv3obboQKVj6E9M,705
 httomolibgpu/cuda_kernels/__init__.py,sha256=VQNMaGcVDwiE-C64FfLtubHpLriLG0Y3_QnjHBSHrN0,884
 httomolibgpu/cuda_kernels/calc_metrics.cu,sha256=oV7ZPcwjWafmZjbNsUkBYPvOViJ_nX3zBoOAuPCmIrA,11335
@@ -8,6 +8,7 @@ httomolibgpu/cuda_kernels/generate_mask.cu,sha256=3il3r1J2cnTCd3UXO4GWGfBgGxj4pv
 httomolibgpu/cuda_kernels/median_kernel.cu,sha256=EECLUCoJkT9GQ9Db_FF6fYOG6cDSiAePTRZNxE4VZ68,1692
 httomolibgpu/cuda_kernels/raven_filter.cu,sha256=KX2TM_9tMpvoGCHezDNWYABCnv2cT9mlMo4IhxRUac0,1437
 httomolibgpu/cuda_kernels/remove_nan_inf.cu,sha256=gv0ihkf6A_D_po9x7pmgFsQFhwZ1dB_HYc_0Tu-bpUU,630
+httomolibgpu/cuda_kernels/remove_stripe_fw.cu,sha256=J_vy0RUYYKT-mOzERsn3kjgt4hbE7vHPFRuJYNzs6sM,4504
 httomolibgpu/misc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 httomolibgpu/misc/corr.py,sha256=e1eUsWLSM9SB5xzWTDW0o9pAD_lbrr4DL-QQmyM8v4c,4503
 httomolibgpu/misc/denoise.py,sha256=-D9UPbZyUAcCptBHUUXsmj1NFzd6HrrRjJJh4T5gmhQ,4787
@@ -17,14 +18,14 @@ httomolibgpu/misc/utils.py,sha256=rHRuQUO47SlTanvKDBgiC0im4tXlGLCw5B_zvlLzzbc,47
 httomolibgpu/prep/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 httomolibgpu/prep/alignment.py,sha256=GVxnyioipmqsHb4s3mPQ8tKGoPIQMPftDrQxUO-HBuE,5491
 httomolibgpu/prep/normalize.py,sha256=hee0H4mE7FrSZgcF1fjLsKT06xjTJymkyAxpe2itQe4,4202
-httomolibgpu/prep/phase.py,sha256=yKJe9gmWuFaUSIuoctV5X1Pb7yEgOmkQ6jxvZkSSwpQ,12128
-httomolibgpu/prep/stripe.py,sha256=8_DV0ON6AWARuziqkmhom56gWTardtqC_z3xG8geg0o,14774
+httomolibgpu/prep/phase.py,sha256=N3Ep_Krn4rqbGOnNhApSbIYM7gVstBtequXTklBDQLk,14907
+httomolibgpu/prep/stripe.py,sha256=OZPimFxe9TOSaEcErORFxd6HCcFcR62-q5XYBvC10FM,36918
 httomolibgpu/recon/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 httomolibgpu/recon/_phase_cross_correlation.py,sha256=Ru2oLAPv8XOSSuZer5yNQrxD_8lMAwBSvtkVAVs5TCc,16469
 httomolibgpu/recon/algorithm.py,sha256=ds-_io7kGzo5FiJq8k4--PYtIWak3y9H7yuyg1lymyQ,25121
 httomolibgpu/recon/rotation.py,sha256=GaSwNrlDnlP_iIrTfKUQLiXsShJ5aSDvdKPwofggtwQ,27948
-httomolibgpu-5.1.dist-info/licenses/LICENSE,sha256=bXeLsgelPUUXw8HCIYiVC97Dpjhm2nB54m7TACdH8ng,48032
-httomolibgpu-5.1.dist-info/METADATA,sha256=zSD4pi1w0lyFkgkZrB38m1DuhmGj5ad4uWJENNX_J44,3339
-httomolibgpu-5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-httomolibgpu-5.1.dist-info/top_level.txt,sha256=nV0Ty_YvSPVd1O6MNWuIplD0w1nwk5hT76YgBZ-bzUw,13
-httomolibgpu-5.1.dist-info/RECORD,,
+httomolibgpu-5.3.dist-info/licenses/LICENSE,sha256=bXeLsgelPUUXw8HCIYiVC97Dpjhm2nB54m7TACdH8ng,48032
+httomolibgpu-5.3.dist-info/METADATA,sha256=prUDINLOyJMUnUz3YQCkfhuDJtPyQCoELYedj2ktUD0,3365
+httomolibgpu-5.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+httomolibgpu-5.3.dist-info/top_level.txt,sha256=nV0Ty_YvSPVd1O6MNWuIplD0w1nwk5hT76YgBZ-bzUw,13
+httomolibgpu-5.3.dist-info/RECORD,,

{httomolibgpu-5.1.dist-info → httomolibgpu-5.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{httomolibgpu-5.1.dist-info → httomolibgpu-5.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{httomolibgpu-5.1.dist-info → httomolibgpu-5.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

httomolibgpu 5.1__py3-none-any.whl → 5.3__py3-none-any.whl

httomolibgpu 5.1py3-none-any.whl → 5.3py3-none-any.whl