PyPI - httomolibgpu - Versions diffs - 5.0__py3-none-any.whl → 5.2__py3-none-any.whl - Mend

httomolibgpu 5.0py3-none-any.whl → 5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

httomolibgpu/__init__.py CHANGED Viewed

@@ -9,6 +9,7 @@ from httomolibgpu.prep.normalize import dark_flat_field_correction, minus_log
 from httomolibgpu.prep.phase import paganin_filter, paganin_filter_savu_legacy
 from httomolibgpu.prep.stripe import (
     remove_stripe_based_sorting,
+    remove_stripe_fw,
     remove_stripe_ti,
     remove_all_stripe,
     raven_filter,

httomolibgpu/cuda_kernels/remove_stripe_fw.cu ADDED Viewed

@@ -0,0 +1,155 @@
+template<int WSize>
+__global__ void grouped_convolution_x(
+    int dim_x,
+    int dim_y,
+    int dim_z,
+    const float* in,
+    int in_stride_x,
+    int in_stride_y,
+    int in_stride_z,
+    float* out,
+    int out_stride_z,
+    int out_stride_group,
+    const float* w
+)
+{
+    const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
+    const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
+    const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
+    if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
+    {
+        return;
+    }
+    constexpr int out_groups = 2;
+    for (int i = 0; i < out_groups; ++i)
+    {
+        float acc = 0.F;
+        for (int j = 0; j < WSize; ++j)
+        {
+            const int w_idx = i * WSize + j;
+            const int in_idx = (g_thd_x * in_stride_x + j) + g_thd_y * in_stride_y + g_thd_z * in_stride_z;
+            acc += w[w_idx] * in[in_idx];
+        }
+        const int out_idx = g_thd_x + g_thd_y * dim_x + g_thd_z * out_stride_z + i * out_stride_group;
+        out[out_idx] = acc;
+    }
+}
+template<int WSize>
+__global__ void grouped_convolution_y(
+    int dim_x,
+    int dim_y,
+    int dim_z,
+    const float* in,
+    int in_stride_x,
+    int in_stride_y,
+    int in_stride_z,
+    int in_stride_group,
+    float* out,
+    int out_stride_z,
+    int out_stride_group,
+    const float* w
+)
+{
+    const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
+    const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
+    const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
+    if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
+    {
+        return;
+    }
+    constexpr int in_groups = 2;
+    constexpr int out_groups = 2;
+    constexpr int item_stride_y = 2;
+    for (int group = 0; group < in_groups; ++group)
+    {
+        for (int i = 0; i < out_groups; ++i)
+        {
+            float acc = 0.F;
+            for (int j = 0; j < WSize; ++j)
+            {
+                const int w_idx = (out_groups * group + i) * WSize + j;
+                const int in_idx = g_thd_x * in_stride_x + (item_stride_y * g_thd_y + j) * in_stride_y + group * in_stride_group + g_thd_z * in_stride_z;
+                acc += w[w_idx] * in[in_idx];
+            }
+            const int out_idx = g_thd_x + g_thd_y * dim_x + g_thd_z * out_stride_z + (out_groups * group + i) * out_stride_group;
+            out[out_idx] = acc;
+        }
+    }
+}
+template<int WSize>
+__global__ void transposed_convolution_x(
+    int dim_x,
+    int dim_y,
+    int dim_z,
+    const float* in,
+    int in_dim_x,
+    int in_stride_y,
+    int in_stride_z,
+    const float* w,
+    float* out
+)
+{
+    const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
+    const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
+    const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
+    if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
+    {
+        return;
+    }
+    constexpr int item_out_stride = 2;
+    float acc = 0.F;
+    for (int i = 0; i < WSize; ++i)
+    {
+        const int in_x = (g_thd_x - i) / item_out_stride;
+        const int in_x_mod = (g_thd_x - i) % item_out_stride;
+        if (in_x_mod == 0 && in_x >= 0 && in_x < in_dim_x)
+        {
+            const int in_idx = in_x + g_thd_y * in_stride_y + g_thd_z * in_stride_z;
+            acc += in[in_idx] * w[i];
+        }
+    }
+    const int out_idx = g_thd_x + dim_x * g_thd_y + dim_x * dim_y * g_thd_z;
+    out[out_idx] = acc;
+}
+template<int WSize>
+__global__ void transposed_convolution_y(
+    int dim_x,
+    int dim_y,
+    int dim_z,
+    const float* in,
+    int in_dim_y,
+    int in_stride_y,
+    int in_stride_z,
+    const float* w,
+    float* out
+)
+{
+    const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
+    const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
+    const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
+    if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
+    {
+        return;
+    }
+    constexpr int item_out_stride = 2;
+    float acc = 0.F;
+    for (int i = 0; i < WSize; ++i)
+    {
+        const int in_y = (g_thd_y - i) / item_out_stride;
+        const int in_y_mod = (g_thd_y - i) % item_out_stride;
+        if (in_y_mod == 0 && in_y >= 0 && in_y < in_dim_y)
+        {
+            const int in_idx = g_thd_x + in_y * in_stride_y + g_thd_z * in_stride_z;
+            acc += in[in_idx] * w[i];
+        }
+    }
+    const int out_idx = g_thd_x + dim_x * g_thd_y + dim_x * dim_y * g_thd_z;
+    out[out_idx] = acc;
+}

httomolibgpu/memory_estimator_helpers.py ADDED Viewed

@@ -0,0 +1,24 @@
+ALLOCATION_UNIT_SIZE = 512
+class _DeviceMemStack:
+    def __init__(self) -> None:
+        self.allocations = []
+        self.current = 0
+        self.highwater = 0
+    def malloc(self, bytes):
+        self.allocations.append(bytes)
+        allocated = self._round_up(bytes)
+        self.current += allocated
+        self.highwater = max(self.current, self.highwater)
+    def free(self, bytes):
+        assert bytes in self.allocations
+        self.allocations.remove(bytes)
+        self.current -= self._round_up(bytes)
+        assert self.current >= 0
+    def _round_up(self, size):
+        size = (size + ALLOCATION_UNIT_SIZE - 1) // ALLOCATION_UNIT_SIZE
+        return size * ALLOCATION_UNIT_SIZE

httomolibgpu/prep/phase.py CHANGED Viewed

@@ -22,6 +22,7 @@
 import numpy as np
 from httomolibgpu import cupywrapper
+from httomolibgpu.memory_estimator_helpers import _DeviceMemStack
 cp = cupywrapper.cp
 cupy_run = cupywrapper.cupy_run
@@ -30,13 +31,14 @@ from unittest.mock import Mock
 if cupy_run:
     from cupyx.scipy.fft import fft2, ifft2, fftshift
+    from cupyx.scipy.fftpack import get_fft_plan
 else:
     fft2 = Mock()
     ifft2 = Mock()
     fftshift = Mock()
 from numpy import float32
-from typing import Tuple
+from typing import Optional, Tuple
 import math
 __all__ = [
@@ -54,6 +56,7 @@ def paganin_filter(
     distance: float = 1.0,
     energy: float = 53.0,
     ratio_delta_beta: float = 250,
+    calc_peak_gpu_mem: bool = False,
 ) -> cp.ndarray:
     """
     Perform single-material phase retrieval from flats/darks corrected tomographic measurements. For more detailed information, see :ref:`phase_contrast_module`.
@@ -71,30 +74,50 @@ def paganin_filter(
         Beam energy in keV.
     ratio_delta_beta : float
         The ratio of delta/beta, where delta is the phase shift and real part of the complex material refractive index and beta is the absorption.
+    calc_peak_gpu_mem: bool
+        Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user.
     Returns
     -------
     cp.ndarray
         The 3D array of Paganin phase-filtered projection images.
     """
+    mem_stack = _DeviceMemStack() if calc_peak_gpu_mem else None
     # Check the input data is valid
-    if tomo.ndim != 3:
+    if not mem_stack and tomo.ndim != 3:
         raise ValueError(
             f"Invalid number of dimensions in data: {tomo.ndim},"
             " please provide a stack of 2D projections."
         )
-    dz_orig, dy_orig, dx_orig = tomo.shape
+    if mem_stack:
+        mem_stack.malloc(np.prod(tomo) * np.float32().itemsize)
+    dz_orig, dy_orig, dx_orig = tomo.shape if not mem_stack else tomo
     # Perform padding to the power of 2 as FFT is O(n*log(n)) complexity
     # TODO: adding other options of padding?
-    padded_tomo, pad_tup = _pad_projections_to_second_power(tomo)
+    padded_tomo, pad_tup = _pad_projections_to_second_power(tomo, mem_stack)
-    dz, dy, dx = padded_tomo.shape
+    dz, dy, dx = padded_tomo.shape if not mem_stack else padded_tomo
     # 3D FFT of tomo data
-    padded_tomo = cp.asarray(padded_tomo, dtype=cp.complex64)
-    fft_tomo = fft2(padded_tomo, axes=(-2, -1), overwrite_x=True)
+    if mem_stack:
+        mem_stack.malloc(np.prod(padded_tomo) * np.complex64().itemsize)
+        mem_stack.free(np.prod(padded_tomo) * np.float32().itemsize)
+        fft_input = cp.empty(padded_tomo, dtype=cp.complex64)
+    else:
+        padded_tomo = cp.asarray(padded_tomo, dtype=cp.complex64)
+        fft_input = padded_tomo
+    fft_plan = get_fft_plan(fft_input, axes=(-2, -1))
+    if mem_stack:
+        mem_stack.malloc(fft_plan.work_area.mem.size)
+        mem_stack.free(fft_plan.work_area.mem.size)
+    else:
+        with fft_plan:
+            fft_tomo = fft2(padded_tomo, axes=(-2, -1), overwrite_x=True)
+        del padded_tomo
+    del fft_input
+    del fft_plan
     # calculate alpha constant
     alpha = _calculate_alpha(energy, distance / 1e-6, ratio_delta_beta)
@@ -103,18 +126,56 @@ def paganin_filter(
     indx = _reciprocal_coord(pixel_size, dy)
     indy = _reciprocal_coord(pixel_size, dx)
-    # Build Lorentzian-type filter
-    phase_filter = fftshift(
-        1.0 / (1.0 + alpha * (cp.add.outer(cp.square(indx), cp.square(indy))))
-    )
+    if mem_stack:
+        mem_stack.malloc(indx.size * indx.dtype.itemsize)  # cp.asarray(indx)
+        mem_stack.malloc(indx.size * indx.dtype.itemsize)  # cp.square
+        mem_stack.free(indx.size * indx.dtype.itemsize)  # cp.asarray(indx)
+        mem_stack.malloc(indy.size * indy.dtype.itemsize)  # cp.asarray(indy)
+        mem_stack.malloc(indy.size * indy.dtype.itemsize)  # cp.square
+        mem_stack.free(indy.size * indy.dtype.itemsize)  # cp.asarray(indy)
+        mem_stack.malloc(indx.size * indy.size * indx.dtype.itemsize)  # cp.add.outer
+        mem_stack.free(indx.size * indx.dtype.itemsize)  # cp.square
+        mem_stack.free(indy.size * indy.dtype.itemsize)  # cp.square
+        mem_stack.malloc(indx.size * indy.size * indx.dtype.itemsize)  # phase_filter
+        mem_stack.free(indx.size * indy.size * indx.dtype.itemsize)  # cp.add.outer
+        mem_stack.free(indx.size * indy.size * indx.dtype.itemsize)  # phase_filter
+    else:
+        # Build Lorentzian-type filter
+        phase_filter = fftshift(
+            1.0
+            / (
+                1.0
+                + alpha
+                * (
+                    cp.add.outer(
+                        cp.square(cp.asarray(indx)), cp.square(cp.asarray(indy))
+                    )
+                )
+            )
+        )
-    phase_filter = phase_filter / phase_filter.max()  # normalisation
+        phase_filter = phase_filter / phase_filter.max()  # normalisation
-    # Filter projections
-    fft_tomo *= phase_filter
+        # Filter projections
+        fft_tomo *= phase_filter
+        del phase_filter
     # Apply filter and take inverse FFT
-    ifft_filtered_tomo = ifft2(fft_tomo, axes=(-2, -1), overwrite_x=True).real
+    ifft_input = (
+        fft_tomo if not mem_stack else cp.empty(padded_tomo, dtype=cp.complex64)
+    )
+    ifft_plan = get_fft_plan(ifft_input, axes=(-2, -1))
+    if mem_stack:
+        mem_stack.malloc(ifft_plan.work_area.mem.size)
+        mem_stack.free(ifft_plan.work_area.mem.size)
+    else:
+        with ifft_plan:
+            ifft_filtered_tomo = ifft2(fft_tomo, axes=(-2, -1), overwrite_x=True).real
+        del fft_tomo
+    del ifft_plan
+    del ifft_input
     # slicing indices for cropping
     slc_indices = (
@@ -123,8 +184,19 @@ def paganin_filter(
         slice(pad_tup[2][0], pad_tup[2][0] + dx_orig, 1),
     )
+    if mem_stack:
+        mem_stack.malloc(np.prod(tomo) * np.float32().itemsize)  # astype(cp.float32)
+        mem_stack.free(
+            np.prod(padded_tomo) * np.complex64().itemsize
+        )  # ifft_filtered_tomo
+        mem_stack.malloc(
+            np.prod(tomo) * np.float32().itemsize
+        )  # return _log_kernel(tomo)
+        return mem_stack.highwater
     # crop the padded filtered data:
     tomo = ifft_filtered_tomo[slc_indices].astype(cp.float32)
+    del ifft_filtered_tomo
     # taking the negative log
     _log_kernel = cp.ElementwiseKernel(
@@ -177,7 +249,7 @@ def _calculate_pad_size(datashape: tuple) -> list:
 def _pad_projections_to_second_power(
-    tomo: cp.ndarray,
+    tomo: cp.ndarray, mem_stack: Optional[_DeviceMemStack]
 ) -> Tuple[cp.ndarray, Tuple[int, int]]:
     """
     Performs padding of each projection to the next power of 2.
@@ -194,11 +266,17 @@ def _pad_projections_to_second_power(
     ndarray: padded 3d projection data
     tuple: a tuple with padding dimensions
     """
-    full_shape_tomo = cp.shape(tomo)
+    full_shape_tomo = cp.shape(tomo) if not mem_stack else tomo
     pad_list = _calculate_pad_size(full_shape_tomo)
-    padded_tomo = cp.pad(tomo, tuple(pad_list), "edge")
+    if mem_stack:
+        padded_tomo = [
+            sh + pad[0] + pad[1] for sh, pad in zip(full_shape_tomo, pad_list)
+        ]
+        mem_stack.malloc(np.prod(padded_tomo) * np.float32().itemsize)
+    else:
+        padded_tomo = cp.pad(tomo, tuple(pad_list), "edge")
     return padded_tomo, tuple(pad_list)
@@ -209,7 +287,7 @@ def _wavelength_micron(energy: float) -> float:
     return 2 * math.pi * PLANCK_CONSTANT * SPEED_OF_LIGHT / energy
-def _reciprocal_coord(pixel_size: float, num_grid: int) -> cp.ndarray:
+def _reciprocal_coord(pixel_size: float, num_grid: int) -> np.ndarray:
     """
     Calculate reciprocal grid coordinates for a given pixel size
     and discretization.
@@ -227,7 +305,7 @@ def _reciprocal_coord(pixel_size: float, num_grid: int) -> cp.ndarray:
         Grid coordinates.
     """
     n = num_grid - 1
-    rc = cp.arange(-n, num_grid, 2, dtype=cp.float32)
+    rc = np.arange(-n, num_grid, 2, dtype=cp.float32)
     rc *= 2 * math.pi / (n * pixel_size)
     return rc
@@ -238,6 +316,7 @@ def paganin_filter_savu_legacy(
     distance: float = 1.0,
     energy: float = 53.0,
     ratio_delta_beta: float = 250,
+    calc_peak_gpu_mem: bool = False,
 ) -> cp.ndarray:
     """
     Perform single-material phase retrieval from flats/darks corrected tomographic measurements. For more detailed information, see :ref:`phase_contrast_module`.
@@ -256,6 +335,8 @@ def paganin_filter_savu_legacy(
         Beam energy in keV.
     ratio_delta_beta : float
         The ratio of delta/beta, where delta is the phase shift and real part of the complex material refractive index and beta is the absorption.
+    calc_peak_gpu_mem: bool
+        Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user.
     Returns
     -------
@@ -263,4 +344,11 @@ def paganin_filter_savu_legacy(
         The 3D array of Paganin phase-filtered projection images.
     """
-    return paganin_filter(tomo, pixel_size, distance, energy, ratio_delta_beta / 4)
+    return paganin_filter(
+        tomo,
+        pixel_size,
+        distance,
+        energy,
+        ratio_delta_beta / 4,
+        calc_peak_gpu_mem=calc_peak_gpu_mem,
+    )

httomolibgpu/prep/stripe.py CHANGED Viewed

@@ -21,6 +21,7 @@
 """Module for stripes removal"""
 import numpy as np
+import pywt
 from httomolibgpu import cupywrapper
 cp = cupywrapper.cp
@@ -31,6 +32,7 @@ from unittest.mock import Mock
 if cupy_run:
     from cupyx.scipy.ndimage import median_filter, binary_dilation, uniform_filter1d
     from cupyx.scipy.fft import fft2, ifft2, fftshift
+    from cupyx.scipy.fftpack import get_fft_plan
     from httomolibgpu.cuda_kernels import load_cuda_module
 else:
     median_filter = Mock()
@@ -41,10 +43,11 @@ else:
     fftshift = Mock()
-from typing import Union
+from typing import Optional, Tuple, Union
 __all__ = [
     "remove_stripe_based_sorting",
+    "remove_stripe_fw",
     "remove_stripe_ti",
     "remove_all_stripe",
     "raven_filter",
@@ -156,6 +159,604 @@ def remove_stripe_ti(
         return data
+###### Ring removal with wavelet filtering (adapted for cupy from pytroch_wavelet package https://pytorch-wavelets.readthedocs.io/)##########
+# These functions are taken from TomoCuPy package
+# *************************************************************************** #
+#                  Copyright © 2022, UChicago Argonne, LLC                    #
+#                           All Rights Reserved                               #
+#                         Software Name: Tomocupy                             #
+#                     By: Argonne National Laboratory                         #
+#                                                                             #
+#                           OPEN SOURCE LICENSE                               #
+#                                                                             #
+# Redistribution and use in source and binary forms, with or without          #
+# modification, are permitted provided that the following conditions are met: #
+#                                                                             #
+# 1. Redistributions of source code must retain the above copyright notice,   #
+#    this list of conditions and the following disclaimer.                    #
+# 2. Redistributions in binary form must reproduce the above copyright        #
+#    notice, this list of conditions and the following disclaimer in the      #
+#    documentation and/or other materials provided with the distribution.     #
+# 3. Neither the name of the copyright holder nor the names of its            #
+#    contributors may be used to endorse or promote products derived          #
+#    from this software without specific prior written permission.            #
+#                                                                             #
+#                                                                             #
+# *************************************************************************** #
+def _reflect(x: np.ndarray, minx: float, maxx: float) -> np.ndarray:
+    """Reflect the values in matrix *x* about the scalar values *minx* and
+    *maxx*.  Hence a vector *x* containing a long linearly increasing series is
+    converted into a waveform which ramps linearly up and down between *minx*
+    and *maxx*.  If *x* contains integers and *minx* and *maxx* are (integers +
+    0.5), the ramps will have repeated max and min samples.
+    .. codeauthor:: Rich Wareham <rjw57@cantab.net>, Aug 2013
+    .. codeauthor:: Nick Kingsbury, Cambridge University, January 1999.
+    """
+    rng = maxx - minx
+    rng_by_2 = 2 * rng
+    mod = np.fmod(x - minx, rng_by_2)
+    normed_mod = np.where(mod < 0, mod + rng_by_2, mod)
+    out = np.where(normed_mod >= rng, rng_by_2 - normed_mod, normed_mod) + minx
+    return np.array(out, dtype=x.dtype)
+class _DeviceMemStack:
+    def __init__(self) -> None:
+        self.allocations = []
+        self.current = 0
+        self.highwater = 0
+    def malloc(self, bytes):
+        self.allocations.append(bytes)
+        allocated = self._round_up(bytes)
+        self.current += allocated
+        self.highwater = max(self.current, self.highwater)
+    def free(self, bytes):
+        assert bytes in self.allocations
+        self.allocations.remove(bytes)
+        self.current -= self._round_up(bytes)
+        assert self.current >= 0
+    def _round_up(self, size):
+        ALLOCATION_UNIT_SIZE = 512
+        size = (size + ALLOCATION_UNIT_SIZE - 1) // ALLOCATION_UNIT_SIZE
+        return size * ALLOCATION_UNIT_SIZE
+def _mypad(
+    x: cp.ndarray, pad: Tuple[int, int, int, int], mem_stack: Optional[_DeviceMemStack]
+) -> cp.ndarray:
+    """Function to do numpy like padding on Arrays. Only works for 2-D
+    padding.
+    Inputs:
+        x (array): Array to pad
+        pad (tuple): tuple of (left, right, top, bottom) pad sizes
+    """
+    # Vertical only
+    if pad[0] == 0 and pad[1] == 0:
+        m1, m2 = pad[2], pad[3]
+        l = x.shape[-2] if not mem_stack else x[-2]
+        xe = _reflect(np.arange(-m1, l + m2, dtype="int32"), -0.5, l - 0.5)
+        if mem_stack:
+            ret_shape = [x[0], x[1], xe.size, x[3]]
+            mem_stack.malloc(np.prod(ret_shape) * np.float32().itemsize)
+            return ret_shape
+        return x[:, :, xe, :]
+    # horizontal only
+    elif pad[2] == 0 and pad[3] == 0:
+        m1, m2 = pad[0], pad[1]
+        l = x.shape[-1] if not mem_stack else x[-1]
+        xe = _reflect(np.arange(-m1, l + m2, dtype="int32"), -0.5, l - 0.5)
+        if mem_stack:
+            ret_shape = [x[0], x[1], x[2], xe.size]
+            mem_stack.malloc(np.prod(ret_shape) * np.float32().itemsize)
+            return ret_shape
+        return x[:, :, :, xe]
+def _next_power_of_two(x: int, max_val: int = 128) -> int:
+    n = 1
+    while n < x and n < max_val:
+        n *= 2
+    return n
+def _conv2d(
+    x: cp.ndarray,
+    w: np.ndarray,
+    stride: Tuple[int, int],
+    groups: int,
+    mem_stack: Optional[_DeviceMemStack],
+) -> cp.ndarray:
+    """Convolution (equivalent pytorch.conv2d)"""
+    b, ci, hi, wi = x.shape if not mem_stack else x
+    co, _, hk, wk = w.shape
+    ho = int(np.floor(1 + (hi - hk) / stride[0]))
+    wo = int(np.floor(1 + (wi - wk) / stride[1]))
+    out_shape = [b, co, ho, wo]
+    if mem_stack:
+        mem_stack.malloc(np.prod(out_shape) * np.float32().itemsize)
+        return out_shape
+    out = cp.zeros(out_shape, dtype="float32")
+    w = cp.asarray(w)
+    x = cp.expand_dims(x, axis=1)
+    w = np.expand_dims(w, axis=0)
+    symbol_names = [f"grouped_convolution_x<{wk}>", f"grouped_convolution_y<{hk}>"]
+    module = load_cuda_module("remove_stripe_fw", name_expressions=symbol_names)
+    dim_x = out.shape[-1]
+    dim_y = out.shape[-2]
+    dim_z = out.shape[0]
+    in_stride_x = stride[1]
+    in_stride_y = x.strides[-2] // x.dtype.itemsize
+    in_stride_z = x.strides[0] // x.dtype.itemsize
+    out_stride_z = out.strides[0] // x.dtype.itemsize
+    out_stride_group = out.strides[1] // x.dtype.itemsize
+    block_x = _next_power_of_two(dim_x)
+    block_dim = (block_x, 1, 1)
+    grid_x = (dim_x + block_x - 1) // block_x
+    grid_dim = (grid_x, dim_y, dim_z)
+    if groups == 1:
+        grouped_convolution_kernel_x = module.get_function(symbol_names[0])
+        grouped_convolution_kernel_x(
+            grid_dim,
+            block_dim,
+            (
+                dim_x,
+                dim_y,
+                dim_z,
+                x,
+                in_stride_x,
+                in_stride_y,
+                in_stride_z,
+                out,
+                out_stride_z,
+                out_stride_group,
+                w,
+            ),
+        )
+        return out
+    grouped_convolution_kernel_y = module.get_function(symbol_names[1])
+    in_stride_group = x.strides[2] // x.dtype.itemsize
+    grouped_convolution_kernel_y(
+        grid_dim,
+        block_dim,
+        (
+            dim_x,
+            dim_y,
+            dim_z,
+            x,
+            in_stride_x,
+            in_stride_y,
+            in_stride_z,
+            in_stride_group,
+            out,
+            out_stride_z,
+            out_stride_group,
+            w,
+        ),
+    )
+    del w
+    return out
+def _conv_transpose2d(
+    x: cp.ndarray,
+    w: np.ndarray,
+    stride: Tuple[int, int],
+    pad: Tuple[int, int],
+    groups: int,
+    mem_stack: Optional[_DeviceMemStack],
+) -> cp.ndarray:
+    """Transposed convolution (equivalent pytorch.conv_transpose2d)"""
+    b, co, ho, wo = x.shape if not mem_stack else x
+    co, ci, hk, wk = w.shape
+    hi = (ho - 1) * stride[0] + hk
+    wi = (wo - 1) * stride[1] + wk
+    out_shape = [b, ci, hi, wi]
+    if mem_stack:
+        mem_stack.malloc(np.prod(out_shape) * np.float32().itemsize)
+        mem_stack.malloc(w.size * np.float32().itemsize)
+        if pad != 0:
+            new_out_shape = [
+                out_shape[0],
+                out_shape[1],
+                out_shape[2] - 2 * pad[0],
+                out_shape[3] - 2 * pad[1],
+            ]
+            mem_stack.malloc(np.prod(new_out_shape) * np.float32().itemsize)
+            mem_stack.free(np.prod(out_shape) * np.float32().itemsize)
+            out_shape = new_out_shape
+        mem_stack.free(w.size * np.float32().itemsize)
+        return out_shape
+    out = cp.zeros(out_shape, dtype="float32")
+    w = cp.asarray(w)
+    symbol_names = [
+        f"transposed_convolution_x<{wk}>",
+        f"transposed_convolution_y<{hk}>",
+    ]
+    module = load_cuda_module("remove_stripe_fw", name_expressions=symbol_names)
+    dim_x = out.shape[-1]
+    dim_y = out.shape[-2]
+    dim_z = out.shape[0]
+    in_dim_x = x.shape[-1]
+    in_dim_y = x.shape[-2]
+    in_stride_y = x.strides[-2] // x.dtype.itemsize
+    in_stride_z = x.strides[0] // x.dtype.itemsize
+    block_x = _next_power_of_two(dim_x)
+    block_dim = (block_x, 1, 1)
+    grid_x = (dim_x + block_x - 1) // block_x
+    grid_dim = (grid_x, dim_y, dim_z)
+    if wk > 1:
+        transposed_convolution_kernel_x = module.get_function(symbol_names[0])
+        transposed_convolution_kernel_x(
+            grid_dim,
+            block_dim,
+            (dim_x, dim_y, dim_z, x, in_dim_x, in_stride_y, in_stride_z, w, out),
+        )
+    elif hk > 1:
+        transposed_convolution_kernel_y = module.get_function(symbol_names[1])
+        transposed_convolution_kernel_y(
+            grid_dim,
+            block_dim,
+            (dim_x, dim_y, dim_z, x, in_dim_y, in_stride_y, in_stride_z, w, out),
+        )
+    else:
+        assert False
+    if pad != 0:
+        out = out[:, :, pad[0] : out.shape[2] - pad[0], pad[1] : out.shape[3] - pad[1]]
+    return cp.ascontiguousarray(out)
+def _afb1d(
+    x: cp.ndarray,
+    h0: np.ndarray,
+    h1: np.ndarray,
+    dim: int,
+    mem_stack: Optional[_DeviceMemStack],
+) -> cp.ndarray:
+    """1D analysis filter bank (along one dimension only) of an image
+    Parameters
+    ----------
+    x (array): 4D input with the last two dimensions the spatial input
+    h0 (array): 4D input for the lowpass filter. Should have shape (1, 1,
+        h, 1) or (1, 1, 1, w)
+    h1 (array): 4D input for the highpass filter. Should have shape (1, 1,
+        h, 1) or (1, 1, 1, w)
+    dim (int) - dimension of filtering. d=2 is for a vertical filter (called
+        column filtering but filters across the rows). d=3 is for a
+        horizontal filter, (called row filtering but filters across the
+        columns).
+    Returns
+    -------
+    lohi: lowpass and highpass subbands concatenated along the channel
+        dimension
+    """
+    C = x.shape[1] if not mem_stack else x[1]
+    # Convert the dim to positive
+    d = dim % 4
+    s = (2, 1) if d == 2 else (1, 2)
+    N = x.shape[d] if not mem_stack else x[d]
+    L = h0.size
+    shape = [1, 1, 1, 1]
+    shape[d] = L
+    h = np.concatenate([h0.reshape(*shape), h1.reshape(*shape)] * C, axis=0)
+    # Calculate the pad size
+    outsize = pywt.dwt_coeff_len(N, L, mode="symmetric")
+    p = 2 * (outsize - 1) - N + L
+    pad = (0, 0, p // 2, (p + 1) // 2) if d == 2 else (p // 2, (p + 1) // 2, 0, 0)
+    padded_x = _mypad(x, pad=pad, mem_stack=mem_stack)
+    lohi = _conv2d(padded_x, h, stride=s, groups=C, mem_stack=mem_stack)
+    if mem_stack:
+        mem_stack.free(np.prod(padded_x) * np.float32().itemsize)
+    del padded_x
+    return lohi
+def _sfb1d(
+    lo: cp.ndarray,
+    hi: cp.ndarray,
+    g0: np.ndarray,
+    g1: np.ndarray,
+    dim: int,
+    mem_stack: Optional[_DeviceMemStack],
+) -> cp.ndarray:
+    """1D synthesis filter bank of an image Array"""
+    C = lo.shape[1] if not mem_stack else lo[1]
+    d = dim % 4
+    L = g0.size
+    shape = [1, 1, 1, 1]
+    shape[d] = L
+    s = (2, 1) if d == 2 else (1, 2)
+    g0 = np.concatenate([g0.reshape(*shape)] * C, axis=0)
+    g1 = np.concatenate([g1.reshape(*shape)] * C, axis=0)
+    pad = (L - 2, 0) if d == 2 else (0, L - 2)
+    y_lo = _conv_transpose2d(lo, g0, stride=s, pad=pad, groups=C, mem_stack=mem_stack)
+    y_hi = _conv_transpose2d(hi, g1, stride=s, pad=pad, groups=C, mem_stack=mem_stack)
+    if mem_stack:
+        # Allocation of the sum
+        mem_stack.malloc(np.prod(y_hi) * np.float32().itemsize)
+        mem_stack.free(np.prod(y_lo) * np.float32().itemsize)
+        mem_stack.free(np.prod(y_hi) * np.float32().itemsize)
+        return y_lo
+    return y_lo + y_hi
+class _DWTForward:
+    """Performs a 2d DWT Forward decomposition of an image
+    Args:
+        wave (str): Which wavelet to use.
+    """
+    def __init__(self, wave: str):
+        super().__init__()
+        wave = pywt.Wavelet(wave)
+        h0_col, h1_col = wave.dec_lo, wave.dec_hi
+        h0_row, h1_row = h0_col, h1_col
+        self.h0_col = np.array(h0_col).astype("float32")[::-1].reshape((1, 1, -1, 1))
+        self.h1_col = np.array(h1_col).astype("float32")[::-1].reshape((1, 1, -1, 1))
+        self.h0_row = np.array(h0_row).astype("float32")[::-1].reshape((1, 1, 1, -1))
+        self.h1_row = np.array(h1_row).astype("float32")[::-1].reshape((1, 1, 1, -1))
+    def apply(
+        self, x: cp.ndarray, mem_stack: Optional[_DeviceMemStack] = None
+    ) -> Tuple[cp.ndarray, cp.ndarray]:
+        """Forward pass of the DWT.
+        Args:
+            x (array): Input of shape :math:`(N, C_{in}, H_{in}, W_{in})`
+        Returns:
+            (yl, yh)
+                tuple of lowpass (yl) and bandpass (yh) coefficients.
+                yh is a list of scale coefficients. yl has shape
+                :math:`(N, C_{in}, H_{in}', W_{in}')` and yh has shape
+                :math:`list(N, C_{in}, 3, H_{in}'', W_{in}'')`. The new
+                dimension in yh iterates over the LH, HL and HH coefficients.
+        Note:
+            :math:`H_{in}', W_{in}', H_{in}'', W_{in}''` denote the correctly
+            downsampled shapes of the DWT pyramid.
+        """
+        # Do a multilevel transform
+        # Do 1 level of the transform
+        lohi = _afb1d(x, self.h0_row, self.h1_row, dim=3, mem_stack=mem_stack)
+        y = _afb1d(lohi, self.h0_col, self.h1_col, dim=2, mem_stack=mem_stack)
+        if mem_stack:
+            y_shape = [y[0], np.prod(y) // y[0] // 4 // y[-2] // y[-1], 4, y[-2], y[-1]]
+            x_shape = [y_shape[0], y_shape[1], y_shape[3], y_shape[4]]
+            yh_shape = [y_shape[0], y_shape[1], y_shape[2] - 1, y_shape[3], y_shape[4]]
+            mem_stack.free(np.prod(lohi) * np.float32().itemsize)
+            mem_stack.malloc(np.prod(x_shape) * np.float32().itemsize)
+            mem_stack.malloc(np.prod(yh_shape) * np.float32().itemsize)
+            mem_stack.free(np.prod(y) * np.float32().itemsize)
+            return x_shape, yh_shape
+        del lohi
+        s = y.shape
+        y = y.reshape(s[0], -1, 4, s[-2], s[-1])
+        x = cp.ascontiguousarray(y[:, :, 0])
+        yh = cp.ascontiguousarray(y[:, :, 1:])
+        return (x, yh)
+class _DWTInverse:
+    """Performs a 2d DWT Inverse reconstruction of an image
+    Args:
+        wave (str): Which wavelet to use.
+    """
+    def __init__(self, wave: str):
+        super().__init__()
+        wave = pywt.Wavelet(wave)
+        g0_col, g1_col = wave.rec_lo, wave.rec_hi
+        g0_row, g1_row = g0_col, g1_col
+        # Prepare the filters
+        self.g0_col = np.array(g0_col).astype("float32").reshape((1, 1, -1, 1))
+        self.g1_col = np.array(g1_col).astype("float32").reshape((1, 1, -1, 1))
+        self.g0_row = np.array(g0_row).astype("float32").reshape((1, 1, 1, -1))
+        self.g1_row = np.array(g1_row).astype("float32").reshape((1, 1, 1, -1))
+    def apply(
+        self,
+        coeffs: Tuple[cp.ndarray, cp.ndarray],
+        mem_stack: Optional[_DeviceMemStack] = None,
+    ) -> cp.ndarray:
+        """
+        Args:
+            coeffs (yl, yh): tuple of lowpass and bandpass coefficients, where:
+              yl is a lowpass array of shape :math:`(N, C_{in}, H_{in}',
+              W_{in}')` and yh is a list of bandpass arrays of shape
+              :math:`list(N, C_{in}, 3, H_{in}'', W_{in}'')`. I.e. should match
+              the format returned by DWTForward
+        Returns:
+            Reconstructed input of shape :math:`(N, C_{in}, H_{in}, W_{in})`
+        Note:
+            :math:`H_{in}', W_{in}', H_{in}'', W_{in}''` denote the correctly
+            downsampled shapes of the DWT pyramid.
+        """
+        yl, yh = coeffs
+        lh = yh[:, :, 0, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
+        hl = yh[:, :, 1, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
+        hh = yh[:, :, 2, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
+        lo = _sfb1d(yl, lh, self.g0_col, self.g1_col, dim=2, mem_stack=mem_stack)
+        hi = _sfb1d(hl, hh, self.g0_col, self.g1_col, dim=2, mem_stack=mem_stack)
+        yl = _sfb1d(lo, hi, self.g0_row, self.g1_row, dim=3, mem_stack=mem_stack)
+        if mem_stack:
+            mem_stack.free(np.prod(lo) * np.float32().itemsize)
+            mem_stack.free(np.prod(hi) * np.float32().itemsize)
+        del lo
+        del hi
+        return yl
+def _repair_memory_fragmentation_if_needed(fragmentation_threshold: float = 0.2):
+    pool = cp.get_default_memory_pool()
+    total = pool.total_bytes()
+    if (total / pool.used_bytes()) - 1 > fragmentation_threshold:
+        pool.free_all_blocks()
+def remove_stripe_fw(
+    data: cp.ndarray,
+    sigma: float = 2,
+    wname: str = "db5",
+    level: Optional[int] = None,
+    calc_peak_gpu_mem: bool = False,
+) -> cp.ndarray:
+    """
+    Remove horizontal stripes from sinogram using the Fourier-Wavelet (FW) based method :cite:`munch2009stripe`. The original source code
+    taken from TomoCupy and NABU packages.
+    Parameters
+    ----------
+    data : ndarray
+        3D tomographic data as a CuPy array.
+    sigma : float
+        Damping parameter in Fourier space.
+    wname : str
+        Type of the wavelet filter: select from 'db5', 'db7', 'haar', 'sym5', 'sym16' 'bior4.4'.
+    level : int, optional
+        Number of discrete wavelet transform levels.
+    calc_peak_gpu_mem: str:
+        Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user.
+    Returns
+    -------
+    ndarray
+        Stripe-corrected 3D tomographic data as a CuPy array.
+    """
+    if level is None:
+        if calc_peak_gpu_mem:
+            size = np.max(data)  # data is a tuple in this case
+        else:
+            size = np.max(data.shape)
+        level = int(np.ceil(np.log2(size)))
+    [nproj, nz, ni] = data.shape if not calc_peak_gpu_mem else data
+    nproj_pad = nproj + nproj // 8
+    # Accepts all wave types available to PyWavelets
+    xfm = _DWTForward(wave=wname)
+    ifm = _DWTInverse(wave=wname)
+    # Wavelet decomposition.
+    cc = []
+    sli_shape = [nz, 1, nproj_pad, ni]
+    if calc_peak_gpu_mem:
+        mem_stack = _DeviceMemStack()
+        # A data copy is assumed when invoking the function
+        mem_stack.malloc(np.prod(data) * np.float32().itemsize)
+        mem_stack.malloc(np.prod(sli_shape) * np.float32().itemsize)
+        cc = []
+        fcV_bytes = None
+        for k in range(level):
+            new_sli_shape, c = xfm.apply(sli_shape, mem_stack)
+            mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
+            sli_shape = new_sli_shape
+            cc.append(c)
+            if fcV_bytes:
+                mem_stack.free(fcV_bytes)
+            fcV_shape = [c[0], c[3], c[4]]
+            fcV_bytes = np.prod(fcV_shape) * np.complex64().itemsize
+            mem_stack.malloc(fcV_bytes)
+            # For the FFT
+            mem_stack.malloc(2 * np.prod(fcV_shape) * np.float32().itemsize)
+            mem_stack.malloc(2 * fcV_bytes)
+            fft_dummy = cp.empty(fcV_shape, dtype="float32")
+            fft_plan = get_fft_plan(fft_dummy)
+            fft_plan_size = fft_plan.work_area.mem.size
+            del fft_dummy
+            del fft_plan
+            mem_stack.malloc(fft_plan_size)
+            mem_stack.free(2 * np.prod(fcV_shape) * np.float32().itemsize)
+            mem_stack.free(fft_plan_size)
+            mem_stack.free(2 * fcV_bytes)
+            # The rest of the iteration doesn't contribute to the peak
+        # NOTE: The last iteration of fcV is "leaked"
+        for k in range(level)[::-1]:
+            new_sli_shape = [sli_shape[0], sli_shape[1], cc[k][-2], cc[k][-1]]
+            new_sli_shape = ifm.apply((new_sli_shape, cc[k]), mem_stack)
+            mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
+            sli_shape = new_sli_shape
+        mem_stack.malloc(np.prod(data) * np.float32().itemsize)
+        for c in cc:
+            mem_stack.free(np.prod(c) * np.float32().itemsize)
+        mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
+        return int(mem_stack.highwater * 1.1)
+    sli = cp.zeros(sli_shape, dtype="float32")
+    sli[:, 0, (nproj_pad - nproj) // 2 : (nproj_pad + nproj) // 2] = data.swapaxes(0, 1)
+    for k in range(level):
+        sli, c = xfm.apply(sli)
+        cc.append(c)
+        # FFT
+        fft_in = cp.ascontiguousarray(cc[k][:, 0, 1])
+        fft_plan = get_fft_plan(fft_in, axes=1)
+        with fft_plan:
+            fcV = cp.fft.fft(fft_in, axis=1)
+        del fft_plan
+        del fft_in
+        _, my, mx = fcV.shape
+        # Damping of ring artifact information.
+        y_hat = np.fft.ifftshift((np.arange(-my, my, 2) + 1) / 2)
+        damp = -np.expm1(-(y_hat**2) / (2 * sigma**2))
+        fcV *= cp.tile(damp, (mx, 1)).swapaxes(0, 1)
+        # Inverse FFT.
+        ifft_in = cp.ascontiguousarray(fcV)
+        ifft_plan = get_fft_plan(ifft_in, axes=1)
+        with ifft_plan:
+            cc[k][:, 0, 1] = cp.fft.ifft(ifft_in, my, axis=1).real
+        del ifft_plan
+        del ifft_in
+        _repair_memory_fragmentation_if_needed()
+    # Wavelet reconstruction.
+    for k in range(level)[::-1]:
+        shape0 = cc[k][0, 0, 1].shape
+        sli = sli[:, :, : shape0[0], : shape0[1]]
+        sli = ifm.apply((sli, cc[k]))
+        _repair_memory_fragmentation_if_needed()
+    data = sli[:, 0, (nproj_pad - nproj) // 2 : (nproj_pad + nproj) // 2, :ni]
+    data = data.swapaxes(0, 1)
+    return cp.ascontiguousarray(data)
 ######## Optimized version for Vo-all ring removal in tomopy########
 # This function is taken from TomoCuPy package
 # *************************************************************************** #

httomolibgpu/recon/_phase_cross_correlation.py CHANGED Viewed

@@ -36,9 +36,8 @@ import cupy as cp
 import cupyx.scipy.ndimage as ndi
 import numpy as np
-def _upsampled_dft(
-    data, upsampled_region_size, upsample_factor=1, axis_offsets=None
-):
+def _upsampled_dft(data, upsampled_region_size, upsample_factor=1, axis_offsets=None):
     """
     Upsampled DFT by matrix multiplication.
@@ -148,9 +147,7 @@ def _compute_error(cross_correlation_max, src_amp, target_amp):
         )
     with np.errstate(invalid="ignore"):
-        error = 1.0 - cross_correlation_max * cross_correlation_max.conj() / (
-            amp
-        )
+        error = 1.0 - cross_correlation_max * cross_correlation_max.conj() / (amp)
     return cp.sqrt(cp.abs(error))
@@ -192,9 +189,7 @@ def _disambiguate_shift(reference_image, moving_image, shift):
     negative_shift = [shift_i - s for shift_i, s in zip(positive_shift, shape)]
     subpixel = any(s % 1 != 0 for s in shift)
     interp_order = 3 if subpixel else 0
-    shifted = ndi.shift(
-        moving_image, shift, mode="grid-wrap", order=interp_order
-    )
+    shifted = ndi.shift(moving_image, shift, mode="grid-wrap", order=interp_order)
     indices = tuple(round(s) for s in positive_shift)
     splits_per_dim = [(slice(0, i), slice(i, None)) for i in indices]
     max_corr = -1.0
@@ -217,9 +212,7 @@ def _disambiguate_shift(reference_image, moving_image, shift):
         )
         return shift
     real_shift_acc = []
-    for sl, pos_shift, neg_shift in zip(
-        max_slice, positive_shift, negative_shift
-    ):
+    for sl, pos_shift, neg_shift in zip(max_slice, positive_shift, negative_shift):
         real_shift_acc.append(pos_shift if sl.stop is None else neg_shift)
     if not subpixel:
         real_shift = tuple(map(int, real_shift_acc))
@@ -359,16 +352,12 @@ def phase_cross_correlation(
         # Initial shift estimate in upsampled grid
         # shift = cp.around(shift * upsample_factor) / upsample_factor
         upsample_factor = float(upsample_factor)
-        shift = tuple(
-            round(s * upsample_factor) / upsample_factor for s in shift
-        )
+        shift = tuple(round(s * upsample_factor) / upsample_factor for s in shift)
         upsampled_region_size = math.ceil(upsample_factor * 1.5)
         # Center of output array at dftshift + 1
         dftshift = float(upsampled_region_size // 2)
         # Matrix multiply DFT around the current shift estimate
-        sample_region_offset = tuple(
-            dftshift - s * upsample_factor for s in shift
-        )
+        sample_region_offset = tuple(dftshift - s * upsample_factor for s in shift)
         cross_correlation = _upsampled_dft(
             image_product.conj(),
             upsampled_region_size,
@@ -394,9 +383,7 @@ def phase_cross_correlation(
     # If its only one row or column the shift along that dimension has no
     # effect. We set to zero.
-    shift = tuple(
-        s if axis_size != 1 else 0 for s, axis_size in zip(shift, shape)
-    )
+    shift = tuple(s if axis_size != 1 else 0 for s, axis_size in zip(shift, shape))
     if disambiguate:
         if space.lower() != "real":
@@ -406,10 +393,7 @@ def phase_cross_correlation(
     # Redirect user to masked_phase_cross_correlation if NaNs are observed
     if cp.isnan(CCmax) or cp.isnan(src_amp) or cp.isnan(target_amp):
-        raise ValueError(
-            "NaN values found, please remove NaNs from your "
-            "input data"
-        )
+        raise ValueError("NaN values found, please remove NaNs from your " "input data")
     return (
         shift,

{httomolibgpu-5.0.dist-info → httomolibgpu-5.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: httomolibgpu
-Version: 5.0
+Version: 5.2
 Summary: Commonly used tomography data processing methods at DLS.
 Author-email: Daniil Kazantsev <daniil.kazantsev@diamond.ac.uk>, Yousef Moazzam <yousef.moazzam@diamond.ac.uk>, Naman Gera <naman.gera@diamond.ac.uk>
 License: BSD-3-Clause
@@ -19,6 +19,7 @@ Requires-Dist: scipy
 Requires-Dist: pillow
 Requires-Dist: scikit-image
 Requires-Dist: tomobar
+Requires-Dist: PyWavelets
 Provides-Extra: dev
 Requires-Dist: pytest; extra == "dev"
 Requires-Dist: pytest-cov; extra == "dev"

{httomolibgpu-5.0.dist-info → httomolibgpu-5.2.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,6 @@
-httomolibgpu/__init__.py,sha256=Fdj5ipIGgeKqSCYRb5bBVMAZ04ZvZJzuBoGOAqc0zgk,937
+httomolibgpu/__init__.py,sha256=Dt_TYhjJGPVathlceTYQhoRSyH8n7FGQJlRMUlFZNdc,959
 httomolibgpu/cupywrapper.py,sha256=6ITGJ2Jw5I5kVmKEL5LlsnLRniEqqBLsHiAjvLtk0Xk,493
+httomolibgpu/memory_estimator_helpers.py,sha256=QaJady-z8y9Emw7W-lB608vBTNvVYv3obboQKVj6E9M,705
 httomolibgpu/cuda_kernels/__init__.py,sha256=VQNMaGcVDwiE-C64FfLtubHpLriLG0Y3_QnjHBSHrN0,884
 httomolibgpu/cuda_kernels/calc_metrics.cu,sha256=oV7ZPcwjWafmZjbNsUkBYPvOViJ_nX3zBoOAuPCmIrA,11335
 httomolibgpu/cuda_kernels/center_360_shifts.cu,sha256=Ya_8hxjXGtPBsPY3qfGJaugwnYrTFjFFretRcLiUfFQ,1631
@@ -7,6 +8,7 @@ httomolibgpu/cuda_kernels/generate_mask.cu,sha256=3il3r1J2cnTCd3UXO4GWGfBgGxj4pv
 httomolibgpu/cuda_kernels/median_kernel.cu,sha256=EECLUCoJkT9GQ9Db_FF6fYOG6cDSiAePTRZNxE4VZ68,1692
 httomolibgpu/cuda_kernels/raven_filter.cu,sha256=KX2TM_9tMpvoGCHezDNWYABCnv2cT9mlMo4IhxRUac0,1437
 httomolibgpu/cuda_kernels/remove_nan_inf.cu,sha256=gv0ihkf6A_D_po9x7pmgFsQFhwZ1dB_HYc_0Tu-bpUU,630
+httomolibgpu/cuda_kernels/remove_stripe_fw.cu,sha256=J_vy0RUYYKT-mOzERsn3kjgt4hbE7vHPFRuJYNzs6sM,4504
 httomolibgpu/misc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 httomolibgpu/misc/corr.py,sha256=e1eUsWLSM9SB5xzWTDW0o9pAD_lbrr4DL-QQmyM8v4c,4503
 httomolibgpu/misc/denoise.py,sha256=-D9UPbZyUAcCptBHUUXsmj1NFzd6HrrRjJJh4T5gmhQ,4787
@@ -16,14 +18,14 @@ httomolibgpu/misc/utils.py,sha256=rHRuQUO47SlTanvKDBgiC0im4tXlGLCw5B_zvlLzzbc,47
 httomolibgpu/prep/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 httomolibgpu/prep/alignment.py,sha256=GVxnyioipmqsHb4s3mPQ8tKGoPIQMPftDrQxUO-HBuE,5491
 httomolibgpu/prep/normalize.py,sha256=hee0H4mE7FrSZgcF1fjLsKT06xjTJymkyAxpe2itQe4,4202
-httomolibgpu/prep/phase.py,sha256=eDi4Y2dZ0ZDgblCku1XhHiSuK6rHnmsDFuZdDvlnHMU,8505
-httomolibgpu/prep/stripe.py,sha256=8_DV0ON6AWARuziqkmhom56gWTardtqC_z3xG8geg0o,14774
+httomolibgpu/prep/phase.py,sha256=yKJe9gmWuFaUSIuoctV5X1Pb7yEgOmkQ6jxvZkSSwpQ,12128
+httomolibgpu/prep/stripe.py,sha256=OZPimFxe9TOSaEcErORFxd6HCcFcR62-q5XYBvC10FM,36918
 httomolibgpu/recon/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-httomolibgpu/recon/_phase_cross_correlation.py,sha256=h5r1g5lMmS9p32k2SuC8pjji6McpwBZiN95zQd2xvBo,16616
+httomolibgpu/recon/_phase_cross_correlation.py,sha256=Ru2oLAPv8XOSSuZer5yNQrxD_8lMAwBSvtkVAVs5TCc,16469
 httomolibgpu/recon/algorithm.py,sha256=ds-_io7kGzo5FiJq8k4--PYtIWak3y9H7yuyg1lymyQ,25121
 httomolibgpu/recon/rotation.py,sha256=GaSwNrlDnlP_iIrTfKUQLiXsShJ5aSDvdKPwofggtwQ,27948
-httomolibgpu-5.0.dist-info/licenses/LICENSE,sha256=bXeLsgelPUUXw8HCIYiVC97Dpjhm2nB54m7TACdH8ng,48032
-httomolibgpu-5.0.dist-info/METADATA,sha256=0_lrMXVwbSoLpLzIx_i24kCU7VWAMkXFaBaT6rQ0O-c,3339
-httomolibgpu-5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-httomolibgpu-5.0.dist-info/top_level.txt,sha256=nV0Ty_YvSPVd1O6MNWuIplD0w1nwk5hT76YgBZ-bzUw,13
-httomolibgpu-5.0.dist-info/RECORD,,
+httomolibgpu-5.2.dist-info/licenses/LICENSE,sha256=bXeLsgelPUUXw8HCIYiVC97Dpjhm2nB54m7TACdH8ng,48032
+httomolibgpu-5.2.dist-info/METADATA,sha256=K8rMjvvrs2ZvfOiZeEar2tinoGMjH5b7fYfv-xG0F3E,3365
+httomolibgpu-5.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+httomolibgpu-5.2.dist-info/top_level.txt,sha256=nV0Ty_YvSPVd1O6MNWuIplD0w1nwk5hT76YgBZ-bzUw,13
+httomolibgpu-5.2.dist-info/RECORD,,

{httomolibgpu-5.0.dist-info → httomolibgpu-5.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{httomolibgpu-5.0.dist-info → httomolibgpu-5.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{httomolibgpu-5.0.dist-info → httomolibgpu-5.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

httomolibgpu 5.0__py3-none-any.whl → 5.2__py3-none-any.whl

httomolibgpu 5.0py3-none-any.whl → 5.2py3-none-any.whl