PyPI - diffct - Versions diffs - 1.2.3__tar.gz → 1.2.5__tar.gz - Mend

diffct 1.2.3tar.gz → 1.2.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

diffct-1.2.5/.github/workflows/release.yml ADDED Viewed

@@ -0,0 +1,20 @@
+name: Create Release
+on:
+  push:
+    tags:
+      - 'v*'
+jobs:
+  release:
+    name: Create Release
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+      - name: Create Release
+        uses: softprops/action-gh-release@v2
+        with:
+          generate_release_notes: true

{diffct-1.2.3 → diffct-1.2.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diffct
-Version: 1.2.3
+Version: 1.2.5
 Summary: A CUDA-based library for computed tomography (CT) projection and reconstruction with differentiable operators
 Project-URL: Homepage, https://github.com/sypsyp97/diffct
 Author-email: Yipeng Sun <yipeng.sun@fau.de>

{diffct-1.2.3 → diffct-1.2.5}/diffct/differentiable.py RENAMED Viewed

@@ -19,6 +19,8 @@ _TPB_3D             = (8,  8,  8)
 # Trades numerical precision for performance in ray-tracing calculations
 # Safe for CT reconstruction where slight precision loss is acceptable for speed gains
 _FASTMATH_DECORATOR = cuda.jit(cache=True, fastmath=True)
+# Disable fastmath for backward kernels to ensure gradient correctness
+_NON_FASTMATH_DECORATOR = cuda.jit(cache=True, fastmath=False)
 _INF                = _DTYPE(np.inf)
 _EPSILON            = _DTYPE(1e-6)
 # === Device Management Utilities ===
@@ -106,36 +108,9 @@ class TorchCUDABridge:
             raise ValueError("Tensor must be on CUDA device")
         return cuda.as_cuda_array(tensor.detach())
-    @staticmethod
-    def cuda_array_to_tensor(cuda_array, tensor_template):
-        """Convert a Numba CUDA array to a PyTorch tensor.
-        Wrap a Numba CUDA DeviceNDArray as a PyTorch tensor with matching device
-        and dtype from a template tensor, sharing underlying memory.
-        Parameters
-        ----------
-        cuda_array : numba.cuda.cudadrv.devicearray.DeviceNDArray
-            Numba CUDA array to wrap.
-        tensor_template : torch.Tensor
-            Template tensor specifying device and dtype.
-        Returns
-        -------
-        torch.Tensor
-            PyTorch tensor sharing data with the CUDA array on the template's
-            device and dtype.
-        Examples
-        --------
-        >>> arr = cuda.device_array((10,), dtype=np.float32)
-        >>> t = torch.zeros(10, device='cuda')
-        >>> new_t = TorchCUDABridge.cuda_array_to_tensor(arr, t)
-        """
-        return torch.as_tensor(cuda_array, device=tensor_template.device, dtype=tensor_template.dtype)
 # === GPU-aware Trigonometric Table Generation ===
-def _trig_tables(angles, dtype=_DTYPE):
+def _trig_tables(angles, dtype=_DTYPE, device=None):
     """Compute cosine and sine tables for input angles.
     Precompute cosine and sine values and return as torch tensors on the
@@ -163,7 +138,7 @@ def _trig_tables(angles, dtype=_DTYPE):
     device(type='cuda', index=0)
     """
     if isinstance(angles, torch.Tensor):
-        device = angles.device
+        device = angles.device if device is None else device
         cos = torch.cos(angles).to(dtype=dtype)
         sin = torch.sin(angles).to(dtype=dtype)
         return cos.to(device), sin.to(device)
@@ -182,7 +157,10 @@ def _trig_tables(angles, dtype=_DTYPE):
         angles_cpu = torch.tensor(angles, dtype=torch_dtype)
         cos_cpu = torch.cos(angles_cpu)
         sin_cpu = torch.sin(angles_cpu)
-        return cos_cpu, sin_cpu
+        if device is not None:
+            return cos_cpu.to(device), sin_cpu.to(device)
+        else:
+            return cos_cpu, sin_cpu
 # ############################################################################
@@ -191,14 +169,14 @@ def _trig_tables(angles, dtype=_DTYPE):
 def _validate_3d_memory_layout(tensor, expected_order='DHW'):
     """Validate 3D tensor memory layout to prevent coordinate system inconsistencies.
     Parameters
     ----------
     tensor : torch.Tensor
         3D tensor to validate
     expected_order : str, optional
         Expected memory order ('DHW', 'VHW', etc.). Default is 'DHW'.
     Raises
     ------
     ValueError
@@ -206,53 +184,50 @@ def _validate_3d_memory_layout(tensor, expected_order='DHW'):
     """
     if len(tensor.shape) != 3:
         raise ValueError(f"Expected 3D tensor, got {len(tensor.shape)}D")
     # Check if tensor is contiguous to avoid memory duplication
     if not tensor.is_contiguous():
         raise ValueError(
             "Input tensor must be contiguous. Call .contiguous() before passing to "
             "cone beam functions to avoid memory duplication and ensure correct results."
         )
-    # Validate expected memory order based on stride patterns
-    strides = tensor.stride()
-    # Map expected orders to stride patterns
-    order_mapping = {
-        'DHW': (0, 1, 2),  # Depth, Height, Width
-        'VHW': (0, 1, 2),  # Views, Height, Width (for sinograms)
-        'WHD': (2, 1, 0),  # Width, Height, Depth (internal WHD format)
-    }
-    if expected_order not in order_mapping:
-        raise ValueError(f"Unsupported expected_order: {expected_order}")
-    expected_stride_order = order_mapping[expected_order]
-    # Check if actual strides match expected order
-    sorted_strides = sorted(enumerate(strides), key=lambda x: x[1], reverse=True)
-    actual_order = tuple(idx for idx, _ in sorted_strides)
-    if actual_order != expected_stride_order:
-        # Create appropriate error message based on context
-        if expected_order == 'VHW':
-            actual_str = f"({tensor.shape[0]}, {tensor.shape[1]}, {tensor.shape[2]})"
-            expected_str = "(Views, Height, Width)"
-            fix_str = "ensure your sinogram has shape (num_views, det_v, det_u)"
-        elif expected_order == 'DHW':
-            actual_str = f"({tensor.shape[0]}, {tensor.shape[1]}, {tensor.shape[2]})"
-            expected_str = "(Depth, Height, Width)"
-            fix_str = "ensure your volume has shape (D, H, W)"
-        else:
-            actual_str = str(tuple(tensor.shape))
-            expected_str = expected_order
-            fix_str = "check tensor dimensions"
-        raise ValueError(
-            f"Memory layout mismatch: expected {expected_str} order, "
-            f"but tensor has shape {actual_str}. Please {fix_str} and ensure "
-            f"the tensor is contiguous (.contiguous()) before passing to the function."
-        )
+    # Only check memory order for DHW and VHW, not for internal WHD layout
+    if expected_order in ('DHW', 'VHW'):
+        strides = tensor.stride()
+        order_mapping = {
+            'DHW': (0, 1, 2),  # Depth, Height, Width
+            'VHW': (0, 1, 2),  # Views, Height, Width (for sinograms)
+        }
+        if expected_order not in order_mapping:
+            raise ValueError(f"Unsupported expected_order: {expected_order}")
+        expected_stride_order = order_mapping[expected_order]
+        # Check if actual strides match expected order
+        sorted_strides = sorted(enumerate(strides), key=lambda x: x[1], reverse=True)
+        actual_order = tuple(idx for idx, _ in sorted_strides)
+        if actual_order != expected_stride_order:
+            # Create appropriate error message based on context
+            if expected_order == 'VHW':
+                actual_str = f"({tensor.shape[0]}, {tensor.shape[1]}, {tensor.shape[2]})"
+                expected_str = "(Views, Height, Width)"
+                fix_str = "ensure your sinogram has shape (num_views, det_v, det_u)"
+            elif expected_order == 'DHW':
+                actual_str = f"({tensor.shape[0]}, {tensor.shape[1]}, {tensor.shape[2]})"
+                expected_str = "(Depth, Height, Width)"
+                fix_str = "ensure your volume has shape (D, H, W)"
+            else:
+                actual_str = str(tuple(tensor.shape))
+                expected_str = expected_order
+                fix_str = "check tensor dimensions"
+            raise ValueError(
+                f"Memory layout mismatch: expected {expected_str} order, "
+                f"but tensor has shape {actual_str}. Please {fix_str} and ensure "
+                f"the tensor is contiguous (.contiguous()) before passing to the function."
+            )
+    # For 'WHD' (internal layout), skip stride check entirely
 def _grid_2d(n1, n2, tpb=_TPB_2D):
@@ -392,7 +367,7 @@ def _parallel_2d_forward_kernel(
     cos_a = d_cos[iang]  # Precomputed cosine of projection angle
     sin_a = d_sin[iang]  # Precomputed sine of projection angle
     # Normalize all physical distances to voxel units
-    u     = (idet - (n_det - 1) * 0.5) * det_spacing / voxel_spacing  # Detector coordinate in voxel units
+    u     = (idet - n_det * 0.5) * det_spacing / voxel_spacing  # Detector coordinate in voxel units
     # Define ray direction and starting point for parallel beam geometry
     # Ray direction is perpendicular to detector array (cos_a, sin_a)
@@ -471,8 +446,8 @@ def _parallel_2d_forward_kernel(
                 dx, dy = mid_x - ix0, mid_y - iy0  # Fractional parts: distance from base voxel center [0,1]
                 # Clamp indices to stay in-bounds during interpolation
-                ix0 = min(ix0, Nx - 2)
-                iy0 = min(iy0, Ny - 2)
+                ix0 = max(0, min(ix0, Nx - 2))
+                iy0 = max(0, min(iy0, Ny - 2))
                 # === BILINEAR INTERPOLATION WEIGHT CALCULATION ===
                 # Mathematical basis: Bilinear interpolation formula f(x,y) = Σ f(xi,yi) * wi(x,y)
@@ -501,7 +476,7 @@ def _parallel_2d_forward_kernel(
     d_sino[iang, idet] = accum
-@_FASTMATH_DECORATOR
+@_NON_FASTMATH_DECORATOR
 def _parallel_2d_backward_kernel(
     d_sino, n_ang, n_det,
     d_image, Nx, Ny,
@@ -554,7 +529,7 @@ def _parallel_2d_backward_kernel(
     cos_a = d_cos[iang]         # Precomputed cosine of projection angle
     sin_a = d_sin[iang]         # Precomputed sine of projection angle
     # Normalize all physical distances to voxel units
-    u     = (idet - (n_det - 1) * 0.5) * det_spacing / voxel_spacing  # Detector coordinate in voxel units
+    u     = (idet - n_det * 0.5) * det_spacing / voxel_spacing  # Detector coordinate in voxel units
     # Define ray direction and starting point for parallel beam geometry
     dir_x, dir_y = cos_a, sin_a
@@ -599,8 +574,8 @@ def _parallel_2d_backward_kernel(
                 dx, dy = mid_x - ix0, mid_y - iy0
                 # Clamp indices to stay in-bounds during interpolation
-                ix0 = min(ix0, Nx - 2)
-                iy0 = min(iy0, Ny - 2)
+                ix0 = max(0, min(ix0, Nx - 2))
+                iy0 = max(0, min(iy0, Ny - 2))
                 # === ATOMIC BACKPROJECTION WITH BILINEAR WEIGHTS ===
                 # Distribute contribution weighted by segment length and interpolation weights
@@ -686,7 +661,7 @@ def _fan_2d_forward_kernel(
     cos_a = d_cos[iang]  # Precomputed cosine of projection angle
     sin_a = d_sin[iang]  # Precomputed sine of projection angle
     # Normalize all physical distances to voxel units
-    u     = (idet - (n_det - 1) * 0.5) * det_spacing / voxel_spacing  # Detector coordinate in voxel units
+    u     = (idet - n_det * 0.5) * det_spacing / voxel_spacing  # Detector coordinate in voxel units
     sid_v = sid / voxel_spacing  # Source-to-isocenter distance in voxel units
     sdd_v = sdd / voxel_spacing  # Source-to-detector distance in voxel units
@@ -757,8 +732,8 @@ def _fan_2d_forward_kernel(
                 dx, dy = mid_x - ix0, mid_y - iy0
                 # Clamp indices to stay in-bounds during interpolation
-                ix0 = min(ix0, Nx - 2)
-                iy0 = min(iy0, Ny - 2)
+                ix0 = max(0, min(ix0, Nx - 2))
+                iy0 = max(0, min(iy0, Ny - 2))
                 # Bilinear interpolation (identical to parallel beam)
                 val = (
@@ -781,7 +756,7 @@ def _fan_2d_forward_kernel(
     d_sino[iang, idet] = accum
-@_FASTMATH_DECORATOR
+@_NON_FASTMATH_DECORATOR
 def _fan_2d_backward_kernel(
     d_sino, n_ang, n_det,
     d_image, Nx, Ny,
@@ -839,7 +814,7 @@ def _fan_2d_backward_kernel(
     cos_a = d_cos[iang]         # Precomputed cosine of projection angle
     sin_a = d_sin[iang]         # Precomputed sine of projection angle
     # Normalize all physical distances to voxel units
-    u     = (idet - (n_det - 1) * 0.5) * det_spacing / voxel_spacing  # Detector coordinate in voxel units
+    u     = (idet - n_det * 0.5) * det_spacing / voxel_spacing  # Detector coordinate in voxel units
     sid_v = sid / voxel_spacing  # Source-to-isocenter distance in voxel units
     sdd_v = sdd / voxel_spacing  # Source-to-detector distance in voxel units
@@ -901,8 +876,8 @@ def _fan_2d_backward_kernel(
                 dx, dy = mid_x - ix0, mid_y - iy0
                 # Clamp indices to stay in-bounds during interpolation
-                ix0 = min(ix0, Nx - 2)
-                iy0 = min(iy0, Ny - 2)
+                ix0 = max(0, min(ix0, Nx - 2))
+                iy0 = max(0, min(iy0, Ny - 2))
                 # === ATOMIC BACKPROJECTION WITH BILINEAR WEIGHTS ===
                 # Distribute contribution weighted by segment length and interpolation weights
@@ -995,8 +970,8 @@ def _cone_3d_forward_kernel(
     # === 3D CONE BEAM GEOMETRY SETUP ===
     cos_a, sin_a = d_cos[iview], d_sin[iview]  # Projection angle trigonometry
     # Normalize all physical distances to voxel units
-    u     = (iu - (n_u - 1) * 0.5) * du / voxel_spacing  # Detector u-coordinate in voxel units
-    v     = (iv - (n_v - 1) * 0.5) * dv / voxel_spacing  # Detector v-coordinate in voxel units
+    u     = (iu - n_u * 0.5) * du / voxel_spacing  # Detector u-coordinate in voxel units
+    v     = (iv - n_v * 0.5) * dv / voxel_spacing  # Detector v-coordinate in voxel units
     sid_v = sid / voxel_spacing  # Source-to-isocenter distance in voxel units
     sdd_v = sdd / voxel_spacing  # Source-to-detector distance in voxel units
@@ -1091,9 +1066,9 @@ def _cone_3d_forward_kernel(
                 dx, dy, dz = mid_x - ix0, mid_y - iy0, mid_z - iz0  # Fractional parts: distance from base voxel center [0,1]
                 # Clamp indices to stay in-bounds during interpolation
-                ix0 = min(ix0, Nx - 2)
-                iy0 = min(iy0, Ny - 2)
-                iz0 = min(iz0, Nz - 2)
+                ix0 = max(0, min(ix0, Nx - 2))
+                iy0 = max(0, min(iy0, Ny - 2))
+                iz0 = max(0, min(iz0, Nz - 2))
                 # === TRILINEAR INTERPOLATION WEIGHT CALCULATION ===
                 # Mathematical basis: Trilinear interpolation formula f(x,y,z) = Σ f(xi,yi,zi) * wi(x,y,z)
@@ -1131,7 +1106,7 @@ def _cone_3d_forward_kernel(
     d_sino[iview, iu, iv] = accum
-@_FASTMATH_DECORATOR
+@_NON_FASTMATH_DECORATOR
 def _cone_3d_backward_kernel(
     d_sino, n_views, n_u, n_v,
     d_vol, Nx, Ny, Nz,
@@ -1196,8 +1171,8 @@ def _cone_3d_backward_kernel(
     g = d_sino[iview, iu, iv]  # Sinogram value to backproject along this ray
     cos_a, sin_a = d_cos[iview], d_sin[iview]  # Projection angle trigonometry
     # Normalize all physical distances to voxel units
-    u     = (iu - (n_u - 1) * 0.5) * du / voxel_spacing  # Detector u-coordinate in voxel units
-    v     = (iv - (n_v - 1) * 0.5) * dv / voxel_spacing  # Detector v-coordinate in voxel units
+    u     = (iu - n_u * 0.5) * du / voxel_spacing  # Detector u-coordinate in voxel units
+    v     = (iv - n_v * 0.5) * dv / voxel_spacing  # Detector v-coordinate in voxel units
     sid_v = sid / voxel_spacing  # Source-to-isocenter distance in voxel units
     sdd_v = sdd / voxel_spacing  # Source-to-detector distance in voxel units
@@ -1281,9 +1256,9 @@ def _cone_3d_backward_kernel(
                 dx, dy, dz = mid_x - ix0, mid_y - iy0, mid_z - iz0  # Fractional parts for 3D weights
                 # Clamp indices to stay in-bounds during interpolation
-                ix0 = min(ix0, Nx - 2)
-                iy0 = min(iy0, Ny - 2)
-                iz0 = min(iz0, Nz - 2)
+                ix0 = max(0, min(ix0, Nx - 2))
+                iy0 = max(0, min(iy0, Ny - 2))
+                iz0 = max(0, min(iz0, Nz - 2))
                 # === ATOMIC BACKPROJECTION WITH TRILINEAR WEIGHTS ===
                 # Distribute contribution weighted by segment length and interpolation weights
@@ -1408,7 +1383,7 @@ class ParallelProjectorFunction(torch.autograd.Function):
         sinogram = torch.zeros((n_angles, num_detectors), dtype=image.dtype, device=device)
         # Prepare trigonometric tables on the correct device
-        d_cos, d_sin = _trig_tables(angles, dtype=image.dtype)
+        d_cos, d_sin = _trig_tables(angles, dtype=image.dtype, device=device)
         # Get Numba CUDA array views for kernel
         d_image = TorchCUDABridge.tensor_to_cuda_array(image)
@@ -1441,7 +1416,7 @@ class ParallelProjectorFunction(torch.autograd.Function):
         n_angles = angles.shape[0]
         grad_image = torch.zeros((Ny, Nx), dtype=grad_sinogram.dtype, device=device)
-        d_cos, d_sin = _trig_tables(angles, dtype=grad_sinogram.dtype)
+        d_cos, d_sin = _trig_tables(angles, dtype=grad_sinogram.dtype, device=device)
         d_grad_sino = TorchCUDABridge.tensor_to_cuda_array(grad_sinogram)
         d_img_grad = TorchCUDABridge.tensor_to_cuda_array(grad_image)
@@ -1541,7 +1516,7 @@ class ParallelBackprojectorFunction(torch.autograd.Function):
         reco = torch.zeros((Ny, Nx), dtype=sinogram.dtype, device=device)
         # Prepare trigonometric tables on the correct device
-        d_cos, d_sin = _trig_tables(angles, dtype=sinogram.dtype)
+        d_cos, d_sin = _trig_tables(angles, dtype=sinogram.dtype, device=device)
         # Get Numba CUDA array views for kernel
         d_sino = TorchCUDABridge.tensor_to_cuda_array(sinogram)
@@ -1578,7 +1553,7 @@ class ParallelBackprojectorFunction(torch.autograd.Function):
         grad_sino = torch.zeros((n_ang, n_det), dtype=grad_output.dtype, device=device)
         # Prepare trigonometric tables on the correct device
-        d_cos, d_sin = _trig_tables(angles, dtype=grad_output.dtype)
+        d_cos, d_sin = _trig_tables(angles, dtype=grad_output.dtype, device=device)
         # Get Numba CUDA array views for kernel
         d_grad_out = TorchCUDABridge.tensor_to_cuda_array(grad_output)
@@ -1679,7 +1654,7 @@ class FanProjectorFunction(torch.autograd.Function):
         n_ang = angles.shape[0]
         sinogram = torch.zeros((n_ang, num_detectors), dtype=image.dtype, device=device)
-        d_cos, d_sin = _trig_tables(angles, dtype=image.dtype)
+        d_cos, d_sin = _trig_tables(angles, dtype=image.dtype, device=device)
         d_image = TorchCUDABridge.tensor_to_cuda_array(image)
         d_sino = TorchCUDABridge.tensor_to_cuda_array(sinogram)
@@ -1713,7 +1688,7 @@ class FanProjectorFunction(torch.autograd.Function):
         n_ang = angles.shape[0]
         grad_img = torch.zeros((Ny, Nx), dtype=grad_sinogram.dtype, device=device)
-        d_cos, d_sin = _trig_tables(angles, dtype=grad_sinogram.dtype)
+        d_cos, d_sin = _trig_tables(angles, dtype=grad_sinogram.dtype, device=device)
         d_grad_sino = TorchCUDABridge.tensor_to_cuda_array(grad_sinogram)
         d_img_grad = TorchCUDABridge.tensor_to_cuda_array(grad_img)
@@ -1817,7 +1792,7 @@ class FanBackprojectorFunction(torch.autograd.Function):
         Ny, Nx = H, W
         reco = torch.zeros((Ny, Nx), dtype=sinogram.dtype, device=device)
-        d_cos, d_sin = _trig_tables(angles, dtype=sinogram.dtype)
+        d_cos, d_sin = _trig_tables(angles, dtype=sinogram.dtype, device=device)
         d_sino = TorchCUDABridge.tensor_to_cuda_array(sinogram)
         d_reco = TorchCUDABridge.tensor_to_cuda_array(reco)
@@ -1851,7 +1826,7 @@ class FanBackprojectorFunction(torch.autograd.Function):
         Ny, Nx = grad_output.shape
         grad_sino = torch.zeros((n_ang, n_det), dtype=grad_output.dtype, device=device)
-        d_cos, d_sin = _trig_tables(angles, dtype=grad_output.dtype)
+        d_cos, d_sin = _trig_tables(angles, dtype=grad_output.dtype, device=device)
         d_grad_out = TorchCUDABridge.tensor_to_cuda_array(grad_output)
         d_sino_grad = TorchCUDABridge.tensor_to_cuda_array(grad_sino)
@@ -1959,7 +1934,7 @@ class ConeProjectorFunction(torch.autograd.Function):
         _validate_3d_memory_layout(volume, expected_order='DHW')
         sino = torch.zeros((n_views, det_u, det_v), dtype=volume.dtype, device=device)
-        d_cos, d_sin = _trig_tables(angles, dtype=volume.dtype)
+        d_cos, d_sin = _trig_tables(angles, dtype=volume.dtype, device=device)
         volume_perm = volume.permute(2, 1, 0).contiguous()
         d_vol = TorchCUDABridge.tensor_to_cuda_array(volume_perm)
@@ -1997,7 +1972,7 @@ class ConeProjectorFunction(torch.autograd.Function):
         n_views = angles.shape[0]
         grad_vol_perm = torch.zeros((W, H, D), dtype=grad_sinogram.dtype, device=device)
-        d_cos, d_sin = _trig_tables(angles, dtype=grad_sinogram.dtype)
+        d_cos, d_sin = _trig_tables(angles, dtype=grad_sinogram.dtype, device=device)
         d_grad_sino = TorchCUDABridge.tensor_to_cuda_array(grad_sinogram)
         d_vol_grad = TorchCUDABridge.tensor_to_cuda_array(grad_vol_perm)
@@ -2116,7 +2091,7 @@ class ConeBackprojectorFunction(torch.autograd.Function):
         _validate_3d_memory_layout(sinogram, expected_order='VHW')
         vol_perm = torch.zeros((W, H, D), dtype=sinogram.dtype, device=device)
-        d_cos, d_sin = _trig_tables(angles, dtype=sinogram.dtype)
+        d_cos, d_sin = _trig_tables(angles, dtype=sinogram.dtype, device=device)
         d_sino = TorchCUDABridge.tensor_to_cuda_array(sinogram)
         d_reco = TorchCUDABridge.tensor_to_cuda_array(vol_perm)
@@ -2153,7 +2128,7 @@ class ConeBackprojectorFunction(torch.autograd.Function):
         n_views = angles.shape[0]
         grad_sino = torch.zeros((n_views, n_u, n_v), dtype=grad_output.dtype, device=device)
-        d_cos, d_sin = _trig_tables(angles, dtype=grad_output.dtype)
+        d_cos, d_sin = _trig_tables(angles, dtype=grad_output.dtype, device=device)
         grad_output_perm = grad_output.permute(2, 1, 0).contiguous()
         d_grad_out = TorchCUDABridge.tensor_to_cuda_array(grad_output_perm)

{diffct-1.2.3 → diffct-1.2.5}/examples/fbp_fan.py RENAMED Viewed

@@ -2,6 +2,7 @@ import math
 import numpy as np
 import torch
 import matplotlib.pyplot as plt
+import torch.nn.functional as F
 from diffct.differentiable import FanProjectorFunction, FanBackprojectorFunction
@@ -76,9 +77,9 @@ def main():
     sino_weighted = sinogram * weights
     sinogram_filt = ramp_filter(sino_weighted)
-    reconstruction = FanBackprojectorFunction.apply(sinogram_filt, angles_torch,
+    reconstruction = F.relu(FanBackprojectorFunction.apply(sinogram_filt, angles_torch,
                                                     detector_spacing, Ny, Nx,
-                                                    sdd, sid, voxel_spacing)
+                                                    sdd, sid, voxel_spacing)) # ReLU to ensure non-negativity
     # --- FBP normalization ---
     # The backprojection is a sum over all angles. To approximate the integral,

{diffct-1.2.3 → diffct-1.2.5}/examples/fbp_parallel.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import numpy as np
 import torch
 import matplotlib.pyplot as plt
+import torch.nn.functional as F
 from diffct.differentiable import ParallelProjectorFunction, ParallelBackprojectorFunction
@@ -64,8 +65,8 @@ def main():
     sinogram_filt = ramp_filter(sinogram)
-    reconstruction = ParallelBackprojectorFunction.apply(sinogram_filt, angles_torch,
-                                                         detector_spacing, Ny, Nx, voxel_spacing)
+    reconstruction = F.relu(ParallelBackprojectorFunction.apply(sinogram_filt, angles_torch,
+                                                         detector_spacing, Ny, Nx, voxel_spacing)) # ReLU to ensure non-negativity
     # --- FBP normalization ---
     # The backprojection is a sum over all angles. To approximate the integral,

{diffct-1.2.3 → diffct-1.2.5}/examples/fdk_cone.py RENAMED Viewed

@@ -2,6 +2,7 @@ import math
 import numpy as np
 import torch
 import matplotlib.pyplot as plt
+import torch.nn.functional as F
 from diffct.differentiable import ConeProjectorFunction, ConeBackprojectorFunction
@@ -110,8 +111,8 @@ def main():
     sino_weighted = sinogram * weights
     sinogram_filt = ramp_filter_3d(sino_weighted).contiguous()
-    reconstruction = ConeBackprojectorFunction.apply(sinogram_filt, angles_torch, Nz, Ny, Nx,
-                                                    du, dv, sdd, sid, voxel_spacing)
+    reconstruction = F.relu(ConeBackprojectorFunction.apply(sinogram_filt, angles_torch, Nz, Ny, Nx,
+                                                    du, dv, sdd, sid, voxel_spacing)) # ReLU to ensure non-negativity
     # --- FDK normalization ---
     # The backprojection is a sum over all angles. To approximate the integral,

{diffct-1.2.3 → diffct-1.2.5}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "diffct"
-version = "1.2.3"
+version = "1.2.5"
 description = "A CUDA-based library for computed tomography (CT) projection and reconstruction with differentiable operators"
 readme = "README.md"
 authors = [