diffct 1.2.6__tar.gz → 1.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diffct-1.2.6 → diffct-1.2.7}/PKG-INFO +27 -1
- {diffct-1.2.6 → diffct-1.2.7}/README.md +26 -0
- {diffct-1.2.6 → diffct-1.2.7}/diffct/differentiable.py +219 -175
- {diffct-1.2.6 → diffct-1.2.7}/pyproject.toml +2 -3
- {diffct-1.2.6 → diffct-1.2.7}/.github/workflows/docs.yml +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/.github/workflows/release.yml +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/.gitignore +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/LICENSE +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/diffct/__init__.py +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/Makefile +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/source/_static/.gitkeep +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/source/api.rst +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/source/conf.py +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/source/examples.rst +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/source/fbp_fan_example.rst +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/source/fbp_parallel_example.rst +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/source/fdk_cone_example.rst +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/source/getting_started.rst +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/source/index.rst +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/source/iterative_reco_cone_example.rst +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/source/iterative_reco_fan_example.rst +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/docs/source/iterative_reco_parallel_example.rst +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/examples/fbp_fan.py +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/examples/fbp_parallel.py +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/examples/fdk_cone.py +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/examples/iterative_reco_cone.py +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/examples/iterative_reco_fan.py +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/examples/iterative_reco_parallel.py +0 -0
- {diffct-1.2.6 → diffct-1.2.7}/requirements.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diffct
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.7
|
|
4
4
|
Summary: A CUDA-based library for computed tomography (CT) projection and reconstruction with differentiable operators
|
|
5
5
|
Project-URL: Homepage, https://github.com/sypsyp97/diffct
|
|
6
6
|
Author-email: Yipeng Sun <yipeng.sun@fau.de>
|
|
@@ -69,7 +69,33 @@ diffct/
|
|
|
69
69
|
|
|
70
70
|
### Installation
|
|
71
71
|
|
|
72
|
+
**CUDA 12:**
|
|
72
73
|
```bash
|
|
74
|
+
# Create and activate conda environment
|
|
75
|
+
conda create -n diffct python=3.12
|
|
76
|
+
conda activate diffct
|
|
77
|
+
|
|
78
|
+
# Install CUDA Toolkit, PyTorch, and Numba
|
|
79
|
+
conda install nvidia/label/cuda-12.8.1::cuda-toolkit
|
|
80
|
+
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
|
|
81
|
+
pip install numba-cuda[cu12]
|
|
82
|
+
|
|
83
|
+
# Install diffct
|
|
84
|
+
pip install diffct
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
**CUDA 11:**
|
|
88
|
+
```bash
|
|
89
|
+
# Create and activate conda environment
|
|
90
|
+
conda create -n diffct python=3.12
|
|
91
|
+
conda activate diffct
|
|
92
|
+
|
|
93
|
+
# Install CUDA Toolkit, PyTorch, and Numba
|
|
94
|
+
conda install nvidia/label/cuda-11.8.0::cuda-toolkit
|
|
95
|
+
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
|
96
|
+
pip install numba-cuda[cu11]
|
|
97
|
+
|
|
98
|
+
# Install diffct
|
|
73
99
|
pip install diffct
|
|
74
100
|
```
|
|
75
101
|
|
|
@@ -52,7 +52,33 @@ diffct/
|
|
|
52
52
|
|
|
53
53
|
### Installation
|
|
54
54
|
|
|
55
|
+
**CUDA 12:**
|
|
55
56
|
```bash
|
|
57
|
+
# Create and activate conda environment
|
|
58
|
+
conda create -n diffct python=3.12
|
|
59
|
+
conda activate diffct
|
|
60
|
+
|
|
61
|
+
# Install CUDA Toolkit, PyTorch, and Numba
|
|
62
|
+
conda install nvidia/label/cuda-12.8.1::cuda-toolkit
|
|
63
|
+
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
|
|
64
|
+
pip install numba-cuda[cu12]
|
|
65
|
+
|
|
66
|
+
# Install diffct
|
|
67
|
+
pip install diffct
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**CUDA 11:**
|
|
71
|
+
```bash
|
|
72
|
+
# Create and activate conda environment
|
|
73
|
+
conda create -n diffct python=3.12
|
|
74
|
+
conda activate diffct
|
|
75
|
+
|
|
76
|
+
# Install CUDA Toolkit, PyTorch, and Numba
|
|
77
|
+
conda install nvidia/label/cuda-11.8.0::cuda-toolkit
|
|
78
|
+
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
|
79
|
+
pip install numba-cuda[cu11]
|
|
80
|
+
|
|
81
|
+
# Install diffct
|
|
56
82
|
pip install diffct
|
|
57
83
|
```
|
|
58
84
|
|
|
@@ -2,6 +2,7 @@ import math
|
|
|
2
2
|
import numpy as np
|
|
3
3
|
import torch
|
|
4
4
|
from numba import cuda
|
|
5
|
+
from functools import lru_cache
|
|
5
6
|
|
|
6
7
|
# ---------------------------------------------------------------------------
|
|
7
8
|
# Global settings & helpers
|
|
@@ -19,8 +20,7 @@ _TPB_3D = (8, 8, 8)
|
|
|
19
20
|
# Trades numerical precision for performance in ray-tracing calculations
|
|
20
21
|
# Safe for CT reconstruction where slight precision loss is acceptable for speed gains
|
|
21
22
|
_FASTMATH_DECORATOR = cuda.jit(cache=True, fastmath=True)
|
|
22
|
-
|
|
23
|
-
_NON_FASTMATH_DECORATOR = cuda.jit(cache=True, fastmath=False)
|
|
23
|
+
|
|
24
24
|
_INF = _DTYPE(np.inf)
|
|
25
25
|
_EPSILON = _DTYPE(1e-6)
|
|
26
26
|
# === Device Management Utilities ===
|
|
@@ -70,8 +70,8 @@ class DeviceManager:
|
|
|
70
70
|
... )
|
|
71
71
|
tensor([1, 2, 3], device='cuda:0')
|
|
72
72
|
"""
|
|
73
|
-
if hasattr(tensor, "to"):
|
|
74
|
-
return tensor
|
|
73
|
+
if hasattr(tensor, "to") and tensor.device != device:
|
|
74
|
+
return tensor.to(device)
|
|
75
75
|
return tensor
|
|
76
76
|
|
|
77
77
|
# === PyTorch-CUDA Bridge ===
|
|
@@ -110,6 +110,7 @@ class TorchCUDABridge:
|
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
# === GPU-aware Trigonometric Table Generation ===
|
|
113
|
+
@lru_cache(maxsize=2048)
|
|
113
114
|
def _trig_tables(angles, dtype=_DTYPE, device=None):
|
|
114
115
|
"""Compute cosine and sine tables for input angles.
|
|
115
116
|
|
|
@@ -139,9 +140,11 @@ def _trig_tables(angles, dtype=_DTYPE, device=None):
|
|
|
139
140
|
"""
|
|
140
141
|
if isinstance(angles, torch.Tensor):
|
|
141
142
|
device = angles.device if device is None else device
|
|
142
|
-
cos
|
|
143
|
-
|
|
144
|
-
|
|
143
|
+
# Compute both cos and sin in one call to avoid redundant kernel launches
|
|
144
|
+
angles_device = angles.to(dtype=dtype, device=device)
|
|
145
|
+
cos = torch.cos(angles_device)
|
|
146
|
+
sin = torch.sin(angles_device)
|
|
147
|
+
return cos, sin
|
|
145
148
|
else:
|
|
146
149
|
# fallback for non-tensor inputs: compute via PyTorch on CPU for consistency
|
|
147
150
|
# Determine desired torch dtype
|
|
@@ -153,7 +156,7 @@ def _trig_tables(angles, dtype=_DTYPE, device=None):
|
|
|
153
156
|
np.float64: torch.float64,
|
|
154
157
|
}
|
|
155
158
|
torch_dtype = _NP_TO_TORCH.get(dtype, torch.float32)
|
|
156
|
-
# Convert input angles to a CPU torch tensor
|
|
159
|
+
# Convert input angles to a CPU torch tensor and compute both simultaneously
|
|
157
160
|
angles_cpu = torch.tensor(angles, dtype=torch_dtype)
|
|
158
161
|
cos_cpu = torch.cos(angles_cpu)
|
|
159
162
|
sin_cpu = torch.sin(angles_cpu)
|
|
@@ -182,18 +185,23 @@ def _validate_3d_memory_layout(tensor, expected_order='DHW'):
|
|
|
182
185
|
ValueError
|
|
183
186
|
If tensor has unexpected memory layout or is non-contiguous
|
|
184
187
|
"""
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
# Check if tensor is contiguous to avoid memory duplication
|
|
189
|
-
if not tensor.is_contiguous():
|
|
190
|
-
raise ValueError(
|
|
191
|
-
"Input tensor must be contiguous. Call .contiguous() before passing to "
|
|
192
|
-
"cone beam functions to avoid memory duplication and ensure correct results."
|
|
193
|
-
)
|
|
188
|
+
shape = tensor.shape
|
|
189
|
+
if len(shape) != 3:
|
|
190
|
+
raise ValueError(f"Expected 3D tensor, got {len(shape)}D")
|
|
194
191
|
|
|
192
|
+
# Early return for common case - contiguous tensor with expected ordering
|
|
193
|
+
if tensor.is_contiguous() and expected_order in ('DHW', 'VHW'):
|
|
194
|
+
# For DHW and VHW, the expected order matches memory layout when contiguous
|
|
195
|
+
return
|
|
196
|
+
|
|
195
197
|
# Only check memory order for DHW and VHW, not for internal WHD layout
|
|
196
198
|
if expected_order in ('DHW', 'VHW'):
|
|
199
|
+
if not tensor.is_contiguous():
|
|
200
|
+
raise ValueError(
|
|
201
|
+
"Input tensor must be contiguous. Call .contiguous() before passing to "
|
|
202
|
+
"cone beam functions to avoid memory duplication and ensure correct results."
|
|
203
|
+
)
|
|
204
|
+
|
|
197
205
|
strides = tensor.stride()
|
|
198
206
|
order_mapping = {
|
|
199
207
|
'DHW': (0, 1, 2), # Depth, Height, Width
|
|
@@ -210,15 +218,15 @@ def _validate_3d_memory_layout(tensor, expected_order='DHW'):
|
|
|
210
218
|
if actual_order != expected_stride_order:
|
|
211
219
|
# Create appropriate error message based on context
|
|
212
220
|
if expected_order == 'VHW':
|
|
213
|
-
actual_str = f"({
|
|
221
|
+
actual_str = f"({shape[0]}, {shape[1]}, {shape[2]})"
|
|
214
222
|
expected_str = "(Views, Height, Width)"
|
|
215
223
|
fix_str = "ensure your sinogram has shape (num_views, det_v, det_u)"
|
|
216
224
|
elif expected_order == 'DHW':
|
|
217
|
-
actual_str = f"({
|
|
225
|
+
actual_str = f"({shape[0]}, {shape[1]}, {shape[2]})"
|
|
218
226
|
expected_str = "(Depth, Height, Width)"
|
|
219
227
|
fix_str = "ensure your volume has shape (D, H, W)"
|
|
220
228
|
else:
|
|
221
|
-
actual_str = str(tuple(
|
|
229
|
+
actual_str = str(tuple(shape))
|
|
222
230
|
expected_str = expected_order
|
|
223
231
|
fix_str = "check tensor dimensions"
|
|
224
232
|
|
|
@@ -316,7 +324,7 @@ def _parallel_2d_forward_kernel(
|
|
|
316
324
|
):
|
|
317
325
|
"""Compute the 2D parallel beam forward projection.
|
|
318
326
|
|
|
319
|
-
This CUDA kernel implements the Siddon
|
|
327
|
+
This CUDA kernel implements the Siddon ray-tracing method with interpolation for
|
|
320
328
|
2D parallel beam forward projection.
|
|
321
329
|
|
|
322
330
|
Parameters
|
|
@@ -348,7 +356,7 @@ def _parallel_2d_forward_kernel(
|
|
|
348
356
|
|
|
349
357
|
Notes
|
|
350
358
|
-----
|
|
351
|
-
The Siddon
|
|
359
|
+
The Siddon method with interpolation provides accurate ray-volume intersection by:
|
|
352
360
|
- Calculating ray-volume boundary intersections to define traversal limits.
|
|
353
361
|
- Iterating through voxels along the ray path via parametric equations.
|
|
354
362
|
- Determining bilinear interpolation weights for sub-voxel sampling.
|
|
@@ -407,7 +415,7 @@ def _parallel_2d_forward_kernel(
|
|
|
407
415
|
if t_min >= t_max:
|
|
408
416
|
d_sino[iang, idet] = 0.0; return
|
|
409
417
|
|
|
410
|
-
# === SIDDON
|
|
418
|
+
# === SIDDON METHOD VOXEL TRAVERSAL INITIALIZATION ===
|
|
411
419
|
accum = 0.0 # Accumulated projection value along ray
|
|
412
420
|
t = t_min # Current ray parameter (distance from ray start)
|
|
413
421
|
|
|
@@ -417,12 +425,15 @@ def _parallel_2d_forward_kernel(
|
|
|
417
425
|
|
|
418
426
|
# Determine traversal direction and step sizes for each axis
|
|
419
427
|
step_x, step_y = (1 if dir_x >= 0 else -1), (1 if dir_y >= 0 else -1) # Voxel stepping direction
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
428
|
+
# Hoist inverse directions to reduce divisions and branches
|
|
429
|
+
inv_dir_x = (1.0 / dir_x) if abs(dir_x) > _EPSILON else 0.0
|
|
430
|
+
inv_dir_y = (1.0 / dir_y) if abs(dir_y) > _EPSILON else 0.0
|
|
431
|
+
dt_x = abs(inv_dir_x) if abs(dir_x) > _EPSILON else _INF
|
|
432
|
+
dt_y = abs(inv_dir_y) if abs(dir_y) > _EPSILON else _INF
|
|
433
|
+
|
|
434
|
+
# Calculate parameter values for next voxel boundary crossings using inv_dir_*
|
|
435
|
+
tx = ((ix + (step_x > 0)) - cx - pnt_x) * inv_dir_x if abs(dir_x) > _EPSILON else _INF
|
|
436
|
+
ty = ((iy + (step_y > 0)) - cy - pnt_y) * inv_dir_y if abs(dir_y) > _EPSILON else _INF
|
|
426
437
|
|
|
427
438
|
# === MAIN RAY TRAVERSAL LOOP ===
|
|
428
439
|
# Step through voxels along ray path, accumulating weighted contributions
|
|
@@ -437,9 +448,10 @@ def _parallel_2d_forward_kernel(
|
|
|
437
448
|
# === BILINEAR INTERPOLATION SAMPLING ===
|
|
438
449
|
# Sample volume at ray segment midpoint for accurate integration
|
|
439
450
|
# Mathematical basis: Midpoint rule for numerical integration along ray segments
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
451
|
+
t_mid = t + seg_len * 0.5
|
|
452
|
+
mid_x = pnt_x + t_mid * dir_x + cx # Midpoint x-coordinate in image space
|
|
453
|
+
mid_y = pnt_y + t_mid * dir_y + cy # Midpoint y-coordinate in image space
|
|
454
|
+
|
|
443
455
|
# Convert continuous coordinates to discrete voxel indices and fractional weights
|
|
444
456
|
# Floor operation gives base voxel index, fractional part gives interpolation weights
|
|
445
457
|
ix0, iy0 = int(math.floor(mid_x)), int(math.floor(mid_y)) # Base voxel indices (bottom-left corner)
|
|
@@ -476,7 +488,7 @@ def _parallel_2d_forward_kernel(
|
|
|
476
488
|
|
|
477
489
|
d_sino[iang, idet] = accum
|
|
478
490
|
|
|
479
|
-
@
|
|
491
|
+
@_FASTMATH_DECORATOR
|
|
480
492
|
def _parallel_2d_backward_kernel(
|
|
481
493
|
d_sino, n_ang, n_det,
|
|
482
494
|
d_image, Nx, Ny,
|
|
@@ -484,8 +496,8 @@ def _parallel_2d_backward_kernel(
|
|
|
484
496
|
):
|
|
485
497
|
"""Compute the 2D parallel beam backprojection.
|
|
486
498
|
|
|
487
|
-
This CUDA kernel implements the Siddon-
|
|
488
|
-
beam backprojection.
|
|
499
|
+
This CUDA kernel implements the Siddon ray-tracing method with interpolation for
|
|
500
|
+
2D parallel beam backprojection.
|
|
489
501
|
|
|
490
502
|
Parameters
|
|
491
503
|
----------
|
|
@@ -549,16 +561,18 @@ def _parallel_2d_backward_kernel(
|
|
|
549
561
|
|
|
550
562
|
if t_min >= t_max: return
|
|
551
563
|
|
|
552
|
-
# === SIDDON
|
|
564
|
+
# === SIDDON METHOD TRAVERSAL INITIALIZATION ===
|
|
553
565
|
t = t_min
|
|
554
566
|
ix = int(math.floor(pnt_x + t * dir_x + cx))
|
|
555
567
|
iy = int(math.floor(pnt_y + t * dir_y + cy))
|
|
556
568
|
|
|
557
569
|
step_x, step_y = (1 if dir_x >= 0 else -1), (1 if dir_y >= 0 else -1)
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
570
|
+
inv_dir_x = (1.0 / dir_x) if abs(dir_x) > _EPSILON else 0.0
|
|
571
|
+
inv_dir_y = (1.0 / dir_y) if abs(dir_y) > _EPSILON else 0.0
|
|
572
|
+
dt_x = abs(inv_dir_x) if abs(dir_x) > _EPSILON else _INF
|
|
573
|
+
dt_y = abs(inv_dir_y) if abs(dir_y) > _EPSILON else _INF
|
|
574
|
+
tx = ((ix + (step_x > 0)) - cx - pnt_x) * inv_dir_x if abs(dir_x) > _EPSILON else _INF
|
|
575
|
+
ty = ((iy + (step_y > 0)) - cy - pnt_y) * inv_dir_y if abs(dir_y) > _EPSILON else _INF
|
|
562
576
|
|
|
563
577
|
# === BACKPROJECTION TRAVERSAL LOOP ===
|
|
564
578
|
# Distribute sinogram value along ray path using bilinear interpolation
|
|
@@ -568,8 +582,9 @@ def _parallel_2d_backward_kernel(
|
|
|
568
582
|
seg_len = t_next - t
|
|
569
583
|
if seg_len > _EPSILON:
|
|
570
584
|
# Sample at ray segment midpoint (same as forward projection)
|
|
571
|
-
|
|
572
|
-
|
|
585
|
+
t_mid = t + seg_len * 0.5
|
|
586
|
+
mid_x = pnt_x + t_mid * dir_x + cx
|
|
587
|
+
mid_y = pnt_y + t_mid * dir_y + cy
|
|
573
588
|
ix0, iy0 = int(math.floor(mid_x)), int(math.floor(mid_y))
|
|
574
589
|
dx, dy = mid_x - ix0, mid_y - iy0
|
|
575
590
|
|
|
@@ -585,10 +600,12 @@ def _parallel_2d_backward_kernel(
|
|
|
585
600
|
# Performance impact: Atomic operations are slower than regular writes but necessary for correctness
|
|
586
601
|
# Memory access pattern: Global memory atomics with potential bank conflicts, but unavoidable
|
|
587
602
|
cval = val * seg_len # Contribution value for this ray segment
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
cuda.atomic.add(d_image, (iy0
|
|
591
|
-
cuda.atomic.add(d_image, (iy0
|
|
603
|
+
one_minus_dx = 1.0 - dx
|
|
604
|
+
one_minus_dy = 1.0 - dy
|
|
605
|
+
cuda.atomic.add(d_image, (iy0, ix0), cval * one_minus_dx * one_minus_dy)
|
|
606
|
+
cuda.atomic.add(d_image, (iy0, ix0 + 1), cval * dx * one_minus_dy)
|
|
607
|
+
cuda.atomic.add(d_image, (iy0 + 1, ix0), cval * one_minus_dx * dy)
|
|
608
|
+
cuda.atomic.add(d_image, (iy0 + 1, ix0 + 1), cval * dx * dy)
|
|
592
609
|
|
|
593
610
|
# Advance to next voxel (identical logic to forward projection)
|
|
594
611
|
if tx <= ty:
|
|
@@ -613,8 +630,8 @@ def _fan_2d_forward_kernel(
|
|
|
613
630
|
):
|
|
614
631
|
"""Compute the 2D fan beam forward projection.
|
|
615
632
|
|
|
616
|
-
This CUDA kernel implements the Siddon-
|
|
617
|
-
forward projection.
|
|
633
|
+
This CUDA kernel implements the Siddon ray-tracing method with interpolation for
|
|
634
|
+
2D fan beam forward projection.
|
|
618
635
|
|
|
619
636
|
Parameters
|
|
620
637
|
----------
|
|
@@ -704,7 +721,7 @@ def _fan_2d_forward_kernel(
|
|
|
704
721
|
if t_min >= t_max: # No valid intersection
|
|
705
722
|
d_sino[iang, idet] = 0.0; return
|
|
706
723
|
|
|
707
|
-
# === SIDDON
|
|
724
|
+
# === SIDDON METHOD TRAVERSAL (same algorithm as parallel beam) ===
|
|
708
725
|
accum = 0.0 # Accumulated projection value
|
|
709
726
|
t = t_min # Current ray parameter
|
|
710
727
|
|
|
@@ -714,10 +731,12 @@ def _fan_2d_forward_kernel(
|
|
|
714
731
|
|
|
715
732
|
# Traversal parameters (identical to parallel beam implementation)
|
|
716
733
|
step_x, step_y = (1 if dir_x >= 0 else -1), (1 if dir_y >= 0 else -1)
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
734
|
+
inv_dir_x = (1.0 / dir_x) if abs(dir_x) > _EPSILON else 0.0
|
|
735
|
+
inv_dir_y = (1.0 / dir_y) if abs(dir_y) > _EPSILON else 0.0
|
|
736
|
+
dt_x = abs(inv_dir_x) if abs(dir_x) > _EPSILON else _INF
|
|
737
|
+
dt_y = abs(inv_dir_y) if abs(dir_y) > _EPSILON else _INF
|
|
738
|
+
tx = ((ix + (step_x > 0)) - cx - src_x) * inv_dir_x if abs(dir_x) > _EPSILON else _INF
|
|
739
|
+
ty = ((iy + (step_y > 0)) - cy - src_y) * inv_dir_y if abs(dir_y) > _EPSILON else _INF
|
|
721
740
|
|
|
722
741
|
# Main traversal loop with bilinear interpolation (identical to parallel beam)
|
|
723
742
|
while t < t_max:
|
|
@@ -726,8 +745,9 @@ def _fan_2d_forward_kernel(
|
|
|
726
745
|
seg_len = t_next - t
|
|
727
746
|
if seg_len > _EPSILON:
|
|
728
747
|
# Sample at midpoint using source as ray origin
|
|
729
|
-
|
|
730
|
-
|
|
748
|
+
t_mid = t + seg_len * 0.5
|
|
749
|
+
mid_x = src_x + t_mid * dir_x + cx
|
|
750
|
+
mid_y = src_y + t_mid * dir_y + cy
|
|
731
751
|
ix0, iy0 = int(math.floor(mid_x)), int(math.floor(mid_y))
|
|
732
752
|
dx, dy = mid_x - ix0, mid_y - iy0
|
|
733
753
|
|
|
@@ -756,7 +776,7 @@ def _fan_2d_forward_kernel(
|
|
|
756
776
|
|
|
757
777
|
d_sino[iang, idet] = accum
|
|
758
778
|
|
|
759
|
-
@
|
|
779
|
+
@_FASTMATH_DECORATOR
|
|
760
780
|
def _fan_2d_backward_kernel(
|
|
761
781
|
d_sino, n_ang, n_det,
|
|
762
782
|
d_image, Nx, Ny,
|
|
@@ -765,8 +785,8 @@ def _fan_2d_backward_kernel(
|
|
|
765
785
|
):
|
|
766
786
|
"""Compute the 2D fan beam backprojection.
|
|
767
787
|
|
|
768
|
-
This CUDA kernel implements the Siddon-
|
|
769
|
-
backprojection.
|
|
788
|
+
This CUDA kernel implements the Siddon ray-tracing method with interpolation for
|
|
789
|
+
2D fan beam backprojection.
|
|
770
790
|
|
|
771
791
|
Parameters
|
|
772
792
|
----------
|
|
@@ -851,16 +871,18 @@ def _fan_2d_backward_kernel(
|
|
|
851
871
|
|
|
852
872
|
if t_min >= t_max: return
|
|
853
873
|
|
|
854
|
-
# === SIDDON
|
|
874
|
+
# === SIDDON METHOD TRAVERSAL INITIALIZATION ===
|
|
855
875
|
t = t_min
|
|
856
876
|
ix = int(math.floor(src_x + t * dir_x + cx))
|
|
857
877
|
iy = int(math.floor(src_y + t * dir_y + cy))
|
|
858
878
|
|
|
859
879
|
step_x, step_y = (1 if dir_x >= 0 else -1), (1 if dir_y >= 0 else -1)
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
880
|
+
inv_dir_x = (1.0 / dir_x) if abs(dir_x) > _EPSILON else 0.0
|
|
881
|
+
inv_dir_y = (1.0 / dir_y) if abs(dir_y) > _EPSILON else 0.0
|
|
882
|
+
dt_x = abs(inv_dir_x) if abs(dir_x) > _EPSILON else _INF
|
|
883
|
+
dt_y = abs(inv_dir_y) if abs(dir_y) > _EPSILON else _INF
|
|
884
|
+
tx = ((ix + (step_x > 0)) - cx - src_x) * inv_dir_x if abs(dir_x) > _EPSILON else _INF
|
|
885
|
+
ty = ((iy + (step_y > 0)) - cy - src_y) * inv_dir_y if abs(dir_y) > _EPSILON else _INF
|
|
864
886
|
|
|
865
887
|
# === FAN BEAM BACKPROJECTION TRAVERSAL LOOP ===
|
|
866
888
|
# Distribute sinogram value along divergent ray path using bilinear interpolation
|
|
@@ -870,8 +892,9 @@ def _fan_2d_backward_kernel(
|
|
|
870
892
|
seg_len = t_next - t
|
|
871
893
|
if seg_len > _EPSILON:
|
|
872
894
|
# Sample at ray segment midpoint using source as ray origin
|
|
873
|
-
|
|
874
|
-
|
|
895
|
+
t_mid = t + seg_len * 0.5
|
|
896
|
+
mid_x = src_x + t_mid * dir_x + cx
|
|
897
|
+
mid_y = src_y + t_mid * dir_y + cy
|
|
875
898
|
ix0, iy0 = int(math.floor(mid_x)), int(math.floor(mid_y))
|
|
876
899
|
dx, dy = mid_x - ix0, mid_y - iy0
|
|
877
900
|
|
|
@@ -886,10 +909,12 @@ def _fan_2d_backward_kernel(
|
|
|
886
909
|
# Atomic operations prevent race conditions when multiple divergent rays write to same voxel
|
|
887
910
|
# Performance consideration: Fan beam geometry may have more atomic contention than parallel beam
|
|
888
911
|
cval = val * seg_len # Contribution value for this ray segment
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
cuda.atomic.add(d_image, (iy0
|
|
892
|
-
cuda.atomic.add(d_image, (iy0
|
|
912
|
+
one_minus_dx = 1.0 - dx
|
|
913
|
+
one_minus_dy = 1.0 - dy
|
|
914
|
+
cuda.atomic.add(d_image, (iy0, ix0), cval * one_minus_dx * one_minus_dy)
|
|
915
|
+
cuda.atomic.add(d_image, (iy0, ix0 + 1), cval * dx * one_minus_dy)
|
|
916
|
+
cuda.atomic.add(d_image, (iy0 + 1, ix0), cval * one_minus_dx * dy)
|
|
917
|
+
cuda.atomic.add(d_image, (iy0 + 1, ix0 + 1), cval * dx * dy)
|
|
893
918
|
|
|
894
919
|
# === VOXEL BOUNDARY CROSSING LOGIC ===
|
|
895
920
|
# Advance to next voxel based on which boundary is crossed first
|
|
@@ -915,8 +940,8 @@ def _cone_3d_forward_kernel(
|
|
|
915
940
|
):
|
|
916
941
|
"""Compute the 3D cone-beam forward projection.
|
|
917
942
|
|
|
918
|
-
This CUDA kernel implements the Siddon-
|
|
919
|
-
forward projection.
|
|
943
|
+
This CUDA kernel implements the Siddon ray-tracing method with interpolation for
|
|
944
|
+
3D cone-beam forward projection.
|
|
920
945
|
|
|
921
946
|
Parameters
|
|
922
947
|
----------
|
|
@@ -1025,7 +1050,7 @@ def _cone_3d_forward_kernel(
|
|
|
1025
1050
|
if t_min >= t_max: # No valid 3D intersection
|
|
1026
1051
|
d_sino[iview, iu, iv] = 0.0; return
|
|
1027
1052
|
|
|
1028
|
-
# === 3D SIDDON
|
|
1053
|
+
# === 3D SIDDON METHOD TRAVERSAL INITIALIZATION ===
|
|
1029
1054
|
accum = 0.0 # Accumulated projection value
|
|
1030
1055
|
t = t_min # Current ray parameter
|
|
1031
1056
|
|
|
@@ -1036,14 +1061,17 @@ def _cone_3d_forward_kernel(
|
|
|
1036
1061
|
|
|
1037
1062
|
# 3D traversal parameters (extends 2D algorithm)
|
|
1038
1063
|
step_x, step_y, step_z = (1 if dir_x >= 0 else -1), (1 if dir_y >= 0 else -1), (1 if dir_z >= 0 else -1)
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1064
|
+
inv_dir_x = (1.0 / dir_x) if abs(dir_x) > _EPSILON else 0.0
|
|
1065
|
+
inv_dir_y = (1.0 / dir_y) if abs(dir_y) > _EPSILON else 0.0
|
|
1066
|
+
inv_dir_z = (1.0 / dir_z) if abs(dir_z) > _EPSILON else 0.0
|
|
1067
|
+
dt_x = abs(inv_dir_x) if abs(dir_x) > _EPSILON else _INF # Parameter increment per x-voxel
|
|
1068
|
+
dt_y = abs(inv_dir_y) if abs(dir_y) > _EPSILON else _INF # Parameter increment per y-voxel
|
|
1069
|
+
dt_z = abs(inv_dir_z) if abs(dir_z) > _EPSILON else _INF # Parameter increment per z-voxel
|
|
1070
|
+
|
|
1043
1071
|
# Calculate parameter values for next 3D voxel boundary crossings
|
|
1044
|
-
tx = ((ix + (step_x > 0)) - cx - src_x)
|
|
1045
|
-
ty = ((iy + (step_y > 0)) - cy - src_y)
|
|
1046
|
-
tz = ((iz + (step_z > 0)) - cz - src_z)
|
|
1072
|
+
tx = ((ix + (step_x > 0)) - cx - src_x) * inv_dir_x if abs(dir_x) > _EPSILON else _INF
|
|
1073
|
+
ty = ((iy + (step_y > 0)) - cy - src_y) * inv_dir_y if abs(dir_y) > _EPSILON else _INF
|
|
1074
|
+
tz = ((iz + (step_z > 0)) - cz - src_z) * inv_dir_z if abs(dir_z) > _EPSILON else _INF
|
|
1047
1075
|
|
|
1048
1076
|
# === 3D TRAVERSAL LOOP WITH TRILINEAR INTERPOLATION ===
|
|
1049
1077
|
while t < t_max:
|
|
@@ -1056,34 +1084,35 @@ def _cone_3d_forward_kernel(
|
|
|
1056
1084
|
# === TRILINEAR INTERPOLATION SAMPLING ===
|
|
1057
1085
|
# Sample 3D volume at ray segment midpoint for accurate integration
|
|
1058
1086
|
# Mathematical basis: Midpoint rule for numerical integration along 3D ray segments
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1087
|
+
t_mid = t + seg_len * 0.5
|
|
1088
|
+
mid_x = src_x + t_mid * dir_x + cx # Midpoint x-coordinate in volume space
|
|
1089
|
+
mid_y = src_y + t_mid * dir_y + cy # Midpoint y-coordinate in volume space
|
|
1090
|
+
mid_z = src_z + t_mid * dir_z + cz # Midpoint z-coordinate in volume space
|
|
1091
|
+
|
|
1063
1092
|
# Convert continuous 3D coordinates to discrete voxel indices and fractional weights
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1093
|
+
ix0, iy0, iz0 = int(math.floor(mid_x)), int(math.floor(mid_y)), int(math.floor(mid_z))
|
|
1094
|
+
dx, dy, dz = mid_x - ix0, mid_y - iy0, mid_z - iz0
|
|
1095
|
+
|
|
1068
1096
|
# Clamp indices to stay in-bounds during interpolation
|
|
1069
1097
|
ix0 = max(0, min(ix0, Nx - 2))
|
|
1070
1098
|
iy0 = max(0, min(iy0, Ny - 2))
|
|
1071
1099
|
iz0 = max(0, min(iz0, Nz - 2))
|
|
1072
|
-
|
|
1100
|
+
|
|
1101
|
+
# Precompute complements
|
|
1102
|
+
omdx = 1.0 - dx
|
|
1103
|
+
omdy = 1.0 - dy
|
|
1104
|
+
omdz = 1.0 - dz
|
|
1105
|
+
|
|
1073
1106
|
# === TRILINEAR INTERPOLATION WEIGHT CALCULATION ===
|
|
1074
|
-
# Mathematical basis: Trilinear interpolation formula f(x,y,z) = Σ f(xi,yi,zi) * wi(x,y,z)
|
|
1075
|
-
# where wi(x,y,z) are the trilinear basis functions for each corner voxel of the 3D cube
|
|
1076
|
-
# Weights are products of 1D linear interpolation weights: (1-dx) or dx, (1-dy) or dy, (1-dz) or dz
|
|
1077
|
-
# Each of the 8 cube corners gets a weight proportional to its distance from the sample point
|
|
1078
1107
|
val = (
|
|
1079
|
-
d_vol[ix0, iy0, iz0] *
|
|
1080
|
-
d_vol[ix0 + 1, iy0, iz0] * dx*
|
|
1081
|
-
d_vol[ix0, iy0 + 1, iz0] *
|
|
1082
|
-
d_vol[ix0, iy0, iz0 + 1] *
|
|
1083
|
-
d_vol[ix0 + 1, iy0 + 1, iz0] * dx*dy*
|
|
1084
|
-
d_vol[ix0 + 1, iy0, iz0 + 1] * dx*
|
|
1085
|
-
d_vol[ix0, iy0 + 1, iz0 + 1] *
|
|
1086
|
-
d_vol[ix0 + 1, iy0 + 1, iz0 + 1] * dx*dy*dz
|
|
1108
|
+
d_vol[ix0, iy0, iz0] * omdx*omdy*omdz +
|
|
1109
|
+
d_vol[ix0 + 1, iy0, iz0] * dx *omdy*omdz +
|
|
1110
|
+
d_vol[ix0, iy0 + 1, iz0] * omdx*dy *omdz +
|
|
1111
|
+
d_vol[ix0, iy0, iz0 + 1] * omdx*omdy*dz +
|
|
1112
|
+
d_vol[ix0 + 1, iy0 + 1, iz0] * dx *dy *omdz +
|
|
1113
|
+
d_vol[ix0 + 1, iy0, iz0 + 1] * dx *omdy*dz +
|
|
1114
|
+
d_vol[ix0, iy0 + 1, iz0 + 1] * omdx*dy *dz +
|
|
1115
|
+
d_vol[ix0 + 1, iy0 + 1, iz0 + 1] * dx *dy *dz
|
|
1087
1116
|
)
|
|
1088
1117
|
# Accumulate contribution weighted by 3D ray segment length (discrete line integral approximation)
|
|
1089
1118
|
# This implements the 3D Radon transform: integral of f(x,y,z) along the ray path
|
|
@@ -1106,7 +1135,7 @@ def _cone_3d_forward_kernel(
|
|
|
1106
1135
|
|
|
1107
1136
|
d_sino[iview, iu, iv] = accum
|
|
1108
1137
|
|
|
1109
|
-
@
|
|
1138
|
+
@_FASTMATH_DECORATOR
|
|
1110
1139
|
def _cone_3d_backward_kernel(
|
|
1111
1140
|
d_sino, n_views, n_u, n_v,
|
|
1112
1141
|
d_vol, Nx, Ny, Nz,
|
|
@@ -1115,8 +1144,8 @@ def _cone_3d_backward_kernel(
|
|
|
1115
1144
|
):
|
|
1116
1145
|
"""Compute the 3D cone-beam backprojection.
|
|
1117
1146
|
|
|
1118
|
-
This CUDA kernel implements the Siddon-
|
|
1119
|
-
backprojection.
|
|
1147
|
+
This CUDA kernel implements the Siddon ray-tracing method with interpolation for
|
|
1148
|
+
3D cone-beam backprojection.
|
|
1120
1149
|
|
|
1121
1150
|
Parameters
|
|
1122
1151
|
----------
|
|
@@ -1219,7 +1248,7 @@ def _cone_3d_backward_kernel(
|
|
|
1219
1248
|
|
|
1220
1249
|
if t_min >= t_max: return
|
|
1221
1250
|
|
|
1222
|
-
# === 3D SIDDON
|
|
1251
|
+
# === 3D SIDDON METHOD TRAVERSAL INITIALIZATION ===
|
|
1223
1252
|
t = t_min
|
|
1224
1253
|
ix = int(math.floor(src_x + t * dir_x + cx)) # Current voxel x-index
|
|
1225
1254
|
iy = int(math.floor(src_y + t * dir_y + cy)) # Current voxel y-index
|
|
@@ -1227,14 +1256,17 @@ def _cone_3d_backward_kernel(
|
|
|
1227
1256
|
|
|
1228
1257
|
# 3D traversal parameters (extends 2D algorithm)
|
|
1229
1258
|
step_x, step_y, step_z = (1 if dir_x >= 0 else -1), (1 if dir_y >= 0 else -1), (1 if dir_z >= 0 else -1)
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1259
|
+
inv_dir_x = (1.0 / dir_x) if abs(dir_x) > _EPSILON else 0.0
|
|
1260
|
+
inv_dir_y = (1.0 / dir_y) if abs(dir_y) > _EPSILON else 0.0
|
|
1261
|
+
inv_dir_z = (1.0 / dir_z) if abs(dir_z) > _EPSILON else 0.0
|
|
1262
|
+
dt_x = abs(inv_dir_x) if abs(dir_x) > _EPSILON else _INF # Parameter increment per x-voxel
|
|
1263
|
+
dt_y = abs(inv_dir_y) if abs(dir_y) > _EPSILON else _INF # Parameter increment per y-voxel
|
|
1264
|
+
dt_z = abs(inv_dir_z) if abs(dir_z) > _EPSILON else _INF # Parameter increment per z-voxel
|
|
1265
|
+
|
|
1234
1266
|
# Calculate parameter values for next 3D voxel boundary crossings
|
|
1235
|
-
tx = ((ix + (step_x > 0)) - cx - src_x)
|
|
1236
|
-
ty = ((iy + (step_y > 0)) - cy - src_y)
|
|
1237
|
-
tz = ((iz + (step_z > 0)) - cz - src_z)
|
|
1267
|
+
tx = ((ix + (step_x > 0)) - cx - src_x) * inv_dir_x if abs(dir_x) > _EPSILON else _INF
|
|
1268
|
+
ty = ((iy + (step_y > 0)) - cy - src_y) * inv_dir_y if abs(dir_y) > _EPSILON else _INF
|
|
1269
|
+
tz = ((iz + (step_z > 0)) - cz - src_z) * inv_dir_z if abs(dir_z) > _EPSILON else _INF
|
|
1238
1270
|
|
|
1239
1271
|
# === 3D CONE BEAM BACKPROJECTION TRAVERSAL LOOP ===
|
|
1240
1272
|
# Distribute sinogram value along divergent 3D ray path using trilinear interpolation
|
|
@@ -1247,35 +1279,35 @@ def _cone_3d_backward_kernel(
|
|
|
1247
1279
|
if seg_len > _EPSILON:
|
|
1248
1280
|
# === TRILINEAR INTERPOLATION SAMPLING ===
|
|
1249
1281
|
# Sample 3D volume at ray segment midpoint using source as ray origin
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1282
|
+
t_mid = t + seg_len * 0.5
|
|
1283
|
+
mid_x = src_x + t_mid * dir_x + cx
|
|
1284
|
+
mid_y = src_y + t_mid * dir_y + cy
|
|
1285
|
+
mid_z = src_z + t_mid * dir_z + cz
|
|
1286
|
+
|
|
1254
1287
|
# Convert continuous 3D coordinates to voxel indices and interpolation weights
|
|
1255
1288
|
ix0, iy0, iz0 = int(math.floor(mid_x)), int(math.floor(mid_y)), int(math.floor(mid_z))
|
|
1256
|
-
dx, dy, dz = mid_x - ix0, mid_y - iy0, mid_z - iz0
|
|
1257
|
-
|
|
1289
|
+
dx, dy, dz = mid_x - ix0, mid_y - iy0, mid_z - iz0
|
|
1290
|
+
|
|
1258
1291
|
# Clamp indices to stay in-bounds during interpolation
|
|
1259
1292
|
ix0 = max(0, min(ix0, Nx - 2))
|
|
1260
1293
|
iy0 = max(0, min(iy0, Ny - 2))
|
|
1261
1294
|
iz0 = max(0, min(iz0, Nz - 2))
|
|
1262
|
-
|
|
1295
|
+
|
|
1296
|
+
# Precompute complements and contribution
|
|
1297
|
+
omdx = 1.0 - dx
|
|
1298
|
+
omdy = 1.0 - dy
|
|
1299
|
+
omdz = 1.0 - dz
|
|
1300
|
+
cval = g * seg_len
|
|
1301
|
+
|
|
1263
1302
|
# === ATOMIC BACKPROJECTION WITH TRILINEAR WEIGHTS ===
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
cuda.atomic.add(d_vol, (ix0,
|
|
1272
|
-
cuda.atomic.add(d_vol, (ix0 + 1, iy0, iz0), cval * dx*(1-dy)*(1-dz)) # Corner (1,0,0) - atomic write
|
|
1273
|
-
cuda.atomic.add(d_vol, (ix0, iy0 + 1, iz0), cval * (1-dx)*dy*(1-dz)) # Corner (0,1,0) - atomic write
|
|
1274
|
-
cuda.atomic.add(d_vol, (ix0, iy0, iz0 + 1), cval * (1-dx)*(1-dy)*dz) # Corner (0,0,1) - atomic write
|
|
1275
|
-
cuda.atomic.add(d_vol, (ix0 + 1, iy0 + 1, iz0), cval * dx*dy*(1-dz)) # Corner (1,1,0) - atomic write
|
|
1276
|
-
cuda.atomic.add(d_vol, (ix0 + 1, iy0, iz0 + 1), cval * dx*(1-dy)*dz) # Corner (1,0,1) - atomic write
|
|
1277
|
-
cuda.atomic.add(d_vol, (ix0, iy0 + 1, iz0 + 1), cval * (1-dx)*dy*dz) # Corner (0,1,1) - atomic write
|
|
1278
|
-
cuda.atomic.add(d_vol, (ix0 + 1, iy0 + 1, iz0 + 1), cval * dx*dy*dz) # Corner (1,1,1) - atomic write
|
|
1303
|
+
cuda.atomic.add(d_vol, (ix0, iy0, iz0), cval * omdx*omdy*omdz)
|
|
1304
|
+
cuda.atomic.add(d_vol, (ix0 + 1, iy0, iz0), cval * dx *omdy*omdz)
|
|
1305
|
+
cuda.atomic.add(d_vol, (ix0, iy0 + 1, iz0), cval * omdx*dy *omdz)
|
|
1306
|
+
cuda.atomic.add(d_vol, (ix0, iy0, iz0 + 1), cval * omdx*omdy*dz)
|
|
1307
|
+
cuda.atomic.add(d_vol, (ix0 + 1, iy0 + 1, iz0), cval * dx *dy *omdz)
|
|
1308
|
+
cuda.atomic.add(d_vol, (ix0 + 1, iy0, iz0 + 1), cval * dx *omdy*dz)
|
|
1309
|
+
cuda.atomic.add(d_vol, (ix0, iy0 + 1, iz0 + 1), cval * omdx*dy *dz)
|
|
1310
|
+
cuda.atomic.add(d_vol, (ix0 + 1, iy0 + 1, iz0 + 1), cval * dx *dy *dz)
|
|
1279
1311
|
|
|
1280
1312
|
# === 3D VOXEL BOUNDARY CROSSING LOGIC ===
|
|
1281
1313
|
# Advance to next voxel based on which boundary is crossed first in 3D
|
|
@@ -1305,8 +1337,8 @@ class ParallelProjectorFunction(torch.autograd.Function):
|
|
|
1305
1337
|
|
|
1306
1338
|
Notes
|
|
1307
1339
|
-----
|
|
1308
|
-
Provides a differentiable interface to the CUDA-accelerated Siddon-
|
|
1309
|
-
|
|
1340
|
+
Provides a differentiable interface to the CUDA-accelerated Siddon ray-tracing
|
|
1341
|
+
method with interpolation for parallel beam CT geometry. The forward pass computes
|
|
1310
1342
|
the sinogram from a 2D image using parallel beam geometry. The backward pass
|
|
1311
1343
|
computes gradients using the adjoint backprojection operation. Requires
|
|
1312
1344
|
CUDA-capable hardware and a properly configured CUDA environment; all input
|
|
@@ -1358,7 +1390,7 @@ class ParallelProjectorFunction(torch.autograd.Function):
|
|
|
1358
1390
|
-----
|
|
1359
1391
|
- All input tensors must be on the same CUDA device.
|
|
1360
1392
|
- The operation is fully differentiable and supports autograd.
|
|
1361
|
-
- Uses the Siddon
|
|
1393
|
+
- Uses the Siddon method with interpolation for accurate ray tracing and bilinear interpolation.
|
|
1362
1394
|
|
|
1363
1395
|
Examples
|
|
1364
1396
|
--------
|
|
@@ -1394,11 +1426,12 @@ class ParallelProjectorFunction(torch.autograd.Function):
|
|
|
1394
1426
|
grid, tpb = _grid_2d(n_angles, num_detectors)
|
|
1395
1427
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1396
1428
|
|
|
1397
|
-
|
|
1429
|
+
pt_stream = torch.cuda.current_stream()
|
|
1430
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1431
|
+
_parallel_2d_forward_kernel[grid, tpb, numba_stream](
|
|
1398
1432
|
d_image, Nx, Ny, d_sino, n_angles, num_detectors,
|
|
1399
1433
|
_DTYPE(detector_spacing), d_cos_arr, d_sin_arr, cx, cy, _DTYPE(voxel_spacing)
|
|
1400
1434
|
)
|
|
1401
|
-
torch.cuda.synchronize()
|
|
1402
1435
|
|
|
1403
1436
|
ctx.save_for_backward(angles)
|
|
1404
1437
|
ctx.intermediate = (num_detectors, detector_spacing, Ny, Nx, voxel_spacing)
|
|
@@ -1427,12 +1460,13 @@ class ParallelProjectorFunction(torch.autograd.Function):
|
|
|
1427
1460
|
grid, tpb = _grid_2d(n_angles, num_detectors)
|
|
1428
1461
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1429
1462
|
|
|
1430
|
-
|
|
1463
|
+
pt_stream = torch.cuda.current_stream()
|
|
1464
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1465
|
+
_parallel_2d_backward_kernel[grid, tpb, numba_stream](
|
|
1431
1466
|
d_grad_sino, n_angles, num_detectors,
|
|
1432
1467
|
d_img_grad, Nx, Ny,
|
|
1433
1468
|
_DTYPE(detector_spacing), d_cos_arr, d_sin_arr, cx, cy, _DTYPE(voxel_spacing)
|
|
1434
1469
|
)
|
|
1435
|
-
torch.cuda.synchronize()
|
|
1436
1470
|
|
|
1437
1471
|
return grad_image, None, None, None, None
|
|
1438
1472
|
|
|
@@ -1445,8 +1479,8 @@ class ParallelBackprojectorFunction(torch.autograd.Function):
|
|
|
1445
1479
|
|
|
1446
1480
|
Notes
|
|
1447
1481
|
-----
|
|
1448
|
-
Provides a differentiable interface to the CUDA-accelerated Siddon
|
|
1449
|
-
|
|
1482
|
+
Provides a differentiable interface to the CUDA-accelerated Siddon ray-tracing
|
|
1483
|
+
method with interpolation for parallel beam backprojection. The forward pass computes a 2D
|
|
1450
1484
|
reconstruction from sinogram data using parallel beam backprojection, and the
|
|
1451
1485
|
backward pass computes gradients via forward projection as the adjoint operation.
|
|
1452
1486
|
Requires CUDA-capable hardware and consistent device placements.
|
|
@@ -1493,7 +1527,7 @@ class ParallelBackprojectorFunction(torch.autograd.Function):
|
|
|
1493
1527
|
-----
|
|
1494
1528
|
- All input tensors must be on the same CUDA device.
|
|
1495
1529
|
- The operation is fully differentiable and supports autograd.
|
|
1496
|
-
- Uses the Siddon
|
|
1530
|
+
- Uses the Siddon method with interpolation for accurate ray tracing and bilinear interpolation.
|
|
1497
1531
|
|
|
1498
1532
|
Examples
|
|
1499
1533
|
--------
|
|
@@ -1529,11 +1563,12 @@ class ParallelBackprojectorFunction(torch.autograd.Function):
|
|
|
1529
1563
|
grid, tpb = _grid_2d(n_ang, n_det)
|
|
1530
1564
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1531
1565
|
|
|
1532
|
-
|
|
1566
|
+
pt_stream = torch.cuda.current_stream()
|
|
1567
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1568
|
+
_parallel_2d_backward_kernel[grid, tpb, numba_stream](
|
|
1533
1569
|
d_sino, n_ang, n_det, d_reco, Nx, Ny,
|
|
1534
1570
|
_DTYPE(detector_spacing), d_cos_arr, d_sin_arr, cx, cy, _DTYPE(voxel_spacing)
|
|
1535
1571
|
)
|
|
1536
|
-
torch.cuda.synchronize()
|
|
1537
1572
|
|
|
1538
1573
|
ctx.save_for_backward(angles)
|
|
1539
1574
|
ctx.intermediate = (H, W, detector_spacing, sinogram.shape[0], sinogram.shape[1], voxel_spacing)
|
|
@@ -1567,11 +1602,12 @@ class ParallelBackprojectorFunction(torch.autograd.Function):
|
|
|
1567
1602
|
grid, tpb = _grid_2d(n_ang, n_det)
|
|
1568
1603
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1569
1604
|
|
|
1570
|
-
|
|
1605
|
+
pt_stream = torch.cuda.current_stream()
|
|
1606
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1607
|
+
_parallel_2d_forward_kernel[grid, tpb, numba_stream](
|
|
1571
1608
|
d_grad_out, Nx, Ny, d_sino_grad, n_ang, n_det,
|
|
1572
1609
|
_DTYPE(detector_spacing), d_cos, d_sin, cx, cy, _DTYPE(voxel_spacing)
|
|
1573
1610
|
)
|
|
1574
|
-
torch.cuda.synchronize()
|
|
1575
1611
|
|
|
1576
1612
|
return grad_sino, None, None, None, None, None
|
|
1577
1613
|
|
|
@@ -1584,8 +1620,8 @@ class FanProjectorFunction(torch.autograd.Function):
|
|
|
1584
1620
|
|
|
1585
1621
|
Notes
|
|
1586
1622
|
-----
|
|
1587
|
-
Provides a differentiable interface to the CUDA-accelerated Siddon-
|
|
1588
|
-
|
|
1623
|
+
Provides a differentiable interface to the CUDA-accelerated Siddon ray-tracing
|
|
1624
|
+
method with interpolation for fan beam geometry, where rays diverge from a point
|
|
1589
1625
|
X-ray source to a linear detector array. The forward pass computes sinograms
|
|
1590
1626
|
using divergent beam geometry, and the backward pass computes gradients via
|
|
1591
1627
|
adjoint backprojection.
|
|
@@ -1637,7 +1673,7 @@ class FanProjectorFunction(torch.autograd.Function):
|
|
|
1637
1673
|
- All input tensors must be on the same CUDA device.
|
|
1638
1674
|
- The operation is fully differentiable and supports autograd.
|
|
1639
1675
|
- Fan beam geometry uses divergent rays from a point source to the detector.
|
|
1640
|
-
- Uses the Siddon
|
|
1676
|
+
- Uses the Siddon method with interpolation for accurate ray tracing and bilinear interpolation.
|
|
1641
1677
|
|
|
1642
1678
|
Examples
|
|
1643
1679
|
--------
|
|
@@ -1668,12 +1704,13 @@ class FanProjectorFunction(torch.autograd.Function):
|
|
|
1668
1704
|
grid, tpb = _grid_2d(n_ang, num_detectors)
|
|
1669
1705
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1670
1706
|
|
|
1671
|
-
|
|
1707
|
+
pt_stream = torch.cuda.current_stream()
|
|
1708
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1709
|
+
_fan_2d_forward_kernel[grid, tpb, numba_stream](
|
|
1672
1710
|
d_image, Nx, Ny, d_sino, n_ang, num_detectors,
|
|
1673
1711
|
_DTYPE(detector_spacing), d_cos_arr, d_sin_arr,
|
|
1674
1712
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, _DTYPE(voxel_spacing)
|
|
1675
1713
|
)
|
|
1676
|
-
torch.cuda.synchronize()
|
|
1677
1714
|
|
|
1678
1715
|
ctx.save_for_backward(angles)
|
|
1679
1716
|
ctx.intermediate = (num_detectors, detector_spacing, Ny, Nx,
|
|
@@ -1703,12 +1740,13 @@ class FanProjectorFunction(torch.autograd.Function):
|
|
|
1703
1740
|
grid, tpb = _grid_2d(n_ang, n_det)
|
|
1704
1741
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1705
1742
|
|
|
1706
|
-
|
|
1743
|
+
pt_stream = torch.cuda.current_stream()
|
|
1744
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1745
|
+
_fan_2d_backward_kernel[grid, tpb, numba_stream](
|
|
1707
1746
|
d_grad_sino, n_ang, n_det, d_img_grad, Nx, Ny,
|
|
1708
1747
|
_DTYPE(det_spacing), d_cos_arr, d_sin_arr,
|
|
1709
1748
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, _DTYPE(voxel_spacing)
|
|
1710
1749
|
)
|
|
1711
|
-
torch.cuda.synchronize()
|
|
1712
1750
|
|
|
1713
1751
|
return grad_img, None, None, None, None, None, None
|
|
1714
1752
|
|
|
@@ -1721,8 +1759,8 @@ class FanBackprojectorFunction(torch.autograd.Function):
|
|
|
1721
1759
|
|
|
1722
1760
|
Notes
|
|
1723
1761
|
-----
|
|
1724
|
-
Provides a differentiable interface to the CUDA-accelerated Siddon-
|
|
1725
|
-
|
|
1762
|
+
Provides a differentiable interface to the CUDA-accelerated Siddon ray-tracing
|
|
1763
|
+
method with interpolation for fan beam backprojection. Implements the adjoint
|
|
1726
1764
|
of the fan beam projection operator, distributing sinogram values back into
|
|
1727
1765
|
the reconstruction volume along divergent ray paths. The forward pass
|
|
1728
1766
|
computes reconstruction from sinogram data, and the backward pass computes
|
|
@@ -1777,7 +1815,7 @@ class FanBackprojectorFunction(torch.autograd.Function):
|
|
|
1777
1815
|
- All input tensors must be on the same CUDA device.
|
|
1778
1816
|
- The operation is fully differentiable and supports autograd.
|
|
1779
1817
|
- Fan beam geometry uses divergent rays from a point source to the detector.
|
|
1780
|
-
- Uses the Siddon
|
|
1818
|
+
- Uses the Siddon method with interpolation for accurate ray tracing and bilinear interpolation.
|
|
1781
1819
|
|
|
1782
1820
|
Examples
|
|
1783
1821
|
--------
|
|
@@ -1808,12 +1846,13 @@ class FanBackprojectorFunction(torch.autograd.Function):
|
|
|
1808
1846
|
grid, tpb = _grid_2d(n_ang, n_det)
|
|
1809
1847
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1810
1848
|
|
|
1811
|
-
|
|
1849
|
+
pt_stream = torch.cuda.current_stream()
|
|
1850
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1851
|
+
_fan_2d_backward_kernel[grid, tpb, numba_stream](
|
|
1812
1852
|
d_sino, n_ang, n_det, d_reco, Nx, Ny,
|
|
1813
1853
|
_DTYPE(detector_spacing), d_cos_arr, d_sin_arr,
|
|
1814
1854
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, _DTYPE(voxel_spacing)
|
|
1815
1855
|
)
|
|
1816
|
-
torch.cuda.synchronize()
|
|
1817
1856
|
|
|
1818
1857
|
ctx.save_for_backward(angles)
|
|
1819
1858
|
ctx.intermediate = (H, W, detector_spacing, n_ang, n_det, sdd, sid, voxel_spacing)
|
|
@@ -1843,12 +1882,13 @@ class FanBackprojectorFunction(torch.autograd.Function):
|
|
|
1843
1882
|
grid, tpb = _grid_2d(n_ang, n_det)
|
|
1844
1883
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1845
1884
|
|
|
1846
|
-
|
|
1885
|
+
pt_stream = torch.cuda.current_stream()
|
|
1886
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1887
|
+
_fan_2d_forward_kernel[grid, tpb, numba_stream](
|
|
1847
1888
|
d_grad_out, Nx, Ny, d_sino_grad, n_ang, n_det,
|
|
1848
1889
|
_DTYPE(det_spacing), d_cos_arr, d_sin_arr,
|
|
1849
1890
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, _DTYPE(voxel_spacing)
|
|
1850
1891
|
)
|
|
1851
|
-
torch.cuda.synchronize()
|
|
1852
1892
|
|
|
1853
1893
|
return grad_sino, None, None, None, None, None, None, None
|
|
1854
1894
|
|
|
@@ -1861,8 +1901,8 @@ class ConeProjectorFunction(torch.autograd.Function):
|
|
|
1861
1901
|
|
|
1862
1902
|
Notes
|
|
1863
1903
|
-----
|
|
1864
|
-
Provides a differentiable interface to the CUDA-accelerated Siddon-
|
|
1865
|
-
|
|
1904
|
+
Provides a differentiable interface to the CUDA-accelerated Siddon ray-tracing
|
|
1905
|
+
method with interpolation for 3D cone beam geometry. Rays emanate from a point
|
|
1866
1906
|
X-ray source to a 2D detector array capturing volumetric projection data.
|
|
1867
1907
|
The forward pass computes 3D projections, and the backward pass computes
|
|
1868
1908
|
gradients via adjoint 3D backprojection. Requires significant GPU memory.
|
|
@@ -1918,7 +1958,7 @@ class ConeProjectorFunction(torch.autograd.Function):
|
|
|
1918
1958
|
- All input tensors must be on the same CUDA device.
|
|
1919
1959
|
- The operation is fully differentiable and supports autograd.
|
|
1920
1960
|
- Cone beam geometry uses a point source and a 2D detector array.
|
|
1921
|
-
- Uses the Siddon
|
|
1961
|
+
- Uses the Siddon method with interpolation for accurate 3D ray tracing and trilinear interpolation.
|
|
1922
1962
|
|
|
1923
1963
|
Examples
|
|
1924
1964
|
--------
|
|
@@ -1953,13 +1993,14 @@ class ConeProjectorFunction(torch.autograd.Function):
|
|
|
1953
1993
|
grid, tpb = _grid_3d(n_views, det_u, det_v)
|
|
1954
1994
|
cx, cy, cz = _DTYPE(W * 0.5), _DTYPE(H * 0.5), _DTYPE(D * 0.5)
|
|
1955
1995
|
|
|
1956
|
-
|
|
1996
|
+
pt_stream = torch.cuda.current_stream()
|
|
1997
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1998
|
+
_cone_3d_forward_kernel[grid, tpb, numba_stream](
|
|
1957
1999
|
d_vol, W, H, D, d_sino, n_views, det_u, det_v,
|
|
1958
2000
|
_DTYPE(du), _DTYPE(dv), d_cos_arr, d_sin_arr,
|
|
1959
2001
|
_DTYPE(sdd), _DTYPE(sid),
|
|
1960
2002
|
cx, cy, cz, _DTYPE(voxel_spacing)
|
|
1961
2003
|
)
|
|
1962
|
-
torch.cuda.synchronize()
|
|
1963
2004
|
|
|
1964
2005
|
ctx.save_for_backward(angles)
|
|
1965
2006
|
ctx.intermediate = (D, H, W, det_u, det_v, du, dv,
|
|
@@ -1991,12 +2032,13 @@ class ConeProjectorFunction(torch.autograd.Function):
|
|
|
1991
2032
|
grid, tpb = _grid_3d(n_views, det_u, det_v)
|
|
1992
2033
|
cx, cy, cz = _DTYPE(W * 0.5), _DTYPE(H * 0.5), _DTYPE(D * 0.5)
|
|
1993
2034
|
|
|
1994
|
-
|
|
2035
|
+
pt_stream = torch.cuda.current_stream()
|
|
2036
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
2037
|
+
_cone_3d_backward_kernel[grid, tpb, numba_stream](
|
|
1995
2038
|
d_grad_sino, n_views, det_u, det_v, d_vol_grad, W, H, D,
|
|
1996
2039
|
_DTYPE(du), _DTYPE(dv), d_cos_arr, d_sin_arr,
|
|
1997
2040
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, cz, _DTYPE(voxel_spacing)
|
|
1998
2041
|
)
|
|
1999
|
-
torch.cuda.synchronize()
|
|
2000
2042
|
|
|
2001
2043
|
grad_vol = grad_vol_perm.permute(2, 1, 0).contiguous()
|
|
2002
2044
|
return grad_vol, None, None, None, None, None, None, None, None
|
|
@@ -2010,8 +2052,8 @@ class ConeBackprojectorFunction(torch.autograd.Function):
|
|
|
2010
2052
|
|
|
2011
2053
|
Notes
|
|
2012
2054
|
-----
|
|
2013
|
-
Provides a differentiable interface to the CUDA-accelerated Siddon-
|
|
2014
|
-
|
|
2055
|
+
Provides a differentiable interface to the CUDA-accelerated Siddon ray-tracing
|
|
2056
|
+
method with interpolation for 3D cone beam backprojection. The forward pass
|
|
2015
2057
|
computes a 3D reconstruction from cone beam projection data using
|
|
2016
2058
|
backprojection as the adjoint operation. The backward pass computes gradients
|
|
2017
2059
|
via 3D cone beam forward projection. Requires CUDA-capable hardware and
|
|
@@ -2078,7 +2120,7 @@ class ConeBackprojectorFunction(torch.autograd.Function):
|
|
|
2078
2120
|
- All input tensors must be on the same CUDA device.
|
|
2079
2121
|
- The operation is fully differentiable and supports autograd.
|
|
2080
2122
|
- Cone beam geometry uses a point source and a 2D detector array.
|
|
2081
|
-
- Uses the Siddon
|
|
2123
|
+
- Uses the Siddon method with interpolation for accurate 3D ray tracing and trilinear interpolation.
|
|
2082
2124
|
|
|
2083
2125
|
Examples
|
|
2084
2126
|
--------
|
|
@@ -2111,12 +2153,13 @@ class ConeBackprojectorFunction(torch.autograd.Function):
|
|
|
2111
2153
|
grid, tpb = _grid_3d(n_views, n_u, n_v)
|
|
2112
2154
|
cx, cy, cz = _DTYPE(W * 0.5), _DTYPE(H * 0.5), _DTYPE(D * 0.5)
|
|
2113
2155
|
|
|
2114
|
-
|
|
2156
|
+
pt_stream = torch.cuda.current_stream()
|
|
2157
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
2158
|
+
_cone_3d_backward_kernel[grid, tpb, numba_stream](
|
|
2115
2159
|
d_sino, n_views, n_u, n_v, d_reco, W, H, D,
|
|
2116
2160
|
_DTYPE(du), _DTYPE(dv), d_cos_arr, d_sin_arr,
|
|
2117
2161
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, cz, _DTYPE(voxel_spacing)
|
|
2118
2162
|
)
|
|
2119
|
-
torch.cuda.synchronize()
|
|
2120
2163
|
|
|
2121
2164
|
ctx.save_for_backward(angles)
|
|
2122
2165
|
ctx.intermediate = (D, H, W, n_u, n_v, du, dv,
|
|
@@ -2150,11 +2193,12 @@ class ConeBackprojectorFunction(torch.autograd.Function):
|
|
|
2150
2193
|
grid, tpb = _grid_3d(n_views, n_u, n_v)
|
|
2151
2194
|
cx, cy, cz = _DTYPE(W * 0.5), _DTYPE(H * 0.5), _DTYPE(D * 0.5)
|
|
2152
2195
|
|
|
2153
|
-
|
|
2196
|
+
pt_stream = torch.cuda.current_stream()
|
|
2197
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
2198
|
+
_cone_3d_forward_kernel[grid, tpb, numba_stream](
|
|
2154
2199
|
d_grad_out, W, H, D, d_sino_grad, n_views, n_u, n_v,
|
|
2155
2200
|
_DTYPE(du), _DTYPE(dv), d_cos_arr, d_sin_arr,
|
|
2156
2201
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, cz, _DTYPE(voxel_spacing)
|
|
2157
2202
|
)
|
|
2158
|
-
torch.cuda.synchronize()
|
|
2159
2203
|
|
|
2160
2204
|
return grad_sino, None, None, None, None, None, None, None, None, None
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "diffct"
|
|
7
|
-
version = "1.2.
|
|
7
|
+
version = "1.2.7"
|
|
8
8
|
description = "A CUDA-based library for computed tomography (CT) projection and reconstruction with differentiable operators"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [
|
|
@@ -36,5 +36,4 @@ where = ["."]
|
|
|
36
36
|
[tool.hatch.envs.default]
|
|
37
37
|
python = "python"
|
|
38
38
|
|
|
39
|
-
[tool.hatch.envs.default.env-vars]
|
|
40
|
-
PYTHONDONTWRITEBYTECODE = "1"
|
|
39
|
+
[tool.hatch.envs.default.env-vars]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|