diffct 1.2.5__tar.gz → 1.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diffct-1.2.5 → diffct-1.2.7}/PKG-INFO +27 -1
- {diffct-1.2.5 → diffct-1.2.7}/README.md +26 -0
- {diffct-1.2.5 → diffct-1.2.7}/diffct/differentiable.py +219 -163
- {diffct-1.2.5 → diffct-1.2.7}/pyproject.toml +2 -3
- {diffct-1.2.5 → diffct-1.2.7}/.github/workflows/docs.yml +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/.github/workflows/release.yml +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/.gitignore +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/LICENSE +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/diffct/__init__.py +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/Makefile +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/source/_static/.gitkeep +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/source/api.rst +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/source/conf.py +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/source/examples.rst +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/source/fbp_fan_example.rst +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/source/fbp_parallel_example.rst +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/source/fdk_cone_example.rst +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/source/getting_started.rst +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/source/index.rst +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/source/iterative_reco_cone_example.rst +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/source/iterative_reco_fan_example.rst +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/docs/source/iterative_reco_parallel_example.rst +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/examples/fbp_fan.py +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/examples/fbp_parallel.py +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/examples/fdk_cone.py +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/examples/iterative_reco_cone.py +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/examples/iterative_reco_fan.py +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/examples/iterative_reco_parallel.py +0 -0
- {diffct-1.2.5 → diffct-1.2.7}/requirements.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diffct
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.7
|
|
4
4
|
Summary: A CUDA-based library for computed tomography (CT) projection and reconstruction with differentiable operators
|
|
5
5
|
Project-URL: Homepage, https://github.com/sypsyp97/diffct
|
|
6
6
|
Author-email: Yipeng Sun <yipeng.sun@fau.de>
|
|
@@ -69,7 +69,33 @@ diffct/
|
|
|
69
69
|
|
|
70
70
|
### Installation
|
|
71
71
|
|
|
72
|
+
**CUDA 12:**
|
|
72
73
|
```bash
|
|
74
|
+
# Create and activate conda environment
|
|
75
|
+
conda create -n diffct python=3.12
|
|
76
|
+
conda activate diffct
|
|
77
|
+
|
|
78
|
+
# Install CUDA Toolkit, PyTorch, and Numba
|
|
79
|
+
conda install nvidia/label/cuda-12.8.1::cuda-toolkit
|
|
80
|
+
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
|
|
81
|
+
pip install numba-cuda[cu12]
|
|
82
|
+
|
|
83
|
+
# Install diffct
|
|
84
|
+
pip install diffct
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
**CUDA 11:**
|
|
88
|
+
```bash
|
|
89
|
+
# Create and activate conda environment
|
|
90
|
+
conda create -n diffct python=3.12
|
|
91
|
+
conda activate diffct
|
|
92
|
+
|
|
93
|
+
# Install CUDA Toolkit, PyTorch, and Numba
|
|
94
|
+
conda install nvidia/label/cuda-11.8.0::cuda-toolkit
|
|
95
|
+
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
|
96
|
+
pip install numba-cuda[cu11]
|
|
97
|
+
|
|
98
|
+
# Install diffct
|
|
73
99
|
pip install diffct
|
|
74
100
|
```
|
|
75
101
|
|
|
@@ -52,7 +52,33 @@ diffct/
|
|
|
52
52
|
|
|
53
53
|
### Installation
|
|
54
54
|
|
|
55
|
+
**CUDA 12:**
|
|
55
56
|
```bash
|
|
57
|
+
# Create and activate conda environment
|
|
58
|
+
conda create -n diffct python=3.12
|
|
59
|
+
conda activate diffct
|
|
60
|
+
|
|
61
|
+
# Install CUDA Toolkit, PyTorch, and Numba
|
|
62
|
+
conda install nvidia/label/cuda-12.8.1::cuda-toolkit
|
|
63
|
+
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
|
|
64
|
+
pip install numba-cuda[cu12]
|
|
65
|
+
|
|
66
|
+
# Install diffct
|
|
67
|
+
pip install diffct
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**CUDA 11:**
|
|
71
|
+
```bash
|
|
72
|
+
# Create and activate conda environment
|
|
73
|
+
conda create -n diffct python=3.12
|
|
74
|
+
conda activate diffct
|
|
75
|
+
|
|
76
|
+
# Install CUDA Toolkit, PyTorch, and Numba
|
|
77
|
+
conda install nvidia/label/cuda-11.8.0::cuda-toolkit
|
|
78
|
+
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
|
79
|
+
pip install numba-cuda[cu11]
|
|
80
|
+
|
|
81
|
+
# Install diffct
|
|
56
82
|
pip install diffct
|
|
57
83
|
```
|
|
58
84
|
|
|
@@ -2,6 +2,7 @@ import math
|
|
|
2
2
|
import numpy as np
|
|
3
3
|
import torch
|
|
4
4
|
from numba import cuda
|
|
5
|
+
from functools import lru_cache
|
|
5
6
|
|
|
6
7
|
# ---------------------------------------------------------------------------
|
|
7
8
|
# Global settings & helpers
|
|
@@ -19,8 +20,7 @@ _TPB_3D = (8, 8, 8)
|
|
|
19
20
|
# Trades numerical precision for performance in ray-tracing calculations
|
|
20
21
|
# Safe for CT reconstruction where slight precision loss is acceptable for speed gains
|
|
21
22
|
_FASTMATH_DECORATOR = cuda.jit(cache=True, fastmath=True)
|
|
22
|
-
|
|
23
|
-
_NON_FASTMATH_DECORATOR = cuda.jit(cache=True, fastmath=False)
|
|
23
|
+
|
|
24
24
|
_INF = _DTYPE(np.inf)
|
|
25
25
|
_EPSILON = _DTYPE(1e-6)
|
|
26
26
|
# === Device Management Utilities ===
|
|
@@ -70,8 +70,8 @@ class DeviceManager:
|
|
|
70
70
|
... )
|
|
71
71
|
tensor([1, 2, 3], device='cuda:0')
|
|
72
72
|
"""
|
|
73
|
-
if hasattr(tensor, "to"):
|
|
74
|
-
return tensor
|
|
73
|
+
if hasattr(tensor, "to") and tensor.device != device:
|
|
74
|
+
return tensor.to(device)
|
|
75
75
|
return tensor
|
|
76
76
|
|
|
77
77
|
# === PyTorch-CUDA Bridge ===
|
|
@@ -110,6 +110,7 @@ class TorchCUDABridge:
|
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
# === GPU-aware Trigonometric Table Generation ===
|
|
113
|
+
@lru_cache(maxsize=2048)
|
|
113
114
|
def _trig_tables(angles, dtype=_DTYPE, device=None):
|
|
114
115
|
"""Compute cosine and sine tables for input angles.
|
|
115
116
|
|
|
@@ -139,9 +140,11 @@ def _trig_tables(angles, dtype=_DTYPE, device=None):
|
|
|
139
140
|
"""
|
|
140
141
|
if isinstance(angles, torch.Tensor):
|
|
141
142
|
device = angles.device if device is None else device
|
|
142
|
-
cos
|
|
143
|
-
|
|
144
|
-
|
|
143
|
+
# Compute both cos and sin in one call to avoid redundant kernel launches
|
|
144
|
+
angles_device = angles.to(dtype=dtype, device=device)
|
|
145
|
+
cos = torch.cos(angles_device)
|
|
146
|
+
sin = torch.sin(angles_device)
|
|
147
|
+
return cos, sin
|
|
145
148
|
else:
|
|
146
149
|
# fallback for non-tensor inputs: compute via PyTorch on CPU for consistency
|
|
147
150
|
# Determine desired torch dtype
|
|
@@ -153,7 +156,7 @@ def _trig_tables(angles, dtype=_DTYPE, device=None):
|
|
|
153
156
|
np.float64: torch.float64,
|
|
154
157
|
}
|
|
155
158
|
torch_dtype = _NP_TO_TORCH.get(dtype, torch.float32)
|
|
156
|
-
# Convert input angles to a CPU torch tensor
|
|
159
|
+
# Convert input angles to a CPU torch tensor and compute both simultaneously
|
|
157
160
|
angles_cpu = torch.tensor(angles, dtype=torch_dtype)
|
|
158
161
|
cos_cpu = torch.cos(angles_cpu)
|
|
159
162
|
sin_cpu = torch.sin(angles_cpu)
|
|
@@ -182,18 +185,23 @@ def _validate_3d_memory_layout(tensor, expected_order='DHW'):
|
|
|
182
185
|
ValueError
|
|
183
186
|
If tensor has unexpected memory layout or is non-contiguous
|
|
184
187
|
"""
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
# Check if tensor is contiguous to avoid memory duplication
|
|
189
|
-
if not tensor.is_contiguous():
|
|
190
|
-
raise ValueError(
|
|
191
|
-
"Input tensor must be contiguous. Call .contiguous() before passing to "
|
|
192
|
-
"cone beam functions to avoid memory duplication and ensure correct results."
|
|
193
|
-
)
|
|
188
|
+
shape = tensor.shape
|
|
189
|
+
if len(shape) != 3:
|
|
190
|
+
raise ValueError(f"Expected 3D tensor, got {len(shape)}D")
|
|
194
191
|
|
|
192
|
+
# Early return for common case - contiguous tensor with expected ordering
|
|
193
|
+
if tensor.is_contiguous() and expected_order in ('DHW', 'VHW'):
|
|
194
|
+
# For DHW and VHW, the expected order matches memory layout when contiguous
|
|
195
|
+
return
|
|
196
|
+
|
|
195
197
|
# Only check memory order for DHW and VHW, not for internal WHD layout
|
|
196
198
|
if expected_order in ('DHW', 'VHW'):
|
|
199
|
+
if not tensor.is_contiguous():
|
|
200
|
+
raise ValueError(
|
|
201
|
+
"Input tensor must be contiguous. Call .contiguous() before passing to "
|
|
202
|
+
"cone beam functions to avoid memory duplication and ensure correct results."
|
|
203
|
+
)
|
|
204
|
+
|
|
197
205
|
strides = tensor.stride()
|
|
198
206
|
order_mapping = {
|
|
199
207
|
'DHW': (0, 1, 2), # Depth, Height, Width
|
|
@@ -210,15 +218,15 @@ def _validate_3d_memory_layout(tensor, expected_order='DHW'):
|
|
|
210
218
|
if actual_order != expected_stride_order:
|
|
211
219
|
# Create appropriate error message based on context
|
|
212
220
|
if expected_order == 'VHW':
|
|
213
|
-
actual_str = f"({
|
|
221
|
+
actual_str = f"({shape[0]}, {shape[1]}, {shape[2]})"
|
|
214
222
|
expected_str = "(Views, Height, Width)"
|
|
215
223
|
fix_str = "ensure your sinogram has shape (num_views, det_v, det_u)"
|
|
216
224
|
elif expected_order == 'DHW':
|
|
217
|
-
actual_str = f"({
|
|
225
|
+
actual_str = f"({shape[0]}, {shape[1]}, {shape[2]})"
|
|
218
226
|
expected_str = "(Depth, Height, Width)"
|
|
219
227
|
fix_str = "ensure your volume has shape (D, H, W)"
|
|
220
228
|
else:
|
|
221
|
-
actual_str = str(tuple(
|
|
229
|
+
actual_str = str(tuple(shape))
|
|
222
230
|
expected_str = expected_order
|
|
223
231
|
fix_str = "check tensor dimensions"
|
|
224
232
|
|
|
@@ -316,7 +324,7 @@ def _parallel_2d_forward_kernel(
|
|
|
316
324
|
):
|
|
317
325
|
"""Compute the 2D parallel beam forward projection.
|
|
318
326
|
|
|
319
|
-
This CUDA kernel implements the Siddon
|
|
327
|
+
This CUDA kernel implements the Siddon ray-tracing method with interpolation for
|
|
320
328
|
2D parallel beam forward projection.
|
|
321
329
|
|
|
322
330
|
Parameters
|
|
@@ -348,7 +356,7 @@ def _parallel_2d_forward_kernel(
|
|
|
348
356
|
|
|
349
357
|
Notes
|
|
350
358
|
-----
|
|
351
|
-
The Siddon
|
|
359
|
+
The Siddon method with interpolation provides accurate ray-volume intersection by:
|
|
352
360
|
- Calculating ray-volume boundary intersections to define traversal limits.
|
|
353
361
|
- Iterating through voxels along the ray path via parametric equations.
|
|
354
362
|
- Determining bilinear interpolation weights for sub-voxel sampling.
|
|
@@ -407,7 +415,7 @@ def _parallel_2d_forward_kernel(
|
|
|
407
415
|
if t_min >= t_max:
|
|
408
416
|
d_sino[iang, idet] = 0.0; return
|
|
409
417
|
|
|
410
|
-
# === SIDDON
|
|
418
|
+
# === SIDDON METHOD VOXEL TRAVERSAL INITIALIZATION ===
|
|
411
419
|
accum = 0.0 # Accumulated projection value along ray
|
|
412
420
|
t = t_min # Current ray parameter (distance from ray start)
|
|
413
421
|
|
|
@@ -417,12 +425,15 @@ def _parallel_2d_forward_kernel(
|
|
|
417
425
|
|
|
418
426
|
# Determine traversal direction and step sizes for each axis
|
|
419
427
|
step_x, step_y = (1 if dir_x >= 0 else -1), (1 if dir_y >= 0 else -1) # Voxel stepping direction
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
428
|
+
# Hoist inverse directions to reduce divisions and branches
|
|
429
|
+
inv_dir_x = (1.0 / dir_x) if abs(dir_x) > _EPSILON else 0.0
|
|
430
|
+
inv_dir_y = (1.0 / dir_y) if abs(dir_y) > _EPSILON else 0.0
|
|
431
|
+
dt_x = abs(inv_dir_x) if abs(dir_x) > _EPSILON else _INF
|
|
432
|
+
dt_y = abs(inv_dir_y) if abs(dir_y) > _EPSILON else _INF
|
|
433
|
+
|
|
434
|
+
# Calculate parameter values for next voxel boundary crossings using inv_dir_*
|
|
435
|
+
tx = ((ix + (step_x > 0)) - cx - pnt_x) * inv_dir_x if abs(dir_x) > _EPSILON else _INF
|
|
436
|
+
ty = ((iy + (step_y > 0)) - cy - pnt_y) * inv_dir_y if abs(dir_y) > _EPSILON else _INF
|
|
426
437
|
|
|
427
438
|
# === MAIN RAY TRAVERSAL LOOP ===
|
|
428
439
|
# Step through voxels along ray path, accumulating weighted contributions
|
|
@@ -437,9 +448,10 @@ def _parallel_2d_forward_kernel(
|
|
|
437
448
|
# === BILINEAR INTERPOLATION SAMPLING ===
|
|
438
449
|
# Sample volume at ray segment midpoint for accurate integration
|
|
439
450
|
# Mathematical basis: Midpoint rule for numerical integration along ray segments
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
451
|
+
t_mid = t + seg_len * 0.5
|
|
452
|
+
mid_x = pnt_x + t_mid * dir_x + cx # Midpoint x-coordinate in image space
|
|
453
|
+
mid_y = pnt_y + t_mid * dir_y + cy # Midpoint y-coordinate in image space
|
|
454
|
+
|
|
443
455
|
# Convert continuous coordinates to discrete voxel indices and fractional weights
|
|
444
456
|
# Floor operation gives base voxel index, fractional part gives interpolation weights
|
|
445
457
|
ix0, iy0 = int(math.floor(mid_x)), int(math.floor(mid_y)) # Base voxel indices (bottom-left corner)
|
|
@@ -476,7 +488,7 @@ def _parallel_2d_forward_kernel(
|
|
|
476
488
|
|
|
477
489
|
d_sino[iang, idet] = accum
|
|
478
490
|
|
|
479
|
-
@
|
|
491
|
+
@_FASTMATH_DECORATOR
|
|
480
492
|
def _parallel_2d_backward_kernel(
|
|
481
493
|
d_sino, n_ang, n_det,
|
|
482
494
|
d_image, Nx, Ny,
|
|
@@ -484,8 +496,8 @@ def _parallel_2d_backward_kernel(
|
|
|
484
496
|
):
|
|
485
497
|
"""Compute the 2D parallel beam backprojection.
|
|
486
498
|
|
|
487
|
-
This CUDA kernel implements the Siddon-
|
|
488
|
-
beam backprojection.
|
|
499
|
+
This CUDA kernel implements the Siddon ray-tracing method with interpolation for
|
|
500
|
+
2D parallel beam backprojection.
|
|
489
501
|
|
|
490
502
|
Parameters
|
|
491
503
|
----------
|
|
@@ -549,16 +561,18 @@ def _parallel_2d_backward_kernel(
|
|
|
549
561
|
|
|
550
562
|
if t_min >= t_max: return
|
|
551
563
|
|
|
552
|
-
# === SIDDON
|
|
564
|
+
# === SIDDON METHOD TRAVERSAL INITIALIZATION ===
|
|
553
565
|
t = t_min
|
|
554
566
|
ix = int(math.floor(pnt_x + t * dir_x + cx))
|
|
555
567
|
iy = int(math.floor(pnt_y + t * dir_y + cy))
|
|
556
568
|
|
|
557
569
|
step_x, step_y = (1 if dir_x >= 0 else -1), (1 if dir_y >= 0 else -1)
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
570
|
+
inv_dir_x = (1.0 / dir_x) if abs(dir_x) > _EPSILON else 0.0
|
|
571
|
+
inv_dir_y = (1.0 / dir_y) if abs(dir_y) > _EPSILON else 0.0
|
|
572
|
+
dt_x = abs(inv_dir_x) if abs(dir_x) > _EPSILON else _INF
|
|
573
|
+
dt_y = abs(inv_dir_y) if abs(dir_y) > _EPSILON else _INF
|
|
574
|
+
tx = ((ix + (step_x > 0)) - cx - pnt_x) * inv_dir_x if abs(dir_x) > _EPSILON else _INF
|
|
575
|
+
ty = ((iy + (step_y > 0)) - cy - pnt_y) * inv_dir_y if abs(dir_y) > _EPSILON else _INF
|
|
562
576
|
|
|
563
577
|
# === BACKPROJECTION TRAVERSAL LOOP ===
|
|
564
578
|
# Distribute sinogram value along ray path using bilinear interpolation
|
|
@@ -568,8 +582,9 @@ def _parallel_2d_backward_kernel(
|
|
|
568
582
|
seg_len = t_next - t
|
|
569
583
|
if seg_len > _EPSILON:
|
|
570
584
|
# Sample at ray segment midpoint (same as forward projection)
|
|
571
|
-
|
|
572
|
-
|
|
585
|
+
t_mid = t + seg_len * 0.5
|
|
586
|
+
mid_x = pnt_x + t_mid * dir_x + cx
|
|
587
|
+
mid_y = pnt_y + t_mid * dir_y + cy
|
|
573
588
|
ix0, iy0 = int(math.floor(mid_x)), int(math.floor(mid_y))
|
|
574
589
|
dx, dy = mid_x - ix0, mid_y - iy0
|
|
575
590
|
|
|
@@ -585,10 +600,12 @@ def _parallel_2d_backward_kernel(
|
|
|
585
600
|
# Performance impact: Atomic operations are slower than regular writes but necessary for correctness
|
|
586
601
|
# Memory access pattern: Global memory atomics with potential bank conflicts, but unavoidable
|
|
587
602
|
cval = val * seg_len # Contribution value for this ray segment
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
cuda.atomic.add(d_image, (iy0
|
|
591
|
-
cuda.atomic.add(d_image, (iy0
|
|
603
|
+
one_minus_dx = 1.0 - dx
|
|
604
|
+
one_minus_dy = 1.0 - dy
|
|
605
|
+
cuda.atomic.add(d_image, (iy0, ix0), cval * one_minus_dx * one_minus_dy)
|
|
606
|
+
cuda.atomic.add(d_image, (iy0, ix0 + 1), cval * dx * one_minus_dy)
|
|
607
|
+
cuda.atomic.add(d_image, (iy0 + 1, ix0), cval * one_minus_dx * dy)
|
|
608
|
+
cuda.atomic.add(d_image, (iy0 + 1, ix0 + 1), cval * dx * dy)
|
|
592
609
|
|
|
593
610
|
# Advance to next voxel (identical logic to forward projection)
|
|
594
611
|
if tx <= ty:
|
|
@@ -613,8 +630,8 @@ def _fan_2d_forward_kernel(
|
|
|
613
630
|
):
|
|
614
631
|
"""Compute the 2D fan beam forward projection.
|
|
615
632
|
|
|
616
|
-
This CUDA kernel implements the Siddon-
|
|
617
|
-
forward projection.
|
|
633
|
+
This CUDA kernel implements the Siddon ray-tracing method with interpolation for
|
|
634
|
+
2D fan beam forward projection.
|
|
618
635
|
|
|
619
636
|
Parameters
|
|
620
637
|
----------
|
|
@@ -704,7 +721,7 @@ def _fan_2d_forward_kernel(
|
|
|
704
721
|
if t_min >= t_max: # No valid intersection
|
|
705
722
|
d_sino[iang, idet] = 0.0; return
|
|
706
723
|
|
|
707
|
-
# === SIDDON
|
|
724
|
+
# === SIDDON METHOD TRAVERSAL (same algorithm as parallel beam) ===
|
|
708
725
|
accum = 0.0 # Accumulated projection value
|
|
709
726
|
t = t_min # Current ray parameter
|
|
710
727
|
|
|
@@ -714,10 +731,12 @@ def _fan_2d_forward_kernel(
|
|
|
714
731
|
|
|
715
732
|
# Traversal parameters (identical to parallel beam implementation)
|
|
716
733
|
step_x, step_y = (1 if dir_x >= 0 else -1), (1 if dir_y >= 0 else -1)
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
734
|
+
inv_dir_x = (1.0 / dir_x) if abs(dir_x) > _EPSILON else 0.0
|
|
735
|
+
inv_dir_y = (1.0 / dir_y) if abs(dir_y) > _EPSILON else 0.0
|
|
736
|
+
dt_x = abs(inv_dir_x) if abs(dir_x) > _EPSILON else _INF
|
|
737
|
+
dt_y = abs(inv_dir_y) if abs(dir_y) > _EPSILON else _INF
|
|
738
|
+
tx = ((ix + (step_x > 0)) - cx - src_x) * inv_dir_x if abs(dir_x) > _EPSILON else _INF
|
|
739
|
+
ty = ((iy + (step_y > 0)) - cy - src_y) * inv_dir_y if abs(dir_y) > _EPSILON else _INF
|
|
721
740
|
|
|
722
741
|
# Main traversal loop with bilinear interpolation (identical to parallel beam)
|
|
723
742
|
while t < t_max:
|
|
@@ -726,8 +745,9 @@ def _fan_2d_forward_kernel(
|
|
|
726
745
|
seg_len = t_next - t
|
|
727
746
|
if seg_len > _EPSILON:
|
|
728
747
|
# Sample at midpoint using source as ray origin
|
|
729
|
-
|
|
730
|
-
|
|
748
|
+
t_mid = t + seg_len * 0.5
|
|
749
|
+
mid_x = src_x + t_mid * dir_x + cx
|
|
750
|
+
mid_y = src_y + t_mid * dir_y + cy
|
|
731
751
|
ix0, iy0 = int(math.floor(mid_x)), int(math.floor(mid_y))
|
|
732
752
|
dx, dy = mid_x - ix0, mid_y - iy0
|
|
733
753
|
|
|
@@ -756,7 +776,7 @@ def _fan_2d_forward_kernel(
|
|
|
756
776
|
|
|
757
777
|
d_sino[iang, idet] = accum
|
|
758
778
|
|
|
759
|
-
@
|
|
779
|
+
@_FASTMATH_DECORATOR
|
|
760
780
|
def _fan_2d_backward_kernel(
|
|
761
781
|
d_sino, n_ang, n_det,
|
|
762
782
|
d_image, Nx, Ny,
|
|
@@ -765,8 +785,8 @@ def _fan_2d_backward_kernel(
|
|
|
765
785
|
):
|
|
766
786
|
"""Compute the 2D fan beam backprojection.
|
|
767
787
|
|
|
768
|
-
This CUDA kernel implements the Siddon-
|
|
769
|
-
backprojection.
|
|
788
|
+
This CUDA kernel implements the Siddon ray-tracing method with interpolation for
|
|
789
|
+
2D fan beam backprojection.
|
|
770
790
|
|
|
771
791
|
Parameters
|
|
772
792
|
----------
|
|
@@ -851,16 +871,18 @@ def _fan_2d_backward_kernel(
|
|
|
851
871
|
|
|
852
872
|
if t_min >= t_max: return
|
|
853
873
|
|
|
854
|
-
# === SIDDON
|
|
874
|
+
# === SIDDON METHOD TRAVERSAL INITIALIZATION ===
|
|
855
875
|
t = t_min
|
|
856
876
|
ix = int(math.floor(src_x + t * dir_x + cx))
|
|
857
877
|
iy = int(math.floor(src_y + t * dir_y + cy))
|
|
858
878
|
|
|
859
879
|
step_x, step_y = (1 if dir_x >= 0 else -1), (1 if dir_y >= 0 else -1)
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
880
|
+
inv_dir_x = (1.0 / dir_x) if abs(dir_x) > _EPSILON else 0.0
|
|
881
|
+
inv_dir_y = (1.0 / dir_y) if abs(dir_y) > _EPSILON else 0.0
|
|
882
|
+
dt_x = abs(inv_dir_x) if abs(dir_x) > _EPSILON else _INF
|
|
883
|
+
dt_y = abs(inv_dir_y) if abs(dir_y) > _EPSILON else _INF
|
|
884
|
+
tx = ((ix + (step_x > 0)) - cx - src_x) * inv_dir_x if abs(dir_x) > _EPSILON else _INF
|
|
885
|
+
ty = ((iy + (step_y > 0)) - cy - src_y) * inv_dir_y if abs(dir_y) > _EPSILON else _INF
|
|
864
886
|
|
|
865
887
|
# === FAN BEAM BACKPROJECTION TRAVERSAL LOOP ===
|
|
866
888
|
# Distribute sinogram value along divergent ray path using bilinear interpolation
|
|
@@ -870,8 +892,9 @@ def _fan_2d_backward_kernel(
|
|
|
870
892
|
seg_len = t_next - t
|
|
871
893
|
if seg_len > _EPSILON:
|
|
872
894
|
# Sample at ray segment midpoint using source as ray origin
|
|
873
|
-
|
|
874
|
-
|
|
895
|
+
t_mid = t + seg_len * 0.5
|
|
896
|
+
mid_x = src_x + t_mid * dir_x + cx
|
|
897
|
+
mid_y = src_y + t_mid * dir_y + cy
|
|
875
898
|
ix0, iy0 = int(math.floor(mid_x)), int(math.floor(mid_y))
|
|
876
899
|
dx, dy = mid_x - ix0, mid_y - iy0
|
|
877
900
|
|
|
@@ -886,10 +909,12 @@ def _fan_2d_backward_kernel(
|
|
|
886
909
|
# Atomic operations prevent race conditions when multiple divergent rays write to same voxel
|
|
887
910
|
# Performance consideration: Fan beam geometry may have more atomic contention than parallel beam
|
|
888
911
|
cval = val * seg_len # Contribution value for this ray segment
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
cuda.atomic.add(d_image, (iy0
|
|
892
|
-
cuda.atomic.add(d_image, (iy0
|
|
912
|
+
one_minus_dx = 1.0 - dx
|
|
913
|
+
one_minus_dy = 1.0 - dy
|
|
914
|
+
cuda.atomic.add(d_image, (iy0, ix0), cval * one_minus_dx * one_minus_dy)
|
|
915
|
+
cuda.atomic.add(d_image, (iy0, ix0 + 1), cval * dx * one_minus_dy)
|
|
916
|
+
cuda.atomic.add(d_image, (iy0 + 1, ix0), cval * one_minus_dx * dy)
|
|
917
|
+
cuda.atomic.add(d_image, (iy0 + 1, ix0 + 1), cval * dx * dy)
|
|
893
918
|
|
|
894
919
|
# === VOXEL BOUNDARY CROSSING LOGIC ===
|
|
895
920
|
# Advance to next voxel based on which boundary is crossed first
|
|
@@ -915,8 +940,8 @@ def _cone_3d_forward_kernel(
|
|
|
915
940
|
):
|
|
916
941
|
"""Compute the 3D cone-beam forward projection.
|
|
917
942
|
|
|
918
|
-
This CUDA kernel implements the Siddon-
|
|
919
|
-
forward projection.
|
|
943
|
+
This CUDA kernel implements the Siddon ray-tracing method with interpolation for
|
|
944
|
+
3D cone-beam forward projection.
|
|
920
945
|
|
|
921
946
|
Parameters
|
|
922
947
|
----------
|
|
@@ -1025,7 +1050,7 @@ def _cone_3d_forward_kernel(
|
|
|
1025
1050
|
if t_min >= t_max: # No valid 3D intersection
|
|
1026
1051
|
d_sino[iview, iu, iv] = 0.0; return
|
|
1027
1052
|
|
|
1028
|
-
# === 3D SIDDON
|
|
1053
|
+
# === 3D SIDDON METHOD TRAVERSAL INITIALIZATION ===
|
|
1029
1054
|
accum = 0.0 # Accumulated projection value
|
|
1030
1055
|
t = t_min # Current ray parameter
|
|
1031
1056
|
|
|
@@ -1036,14 +1061,17 @@ def _cone_3d_forward_kernel(
|
|
|
1036
1061
|
|
|
1037
1062
|
# 3D traversal parameters (extends 2D algorithm)
|
|
1038
1063
|
step_x, step_y, step_z = (1 if dir_x >= 0 else -1), (1 if dir_y >= 0 else -1), (1 if dir_z >= 0 else -1)
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1064
|
+
inv_dir_x = (1.0 / dir_x) if abs(dir_x) > _EPSILON else 0.0
|
|
1065
|
+
inv_dir_y = (1.0 / dir_y) if abs(dir_y) > _EPSILON else 0.0
|
|
1066
|
+
inv_dir_z = (1.0 / dir_z) if abs(dir_z) > _EPSILON else 0.0
|
|
1067
|
+
dt_x = abs(inv_dir_x) if abs(dir_x) > _EPSILON else _INF # Parameter increment per x-voxel
|
|
1068
|
+
dt_y = abs(inv_dir_y) if abs(dir_y) > _EPSILON else _INF # Parameter increment per y-voxel
|
|
1069
|
+
dt_z = abs(inv_dir_z) if abs(dir_z) > _EPSILON else _INF # Parameter increment per z-voxel
|
|
1070
|
+
|
|
1043
1071
|
# Calculate parameter values for next 3D voxel boundary crossings
|
|
1044
|
-
tx = ((ix + (step_x > 0)) - cx - src_x)
|
|
1045
|
-
ty = ((iy + (step_y > 0)) - cy - src_y)
|
|
1046
|
-
tz = ((iz + (step_z > 0)) - cz - src_z)
|
|
1072
|
+
tx = ((ix + (step_x > 0)) - cx - src_x) * inv_dir_x if abs(dir_x) > _EPSILON else _INF
|
|
1073
|
+
ty = ((iy + (step_y > 0)) - cy - src_y) * inv_dir_y if abs(dir_y) > _EPSILON else _INF
|
|
1074
|
+
tz = ((iz + (step_z > 0)) - cz - src_z) * inv_dir_z if abs(dir_z) > _EPSILON else _INF
|
|
1047
1075
|
|
|
1048
1076
|
# === 3D TRAVERSAL LOOP WITH TRILINEAR INTERPOLATION ===
|
|
1049
1077
|
while t < t_max:
|
|
@@ -1056,34 +1084,35 @@ def _cone_3d_forward_kernel(
|
|
|
1056
1084
|
# === TRILINEAR INTERPOLATION SAMPLING ===
|
|
1057
1085
|
# Sample 3D volume at ray segment midpoint for accurate integration
|
|
1058
1086
|
# Mathematical basis: Midpoint rule for numerical integration along 3D ray segments
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1087
|
+
t_mid = t + seg_len * 0.5
|
|
1088
|
+
mid_x = src_x + t_mid * dir_x + cx # Midpoint x-coordinate in volume space
|
|
1089
|
+
mid_y = src_y + t_mid * dir_y + cy # Midpoint y-coordinate in volume space
|
|
1090
|
+
mid_z = src_z + t_mid * dir_z + cz # Midpoint z-coordinate in volume space
|
|
1091
|
+
|
|
1063
1092
|
# Convert continuous 3D coordinates to discrete voxel indices and fractional weights
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1093
|
+
ix0, iy0, iz0 = int(math.floor(mid_x)), int(math.floor(mid_y)), int(math.floor(mid_z))
|
|
1094
|
+
dx, dy, dz = mid_x - ix0, mid_y - iy0, mid_z - iz0
|
|
1095
|
+
|
|
1068
1096
|
# Clamp indices to stay in-bounds during interpolation
|
|
1069
1097
|
ix0 = max(0, min(ix0, Nx - 2))
|
|
1070
1098
|
iy0 = max(0, min(iy0, Ny - 2))
|
|
1071
1099
|
iz0 = max(0, min(iz0, Nz - 2))
|
|
1072
|
-
|
|
1100
|
+
|
|
1101
|
+
# Precompute complements
|
|
1102
|
+
omdx = 1.0 - dx
|
|
1103
|
+
omdy = 1.0 - dy
|
|
1104
|
+
omdz = 1.0 - dz
|
|
1105
|
+
|
|
1073
1106
|
# === TRILINEAR INTERPOLATION WEIGHT CALCULATION ===
|
|
1074
|
-
# Mathematical basis: Trilinear interpolation formula f(x,y,z) = Σ f(xi,yi,zi) * wi(x,y,z)
|
|
1075
|
-
# where wi(x,y,z) are the trilinear basis functions for each corner voxel of the 3D cube
|
|
1076
|
-
# Weights are products of 1D linear interpolation weights: (1-dx) or dx, (1-dy) or dy, (1-dz) or dz
|
|
1077
|
-
# Each of the 8 cube corners gets a weight proportional to its distance from the sample point
|
|
1078
1107
|
val = (
|
|
1079
|
-
d_vol[ix0, iy0, iz0] *
|
|
1080
|
-
d_vol[ix0 + 1, iy0, iz0] * dx*
|
|
1081
|
-
d_vol[ix0, iy0 + 1, iz0] *
|
|
1082
|
-
d_vol[ix0, iy0, iz0 + 1] *
|
|
1083
|
-
d_vol[ix0 + 1, iy0 + 1, iz0] * dx*dy*
|
|
1084
|
-
d_vol[ix0 + 1, iy0, iz0 + 1] * dx*
|
|
1085
|
-
d_vol[ix0, iy0 + 1, iz0 + 1] *
|
|
1086
|
-
d_vol[ix0 + 1, iy0 + 1, iz0 + 1] * dx*dy*dz
|
|
1108
|
+
d_vol[ix0, iy0, iz0] * omdx*omdy*omdz +
|
|
1109
|
+
d_vol[ix0 + 1, iy0, iz0] * dx *omdy*omdz +
|
|
1110
|
+
d_vol[ix0, iy0 + 1, iz0] * omdx*dy *omdz +
|
|
1111
|
+
d_vol[ix0, iy0, iz0 + 1] * omdx*omdy*dz +
|
|
1112
|
+
d_vol[ix0 + 1, iy0 + 1, iz0] * dx *dy *omdz +
|
|
1113
|
+
d_vol[ix0 + 1, iy0, iz0 + 1] * dx *omdy*dz +
|
|
1114
|
+
d_vol[ix0, iy0 + 1, iz0 + 1] * omdx*dy *dz +
|
|
1115
|
+
d_vol[ix0 + 1, iy0 + 1, iz0 + 1] * dx *dy *dz
|
|
1087
1116
|
)
|
|
1088
1117
|
# Accumulate contribution weighted by 3D ray segment length (discrete line integral approximation)
|
|
1089
1118
|
# This implements the 3D Radon transform: integral of f(x,y,z) along the ray path
|
|
@@ -1106,7 +1135,7 @@ def _cone_3d_forward_kernel(
|
|
|
1106
1135
|
|
|
1107
1136
|
d_sino[iview, iu, iv] = accum
|
|
1108
1137
|
|
|
1109
|
-
@
|
|
1138
|
+
@_FASTMATH_DECORATOR
|
|
1110
1139
|
def _cone_3d_backward_kernel(
|
|
1111
1140
|
d_sino, n_views, n_u, n_v,
|
|
1112
1141
|
d_vol, Nx, Ny, Nz,
|
|
@@ -1115,8 +1144,8 @@ def _cone_3d_backward_kernel(
|
|
|
1115
1144
|
):
|
|
1116
1145
|
"""Compute the 3D cone-beam backprojection.
|
|
1117
1146
|
|
|
1118
|
-
This CUDA kernel implements the Siddon-
|
|
1119
|
-
backprojection.
|
|
1147
|
+
This CUDA kernel implements the Siddon ray-tracing method with interpolation for
|
|
1148
|
+
3D cone-beam backprojection.
|
|
1120
1149
|
|
|
1121
1150
|
Parameters
|
|
1122
1151
|
----------
|
|
@@ -1219,7 +1248,7 @@ def _cone_3d_backward_kernel(
|
|
|
1219
1248
|
|
|
1220
1249
|
if t_min >= t_max: return
|
|
1221
1250
|
|
|
1222
|
-
# === 3D SIDDON
|
|
1251
|
+
# === 3D SIDDON METHOD TRAVERSAL INITIALIZATION ===
|
|
1223
1252
|
t = t_min
|
|
1224
1253
|
ix = int(math.floor(src_x + t * dir_x + cx)) # Current voxel x-index
|
|
1225
1254
|
iy = int(math.floor(src_y + t * dir_y + cy)) # Current voxel y-index
|
|
@@ -1227,14 +1256,17 @@ def _cone_3d_backward_kernel(
|
|
|
1227
1256
|
|
|
1228
1257
|
# 3D traversal parameters (extends 2D algorithm)
|
|
1229
1258
|
step_x, step_y, step_z = (1 if dir_x >= 0 else -1), (1 if dir_y >= 0 else -1), (1 if dir_z >= 0 else -1)
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1259
|
+
inv_dir_x = (1.0 / dir_x) if abs(dir_x) > _EPSILON else 0.0
|
|
1260
|
+
inv_dir_y = (1.0 / dir_y) if abs(dir_y) > _EPSILON else 0.0
|
|
1261
|
+
inv_dir_z = (1.0 / dir_z) if abs(dir_z) > _EPSILON else 0.0
|
|
1262
|
+
dt_x = abs(inv_dir_x) if abs(dir_x) > _EPSILON else _INF # Parameter increment per x-voxel
|
|
1263
|
+
dt_y = abs(inv_dir_y) if abs(dir_y) > _EPSILON else _INF # Parameter increment per y-voxel
|
|
1264
|
+
dt_z = abs(inv_dir_z) if abs(dir_z) > _EPSILON else _INF # Parameter increment per z-voxel
|
|
1265
|
+
|
|
1234
1266
|
# Calculate parameter values for next 3D voxel boundary crossings
|
|
1235
|
-
tx = ((ix + (step_x > 0)) - cx - src_x)
|
|
1236
|
-
ty = ((iy + (step_y > 0)) - cy - src_y)
|
|
1237
|
-
tz = ((iz + (step_z > 0)) - cz - src_z)
|
|
1267
|
+
tx = ((ix + (step_x > 0)) - cx - src_x) * inv_dir_x if abs(dir_x) > _EPSILON else _INF
|
|
1268
|
+
ty = ((iy + (step_y > 0)) - cy - src_y) * inv_dir_y if abs(dir_y) > _EPSILON else _INF
|
|
1269
|
+
tz = ((iz + (step_z > 0)) - cz - src_z) * inv_dir_z if abs(dir_z) > _EPSILON else _INF
|
|
1238
1270
|
|
|
1239
1271
|
# === 3D CONE BEAM BACKPROJECTION TRAVERSAL LOOP ===
|
|
1240
1272
|
# Distribute sinogram value along divergent 3D ray path using trilinear interpolation
|
|
@@ -1247,35 +1279,35 @@ def _cone_3d_backward_kernel(
|
|
|
1247
1279
|
if seg_len > _EPSILON:
|
|
1248
1280
|
# === TRILINEAR INTERPOLATION SAMPLING ===
|
|
1249
1281
|
# Sample 3D volume at ray segment midpoint using source as ray origin
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1282
|
+
t_mid = t + seg_len * 0.5
|
|
1283
|
+
mid_x = src_x + t_mid * dir_x + cx
|
|
1284
|
+
mid_y = src_y + t_mid * dir_y + cy
|
|
1285
|
+
mid_z = src_z + t_mid * dir_z + cz
|
|
1286
|
+
|
|
1254
1287
|
# Convert continuous 3D coordinates to voxel indices and interpolation weights
|
|
1255
1288
|
ix0, iy0, iz0 = int(math.floor(mid_x)), int(math.floor(mid_y)), int(math.floor(mid_z))
|
|
1256
|
-
dx, dy, dz = mid_x - ix0, mid_y - iy0, mid_z - iz0
|
|
1257
|
-
|
|
1289
|
+
dx, dy, dz = mid_x - ix0, mid_y - iy0, mid_z - iz0
|
|
1290
|
+
|
|
1258
1291
|
# Clamp indices to stay in-bounds during interpolation
|
|
1259
1292
|
ix0 = max(0, min(ix0, Nx - 2))
|
|
1260
1293
|
iy0 = max(0, min(iy0, Ny - 2))
|
|
1261
1294
|
iz0 = max(0, min(iz0, Nz - 2))
|
|
1262
|
-
|
|
1295
|
+
|
|
1296
|
+
# Precompute complements and contribution
|
|
1297
|
+
omdx = 1.0 - dx
|
|
1298
|
+
omdy = 1.0 - dy
|
|
1299
|
+
omdz = 1.0 - dz
|
|
1300
|
+
cval = g * seg_len
|
|
1301
|
+
|
|
1263
1302
|
# === ATOMIC BACKPROJECTION WITH TRILINEAR WEIGHTS ===
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
cuda.atomic.add(d_vol, (ix0,
|
|
1272
|
-
cuda.atomic.add(d_vol, (ix0 + 1, iy0, iz0), cval * dx*(1-dy)*(1-dz)) # Corner (1,0,0) - atomic write
|
|
1273
|
-
cuda.atomic.add(d_vol, (ix0, iy0 + 1, iz0), cval * (1-dx)*dy*(1-dz)) # Corner (0,1,0) - atomic write
|
|
1274
|
-
cuda.atomic.add(d_vol, (ix0, iy0, iz0 + 1), cval * (1-dx)*(1-dy)*dz) # Corner (0,0,1) - atomic write
|
|
1275
|
-
cuda.atomic.add(d_vol, (ix0 + 1, iy0 + 1, iz0), cval * dx*dy*(1-dz)) # Corner (1,1,0) - atomic write
|
|
1276
|
-
cuda.atomic.add(d_vol, (ix0 + 1, iy0, iz0 + 1), cval * dx*(1-dy)*dz) # Corner (1,0,1) - atomic write
|
|
1277
|
-
cuda.atomic.add(d_vol, (ix0, iy0 + 1, iz0 + 1), cval * (1-dx)*dy*dz) # Corner (0,1,1) - atomic write
|
|
1278
|
-
cuda.atomic.add(d_vol, (ix0 + 1, iy0 + 1, iz0 + 1), cval * dx*dy*dz) # Corner (1,1,1) - atomic write
|
|
1303
|
+
cuda.atomic.add(d_vol, (ix0, iy0, iz0), cval * omdx*omdy*omdz)
|
|
1304
|
+
cuda.atomic.add(d_vol, (ix0 + 1, iy0, iz0), cval * dx *omdy*omdz)
|
|
1305
|
+
cuda.atomic.add(d_vol, (ix0, iy0 + 1, iz0), cval * omdx*dy *omdz)
|
|
1306
|
+
cuda.atomic.add(d_vol, (ix0, iy0, iz0 + 1), cval * omdx*omdy*dz)
|
|
1307
|
+
cuda.atomic.add(d_vol, (ix0 + 1, iy0 + 1, iz0), cval * dx *dy *omdz)
|
|
1308
|
+
cuda.atomic.add(d_vol, (ix0 + 1, iy0, iz0 + 1), cval * dx *omdy*dz)
|
|
1309
|
+
cuda.atomic.add(d_vol, (ix0, iy0 + 1, iz0 + 1), cval * omdx*dy *dz)
|
|
1310
|
+
cuda.atomic.add(d_vol, (ix0 + 1, iy0 + 1, iz0 + 1), cval * dx *dy *dz)
|
|
1279
1311
|
|
|
1280
1312
|
# === 3D VOXEL BOUNDARY CROSSING LOGIC ===
|
|
1281
1313
|
# Advance to next voxel based on which boundary is crossed first in 3D
|
|
@@ -1305,8 +1337,8 @@ class ParallelProjectorFunction(torch.autograd.Function):
|
|
|
1305
1337
|
|
|
1306
1338
|
Notes
|
|
1307
1339
|
-----
|
|
1308
|
-
Provides a differentiable interface to the CUDA-accelerated Siddon-
|
|
1309
|
-
|
|
1340
|
+
Provides a differentiable interface to the CUDA-accelerated Siddon ray-tracing
|
|
1341
|
+
method with interpolation for parallel beam CT geometry. The forward pass computes
|
|
1310
1342
|
the sinogram from a 2D image using parallel beam geometry. The backward pass
|
|
1311
1343
|
computes gradients using the adjoint backprojection operation. Requires
|
|
1312
1344
|
CUDA-capable hardware and a properly configured CUDA environment; all input
|
|
@@ -1358,7 +1390,7 @@ class ParallelProjectorFunction(torch.autograd.Function):
|
|
|
1358
1390
|
-----
|
|
1359
1391
|
- All input tensors must be on the same CUDA device.
|
|
1360
1392
|
- The operation is fully differentiable and supports autograd.
|
|
1361
|
-
- Uses the Siddon
|
|
1393
|
+
- Uses the Siddon method with interpolation for accurate ray tracing and bilinear interpolation.
|
|
1362
1394
|
|
|
1363
1395
|
Examples
|
|
1364
1396
|
--------
|
|
@@ -1394,7 +1426,9 @@ class ParallelProjectorFunction(torch.autograd.Function):
|
|
|
1394
1426
|
grid, tpb = _grid_2d(n_angles, num_detectors)
|
|
1395
1427
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1396
1428
|
|
|
1397
|
-
|
|
1429
|
+
pt_stream = torch.cuda.current_stream()
|
|
1430
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1431
|
+
_parallel_2d_forward_kernel[grid, tpb, numba_stream](
|
|
1398
1432
|
d_image, Nx, Ny, d_sino, n_angles, num_detectors,
|
|
1399
1433
|
_DTYPE(detector_spacing), d_cos_arr, d_sin_arr, cx, cy, _DTYPE(voxel_spacing)
|
|
1400
1434
|
)
|
|
@@ -1426,7 +1460,9 @@ class ParallelProjectorFunction(torch.autograd.Function):
|
|
|
1426
1460
|
grid, tpb = _grid_2d(n_angles, num_detectors)
|
|
1427
1461
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1428
1462
|
|
|
1429
|
-
|
|
1463
|
+
pt_stream = torch.cuda.current_stream()
|
|
1464
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1465
|
+
_parallel_2d_backward_kernel[grid, tpb, numba_stream](
|
|
1430
1466
|
d_grad_sino, n_angles, num_detectors,
|
|
1431
1467
|
d_img_grad, Nx, Ny,
|
|
1432
1468
|
_DTYPE(detector_spacing), d_cos_arr, d_sin_arr, cx, cy, _DTYPE(voxel_spacing)
|
|
@@ -1443,8 +1479,8 @@ class ParallelBackprojectorFunction(torch.autograd.Function):
|
|
|
1443
1479
|
|
|
1444
1480
|
Notes
|
|
1445
1481
|
-----
|
|
1446
|
-
Provides a differentiable interface to the CUDA-accelerated Siddon
|
|
1447
|
-
|
|
1482
|
+
Provides a differentiable interface to the CUDA-accelerated Siddon ray-tracing
|
|
1483
|
+
method with interpolation for parallel beam backprojection. The forward pass computes a 2D
|
|
1448
1484
|
reconstruction from sinogram data using parallel beam backprojection, and the
|
|
1449
1485
|
backward pass computes gradients via forward projection as the adjoint operation.
|
|
1450
1486
|
Requires CUDA-capable hardware and consistent device placements.
|
|
@@ -1491,7 +1527,7 @@ class ParallelBackprojectorFunction(torch.autograd.Function):
|
|
|
1491
1527
|
-----
|
|
1492
1528
|
- All input tensors must be on the same CUDA device.
|
|
1493
1529
|
- The operation is fully differentiable and supports autograd.
|
|
1494
|
-
- Uses the Siddon
|
|
1530
|
+
- Uses the Siddon method with interpolation for accurate ray tracing and bilinear interpolation.
|
|
1495
1531
|
|
|
1496
1532
|
Examples
|
|
1497
1533
|
--------
|
|
@@ -1527,7 +1563,9 @@ class ParallelBackprojectorFunction(torch.autograd.Function):
|
|
|
1527
1563
|
grid, tpb = _grid_2d(n_ang, n_det)
|
|
1528
1564
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1529
1565
|
|
|
1530
|
-
|
|
1566
|
+
pt_stream = torch.cuda.current_stream()
|
|
1567
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1568
|
+
_parallel_2d_backward_kernel[grid, tpb, numba_stream](
|
|
1531
1569
|
d_sino, n_ang, n_det, d_reco, Nx, Ny,
|
|
1532
1570
|
_DTYPE(detector_spacing), d_cos_arr, d_sin_arr, cx, cy, _DTYPE(voxel_spacing)
|
|
1533
1571
|
)
|
|
@@ -1564,7 +1602,9 @@ class ParallelBackprojectorFunction(torch.autograd.Function):
|
|
|
1564
1602
|
grid, tpb = _grid_2d(n_ang, n_det)
|
|
1565
1603
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1566
1604
|
|
|
1567
|
-
|
|
1605
|
+
pt_stream = torch.cuda.current_stream()
|
|
1606
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1607
|
+
_parallel_2d_forward_kernel[grid, tpb, numba_stream](
|
|
1568
1608
|
d_grad_out, Nx, Ny, d_sino_grad, n_ang, n_det,
|
|
1569
1609
|
_DTYPE(detector_spacing), d_cos, d_sin, cx, cy, _DTYPE(voxel_spacing)
|
|
1570
1610
|
)
|
|
@@ -1580,8 +1620,8 @@ class FanProjectorFunction(torch.autograd.Function):
|
|
|
1580
1620
|
|
|
1581
1621
|
Notes
|
|
1582
1622
|
-----
|
|
1583
|
-
Provides a differentiable interface to the CUDA-accelerated Siddon-
|
|
1584
|
-
|
|
1623
|
+
Provides a differentiable interface to the CUDA-accelerated Siddon ray-tracing
|
|
1624
|
+
method with interpolation for fan beam geometry, where rays diverge from a point
|
|
1585
1625
|
X-ray source to a linear detector array. The forward pass computes sinograms
|
|
1586
1626
|
using divergent beam geometry, and the backward pass computes gradients via
|
|
1587
1627
|
adjoint backprojection.
|
|
@@ -1633,7 +1673,7 @@ class FanProjectorFunction(torch.autograd.Function):
|
|
|
1633
1673
|
- All input tensors must be on the same CUDA device.
|
|
1634
1674
|
- The operation is fully differentiable and supports autograd.
|
|
1635
1675
|
- Fan beam geometry uses divergent rays from a point source to the detector.
|
|
1636
|
-
- Uses the Siddon
|
|
1676
|
+
- Uses the Siddon method with interpolation for accurate ray tracing and bilinear interpolation.
|
|
1637
1677
|
|
|
1638
1678
|
Examples
|
|
1639
1679
|
--------
|
|
@@ -1664,7 +1704,9 @@ class FanProjectorFunction(torch.autograd.Function):
|
|
|
1664
1704
|
grid, tpb = _grid_2d(n_ang, num_detectors)
|
|
1665
1705
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1666
1706
|
|
|
1667
|
-
|
|
1707
|
+
pt_stream = torch.cuda.current_stream()
|
|
1708
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1709
|
+
_fan_2d_forward_kernel[grid, tpb, numba_stream](
|
|
1668
1710
|
d_image, Nx, Ny, d_sino, n_ang, num_detectors,
|
|
1669
1711
|
_DTYPE(detector_spacing), d_cos_arr, d_sin_arr,
|
|
1670
1712
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, _DTYPE(voxel_spacing)
|
|
@@ -1698,7 +1740,9 @@ class FanProjectorFunction(torch.autograd.Function):
|
|
|
1698
1740
|
grid, tpb = _grid_2d(n_ang, n_det)
|
|
1699
1741
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1700
1742
|
|
|
1701
|
-
|
|
1743
|
+
pt_stream = torch.cuda.current_stream()
|
|
1744
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1745
|
+
_fan_2d_backward_kernel[grid, tpb, numba_stream](
|
|
1702
1746
|
d_grad_sino, n_ang, n_det, d_img_grad, Nx, Ny,
|
|
1703
1747
|
_DTYPE(det_spacing), d_cos_arr, d_sin_arr,
|
|
1704
1748
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, _DTYPE(voxel_spacing)
|
|
@@ -1715,8 +1759,8 @@ class FanBackprojectorFunction(torch.autograd.Function):
|
|
|
1715
1759
|
|
|
1716
1760
|
Notes
|
|
1717
1761
|
-----
|
|
1718
|
-
Provides a differentiable interface to the CUDA-accelerated Siddon-
|
|
1719
|
-
|
|
1762
|
+
Provides a differentiable interface to the CUDA-accelerated Siddon ray-tracing
|
|
1763
|
+
method with interpolation for fan beam backprojection. Implements the adjoint
|
|
1720
1764
|
of the fan beam projection operator, distributing sinogram values back into
|
|
1721
1765
|
the reconstruction volume along divergent ray paths. The forward pass
|
|
1722
1766
|
computes reconstruction from sinogram data, and the backward pass computes
|
|
@@ -1771,7 +1815,7 @@ class FanBackprojectorFunction(torch.autograd.Function):
|
|
|
1771
1815
|
- All input tensors must be on the same CUDA device.
|
|
1772
1816
|
- The operation is fully differentiable and supports autograd.
|
|
1773
1817
|
- Fan beam geometry uses divergent rays from a point source to the detector.
|
|
1774
|
-
- Uses the Siddon
|
|
1818
|
+
- Uses the Siddon method with interpolation for accurate ray tracing and bilinear interpolation.
|
|
1775
1819
|
|
|
1776
1820
|
Examples
|
|
1777
1821
|
--------
|
|
@@ -1802,7 +1846,9 @@ class FanBackprojectorFunction(torch.autograd.Function):
|
|
|
1802
1846
|
grid, tpb = _grid_2d(n_ang, n_det)
|
|
1803
1847
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1804
1848
|
|
|
1805
|
-
|
|
1849
|
+
pt_stream = torch.cuda.current_stream()
|
|
1850
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1851
|
+
_fan_2d_backward_kernel[grid, tpb, numba_stream](
|
|
1806
1852
|
d_sino, n_ang, n_det, d_reco, Nx, Ny,
|
|
1807
1853
|
_DTYPE(detector_spacing), d_cos_arr, d_sin_arr,
|
|
1808
1854
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, _DTYPE(voxel_spacing)
|
|
@@ -1836,7 +1882,9 @@ class FanBackprojectorFunction(torch.autograd.Function):
|
|
|
1836
1882
|
grid, tpb = _grid_2d(n_ang, n_det)
|
|
1837
1883
|
cx, cy = _DTYPE(Nx * 0.5), _DTYPE(Ny * 0.5)
|
|
1838
1884
|
|
|
1839
|
-
|
|
1885
|
+
pt_stream = torch.cuda.current_stream()
|
|
1886
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1887
|
+
_fan_2d_forward_kernel[grid, tpb, numba_stream](
|
|
1840
1888
|
d_grad_out, Nx, Ny, d_sino_grad, n_ang, n_det,
|
|
1841
1889
|
_DTYPE(det_spacing), d_cos_arr, d_sin_arr,
|
|
1842
1890
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, _DTYPE(voxel_spacing)
|
|
@@ -1853,8 +1901,8 @@ class ConeProjectorFunction(torch.autograd.Function):
|
|
|
1853
1901
|
|
|
1854
1902
|
Notes
|
|
1855
1903
|
-----
|
|
1856
|
-
Provides a differentiable interface to the CUDA-accelerated Siddon-
|
|
1857
|
-
|
|
1904
|
+
Provides a differentiable interface to the CUDA-accelerated Siddon ray-tracing
|
|
1905
|
+
method with interpolation for 3D cone beam geometry. Rays emanate from a point
|
|
1858
1906
|
X-ray source to a 2D detector array capturing volumetric projection data.
|
|
1859
1907
|
The forward pass computes 3D projections, and the backward pass computes
|
|
1860
1908
|
gradients via adjoint 3D backprojection. Requires significant GPU memory.
|
|
@@ -1910,7 +1958,7 @@ class ConeProjectorFunction(torch.autograd.Function):
|
|
|
1910
1958
|
- All input tensors must be on the same CUDA device.
|
|
1911
1959
|
- The operation is fully differentiable and supports autograd.
|
|
1912
1960
|
- Cone beam geometry uses a point source and a 2D detector array.
|
|
1913
|
-
- Uses the Siddon
|
|
1961
|
+
- Uses the Siddon method with interpolation for accurate 3D ray tracing and trilinear interpolation.
|
|
1914
1962
|
|
|
1915
1963
|
Examples
|
|
1916
1964
|
--------
|
|
@@ -1945,7 +1993,9 @@ class ConeProjectorFunction(torch.autograd.Function):
|
|
|
1945
1993
|
grid, tpb = _grid_3d(n_views, det_u, det_v)
|
|
1946
1994
|
cx, cy, cz = _DTYPE(W * 0.5), _DTYPE(H * 0.5), _DTYPE(D * 0.5)
|
|
1947
1995
|
|
|
1948
|
-
|
|
1996
|
+
pt_stream = torch.cuda.current_stream()
|
|
1997
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
1998
|
+
_cone_3d_forward_kernel[grid, tpb, numba_stream](
|
|
1949
1999
|
d_vol, W, H, D, d_sino, n_views, det_u, det_v,
|
|
1950
2000
|
_DTYPE(du), _DTYPE(dv), d_cos_arr, d_sin_arr,
|
|
1951
2001
|
_DTYPE(sdd), _DTYPE(sid),
|
|
@@ -1982,7 +2032,9 @@ class ConeProjectorFunction(torch.autograd.Function):
|
|
|
1982
2032
|
grid, tpb = _grid_3d(n_views, det_u, det_v)
|
|
1983
2033
|
cx, cy, cz = _DTYPE(W * 0.5), _DTYPE(H * 0.5), _DTYPE(D * 0.5)
|
|
1984
2034
|
|
|
1985
|
-
|
|
2035
|
+
pt_stream = torch.cuda.current_stream()
|
|
2036
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
2037
|
+
_cone_3d_backward_kernel[grid, tpb, numba_stream](
|
|
1986
2038
|
d_grad_sino, n_views, det_u, det_v, d_vol_grad, W, H, D,
|
|
1987
2039
|
_DTYPE(du), _DTYPE(dv), d_cos_arr, d_sin_arr,
|
|
1988
2040
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, cz, _DTYPE(voxel_spacing)
|
|
@@ -2000,8 +2052,8 @@ class ConeBackprojectorFunction(torch.autograd.Function):
|
|
|
2000
2052
|
|
|
2001
2053
|
Notes
|
|
2002
2054
|
-----
|
|
2003
|
-
Provides a differentiable interface to the CUDA-accelerated Siddon-
|
|
2004
|
-
|
|
2055
|
+
Provides a differentiable interface to the CUDA-accelerated Siddon ray-tracing
|
|
2056
|
+
method with interpolation for 3D cone beam backprojection. The forward pass
|
|
2005
2057
|
computes a 3D reconstruction from cone beam projection data using
|
|
2006
2058
|
backprojection as the adjoint operation. The backward pass computes gradients
|
|
2007
2059
|
via 3D cone beam forward projection. Requires CUDA-capable hardware and
|
|
@@ -2068,7 +2120,7 @@ class ConeBackprojectorFunction(torch.autograd.Function):
|
|
|
2068
2120
|
- All input tensors must be on the same CUDA device.
|
|
2069
2121
|
- The operation is fully differentiable and supports autograd.
|
|
2070
2122
|
- Cone beam geometry uses a point source and a 2D detector array.
|
|
2071
|
-
- Uses the Siddon
|
|
2123
|
+
- Uses the Siddon method with interpolation for accurate 3D ray tracing and trilinear interpolation.
|
|
2072
2124
|
|
|
2073
2125
|
Examples
|
|
2074
2126
|
--------
|
|
@@ -2101,7 +2153,9 @@ class ConeBackprojectorFunction(torch.autograd.Function):
|
|
|
2101
2153
|
grid, tpb = _grid_3d(n_views, n_u, n_v)
|
|
2102
2154
|
cx, cy, cz = _DTYPE(W * 0.5), _DTYPE(H * 0.5), _DTYPE(D * 0.5)
|
|
2103
2155
|
|
|
2104
|
-
|
|
2156
|
+
pt_stream = torch.cuda.current_stream()
|
|
2157
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
2158
|
+
_cone_3d_backward_kernel[grid, tpb, numba_stream](
|
|
2105
2159
|
d_sino, n_views, n_u, n_v, d_reco, W, H, D,
|
|
2106
2160
|
_DTYPE(du), _DTYPE(dv), d_cos_arr, d_sin_arr,
|
|
2107
2161
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, cz, _DTYPE(voxel_spacing)
|
|
@@ -2139,7 +2193,9 @@ class ConeBackprojectorFunction(torch.autograd.Function):
|
|
|
2139
2193
|
grid, tpb = _grid_3d(n_views, n_u, n_v)
|
|
2140
2194
|
cx, cy, cz = _DTYPE(W * 0.5), _DTYPE(H * 0.5), _DTYPE(D * 0.5)
|
|
2141
2195
|
|
|
2142
|
-
|
|
2196
|
+
pt_stream = torch.cuda.current_stream()
|
|
2197
|
+
numba_stream = cuda.external_stream(pt_stream.cuda_stream)
|
|
2198
|
+
_cone_3d_forward_kernel[grid, tpb, numba_stream](
|
|
2143
2199
|
d_grad_out, W, H, D, d_sino_grad, n_views, n_u, n_v,
|
|
2144
2200
|
_DTYPE(du), _DTYPE(dv), d_cos_arr, d_sin_arr,
|
|
2145
2201
|
_DTYPE(sdd), _DTYPE(sid), cx, cy, cz, _DTYPE(voxel_spacing)
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "diffct"
|
|
7
|
-
version = "1.2.
|
|
7
|
+
version = "1.2.7"
|
|
8
8
|
description = "A CUDA-based library for computed tomography (CT) projection and reconstruction with differentiable operators"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [
|
|
@@ -36,5 +36,4 @@ where = ["."]
|
|
|
36
36
|
[tool.hatch.envs.default]
|
|
37
37
|
python = "python"
|
|
38
38
|
|
|
39
|
-
[tool.hatch.envs.default.env-vars]
|
|
40
|
-
PYTHONDONTWRITEBYTECODE = "1"
|
|
39
|
+
[tool.hatch.envs.default.env-vars]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|