AOT-biomaps 2.9.261__py3-none-any.whl → 2.9.318__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AOT-biomaps might be problematic. Click here for more details.
- AOT_biomaps/AOT_Experiment/Tomography.py +124 -0
- AOT_biomaps/AOT_Recon/AOT_Optimizers/LS.py +400 -10
- AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py +207 -84
- AOT_biomaps/AOT_Recon/AOT_Optimizers/PDHG.py +442 -11
- AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_CSR.py +48 -26
- AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py +172 -134
- AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin +0 -0
- AOT_biomaps/AOT_Recon/AlgebraicRecon.py +27 -20
- AOT_biomaps/AOT_Recon/PrimalDualRecon.py +94 -41
- AOT_biomaps/AOT_Recon/ReconTools.py +164 -18
- AOT_biomaps/__init__.py +58 -1
- {aot_biomaps-2.9.261.dist-info → aot_biomaps-2.9.318.dist-info}/METADATA +1 -1
- {aot_biomaps-2.9.261.dist-info → aot_biomaps-2.9.318.dist-info}/RECORD +15 -14
- {aot_biomaps-2.9.261.dist-info → aot_biomaps-2.9.318.dist-info}/WHEEL +0 -0
- {aot_biomaps-2.9.261.dist-info → aot_biomaps-2.9.318.dist-info}/top_level.txt +0 -0
|
@@ -287,6 +287,56 @@ class Tomography(Experiment):
|
|
|
287
287
|
if not self._check_patterns(self.patterns):
|
|
288
288
|
raise ValueError("Generated patterns failed validation.")
|
|
289
289
|
|
|
290
|
+
def selectAngles(self, angles):
|
|
291
|
+
|
|
292
|
+
if self.AOsignal_withTumor is None and self.AOsignal_withoutTumor is None:
|
|
293
|
+
raise ValueError("AO signals are not initialized. Please load or generate the AO signals first.")
|
|
294
|
+
if self.AcousticFields is None or len(self.AcousticFields) == 0:
|
|
295
|
+
raise ValueError("AcousticFields is not initialized. Please generate the system matrix first.")
|
|
296
|
+
newAcousticFields = []
|
|
297
|
+
index = []
|
|
298
|
+
for i,field in enumerate(self.AcousticFields):
|
|
299
|
+
if field.angle in angles:
|
|
300
|
+
newAcousticFields.append(field)
|
|
301
|
+
index.append(i)
|
|
302
|
+
if self.AOsignal_withTumor is not None:
|
|
303
|
+
self.AOsignal_withTumor = self.AOsignal_withTumor[:, index]
|
|
304
|
+
if self.AOsignal_withoutTumor is not None:
|
|
305
|
+
self.AOsignal_withoutTumor = self.AOsignal_withoutTumor[:, index]
|
|
306
|
+
self.AcousticFields = newAcousticFields
|
|
307
|
+
|
|
308
|
+
def selectPatterns(self, pattern_names):
|
|
309
|
+
if self.AOsignal_withTumor is None and self.AOsignal_withoutTumor is None:
|
|
310
|
+
raise ValueError("AO signals are not initialized. Please load or generate the AO signals first.")
|
|
311
|
+
if self.AcousticFields is None or len(self.AcousticFields) == 0:
|
|
312
|
+
raise ValueError("AcousticFields is not initialized. Please generate the system matrix first.")
|
|
313
|
+
newAcousticFields = []
|
|
314
|
+
index = []
|
|
315
|
+
for i,field in enumerate(self.AcousticFields):
|
|
316
|
+
if field.pattern.activeList in pattern_names:
|
|
317
|
+
newAcousticFields.append(field)
|
|
318
|
+
index.append(i)
|
|
319
|
+
if self.AOsignal_withTumor is not None:
|
|
320
|
+
self.AOsignal_withTumor = self.AOsignal_withTumor[:, index]
|
|
321
|
+
if self.AOsignal_withoutTumor is not None:
|
|
322
|
+
self.AOsignal_withoutTumor = self.AOsignal_withoutTumor[:, index]
|
|
323
|
+
self.AcousticFields = newAcousticFields
|
|
324
|
+
|
|
325
|
+
def selectRandom(self,N):
|
|
326
|
+
if self.AOsignal_withTumor is None and self.AOsignal_withoutTumor is None:
|
|
327
|
+
raise ValueError("AO signals are not initialized. Please load or generate the AO signals first.")
|
|
328
|
+
if self.AcousticFields is None or len(self.AcousticFields) == 0:
|
|
329
|
+
raise ValueError("AcousticFields is not initialized. Please generate the system matrix first.")
|
|
330
|
+
if N > len(self.AcousticFields):
|
|
331
|
+
raise ValueError("N is larger than the number of available AcousticFields.")
|
|
332
|
+
indices = np.random.choice(len(self.AcousticFields), size=N, replace=False)
|
|
333
|
+
newAcousticFields = [self.AcousticFields[i] for i in indices]
|
|
334
|
+
if self.AOsignal_withTumor is not None:
|
|
335
|
+
self.AOsignal_withTumor = self.AOsignal_withTumor[:, indices]
|
|
336
|
+
if self.AOsignal_withoutTumor is not None:
|
|
337
|
+
self.AOsignal_withoutTumor = self.AOsignal_withoutTumor[:, indices]
|
|
338
|
+
self.AcousticFields = newAcousticFields
|
|
339
|
+
|
|
290
340
|
def _generate_patterns(self, N):
|
|
291
341
|
def format_angle(a):
|
|
292
342
|
return f"{'1' if a < 0 else '0'}{abs(a):02d}"
|
|
@@ -390,6 +440,80 @@ class Tomography(Experiment):
|
|
|
390
440
|
|
|
391
441
|
return True
|
|
392
442
|
|
|
443
|
+
def applyApodisation(self, alpha=0.3, divergence_deg=0.5):
|
|
444
|
+
"""
|
|
445
|
+
Applique une apodisation dynamique sur les champs acoustiques stockés dans l'objet.
|
|
446
|
+
L'apodisation suit l'angle d'émission et la divergence naturelle du faisceau pour
|
|
447
|
+
supprimer les lobes de diffraction (artefacts de bord) sans toucher au signal utile.
|
|
448
|
+
Args:
|
|
449
|
+
probe_width (float): Largeur physique active de la sonde (ex: 40e-3 pour 40mm).
|
|
450
|
+
alpha (float): Paramètre de Tukey (0.0=rectangle, 1.0=hann). 0.3 est un bon compromis.
|
|
451
|
+
divergence_deg (float): Angle d'ouverture du masque pour suivre l'élargissement du faisceau.
|
|
452
|
+
0.0 = Droit, 0.5 = Légère ouverture (conseillé).
|
|
453
|
+
"""
|
|
454
|
+
print(f"Application de l'apodisation (Alpha={alpha}, Div={divergence_deg}°) sur {len(self.AcousticFields)} champs...")
|
|
455
|
+
|
|
456
|
+
probe_width = self.params.acoustic['num_elements'] * self.params.acoustic['element_width']
|
|
457
|
+
|
|
458
|
+
for i in trange(len(self.AcousticFields), desc="Apodisation"):
|
|
459
|
+
# 1. Récupération des données et de l'angle
|
|
460
|
+
field = self.AcousticFields[i].field # Peut être (Z, X) ou (Time, Z, X)
|
|
461
|
+
angle = self.AcousticFields[i].angle # L'angle de l'onde plane
|
|
462
|
+
|
|
463
|
+
# 2. Récupération ou construction des axes physiques
|
|
464
|
+
nz, nx = field.shape[-2:]
|
|
465
|
+
|
|
466
|
+
if hasattr(self, 'x_axis') and self.x_axis is not None:
|
|
467
|
+
x_axis = self.x_axis
|
|
468
|
+
else:
|
|
469
|
+
# Génération par défaut centrée sur 0 (ex: -20mm à +20mm)
|
|
470
|
+
x_axis = np.linspace(-probe_width/2, probe_width/2, nx)
|
|
471
|
+
|
|
472
|
+
if hasattr(self, 'z_axis') and self.z_axis is not None:
|
|
473
|
+
z_axis = self.z_axis
|
|
474
|
+
else:
|
|
475
|
+
# Génération par défaut (ex: 0 à 40mm, basé sur un pitch standard ou arbitraire)
|
|
476
|
+
estimated_depth = 40e-3 # Valeur arbitraire si inconnue
|
|
477
|
+
z_axis = np.linspace(0, estimated_depth, nz)
|
|
478
|
+
|
|
479
|
+
# 3. Préparation des grilles pour le masque
|
|
480
|
+
Z, X = np.meshgrid(z_axis, x_axis, indexing='ij')
|
|
481
|
+
|
|
482
|
+
# 4. Calcul de la géométrie orientée (Steering)
|
|
483
|
+
angle_rad = np.deg2rad(angle)
|
|
484
|
+
X_aligned = X - Z * np.tan(angle_rad)
|
|
485
|
+
|
|
486
|
+
# 5. Calcul de la largeur dynamique du masque (Divergence)
|
|
487
|
+
div_rad = np.deg2rad(divergence_deg)
|
|
488
|
+
current_half_width = (probe_width / 2.0) + Z * np.tan(div_rad)
|
|
489
|
+
|
|
490
|
+
# 6. Normalisation et création du masque Tukey
|
|
491
|
+
X_norm = np.divide(X_aligned, current_half_width, out=np.zeros_like(X_aligned), where=current_half_width!=0)
|
|
492
|
+
|
|
493
|
+
mask = np.zeros_like(X_norm)
|
|
494
|
+
plateau_threshold = 1.0 * (1 - alpha)
|
|
495
|
+
|
|
496
|
+
# Zone centrale (plateau = 1)
|
|
497
|
+
mask[np.abs(X_norm) <= plateau_threshold] = 1.0
|
|
498
|
+
|
|
499
|
+
# Zone de transition (cosinus)
|
|
500
|
+
transition_indices = (np.abs(X_norm) > plateau_threshold) & (np.abs(X_norm) <= 1.0)
|
|
501
|
+
if np.any(transition_indices):
|
|
502
|
+
x_trans = np.abs(X_norm[transition_indices]) - plateau_threshold
|
|
503
|
+
width_trans = 1.0 * alpha
|
|
504
|
+
mask[transition_indices] = 0.5 * (1 + np.cos(np.pi * x_trans / width_trans))
|
|
505
|
+
|
|
506
|
+
# 7. Application du masque (Gestion 2D vs 3D)
|
|
507
|
+
if field.ndim == 3:
|
|
508
|
+
field_apodized = field * mask[np.newaxis, :, :]
|
|
509
|
+
else:
|
|
510
|
+
field_apodized = field * mask
|
|
511
|
+
|
|
512
|
+
# 8. Mise à jour de l'objet
|
|
513
|
+
self.AcousticFields[i].field = field_apodized
|
|
514
|
+
|
|
515
|
+
print("Apodisation terminée.")
|
|
516
|
+
|
|
393
517
|
# PRIVATE METHODS
|
|
394
518
|
def _generateAcousticFields_STRUCT_CPU(self, fieldDataPath=None, show_log=False, nameBlock=None):
|
|
395
519
|
if self.patterns is None:
|
|
@@ -1,34 +1,57 @@
|
|
|
1
1
|
from AOT_biomaps.Config import config
|
|
2
|
+
from AOT_biomaps.AOT_Recon.ReconTools import calculate_memory_requirement, check_gpu_memory
|
|
3
|
+
from AOT_biomaps.AOT_Recon.ReconEnums import SMatrixType
|
|
4
|
+
|
|
2
5
|
import torch
|
|
3
6
|
import numpy as np
|
|
4
7
|
from tqdm import trange
|
|
5
|
-
|
|
8
|
+
import pycuda.driver as drv
|
|
9
|
+
import torch.cuda
|
|
10
|
+
import gc
|
|
11
|
+
|
|
12
|
+
|
|
6
13
|
|
|
7
14
|
def LS(
|
|
8
15
|
SMatrix,
|
|
9
16
|
y,
|
|
10
|
-
numIterations=
|
|
11
|
-
alpha=1e-3,
|
|
17
|
+
numIterations=100,
|
|
12
18
|
isSavingEachIteration=True,
|
|
13
19
|
withTumor=True,
|
|
20
|
+
alpha=1e-1,
|
|
14
21
|
device=None,
|
|
22
|
+
use_numba=False,
|
|
23
|
+
denominator_threshold=1e-6,
|
|
15
24
|
max_saves=5000,
|
|
16
|
-
show_logs=True
|
|
25
|
+
show_logs=True,
|
|
26
|
+
smatrixType=SMatrixType.SELL
|
|
17
27
|
):
|
|
18
28
|
"""
|
|
19
29
|
Least Squares reconstruction using Projected Gradient Descent (PGD) with non-negativity constraint.
|
|
20
30
|
Currently only implements the stable GPU version.
|
|
21
31
|
"""
|
|
22
32
|
tumor_str = "WITH" if withTumor else "WITHOUT"
|
|
23
|
-
#
|
|
33
|
+
# Auto-select device and method
|
|
24
34
|
if device is None:
|
|
25
35
|
if torch.cuda.is_available() and check_gpu_memory(config.select_best_gpu(), calculate_memory_requirement(SMatrix, y), show_logs=show_logs):
|
|
26
|
-
|
|
27
|
-
|
|
36
|
+
device = torch.device(f"cuda:{config.select_best_gpu()}")
|
|
37
|
+
use_gpu = True
|
|
38
|
+
else:
|
|
39
|
+
device = torch.device("cpu")
|
|
40
|
+
use_gpu = False
|
|
41
|
+
else:
|
|
42
|
+
use_gpu = device.type == "cuda"
|
|
43
|
+
# Dispatch to the appropriate implementation
|
|
44
|
+
if use_gpu:
|
|
45
|
+
if smatrixType == SMatrixType.CSR:
|
|
46
|
+
return _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
|
|
47
|
+
elif smatrixType == SMatrixType.SELL:
|
|
48
|
+
return _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
|
|
49
|
+
elif smatrixType == SMatrixType.DENSE:
|
|
50
|
+
return _LS_GPU_stable(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold,show_logs)
|
|
51
|
+
else:
|
|
52
|
+
raise ValueError("Unsupported SMatrixType for GPU LS.")
|
|
28
53
|
else:
|
|
29
|
-
|
|
30
|
-
raise RuntimeError("Only GPU implementation is available for now.")
|
|
31
|
-
return _LS_GPU_stable(SMatrix, y, numIterations, alpha, isSavingEachIteration, tumor_str, max_saves, show_logs=show_logs)
|
|
54
|
+
raise NotImplementedError("Only GPU implementations are currently available for LS.")
|
|
32
55
|
|
|
33
56
|
def _LS_GPU_stable(SMatrix, y, numIterations, alpha, isSavingEachIteration, tumor_str, max_saves=5000, show_logs=True):
|
|
34
57
|
"""
|
|
@@ -104,3 +127,370 @@ def _LS_CPU_opti(*args, **kwargs):
|
|
|
104
127
|
|
|
105
128
|
def _LS_CPU_basic(*args, **kwargs):
|
|
106
129
|
raise NotImplementedError("Only _LS_GPU_stable is implemented for now.")
|
|
130
|
+
|
|
131
|
+
def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs=True):
|
|
132
|
+
"""
|
|
133
|
+
Reconstruction par Moindres Carrés (LS) via Gradient Conjugué (CG) sur format CSR.
|
|
134
|
+
Utilise les mêmes arguments que la fonction MLEM, sans sous-fonctions Python.
|
|
135
|
+
|
|
136
|
+
SMatrix: instance de SparseSMatrix_CSR (déjà allouée)
|
|
137
|
+
y: données mesurées (1D np.float32 de taille TN)
|
|
138
|
+
"""
|
|
139
|
+
final_result = None
|
|
140
|
+
|
|
141
|
+
# Paramètres non utilisés dans CG mais conservés pour la signature: denominator_threshold, device
|
|
142
|
+
|
|
143
|
+
# --- Logique de Produit Scalaire (Intégrée) ---
|
|
144
|
+
def _dot_product_gpu(mod, a_ptr, b_ptr, N_int, stream):
|
|
145
|
+
block_size = 256
|
|
146
|
+
grid_size = (N_int + block_size - 1) // block_size
|
|
147
|
+
|
|
148
|
+
reduction_host = np.empty(grid_size, dtype=np.float32)
|
|
149
|
+
reduction_buffer = drv.mem_alloc(reduction_host.nbytes)
|
|
150
|
+
|
|
151
|
+
dot_kernel = mod.get_function("dot_product_reduction_kernel")
|
|
152
|
+
|
|
153
|
+
dot_kernel(reduction_buffer, a_ptr, b_ptr, np.int32(N_int),
|
|
154
|
+
block=(block_size, 1, 1), grid=(grid_size, 1, 1), stream=stream)
|
|
155
|
+
|
|
156
|
+
drv.memcpy_dtoh(reduction_host, reduction_buffer)
|
|
157
|
+
total_dot = np.sum(reduction_host)
|
|
158
|
+
|
|
159
|
+
reduction_buffer.free()
|
|
160
|
+
return total_dot
|
|
161
|
+
# -----------------------------------------------
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
if not isinstance(SMatrix, SMatrix.__class__):
|
|
165
|
+
raise TypeError("SMatrix must be a SparseSMatrix_CSR object")
|
|
166
|
+
|
|
167
|
+
if SMatrix.ctx:
|
|
168
|
+
SMatrix.ctx.push()
|
|
169
|
+
|
|
170
|
+
dtype = np.float32
|
|
171
|
+
TN = SMatrix.N * SMatrix.T
|
|
172
|
+
ZX = SMatrix.Z * SMatrix.X
|
|
173
|
+
Z = SMatrix.Z
|
|
174
|
+
X = SMatrix.X
|
|
175
|
+
block_size = 256
|
|
176
|
+
tolerance = 1e-12
|
|
177
|
+
|
|
178
|
+
if show_logs:
|
|
179
|
+
print(f"Executing on GPU device index: {SMatrix.device.primary_context.device.name()}")
|
|
180
|
+
print(f"Dim X: {X}, Dim Z: {Z}, TN: {TN}, ZX: {ZX}")
|
|
181
|
+
|
|
182
|
+
stream = drv.Stream()
|
|
183
|
+
|
|
184
|
+
# Récupération des Kernels
|
|
185
|
+
projection_kernel = SMatrix.sparse_mod.get_function('projection_kernel__CSR')
|
|
186
|
+
backprojection_kernel = SMatrix.sparse_mod.get_function('backprojection_kernel__CSR')
|
|
187
|
+
axpby_kernel = SMatrix.sparse_mod.get_function("vector_axpby_kernel")
|
|
188
|
+
minus_axpy_kernel = SMatrix.sparse_mod.get_function("vector_minus_axpy_kernel")
|
|
189
|
+
|
|
190
|
+
# --- Allocation des buffers (Pointeurs Bruts) ---
|
|
191
|
+
y = y.T.flatten().astype(dtype)
|
|
192
|
+
y_gpu = drv.mem_alloc(y.nbytes)
|
|
193
|
+
drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
|
|
194
|
+
|
|
195
|
+
theta_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize) # lambda
|
|
196
|
+
drv.memcpy_htod_async(theta_flat_gpu, np.full(ZX, 0.1, dtype=dtype), stream)
|
|
197
|
+
|
|
198
|
+
q_flat_gpu = drv.mem_alloc(TN * np.dtype(dtype).itemsize) # q = A*p
|
|
199
|
+
r_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize) # r (residue)
|
|
200
|
+
p_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize) # p (direction)
|
|
201
|
+
z_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize) # z = A^T A p
|
|
202
|
+
ATy_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize) # A^T y (constant)
|
|
203
|
+
|
|
204
|
+
# --- Initialisation CG ---
|
|
205
|
+
|
|
206
|
+
# 1. ATy = A^T * y
|
|
207
|
+
drv.memset_d32_async(ATy_flat_gpu, 0, ZX, stream)
|
|
208
|
+
backprojection_kernel(ATy_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
|
|
209
|
+
y_gpu, np.int32(TN),
|
|
210
|
+
block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
211
|
+
|
|
212
|
+
# 2. q = A * theta_0
|
|
213
|
+
projection_kernel(q_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
|
|
214
|
+
theta_flat_gpu, np.int32(TN),
|
|
215
|
+
block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
216
|
+
|
|
217
|
+
# 3. r_temp = A^T * q = A^T A theta_0
|
|
218
|
+
drv.memset_d32_async(r_flat_gpu, 0, ZX, stream)
|
|
219
|
+
backprojection_kernel(r_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
|
|
220
|
+
q_flat_gpu, np.int32(TN),
|
|
221
|
+
block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
222
|
+
|
|
223
|
+
# 4. r_0 = ATy - r_temp (r = ATy + (-1)*r_temp)
|
|
224
|
+
axpby_kernel(r_flat_gpu, ATy_flat_gpu, r_flat_gpu,
|
|
225
|
+
np.float32(1.0), np.float32(-1.0), np.int32(ZX),
|
|
226
|
+
block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
227
|
+
|
|
228
|
+
# 5. p_0 = r_0
|
|
229
|
+
drv.memcpy_dtod(p_flat_gpu, r_flat_gpu, ZX * np.dtype(dtype).itemsize)
|
|
230
|
+
|
|
231
|
+
# 6. rho_prev = ||r_0||^2
|
|
232
|
+
rho_prev = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
|
|
233
|
+
|
|
234
|
+
# --- Boucle itérative ---
|
|
235
|
+
saved_theta, saved_indices = [], []
|
|
236
|
+
if numIterations <= max_saves:
|
|
237
|
+
save_indices = list(range(numIterations))
|
|
238
|
+
else:
|
|
239
|
+
save_indices = list(range(0, numIterations, max(1, numIterations // max_saves)))
|
|
240
|
+
if save_indices[-1] != numIterations - 1:
|
|
241
|
+
save_indices.append(numIterations - 1)
|
|
242
|
+
|
|
243
|
+
description = f"AOT-BioMaps -- LS-CG (CSR-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
244
|
+
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
245
|
+
|
|
246
|
+
for it in iterator:
|
|
247
|
+
# a. q = A * p
|
|
248
|
+
projection_kernel(q_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
|
|
249
|
+
p_flat_gpu, np.int32(TN),
|
|
250
|
+
block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
251
|
+
|
|
252
|
+
# b. z = A^T * q = A^T A p
|
|
253
|
+
drv.memset_d32_async(z_flat_gpu, 0, ZX, stream)
|
|
254
|
+
backprojection_kernel(z_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
|
|
255
|
+
q_flat_gpu, np.int32(TN),
|
|
256
|
+
block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
257
|
+
|
|
258
|
+
# c. alpha = rho_prev / <p, z>
|
|
259
|
+
pAp = _dot_product_gpu(SMatrix.sparse_mod, p_flat_gpu, z_flat_gpu, ZX, stream)
|
|
260
|
+
|
|
261
|
+
if abs(pAp) < 1e-15: break
|
|
262
|
+
alpha = rho_prev / pAp
|
|
263
|
+
|
|
264
|
+
# d. theta = theta + alpha * p
|
|
265
|
+
axpby_kernel(theta_flat_gpu, theta_flat_gpu, p_flat_gpu,
|
|
266
|
+
np.float32(1.0), alpha, np.int32(ZX),
|
|
267
|
+
block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
268
|
+
|
|
269
|
+
# e. r = r - alpha * z
|
|
270
|
+
minus_axpy_kernel(r_flat_gpu, z_flat_gpu, alpha, np.int32(ZX),
|
|
271
|
+
block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
272
|
+
|
|
273
|
+
# f. rho_curr = ||r||^2
|
|
274
|
+
rho_curr = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
|
|
275
|
+
|
|
276
|
+
if rho_curr < tolerance: break
|
|
277
|
+
|
|
278
|
+
# g. beta = rho_curr / rho_prev
|
|
279
|
+
beta = rho_curr / rho_prev
|
|
280
|
+
|
|
281
|
+
# h. p = r + beta * p
|
|
282
|
+
axpby_kernel(p_flat_gpu, r_flat_gpu, p_flat_gpu,
|
|
283
|
+
np.float32(1.0), beta, np.int32(ZX),
|
|
284
|
+
block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
285
|
+
|
|
286
|
+
rho_prev = rho_curr
|
|
287
|
+
|
|
288
|
+
if show_logs and (it % 10 == 0 or it == numIterations - 1):
|
|
289
|
+
drv.Context.synchronize()
|
|
290
|
+
|
|
291
|
+
if isSavingEachIteration and it in save_indices:
|
|
292
|
+
theta_host = np.empty(ZX, dtype=dtype)
|
|
293
|
+
drv.memcpy_dtoh(theta_host, theta_flat_gpu)
|
|
294
|
+
saved_theta.append(theta_host.reshape(Z, X))
|
|
295
|
+
saved_indices.append(it)
|
|
296
|
+
|
|
297
|
+
drv.Context.synchronize()
|
|
298
|
+
|
|
299
|
+
final_result = np.empty(ZX, dtype=dtype)
|
|
300
|
+
drv.memcpy_dtoh(final_result, theta_flat_gpu)
|
|
301
|
+
final_result = final_result.reshape(Z, X)
|
|
302
|
+
|
|
303
|
+
# Libération
|
|
304
|
+
y_gpu.free(); q_flat_gpu.free(); r_flat_gpu.free(); p_flat_gpu.free(); z_flat_gpu.free(); theta_flat_gpu.free(); ATy_flat_gpu.free()
|
|
305
|
+
|
|
306
|
+
return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
|
|
307
|
+
|
|
308
|
+
except Exception as e:
|
|
309
|
+
print(f"Error in LS_CG_sparseCSR_pycuda: {type(e).__name__}: {e}")
|
|
310
|
+
gc.collect()
|
|
311
|
+
return None, None
|
|
312
|
+
|
|
313
|
+
finally:
|
|
314
|
+
if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
|
|
315
|
+
SMatrix.ctx.pop()
|
|
316
|
+
|
|
317
|
+
def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs=True):
|
|
318
|
+
"""
|
|
319
|
+
Reconstruction par Moindres Carrés (LS) via Gradient Conjugué (CG) sur format SELL-C-sigma.
|
|
320
|
+
Utilise les mêmes arguments que la fonction MLEM, sans sous-fonctions Python.
|
|
321
|
+
|
|
322
|
+
SMatrix: instance de SparseSMatrix_SELL (déjà allouée)
|
|
323
|
+
y: données mesurées (1D np.float32 de taille TN)
|
|
324
|
+
"""
|
|
325
|
+
final_result = None
|
|
326
|
+
|
|
327
|
+
# --- Logique de Produit Scalaire (Intégrée) ---
|
|
328
|
+
def _dot_product_gpu(mod, a_ptr, b_ptr, N_int, stream):
|
|
329
|
+
block_size = 256
|
|
330
|
+
grid_size = (N_int + block_size - 1) // block_size
|
|
331
|
+
|
|
332
|
+
reduction_host = np.empty(grid_size, dtype=np.float32)
|
|
333
|
+
reduction_buffer = drv.mem_alloc(reduction_host.nbytes)
|
|
334
|
+
|
|
335
|
+
dot_kernel = mod.get_function("dot_product_reduction_kernel")
|
|
336
|
+
|
|
337
|
+
dot_kernel(reduction_buffer, a_ptr, b_ptr, np.int32(N_int),
|
|
338
|
+
block=(block_size, 1, 1), grid=(grid_size, 1, 1), stream=stream)
|
|
339
|
+
|
|
340
|
+
drv.memcpy_dtoh(reduction_host, reduction_buffer)
|
|
341
|
+
total_dot = np.sum(reduction_host)
|
|
342
|
+
|
|
343
|
+
reduction_buffer.free()
|
|
344
|
+
return total_dot
|
|
345
|
+
# -----------------------------------------------
|
|
346
|
+
|
|
347
|
+
try:
|
|
348
|
+
if not isinstance(SMatrix, SMatrix.__class__):
|
|
349
|
+
raise TypeError("SMatrix must be a SparseSMatrix_SELL object")
|
|
350
|
+
if SMatrix.sell_values_gpu is None:
|
|
351
|
+
raise RuntimeError("SELL not built. Call allocate_sell_c_sigma_direct() first.")
|
|
352
|
+
|
|
353
|
+
if SMatrix.ctx:
|
|
354
|
+
SMatrix.ctx.push()
|
|
355
|
+
|
|
356
|
+
dtype = np.float32
|
|
357
|
+
TN = int(SMatrix.N * SMatrix.T)
|
|
358
|
+
ZX = int(SMatrix.Z * SMatrix.X)
|
|
359
|
+
Z = SMatrix.Z
|
|
360
|
+
X = SMatrix.X
|
|
361
|
+
block_size = 256
|
|
362
|
+
tolerance = 1e-12
|
|
363
|
+
|
|
364
|
+
# Accès aux paramètres SELL
|
|
365
|
+
projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__SELL")
|
|
366
|
+
backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__SELL")
|
|
367
|
+
axpby_kernel = SMatrix.sparse_mod.get_function("vector_axpby_kernel")
|
|
368
|
+
minus_axpy_kernel = SMatrix.sparse_mod.get_function("vector_minus_axpy_kernel")
|
|
369
|
+
slice_height = np.int32(SMatrix.slice_height)
|
|
370
|
+
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
371
|
+
|
|
372
|
+
stream = drv.Stream()
|
|
373
|
+
|
|
374
|
+
# Allocation des buffers
|
|
375
|
+
y = y.T.flatten().astype(dtype)
|
|
376
|
+
y_gpu = drv.mem_alloc(y.nbytes)
|
|
377
|
+
drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
|
|
378
|
+
|
|
379
|
+
theta_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
380
|
+
drv.memcpy_htod_async(theta_flat_gpu, np.full(ZX, 0.1, dtype=dtype), stream)
|
|
381
|
+
|
|
382
|
+
q_flat_gpu = drv.mem_alloc(TN * np.dtype(dtype).itemsize)
|
|
383
|
+
r_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
384
|
+
p_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
385
|
+
z_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
386
|
+
ATy_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
387
|
+
|
|
388
|
+
# --- Initialisation CG ---
|
|
389
|
+
|
|
390
|
+
# 1. ATy = A^T * y
|
|
391
|
+
drv.memset_d32_async(ATy_flat_gpu, 0, ZX, stream)
|
|
392
|
+
backprojection_kernel(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, SMatrix.slice_ptr_gpu, SMatrix.slice_len_gpu,
|
|
393
|
+
y_gpu, ATy_flat_gpu, np.int32(TN), slice_height,
|
|
394
|
+
block=(block_size, 1, 1), grid=grid_rows, stream=stream)
|
|
395
|
+
|
|
396
|
+
# 2. q = A * theta_0
|
|
397
|
+
projection_kernel(q_flat_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, SMatrix.slice_ptr_gpu, SMatrix.slice_len_gpu,
|
|
398
|
+
theta_flat_gpu, np.int32(TN), slice_height,
|
|
399
|
+
block=(block_size, 1, 1), grid=grid_rows, stream=stream)
|
|
400
|
+
|
|
401
|
+
# 3. r_temp = A^T * q = A^T A theta_0
|
|
402
|
+
drv.memset_d32_async(r_flat_gpu, 0, ZX, stream)
|
|
403
|
+
backprojection_kernel(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, SMatrix.slice_ptr_gpu, SMatrix.slice_len_gpu,
|
|
404
|
+
q_flat_gpu, r_flat_gpu, np.int32(TN), slice_height,
|
|
405
|
+
block=(block_size, 1, 1), grid=grid_rows, stream=stream)
|
|
406
|
+
|
|
407
|
+
# 4. r_0 = ATy - r_temp
|
|
408
|
+
axpby_kernel(r_flat_gpu, ATy_flat_gpu, r_flat_gpu,
|
|
409
|
+
np.float32(1.0), np.float32(-1.0), np.int32(ZX),
|
|
410
|
+
block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
411
|
+
|
|
412
|
+
# 5. p_0 = r_0
|
|
413
|
+
drv.memcpy_dtod(p_flat_gpu, r_flat_gpu, ZX * np.dtype(dtype).itemsize)
|
|
414
|
+
|
|
415
|
+
# 6. rho_prev = ||r_0||^2
|
|
416
|
+
rho_prev = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
|
|
417
|
+
|
|
418
|
+
# --- Boucle itérative ---
|
|
419
|
+
saved_theta, saved_indices = [], []
|
|
420
|
+
if numIterations <= max_saves:
|
|
421
|
+
save_indices = list(range(numIterations))
|
|
422
|
+
else:
|
|
423
|
+
save_indices = list(range(0, numIterations, max(1, numIterations // max_saves)))
|
|
424
|
+
if save_indices[-1] != numIterations - 1:
|
|
425
|
+
save_indices.append(numIterations - 1)
|
|
426
|
+
|
|
427
|
+
description = f"AOT-BioMaps -- LS-CG (SELL-c-σ-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
428
|
+
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
429
|
+
|
|
430
|
+
for it in iterator:
|
|
431
|
+
# a. q = A * p
|
|
432
|
+
projection_kernel(q_flat_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, SMatrix.slice_ptr_gpu, SMatrix.slice_len_gpu,
|
|
433
|
+
p_flat_gpu, np.int32(TN), slice_height,
|
|
434
|
+
block=(block_size, 1, 1), grid=grid_rows, stream=stream)
|
|
435
|
+
|
|
436
|
+
# b. z = A^T * q = A^T A p
|
|
437
|
+
drv.memset_d32_async(z_flat_gpu, 0, ZX, stream)
|
|
438
|
+
backprojection_kernel(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, SMatrix.slice_ptr_gpu, SMatrix.slice_len_gpu,
|
|
439
|
+
q_flat_gpu, z_flat_gpu, np.int32(TN), slice_height,
|
|
440
|
+
block=(block_size, 1, 1), grid=grid_rows, stream=stream)
|
|
441
|
+
|
|
442
|
+
# c. alpha = rho_prev / <p, z>
|
|
443
|
+
pAp = _dot_product_gpu(SMatrix.sparse_mod, p_flat_gpu, z_flat_gpu, ZX, stream)
|
|
444
|
+
|
|
445
|
+
if abs(pAp) < 1e-15: break
|
|
446
|
+
alpha = rho_prev / pAp
|
|
447
|
+
|
|
448
|
+
# d. theta = theta + alpha * p
|
|
449
|
+
axpby_kernel(theta_flat_gpu, theta_flat_gpu, p_flat_gpu,
|
|
450
|
+
np.float32(1.0), alpha, np.int32(ZX),
|
|
451
|
+
block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
452
|
+
|
|
453
|
+
# e. r = r - alpha * z
|
|
454
|
+
minus_axpy_kernel(r_flat_gpu, z_flat_gpu, alpha, np.int32(ZX),
|
|
455
|
+
block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
456
|
+
|
|
457
|
+
# f. rho_curr = ||r||^2
|
|
458
|
+
rho_curr = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
|
|
459
|
+
|
|
460
|
+
if rho_curr < tolerance: break
|
|
461
|
+
|
|
462
|
+
# g. beta = rho_curr / rho_prev
|
|
463
|
+
beta = rho_curr / rho_prev
|
|
464
|
+
|
|
465
|
+
# h. p = r + beta * p
|
|
466
|
+
axpby_kernel(p_flat_gpu, r_flat_gpu, p_flat_gpu,
|
|
467
|
+
np.float32(1.0), beta, np.int32(ZX),
|
|
468
|
+
block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
469
|
+
|
|
470
|
+
rho_prev = rho_curr
|
|
471
|
+
|
|
472
|
+
stream.synchronize()
|
|
473
|
+
if isSavingEachIteration and it in save_indices:
|
|
474
|
+
out = np.empty(ZX, dtype=dtype)
|
|
475
|
+
drv.memcpy_dtoh(out, theta_flat_gpu)
|
|
476
|
+
saved_theta.append(out.reshape((Z, X)))
|
|
477
|
+
saved_indices.append(it)
|
|
478
|
+
|
|
479
|
+
# final copy
|
|
480
|
+
res = np.empty(ZX, dtype=np.float32)
|
|
481
|
+
drv.memcpy_dtoh(res, theta_flat_gpu)
|
|
482
|
+
final_result = res.reshape((Z, X))
|
|
483
|
+
|
|
484
|
+
# free temporaries
|
|
485
|
+
y_gpu.free(); q_flat_gpu.free(); r_flat_gpu.free(); p_flat_gpu.free(); z_flat_gpu.free(); theta_flat_gpu.free(); ATy_flat_gpu.free()
|
|
486
|
+
|
|
487
|
+
return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
|
|
488
|
+
|
|
489
|
+
except Exception as e:
|
|
490
|
+
print(f"Error in LS_CG_sparseSELL_pycuda: {type(e).__name__}: {e}")
|
|
491
|
+
gc.collect()
|
|
492
|
+
return None, None
|
|
493
|
+
|
|
494
|
+
finally:
|
|
495
|
+
if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
|
|
496
|
+
SMatrix.ctx.pop()
|