AOT-biomaps 2.9.294__py3-none-any.whl → 2.9.321__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AOT-biomaps might be problematic. Click here for more details.
- AOT_biomaps/AOT_Acoustic/AcousticTools.py +0 -2
- AOT_biomaps/AOT_Experiment/ExperimentTools.py +60 -0
- AOT_biomaps/AOT_Experiment/Tomography.py +207 -3
- AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py +190 -102
- AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py +79 -47
- AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin +0 -0
- AOT_biomaps/AOT_Recon/ReconTools.py +0 -3
- AOT_biomaps/__init__.py +28 -1
- {aot_biomaps-2.9.294.dist-info → aot_biomaps-2.9.321.dist-info}/METADATA +1 -1
- {aot_biomaps-2.9.294.dist-info → aot_biomaps-2.9.321.dist-info}/RECORD +12 -11
- {aot_biomaps-2.9.294.dist-info → aot_biomaps-2.9.321.dist-info}/WHEEL +0 -0
- {aot_biomaps-2.9.294.dist-info → aot_biomaps-2.9.321.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def calc_mat_os(xm, fx, dx, bool_active_list, signal_type):
|
|
4
|
+
num_els = len(xm)
|
|
5
|
+
|
|
6
|
+
# Cas limite : Fréquence nulle (Décimation 0)
|
|
7
|
+
if fx == 0:
|
|
8
|
+
if signal_type == 'cos':
|
|
9
|
+
# cos(0) = 1 -> Tout est actif
|
|
10
|
+
mask = np.ones(num_els, dtype=bool)
|
|
11
|
+
else:
|
|
12
|
+
# sin(0) = 0 -> Tout est inactif
|
|
13
|
+
mask = np.zeros(num_els, dtype=bool)
|
|
14
|
+
else:
|
|
15
|
+
# Calcul normal pour fx > 0
|
|
16
|
+
half_period_elements = round(1 / (2 * fx * dx))
|
|
17
|
+
|
|
18
|
+
# Sécurité : si fx est tellement grand que half_period < 1
|
|
19
|
+
half_period_elements = max(1, half_period_elements)
|
|
20
|
+
|
|
21
|
+
indices = np.arange(num_els)
|
|
22
|
+
if signal_type == 'cos':
|
|
23
|
+
mask = ((indices // half_period_elements) % 2 == 0)
|
|
24
|
+
else:
|
|
25
|
+
# Déphasage de 90° pour le sinus : on décale d'une demi-demi-période
|
|
26
|
+
shift = half_period_elements // 2
|
|
27
|
+
mask = (((indices + shift) // half_period_elements) % 2 == 0)
|
|
28
|
+
|
|
29
|
+
return np.tile(mask[:, np.newaxis], (1, bool_active_list.shape[1]))
|
|
30
|
+
|
|
31
|
+
def convert_to_hex_list(matrix):
|
|
32
|
+
"""
|
|
33
|
+
Convertit une matrice binaire en liste de strings hexa (paquets de 4 bits).
|
|
34
|
+
Chaque colonne devient une chaîne de caractères.
|
|
35
|
+
"""
|
|
36
|
+
n_els, n_scans = matrix.shape
|
|
37
|
+
|
|
38
|
+
# 1. Padding pour s'assurer que n_els est multiple de 4
|
|
39
|
+
remainder = n_els % 4
|
|
40
|
+
if remainder != 0:
|
|
41
|
+
padding = np.zeros((4 - remainder, n_scans))
|
|
42
|
+
matrix = np.vstack([matrix, padding])
|
|
43
|
+
|
|
44
|
+
# 2. Reshape pour isoler des blocs de 4 bits (nibbles)
|
|
45
|
+
# Shape résultante : (Nombre de blocs, 4 bits, Nombre de scans)
|
|
46
|
+
blocks = matrix.reshape(-1, 4, n_scans)
|
|
47
|
+
|
|
48
|
+
# 3. Calcul de la valeur décimale de chaque bloc (0 à 15)
|
|
49
|
+
# On considère le premier élément comme le bit de poids faible (LSB)
|
|
50
|
+
weights = np.array([1, 2, 4, 8]).reshape(1, 4, 1)
|
|
51
|
+
dec_values = np.sum(blocks * weights, axis=1).astype(int)
|
|
52
|
+
|
|
53
|
+
# 4. Conversion en caractères Hexadécimaux
|
|
54
|
+
# On définit la table de conversion pour la rapidité
|
|
55
|
+
hex_table = np.array(list("0123456789abcdef"))
|
|
56
|
+
hex_matrix = hex_table[dec_values]
|
|
57
|
+
|
|
58
|
+
# 5. Assemblage des chaînes (de l'élément N vers 0 pour l'ordre Shift Register standard)
|
|
59
|
+
return ["".join(hex_matrix[::-1, col]) for col in range(n_scans)]
|
|
60
|
+
|
|
@@ -2,6 +2,7 @@ from ._mainExperiment import Experiment
|
|
|
2
2
|
from AOT_biomaps.AOT_Acoustic.AcousticEnums import WaveType
|
|
3
3
|
from AOT_biomaps.AOT_Acoustic.StructuredWave import StructuredWave
|
|
4
4
|
from AOT_biomaps.Config import config
|
|
5
|
+
from AOT_biomaps.AOT_Experiment.ExperimentTools import calc_mat_os, convert_to_hex_list
|
|
5
6
|
import os
|
|
6
7
|
import psutil
|
|
7
8
|
import numpy as np
|
|
@@ -273,7 +274,7 @@ class Tomography(Experiment):
|
|
|
273
274
|
line = f"({coords}, {angles})\n"
|
|
274
275
|
file.write(line)
|
|
275
276
|
|
|
276
|
-
def generateActiveList(self, N):
|
|
277
|
+
def generateActiveList(self, N, decimations = None, angles = None):
|
|
277
278
|
"""
|
|
278
279
|
Génère une liste de patterns d'activation équilibrés et réguliers.
|
|
279
280
|
Args:
|
|
@@ -281,13 +282,136 @@ class Tomography(Experiment):
|
|
|
281
282
|
Returns:
|
|
282
283
|
list: Liste de strings au format "hex_angle".
|
|
283
284
|
"""
|
|
285
|
+
if decimations is not None and angles is not None:
|
|
286
|
+
self._genereate_patterns_from_decimations(decimations, angles)
|
|
284
287
|
if N < 1:
|
|
285
288
|
raise ValueError("N must be a positive integer.")
|
|
286
289
|
self.patterns = self._generate_patterns(N)
|
|
287
290
|
if not self._check_patterns(self.patterns):
|
|
288
291
|
raise ValueError("Generated patterns failed validation.")
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def selectAngles(self, angles):
|
|
296
|
+
|
|
297
|
+
if self.AOsignal_withTumor is None and self.AOsignal_withoutTumor is None:
|
|
298
|
+
raise ValueError("AO signals are not initialized. Please load or generate the AO signals first.")
|
|
299
|
+
if self.AcousticFields is None or len(self.AcousticFields) == 0:
|
|
300
|
+
raise ValueError("AcousticFields is not initialized. Please generate the system matrix first.")
|
|
301
|
+
newAcousticFields = []
|
|
302
|
+
index = []
|
|
303
|
+
for i,field in enumerate(self.AcousticFields):
|
|
304
|
+
if field.angle in angles:
|
|
305
|
+
newAcousticFields.append(field)
|
|
306
|
+
index.append(i)
|
|
307
|
+
if self.AOsignal_withTumor is not None:
|
|
308
|
+
self.AOsignal_withTumor = self.AOsignal_withTumor[:, index]
|
|
309
|
+
if self.AOsignal_withoutTumor is not None:
|
|
310
|
+
self.AOsignal_withoutTumor = self.AOsignal_withoutTumor[:, index]
|
|
311
|
+
self.AcousticFields = newAcousticFields
|
|
312
|
+
|
|
313
|
+
def selectPatterns(self, pattern_names):
|
|
314
|
+
if self.AOsignal_withTumor is None and self.AOsignal_withoutTumor is None:
|
|
315
|
+
raise ValueError("AO signals are not initialized. Please load or generate the AO signals first.")
|
|
316
|
+
if self.AcousticFields is None or len(self.AcousticFields) == 0:
|
|
317
|
+
raise ValueError("AcousticFields is not initialized. Please generate the system matrix first.")
|
|
318
|
+
newAcousticFields = []
|
|
319
|
+
index = []
|
|
320
|
+
for i,field in enumerate(self.AcousticFields):
|
|
321
|
+
if field.pattern.activeList in pattern_names:
|
|
322
|
+
newAcousticFields.append(field)
|
|
323
|
+
index.append(i)
|
|
324
|
+
if self.AOsignal_withTumor is not None:
|
|
325
|
+
self.AOsignal_withTumor = self.AOsignal_withTumor[:, index]
|
|
326
|
+
if self.AOsignal_withoutTumor is not None:
|
|
327
|
+
self.AOsignal_withoutTumor = self.AOsignal_withoutTumor[:, index]
|
|
328
|
+
self.AcousticFields = newAcousticFields
|
|
329
|
+
|
|
330
|
+
def selectRandom(self,N):
|
|
331
|
+
if self.AOsignal_withTumor is None and self.AOsignal_withoutTumor is None:
|
|
332
|
+
raise ValueError("AO signals are not initialized. Please load or generate the AO signals first.")
|
|
333
|
+
if self.AcousticFields is None or len(self.AcousticFields) == 0:
|
|
334
|
+
raise ValueError("AcousticFields is not initialized. Please generate the system matrix first.")
|
|
335
|
+
if N > len(self.AcousticFields):
|
|
336
|
+
raise ValueError("N is larger than the number of available AcousticFields.")
|
|
337
|
+
indices = np.random.choice(len(self.AcousticFields), size=N, replace=False)
|
|
338
|
+
newAcousticFields = [self.AcousticFields[i] for i in indices]
|
|
339
|
+
if self.AOsignal_withTumor is not None:
|
|
340
|
+
self.AOsignal_withTumor = self.AOsignal_withTumor[:, indices]
|
|
341
|
+
if self.AOsignal_withoutTumor is not None:
|
|
342
|
+
self.AOsignal_withoutTumor = self.AOsignal_withoutTumor[:, indices]
|
|
343
|
+
self.AcousticFields = newAcousticFields
|
|
344
|
+
|
|
345
|
+
def _genereate_patterns_from_decimations(self, decimations, angles):
|
|
346
|
+
if isinstance(decimations, list): decimations = np.array(decimations)
|
|
347
|
+
if isinstance(angles, list): angles = np.array(angles)
|
|
348
|
+
|
|
349
|
+
angles = np.sort(angles)
|
|
350
|
+
decimations = np.sort(decimations)
|
|
351
|
+
|
|
352
|
+
num_elements = self.params.acoustic['num_elements']
|
|
353
|
+
dx_mm = self.params.general['dx'] * 1e3
|
|
354
|
+
|
|
355
|
+
# --- Calcul du nombre de Scans ---
|
|
356
|
+
if 0 in decimations:
|
|
357
|
+
Nscans = 4 * angles.shape[0] * (decimations.shape[0] - 1) + angles.shape[0]
|
|
358
|
+
offSet = angles.shape[0]
|
|
359
|
+
else:
|
|
360
|
+
Nscans = 4 * angles.shape[0] * decimations.shape[0]
|
|
361
|
+
offSet = 0
|
|
362
|
+
|
|
363
|
+
ActiveLIST = np.ones((num_elements, Nscans))
|
|
364
|
+
Xm = np.arange(1, num_elements + 1) * dx_mm
|
|
365
|
+
dFx = 1 / (num_elements * dx_mm)
|
|
289
366
|
|
|
290
|
-
|
|
367
|
+
# On traite séparément les décimations non nulles pour la boucle
|
|
368
|
+
active_decimations = decimations[decimations != 0]
|
|
369
|
+
|
|
370
|
+
for i_dec in range(len(active_decimations)):
|
|
371
|
+
idx_base = (np.arange(len(angles))) + (i_dec * 4 * len(angles)) + offSet
|
|
372
|
+
|
|
373
|
+
Icos = idx_base
|
|
374
|
+
Incos = idx_base + len(angles)
|
|
375
|
+
Isin = idx_base + 2 * len(angles)
|
|
376
|
+
Insin = idx_base + 3 * len(angles)
|
|
377
|
+
|
|
378
|
+
fx = dFx * active_decimations[i_dec]
|
|
379
|
+
|
|
380
|
+
# Remplissage des 4 phases
|
|
381
|
+
valid_icos = Icos[Icos < Nscans]
|
|
382
|
+
if valid_icos.size > 0:
|
|
383
|
+
ActiveLIST[:, valid_icos] = calc_mat_os(Xm, fx, dx_mm, ActiveLIST[:, valid_icos], 'cos')
|
|
384
|
+
if (Incos < Nscans).any():
|
|
385
|
+
ActiveLIST[:, Incos[Incos < Nscans]] = 1 - ActiveLIST[:, valid_icos]
|
|
386
|
+
|
|
387
|
+
valid_isin = Isin[Isin < Nscans]
|
|
388
|
+
if valid_isin.size > 0:
|
|
389
|
+
ActiveLIST[:, valid_isin] = calc_mat_os(Xm, fx, dx_mm, ActiveLIST[:, valid_isin], 'sin')
|
|
390
|
+
if (Insin < Nscans).any():
|
|
391
|
+
ActiveLIST[:, Insin[Insin < Nscans]] = 1 - ActiveLIST[:, valid_isin]
|
|
392
|
+
|
|
393
|
+
# --- Conversion au format attendu ---
|
|
394
|
+
# 1. On convertit toute la matrice en liste de strings Hexa
|
|
395
|
+
hexa_list = convert_to_hex_list(ActiveLIST)
|
|
396
|
+
|
|
397
|
+
# 2. Fonction interne de formatage d'angle (pour coller à votre ancien code)
|
|
398
|
+
def format_angle(a):
|
|
399
|
+
return f"{'1' if a < 0 else '0'}{abs(a):02d}"
|
|
400
|
+
|
|
401
|
+
# 3. Construction de la liste de dictionnaires
|
|
402
|
+
patterns = []
|
|
403
|
+
for i in range(Nscans):
|
|
404
|
+
# On retrouve l'angle correspondant à l'index i
|
|
405
|
+
# La logique est cyclique sur la taille de 'angles'
|
|
406
|
+
angle_val = angles[i % len(angles)]
|
|
407
|
+
|
|
408
|
+
hex_pattern = hexa_list[i]
|
|
409
|
+
pair = f"{hex_pattern}_{format_angle(angle_val)}"
|
|
410
|
+
patterns.append({"fileName": pair})
|
|
411
|
+
|
|
412
|
+
return patterns
|
|
413
|
+
|
|
414
|
+
def _generate_patterns(self, N,angles = None):
|
|
291
415
|
def format_angle(a):
|
|
292
416
|
return f"{'1' if a < 0 else '0'}{abs(a):02d}"
|
|
293
417
|
|
|
@@ -298,7 +422,13 @@ class Tomography(Experiment):
|
|
|
298
422
|
return hex_string
|
|
299
423
|
|
|
300
424
|
num_elements = self.params.acoustic['num_elements']
|
|
301
|
-
|
|
425
|
+
if angles is None:
|
|
426
|
+
angle_choices = list(range(-20, 21))
|
|
427
|
+
else:
|
|
428
|
+
# convert np.array to list if necessary
|
|
429
|
+
if isinstance(angles, np.ndarray):
|
|
430
|
+
angles = angles.tolist()
|
|
431
|
+
angle_choices = angles
|
|
302
432
|
|
|
303
433
|
# 1. Trouver TOUS les diviseurs PAIRS de num_elements (y compris num_elements)
|
|
304
434
|
divs = [d for d in range(2, num_elements + 1) if num_elements % d == 0 and d % 2 == 0]
|
|
@@ -390,6 +520,80 @@ class Tomography(Experiment):
|
|
|
390
520
|
|
|
391
521
|
return True
|
|
392
522
|
|
|
523
|
+
def applyApodisation(self, alpha=0.3, divergence_deg=0.5):
|
|
524
|
+
"""
|
|
525
|
+
Applique une apodisation dynamique sur les champs acoustiques stockés dans l'objet.
|
|
526
|
+
L'apodisation suit l'angle d'émission et la divergence naturelle du faisceau pour
|
|
527
|
+
supprimer les lobes de diffraction (artefacts de bord) sans toucher au signal utile.
|
|
528
|
+
Args:
|
|
529
|
+
probe_width (float): Largeur physique active de la sonde (ex: 40e-3 pour 40mm).
|
|
530
|
+
alpha (float): Paramètre de Tukey (0.0=rectangle, 1.0=hann). 0.3 est un bon compromis.
|
|
531
|
+
divergence_deg (float): Angle d'ouverture du masque pour suivre l'élargissement du faisceau.
|
|
532
|
+
0.0 = Droit, 0.5 = Légère ouverture (conseillé).
|
|
533
|
+
"""
|
|
534
|
+
print(f"Applying apodization (Alpha={alpha}, Div={divergence_deg}°) on {len(self.AcousticFields)} fields...")
|
|
535
|
+
|
|
536
|
+
probe_width = self.params.acoustic['num_elements'] * self.params.acoustic['element_width']
|
|
537
|
+
|
|
538
|
+
for i in trange(len(self.AcousticFields), desc="Apodisation"):
|
|
539
|
+
# 1. Récupération des données et de l'angle
|
|
540
|
+
field = self.AcousticFields[i].field # Peut être (Z, X) ou (Time, Z, X)
|
|
541
|
+
angle = self.AcousticFields[i].angle # L'angle de l'onde plane
|
|
542
|
+
|
|
543
|
+
# 2. Récupération ou construction des axes physiques
|
|
544
|
+
nz, nx = field.shape[-2:]
|
|
545
|
+
|
|
546
|
+
if hasattr(self, 'x_axis') and self.x_axis is not None:
|
|
547
|
+
x_axis = self.x_axis
|
|
548
|
+
else:
|
|
549
|
+
# Génération par défaut centrée sur 0 (ex: -20mm à +20mm)
|
|
550
|
+
x_axis = np.linspace(-probe_width/2, probe_width/2, nx)
|
|
551
|
+
|
|
552
|
+
if hasattr(self, 'z_axis') and self.z_axis is not None:
|
|
553
|
+
z_axis = self.z_axis
|
|
554
|
+
else:
|
|
555
|
+
# Génération par défaut (ex: 0 à 40mm, basé sur un pitch standard ou arbitraire)
|
|
556
|
+
estimated_depth = 40e-3 # Valeur arbitraire si inconnue
|
|
557
|
+
z_axis = np.linspace(0, estimated_depth, nz)
|
|
558
|
+
|
|
559
|
+
# 3. Préparation des grilles pour le masque
|
|
560
|
+
Z, X = np.meshgrid(z_axis, x_axis, indexing='ij')
|
|
561
|
+
|
|
562
|
+
# 4. Calcul de la géométrie orientée (Steering)
|
|
563
|
+
angle_rad = np.deg2rad(angle)
|
|
564
|
+
X_aligned = X - Z * np.tan(angle_rad)
|
|
565
|
+
|
|
566
|
+
# 5. Calcul de la largeur dynamique du masque (Divergence)
|
|
567
|
+
div_rad = np.deg2rad(divergence_deg)
|
|
568
|
+
current_half_width = (probe_width / 2.0) + Z * np.tan(div_rad)
|
|
569
|
+
|
|
570
|
+
# 6. Normalisation et création du masque Tukey
|
|
571
|
+
X_norm = np.divide(X_aligned, current_half_width, out=np.zeros_like(X_aligned), where=current_half_width!=0)
|
|
572
|
+
|
|
573
|
+
mask = np.zeros_like(X_norm)
|
|
574
|
+
plateau_threshold = 1.0 * (1 - alpha)
|
|
575
|
+
|
|
576
|
+
# Zone centrale (plateau = 1)
|
|
577
|
+
mask[np.abs(X_norm) <= plateau_threshold] = 1.0
|
|
578
|
+
|
|
579
|
+
# Zone de transition (cosinus)
|
|
580
|
+
transition_indices = (np.abs(X_norm) > plateau_threshold) & (np.abs(X_norm) <= 1.0)
|
|
581
|
+
if np.any(transition_indices):
|
|
582
|
+
x_trans = np.abs(X_norm[transition_indices]) - plateau_threshold
|
|
583
|
+
width_trans = 1.0 * alpha
|
|
584
|
+
mask[transition_indices] = 0.5 * (1 + np.cos(np.pi * x_trans / width_trans))
|
|
585
|
+
|
|
586
|
+
# 7. Application du masque (Gestion 2D vs 3D)
|
|
587
|
+
if field.ndim == 3:
|
|
588
|
+
field_apodized = field * mask[np.newaxis, :, :]
|
|
589
|
+
else:
|
|
590
|
+
field_apodized = field * mask
|
|
591
|
+
|
|
592
|
+
# 8. Mise à jour de l'objet
|
|
593
|
+
self.AcousticFields[i].field = field_apodized
|
|
594
|
+
|
|
595
|
+
print("Apodisation done.")
|
|
596
|
+
|
|
393
597
|
# PRIVATE METHODS
|
|
394
598
|
def _generateAcousticFields_STRUCT_CPU(self, fieldDataPath=None, show_log=False, nameBlock=None):
|
|
395
599
|
if self.patterns is None:
|
|
@@ -58,11 +58,11 @@ def MLEM(
|
|
|
58
58
|
# Dispatch to the appropriate implementation
|
|
59
59
|
if use_gpu:
|
|
60
60
|
if smatrixType == SMatrixType.CSR:
|
|
61
|
-
return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
61
|
+
return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
|
|
62
62
|
elif smatrixType == SMatrixType.SELL:
|
|
63
|
-
return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
63
|
+
return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
|
|
64
64
|
elif smatrixType == SMatrixType.DENSE:
|
|
65
|
-
return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
65
|
+
return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold,show_logs)
|
|
66
66
|
else:
|
|
67
67
|
raise ValueError("Unsupported SMatrixType for GPU MLEM.")
|
|
68
68
|
else:
|
|
@@ -228,49 +228,60 @@ def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
|
228
228
|
print(f"Error in optimized CPU MLEM: {type(e).__name__}: {e}")
|
|
229
229
|
return None, None
|
|
230
230
|
|
|
231
|
-
def MLEM_sparseCSR_pycuda(
|
|
231
|
+
def MLEM_sparseCSR_pycuda(
|
|
232
|
+
SMatrix,
|
|
233
|
+
y,
|
|
234
|
+
numIterations,
|
|
235
|
+
isSavingEachIteration,
|
|
236
|
+
tumor_str,
|
|
237
|
+
max_saves,
|
|
238
|
+
denominator_threshold,
|
|
239
|
+
show_logs=True,
|
|
240
|
+
):
|
|
232
241
|
"""
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
242
|
+
Robust MLEM implementation for CSR SMatrix using PyCUDA kernels.
|
|
243
|
+
Expects SMatrix to be SparseSMatrix_CSR with attributes:
|
|
244
|
+
- values_gpu, col_ind_gpu, row_ptr_gpu (device pointers)
|
|
245
|
+
- norm_factor_inv_gpu (device pointer)
|
|
246
|
+
- sparse_mod (loaded module with kernels)
|
|
247
|
+
- ctx (PyCUDA context)
|
|
248
|
+
Returns (saved_theta_list, saved_indices) if isSavingEachIteration else (final_theta, None)
|
|
240
249
|
"""
|
|
241
|
-
|
|
242
|
-
# We use a final_result placeholder to ensure it's defined outside the try block
|
|
243
250
|
final_result = None
|
|
244
|
-
|
|
251
|
+
|
|
252
|
+
# Local holders to free in finally
|
|
253
|
+
y_gpu = q_flat_gpu = e_flat_gpu = c_flat_gpu = theta_flat_gpu = None
|
|
254
|
+
|
|
245
255
|
try:
|
|
246
256
|
if not isinstance(SMatrix, SparseSMatrix_CSR):
|
|
247
257
|
raise TypeError("SMatrix must be a SparseSMatrix_CSR object")
|
|
248
258
|
|
|
249
|
-
#
|
|
250
|
-
|
|
251
|
-
if SMatrix
|
|
259
|
+
# push context (if provided)
|
|
260
|
+
popped_ctx = False
|
|
261
|
+
if getattr(SMatrix, "ctx", None):
|
|
252
262
|
SMatrix.ctx.push()
|
|
253
|
-
|
|
263
|
+
popped_ctx = True
|
|
254
264
|
|
|
255
265
|
dtype = np.float32
|
|
256
|
-
TN = SMatrix.N * SMatrix.T
|
|
257
|
-
ZX = SMatrix.Z * SMatrix.X
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
if
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
266
|
+
TN = int(SMatrix.N * SMatrix.T)
|
|
267
|
+
ZX = int(SMatrix.Z * SMatrix.X)
|
|
268
|
+
Z = int(SMatrix.Z)
|
|
269
|
+
X = int(SMatrix.X)
|
|
270
|
+
|
|
271
|
+
# Make sure required GPU pointers exist
|
|
272
|
+
if getattr(SMatrix, "values_gpu", None) is None or getattr(SMatrix, "col_ind_gpu", None) is None or getattr(SMatrix, "row_ptr_gpu", None) is None:
|
|
273
|
+
raise RuntimeError("SMatrix is missing GPU buffers (values_gpu / col_ind_gpu / row_ptr_gpu)")
|
|
274
|
+
|
|
275
|
+
if getattr(SMatrix, "norm_factor_inv_gpu", None) is None:
|
|
276
|
+
raise RuntimeError("SMatrix.norm_factor_inv_gpu not available on GPU")
|
|
277
|
+
|
|
278
|
+
# stream for async operations
|
|
268
279
|
stream = drv.Stream()
|
|
269
280
|
|
|
270
|
-
#
|
|
271
|
-
|
|
272
|
-
y_gpu = drv.mem_alloc(
|
|
273
|
-
drv.memcpy_htod_async(y_gpu,
|
|
281
|
+
# prepare device buffers
|
|
282
|
+
y_arr = np.ascontiguousarray(y.T.flatten().astype(np.float32))
|
|
283
|
+
y_gpu = drv.mem_alloc(y_arr.nbytes)
|
|
284
|
+
drv.memcpy_htod_async(y_gpu, y_arr, stream)
|
|
274
285
|
|
|
275
286
|
theta_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
276
287
|
initial_theta = np.full(ZX, 0.1, dtype=dtype)
|
|
@@ -282,61 +293,111 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
|
|
|
282
293
|
e_flat_gpu = drv.mem_alloc(TN * np.dtype(dtype).itemsize)
|
|
283
294
|
c_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
284
295
|
|
|
285
|
-
#
|
|
286
|
-
projection_kernel = SMatrix.sparse_mod.get_function(
|
|
287
|
-
backprojection_kernel = SMatrix.sparse_mod.get_function(
|
|
288
|
-
ratio_kernel = SMatrix.sparse_mod.get_function(
|
|
289
|
-
update_kernel = SMatrix.sparse_mod.get_function(
|
|
296
|
+
# Ensure kernels exist
|
|
297
|
+
projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__CSR")
|
|
298
|
+
backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__CSR")
|
|
299
|
+
ratio_kernel = SMatrix.sparse_mod.get_function("ratio_kernel")
|
|
300
|
+
update_kernel = SMatrix.sparse_mod.get_function("update_theta_kernel")
|
|
290
301
|
block_size = 256
|
|
291
302
|
|
|
292
|
-
|
|
303
|
+
# prepare save indices once
|
|
293
304
|
if numIterations <= max_saves:
|
|
294
305
|
save_indices = list(range(numIterations))
|
|
295
306
|
else:
|
|
296
|
-
|
|
307
|
+
step = max(1, numIterations // max_saves)
|
|
308
|
+
save_indices = list(range(0, numIterations, step))
|
|
297
309
|
if save_indices[-1] != numIterations - 1:
|
|
298
310
|
save_indices.append(numIterations - 1)
|
|
299
311
|
|
|
312
|
+
saved_theta = []
|
|
313
|
+
saved_indices = []
|
|
314
|
+
|
|
300
315
|
description = f"AOT-BioMaps -- ML-EM (CSR-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
301
316
|
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
317
|
+
|
|
318
|
+
# grid sizes
|
|
319
|
+
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
320
|
+
grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
|
|
321
|
+
|
|
302
322
|
for it in iterator:
|
|
303
323
|
# projection: q = A * theta
|
|
304
|
-
projection_kernel(
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
324
|
+
projection_kernel(
|
|
325
|
+
q_flat_gpu,
|
|
326
|
+
SMatrix.values_gpu,
|
|
327
|
+
SMatrix.row_ptr_gpu,
|
|
328
|
+
SMatrix.col_ind_gpu,
|
|
329
|
+
theta_flat_gpu,
|
|
330
|
+
np.int32(TN),
|
|
331
|
+
block=(block_size, 1, 1),
|
|
332
|
+
grid=grid_rows,
|
|
333
|
+
stream=stream,
|
|
334
|
+
)
|
|
308
335
|
|
|
309
336
|
# ratio: e = y / max(q, threshold)
|
|
310
|
-
ratio_kernel(
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
337
|
+
ratio_kernel(
|
|
338
|
+
e_flat_gpu,
|
|
339
|
+
y_gpu,
|
|
340
|
+
q_flat_gpu,
|
|
341
|
+
np.float32(denominator_threshold),
|
|
342
|
+
np.int32(TN),
|
|
343
|
+
block=(block_size, 1, 1),
|
|
344
|
+
grid=grid_rows,
|
|
345
|
+
stream=stream,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# backprojection: c = A^T * e (zero c first)
|
|
314
349
|
drv.memset_d32_async(c_flat_gpu, 0, ZX, stream)
|
|
315
|
-
backprojection_kernel(
|
|
316
|
-
|
|
317
|
-
|
|
350
|
+
backprojection_kernel(
|
|
351
|
+
c_flat_gpu,
|
|
352
|
+
SMatrix.values_gpu,
|
|
353
|
+
SMatrix.row_ptr_gpu,
|
|
354
|
+
SMatrix.col_ind_gpu,
|
|
355
|
+
e_flat_gpu,
|
|
356
|
+
np.int32(TN),
|
|
357
|
+
block=(block_size, 1, 1),
|
|
358
|
+
grid=grid_rows,
|
|
359
|
+
stream=stream,
|
|
360
|
+
)
|
|
318
361
|
|
|
319
362
|
# update: theta *= norm_factor_inv * c
|
|
320
|
-
update_kernel(
|
|
321
|
-
|
|
322
|
-
|
|
363
|
+
update_kernel(
|
|
364
|
+
theta_flat_gpu,
|
|
365
|
+
c_flat_gpu,
|
|
366
|
+
norm_factor_inv_gpu,
|
|
367
|
+
np.int32(ZX),
|
|
368
|
+
block=(block_size, 1, 1),
|
|
369
|
+
grid=grid_cols,
|
|
370
|
+
stream=stream,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# periodic synchronization for stability / logging
|
|
323
374
|
if show_logs and (it % 10 == 0 or it == numIterations - 1):
|
|
324
|
-
|
|
375
|
+
stream.synchronize()
|
|
325
376
|
|
|
377
|
+
# save snapshot if required
|
|
326
378
|
if isSavingEachIteration and it in save_indices:
|
|
379
|
+
# ensure kernels finished
|
|
380
|
+
stream.synchronize()
|
|
327
381
|
theta_host = np.empty(ZX, dtype=dtype)
|
|
328
382
|
drv.memcpy_dtoh(theta_host, theta_flat_gpu)
|
|
329
383
|
saved_theta.append(theta_host.reshape(Z, X))
|
|
330
|
-
saved_indices.append(it)
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
drv.memcpy_dtoh(
|
|
336
|
-
final_result =
|
|
337
|
-
|
|
338
|
-
# free local allocations
|
|
339
|
-
|
|
384
|
+
saved_indices.append(int(it))
|
|
385
|
+
|
|
386
|
+
# make sure everything finished
|
|
387
|
+
stream.synchronize()
|
|
388
|
+
final_theta_host = np.empty(ZX, dtype=dtype)
|
|
389
|
+
drv.memcpy_dtoh(final_theta_host, theta_flat_gpu)
|
|
390
|
+
final_result = final_theta_host.reshape(Z, X)
|
|
391
|
+
|
|
392
|
+
# free local allocations (will also be freed in finally if exception)
|
|
393
|
+
try:
|
|
394
|
+
y_gpu.free()
|
|
395
|
+
q_flat_gpu.free()
|
|
396
|
+
e_flat_gpu.free()
|
|
397
|
+
c_flat_gpu.free()
|
|
398
|
+
theta_flat_gpu.free()
|
|
399
|
+
except Exception:
|
|
400
|
+
pass
|
|
340
401
|
|
|
341
402
|
return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
|
|
342
403
|
|
|
@@ -344,32 +405,50 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
|
|
|
344
405
|
print(f"Error in MLEM_sparseCSR_pycuda: {type(e).__name__}: {e}")
|
|
345
406
|
gc.collect()
|
|
346
407
|
return None, None
|
|
347
|
-
|
|
348
|
-
finally:
|
|
349
|
-
# --- CONTEXT FIX: Pop the context ---
|
|
350
|
-
if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
|
|
351
|
-
SMatrix.ctx.pop()
|
|
352
|
-
# ------------------------------------
|
|
353
408
|
|
|
354
|
-
|
|
409
|
+
finally:
|
|
410
|
+
# free buffers if still allocated
|
|
411
|
+
for buf in ("y_gpu", "q_flat_gpu", "e_flat_gpu", "c_flat_gpu", "theta_flat_gpu"):
|
|
412
|
+
try:
|
|
413
|
+
val = locals().get(buf, None)
|
|
414
|
+
if val is not None:
|
|
415
|
+
val.free()
|
|
416
|
+
except Exception:
|
|
417
|
+
pass
|
|
418
|
+
# pop context safely
|
|
419
|
+
try:
|
|
420
|
+
if SMatrix and hasattr(SMatrix, "ctx") and SMatrix.ctx and popped_ctx:
|
|
421
|
+
SMatrix.ctx.pop()
|
|
422
|
+
except Exception:
|
|
423
|
+
pass
|
|
424
|
+
|
|
425
|
+
def MLEM_sparseSELL_pycuda(
|
|
426
|
+
SMatrix,
|
|
427
|
+
y,
|
|
428
|
+
numIterations,
|
|
429
|
+
isSavingEachIteration,
|
|
430
|
+
tumor_str,
|
|
431
|
+
max_saves,
|
|
432
|
+
denominator_threshold,
|
|
433
|
+
show_logs=True,
|
|
434
|
+
):
|
|
355
435
|
"""
|
|
356
436
|
MLEM using SELL-C-σ kernels already present on device.
|
|
357
437
|
y must be float32 length TN.
|
|
438
|
+
|
|
439
|
+
Version propre : diagnostics retirés.
|
|
358
440
|
"""
|
|
359
441
|
final_result = None
|
|
360
442
|
|
|
361
443
|
try:
|
|
362
|
-
# check if SMatrix is SparseSMatrix_SELL object
|
|
363
444
|
if not isinstance(SMatrix, SparseSMatrix_SELL):
|
|
364
445
|
raise TypeError("SMatrix must be a SparseSMatrix_SELL object")
|
|
365
446
|
if SMatrix.sell_values_gpu is None:
|
|
366
447
|
raise RuntimeError("SELL not built. Call allocate_sell_c_sigma_direct() first.")
|
|
367
|
-
|
|
368
|
-
#
|
|
369
|
-
# This ensures all subsequent PyCUDA operations use the correct GPU/context.
|
|
448
|
+
|
|
449
|
+
# Context
|
|
370
450
|
if SMatrix.ctx:
|
|
371
451
|
SMatrix.ctx.push()
|
|
372
|
-
# -----------------------------------------------------------
|
|
373
452
|
|
|
374
453
|
TN = int(SMatrix.N * SMatrix.T)
|
|
375
454
|
ZX = int(SMatrix.Z * SMatrix.X)
|
|
@@ -383,7 +462,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
383
462
|
|
|
384
463
|
stream = drv.Stream()
|
|
385
464
|
|
|
386
|
-
#
|
|
465
|
+
# Device buffers
|
|
387
466
|
y = y.T.flatten().astype(np.float32)
|
|
388
467
|
y_gpu = drv.mem_alloc(y.nbytes)
|
|
389
468
|
drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
|
|
@@ -402,6 +481,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
402
481
|
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
403
482
|
grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
|
|
404
483
|
|
|
484
|
+
# Prepare save indices
|
|
405
485
|
saved_theta, saved_indices = [], []
|
|
406
486
|
if numIterations <= max_saves:
|
|
407
487
|
save_indices = list(range(numIterations))
|
|
@@ -412,52 +492,60 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
412
492
|
|
|
413
493
|
description = f"AOT-BioMaps -- ML-EM (SELL-c-σ-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
414
494
|
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
495
|
+
|
|
496
|
+
# --- MLEM Loop ---
|
|
415
497
|
for it in iterator:
|
|
416
|
-
# projection
|
|
417
|
-
proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
|
|
418
|
-
theta_gpu, np.int32(TN), slice_height,
|
|
419
|
-
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
420
498
|
|
|
421
|
-
|
|
499
|
+
proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
|
|
500
|
+
slice_ptr_gpu, slice_len_gpu,
|
|
501
|
+
theta_gpu, np.int32(TN), slice_height,
|
|
502
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
503
|
+
|
|
422
504
|
ratio(e_gpu, y_gpu, q_gpu, np.float32(denominator_threshold), np.int32(TN),
|
|
423
|
-
|
|
505
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
424
506
|
|
|
425
|
-
# zero c
|
|
426
507
|
drv.memset_d32_async(c_gpu, 0, ZX, stream)
|
|
427
508
|
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
509
|
+
backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
|
|
510
|
+
slice_ptr_gpu, slice_len_gpu,
|
|
511
|
+
e_gpu, c_gpu, np.int32(TN), slice_height,
|
|
512
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
432
513
|
|
|
433
|
-
# update
|
|
434
514
|
update(theta_gpu, c_gpu, SMatrix.norm_factor_inv_gpu, np.int32(ZX),
|
|
435
|
-
|
|
515
|
+
block=(block_size,1,1), grid=grid_cols, stream=stream)
|
|
436
516
|
|
|
437
|
-
stream.synchronize()
|
|
438
517
|
if isSavingEachIteration and it in save_indices:
|
|
439
518
|
out = np.empty(ZX, dtype=np.float32)
|
|
440
519
|
drv.memcpy_dtoh(out, theta_gpu)
|
|
441
520
|
saved_theta.append(out.reshape((SMatrix.Z, SMatrix.X)))
|
|
442
521
|
saved_indices.append(it)
|
|
443
522
|
|
|
444
|
-
|
|
523
|
+
stream.synchronize()
|
|
445
524
|
res = np.empty(ZX, dtype=np.float32)
|
|
446
525
|
drv.memcpy_dtoh(res, theta_gpu)
|
|
447
526
|
|
|
448
|
-
# free
|
|
449
|
-
|
|
450
|
-
|
|
527
|
+
# free
|
|
528
|
+
try:
|
|
529
|
+
y_gpu.free()
|
|
530
|
+
q_gpu.free()
|
|
531
|
+
e_gpu.free()
|
|
532
|
+
c_gpu.free()
|
|
533
|
+
theta_gpu.free()
|
|
534
|
+
except Exception:
|
|
535
|
+
pass
|
|
536
|
+
|
|
451
537
|
final_result = res.reshape((SMatrix.Z, SMatrix.X))
|
|
452
538
|
return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
|
|
453
|
-
|
|
539
|
+
|
|
454
540
|
except Exception as e:
|
|
455
541
|
print(f"Error in MLEM_sparseSELL_pycuda: {type(e).__name__}: {e}")
|
|
456
542
|
gc.collect()
|
|
457
543
|
return None, None
|
|
458
|
-
|
|
544
|
+
|
|
459
545
|
finally:
|
|
460
|
-
# --- CONTEXT FIX: Pop the context ---
|
|
461
546
|
if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
|
|
462
|
-
|
|
463
|
-
|
|
547
|
+
try:
|
|
548
|
+
SMatrix.ctx.pop()
|
|
549
|
+
except Exception:
|
|
550
|
+
pass
|
|
551
|
+
|
|
@@ -92,13 +92,11 @@ class SparseSMatrix_SELL:
|
|
|
92
92
|
def allocate(self):
|
|
93
93
|
"""
|
|
94
94
|
Build SELL-C-σ directly from manip AcousticFields in streaming blocks.
|
|
95
|
-
|
|
95
|
+
Corrected: per-block row_nnz copy, zeroing of host block, proper sync.
|
|
96
96
|
"""
|
|
97
97
|
if self.sparse_mod is None:
|
|
98
98
|
raise RuntimeError("CUDA module not loaded. Check compilation.")
|
|
99
99
|
|
|
100
|
-
# NOTE: Les noms de kernel (count_nnz_rows_kernel, fill_kernel__SELL) sont utilisés
|
|
101
|
-
# car ils sont présents dans la classe fonctionnelle.
|
|
102
100
|
count_kernel = self.sparse_mod.get_function("count_nnz_rows_kernel")
|
|
103
101
|
fill_kernel = self.sparse_mod.get_function("fill_kernel__SELL")
|
|
104
102
|
|
|
@@ -106,34 +104,34 @@ class SparseSMatrix_SELL:
|
|
|
106
104
|
num_cols = int(self.Z * self.X)
|
|
107
105
|
C = int(self.slice_height)
|
|
108
106
|
|
|
109
|
-
# host temporary block
|
|
110
107
|
br = int(self.block_rows)
|
|
111
|
-
bytes_per_elem = np.dtype(np.float32).itemsize
|
|
112
108
|
dense_host = np.empty((br, num_cols), dtype=np.float32)
|
|
113
109
|
|
|
114
|
-
# Allocation
|
|
110
|
+
# Allocation dense buffer on device (size = br * num_cols)
|
|
115
111
|
dense_gpu_size = dense_host.nbytes
|
|
116
112
|
dense_gpu = drv.mem_alloc(dense_gpu_size)
|
|
117
113
|
|
|
118
|
-
# 1) count nnz per row (
|
|
114
|
+
# 1) count nnz per row (per block)
|
|
119
115
|
row_nnz = np.zeros(num_rows, dtype=np.int32)
|
|
120
116
|
row_nnz_gpu_block_size = br * np.dtype(np.int32).itemsize
|
|
121
117
|
row_nnz_gpu_block = drv.mem_alloc(row_nnz_gpu_block_size)
|
|
122
118
|
|
|
123
|
-
block =
|
|
119
|
+
block = 128
|
|
124
120
|
for b in trange(0, num_rows, br, desc="Count NNZ per row"):
|
|
125
121
|
R = min(br, num_rows - b)
|
|
126
|
-
#
|
|
122
|
+
# zero the host block to avoid garbage in tail when R < br
|
|
123
|
+
dense_host.fill(0.0)
|
|
127
124
|
for i in range(R):
|
|
128
125
|
rg = b + i
|
|
129
126
|
n_idx = rg // self.T
|
|
130
127
|
t_idx = rg % self.T
|
|
131
128
|
dense_host[i, :] = self.manip.AcousticFields[n_idx].field[t_idx].flatten()
|
|
132
|
-
# copy
|
|
129
|
+
# copy whole buffer (safe because we zeroed tail)
|
|
133
130
|
drv.memcpy_htod(dense_gpu, dense_host)
|
|
134
131
|
grid = ((R + block - 1) // block, 1, 1)
|
|
135
132
|
count_kernel(dense_gpu, row_nnz_gpu_block, np.int32(R), np.int32(num_cols), np.float32(self.relative_threshold),
|
|
136
|
-
|
|
133
|
+
block=(block,1,1), grid=grid)
|
|
134
|
+
drv.Context.synchronize()
|
|
137
135
|
tmp = np.empty(R, dtype=np.int32)
|
|
138
136
|
drv.memcpy_dtoh(tmp, row_nnz_gpu_block)
|
|
139
137
|
row_nnz[b:b+R] = tmp
|
|
@@ -148,7 +146,6 @@ class SparseSMatrix_SELL:
|
|
|
148
146
|
r0 = s * C
|
|
149
147
|
r1 = min(num_rows, r0 + C)
|
|
150
148
|
slice_len[s] = int(np.max(row_nnz[r0:r1])) if (r1>r0) else 0
|
|
151
|
-
# slice_ptr (int64)
|
|
152
149
|
slice_ptr = np.zeros(num_slices + 1, dtype=np.int64)
|
|
153
150
|
for s in range(num_slices):
|
|
154
151
|
slice_ptr[s+1] = slice_ptr[s] + (slice_len[s] * C)
|
|
@@ -160,9 +157,14 @@ class SparseSMatrix_SELL:
|
|
|
160
157
|
self.sell_values_gpu_size = total_storage * np.dtype(np.float32).itemsize
|
|
161
158
|
self.sell_colinds_gpu_size = total_storage * np.dtype(np.uint32).itemsize
|
|
162
159
|
|
|
160
|
+
# allocate and optionally zero them
|
|
163
161
|
self.sell_values_gpu = drv.mem_alloc(self.sell_values_gpu_size)
|
|
162
|
+
# It's good practice to zero the values buffer to avoid leftover memory
|
|
163
|
+
drv.memset_d32(self.sell_values_gpu, 0, total_storage)
|
|
164
|
+
|
|
164
165
|
self.sell_colinds_gpu = drv.mem_alloc(self.sell_colinds_gpu_size)
|
|
165
|
-
|
|
166
|
+
drv.memset_d32(self.sell_colinds_gpu, 0, total_storage)
|
|
167
|
+
|
|
166
168
|
# allocate slice metadata on device
|
|
167
169
|
self.slice_ptr = slice_ptr
|
|
168
170
|
self.slice_len = slice_len
|
|
@@ -177,29 +179,28 @@ class SparseSMatrix_SELL:
|
|
|
177
179
|
drv.memcpy_htod(self.slice_len_gpu, self.slice_len)
|
|
178
180
|
|
|
179
181
|
# 3) fill SELL arrays by streaming blocks again (use GPU fill kernel)
|
|
180
|
-
# reuse dense_host and allocate new dense_gpu
|
|
181
182
|
dense_host = np.empty((br, num_cols), dtype=np.float32)
|
|
183
|
+
dense_gpu = drv.mem_alloc(dense_host.nbytes)
|
|
182
184
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
# we also need row_nnz on device per-block; supply global row_nnz on host but the kernel recomputes threshold
|
|
187
|
-
row_nnz_host_gpu_size = br * np.dtype(np.int32).itemsize
|
|
188
|
-
row_nnz_host_gpu = drv.mem_alloc(row_nnz_host_gpu_size)
|
|
185
|
+
# For per-block row_nnz pointer we allocate a buffer of max block size once, then reuse
|
|
186
|
+
row_nnz_host_gpu = drv.mem_alloc(br * np.dtype(np.int32).itemsize)
|
|
189
187
|
|
|
190
188
|
for b in trange(0, num_rows, br, desc="Fill SELL"):
|
|
191
189
|
R = min(br, num_rows - b)
|
|
190
|
+
dense_host.fill(0.0)
|
|
192
191
|
for i in range(R):
|
|
193
192
|
rg = b + i
|
|
194
193
|
n_idx = rg // self.T
|
|
195
194
|
t_idx = rg % self.T
|
|
196
195
|
dense_host[i, :] = self.manip.AcousticFields[n_idx].field[t_idx].flatten()
|
|
196
|
+
# copy host block
|
|
197
197
|
drv.memcpy_htod(dense_gpu, dense_host)
|
|
198
|
-
#
|
|
199
|
-
|
|
198
|
+
# copy corresponding row_nnz slice (only R entries)
|
|
199
|
+
drv.memcpy_htod(row_nnz_host_gpu, row_nnz[b:b+R])
|
|
200
|
+
|
|
200
201
|
grid = ((R + block - 1) // block, 1, 1)
|
|
201
202
|
fill_kernel(dense_gpu,
|
|
202
|
-
|
|
203
|
+
row_nnz_host_gpu,
|
|
203
204
|
self.slice_ptr_gpu,
|
|
204
205
|
self.slice_len_gpu,
|
|
205
206
|
self.sell_colinds_gpu,
|
|
@@ -210,12 +211,14 @@ class SparseSMatrix_SELL:
|
|
|
210
211
|
np.int32(C),
|
|
211
212
|
np.float32(self.relative_threshold),
|
|
212
213
|
block=(block,1,1), grid=grid)
|
|
214
|
+
drv.Context.synchronize()
|
|
215
|
+
|
|
213
216
|
dense_gpu.free()
|
|
214
217
|
row_nnz_host_gpu.free()
|
|
215
218
|
|
|
216
219
|
# 4) compute norm_factor_inv via GPU accumulate (col sums)
|
|
217
220
|
self.compute_norm_factor()
|
|
218
|
-
|
|
221
|
+
|
|
219
222
|
def apply_apodization_gpu(self, window_vector_gpu):
|
|
220
223
|
"""
|
|
221
224
|
Applique le fenêtrage directement sur self.sell_values_gpu
|
|
@@ -234,7 +237,7 @@ class SparseSMatrix_SELL:
|
|
|
234
237
|
)
|
|
235
238
|
|
|
236
239
|
# Le total_storage inclut les éléments non-nuls et le padding SELL.
|
|
237
|
-
threads =
|
|
240
|
+
threads = 128
|
|
238
241
|
blocks = (self.total_storage + threads - 1) // threads
|
|
239
242
|
|
|
240
243
|
# Lancement du kernel. Il travaille sur total_storage éléments.
|
|
@@ -248,43 +251,72 @@ class SparseSMatrix_SELL:
|
|
|
248
251
|
)
|
|
249
252
|
drv.Context.synchronize()
|
|
250
253
|
print("✅ Multiplication par le fenêtrage effectuée in-place sur GPU (SELL-C-σ).")
|
|
251
|
-
|
|
254
|
+
|
|
252
255
|
def compute_norm_factor(self):
|
|
253
256
|
"""
|
|
254
|
-
|
|
257
|
+
Compute the TRUE MLEM normalization norm_factor_inv = 1 / (A^T * 1)
|
|
258
|
+
by performing a SELL backprojection of a vector of ones.
|
|
259
|
+
This is the ONLY correct normalization for MLEM.
|
|
255
260
|
"""
|
|
256
|
-
if self.total_storage == 0:
|
|
257
|
-
raise RuntimeError("sell not built")
|
|
258
261
|
ZX = int(self.Z * self.X)
|
|
262
|
+
TN = int(self.T * self.N)
|
|
259
263
|
|
|
260
|
-
#
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
drv.memset_d32(col_sum_gpu, 0, ZX)
|
|
264
|
+
# Allocate device vector of ones (projections)
|
|
265
|
+
ones_gpu = drv.mem_alloc(TN * np.dtype(np.float32).itemsize)
|
|
266
|
+
drv.memset_d32(ones_gpu, 0x3f800000, TN) # 1.0f bit pattern
|
|
264
267
|
|
|
265
|
-
#
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
268
|
+
# Allocate output for backprojection (ZX pixels)
|
|
269
|
+
c_gpu = drv.mem_alloc(ZX * np.dtype(np.float32).itemsize)
|
|
270
|
+
drv.memset_d32(c_gpu, 0, ZX)
|
|
271
|
+
|
|
272
|
+
# Get SELL backprojection kernel
|
|
273
|
+
try:
|
|
274
|
+
bp_kernel = self.sparse_mod.get_function("backprojection_kernel__SELL")
|
|
275
|
+
except Exception as e:
|
|
276
|
+
raise RuntimeError("Missing kernel backprojection_kernel__SELL in the cubin") from e
|
|
277
|
+
|
|
278
|
+
threads = 256
|
|
279
|
+
blocks = (TN + threads - 1) // threads
|
|
280
|
+
|
|
281
|
+
# Launch GPU backprojection
|
|
282
|
+
bp_kernel(
|
|
283
|
+
self.sell_values_gpu,
|
|
284
|
+
self.sell_colinds_gpu,
|
|
285
|
+
self.slice_ptr_gpu,
|
|
286
|
+
self.slice_len_gpu,
|
|
287
|
+
ones_gpu,
|
|
288
|
+
c_gpu,
|
|
289
|
+
np.int32(TN),
|
|
290
|
+
# np.int32(ZX),
|
|
291
|
+
np.int32(self.slice_height),
|
|
292
|
+
# np.int64(self.total_storage),
|
|
293
|
+
block=(threads, 1, 1), # Utilise le nouveau nombre de threads
|
|
294
|
+
grid=(blocks, 1, 1)
|
|
295
|
+
)
|
|
272
296
|
drv.Context.synchronize()
|
|
273
297
|
|
|
274
|
-
#
|
|
275
|
-
|
|
276
|
-
drv.memcpy_dtoh(
|
|
277
|
-
|
|
298
|
+
# Copy back to host
|
|
299
|
+
c_host = np.empty(ZX, dtype=np.float32)
|
|
300
|
+
drv.memcpy_dtoh(c_host, c_gpu)
|
|
301
|
+
ones_gpu.free()
|
|
302
|
+
c_gpu.free()
|
|
303
|
+
|
|
304
|
+
# Avoid divide-by-zero
|
|
305
|
+
c_host = np.maximum(c_host, 1e-6)
|
|
306
|
+
|
|
307
|
+
# Compute inverse (stored for use in MLEM)
|
|
308
|
+
self.norm_factor_inv = (1.0 / c_host).astype(np.float32)
|
|
278
309
|
|
|
279
|
-
|
|
280
|
-
self.norm_factor_inv = (1.0 / norm).astype(np.float32)
|
|
310
|
+
# Upload to GPU
|
|
281
311
|
if self.norm_factor_inv_gpu is not None:
|
|
282
312
|
self.norm_factor_inv_gpu.free()
|
|
283
|
-
|
|
313
|
+
|
|
284
314
|
self.norm_factor_inv_gpu_size = self.norm_factor_inv.nbytes
|
|
285
315
|
self.norm_factor_inv_gpu = drv.mem_alloc(self.norm_factor_inv_gpu_size)
|
|
286
316
|
drv.memcpy_htod(self.norm_factor_inv_gpu, self.norm_factor_inv)
|
|
287
317
|
|
|
318
|
+
print("✓ Normalization (A^T*1) computed for MLEM.")
|
|
319
|
+
|
|
288
320
|
def compute_density(self):
|
|
289
321
|
"""
|
|
290
322
|
Returns only the density of the SELL-C-σ matrix.
|
|
Binary file
|
|
@@ -221,7 +221,6 @@ def calculate_memory_requirement(SMatrix, y):
|
|
|
221
221
|
# --- 3. Final Result ---
|
|
222
222
|
return total_bytes / (1024 ** 3)
|
|
223
223
|
|
|
224
|
-
|
|
225
224
|
def check_gpu_memory(device_index, required_memory, show_logs=True):
|
|
226
225
|
"""Check if enough memory is available on the specified GPU."""
|
|
227
226
|
free_memory, _ = torch.cuda.mem_get_info(f"cuda:{device_index}")
|
|
@@ -252,7 +251,6 @@ def _backward_projection(SMatrix, e_p, c_p):
|
|
|
252
251
|
total += SMatrix[_t, _z, _x, _n] * e_p[_t, _n]
|
|
253
252
|
c_p[_z, _x] = total
|
|
254
253
|
|
|
255
|
-
|
|
256
254
|
def _build_adjacency_sparse(Z, X, device, corner=(0.5 - np.sqrt(2) / 4) / np.sqrt(2), face=0.5 - np.sqrt(2) / 4,dtype=torch.float32):
|
|
257
255
|
rows, cols, weights = [], [], []
|
|
258
256
|
for z in range(Z):
|
|
@@ -273,7 +271,6 @@ def _build_adjacency_sparse(Z, X, device, corner=(0.5 - np.sqrt(2) / 4) / np.sqr
|
|
|
273
271
|
index, values = coalesce(index, values, m=Z*X, n=Z*X)
|
|
274
272
|
return index, values
|
|
275
273
|
|
|
276
|
-
|
|
277
274
|
def power_method(P, PT, data, Z, X, n_it=10):
|
|
278
275
|
x = torch.randn(Z * X, device=data.device)
|
|
279
276
|
x = x / torch.norm(x)
|
AOT_biomaps/__init__.py
CHANGED
|
@@ -85,7 +85,7 @@ from .AOT_Recon.AOT_PotentialFunctions.RelativeDifferences import *
|
|
|
85
85
|
from .Config import config
|
|
86
86
|
from .Settings import *
|
|
87
87
|
|
|
88
|
-
__version__ = '2.9.
|
|
88
|
+
__version__ = '2.9.321'
|
|
89
89
|
__process__ = config.get_process()
|
|
90
90
|
|
|
91
91
|
def initialize(process=None):
|
|
@@ -138,6 +138,33 @@ def initialize(process=None):
|
|
|
138
138
|
|
|
139
139
|
|
|
140
140
|
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
|
|
141
168
|
|
|
142
169
|
|
|
143
170
|
|
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
AOT_biomaps/Config.py,sha256=ghEOP1n8aO1pR-su13wMeAZAxZRfry5hH67NbtZ8SqI,3614
|
|
2
2
|
AOT_biomaps/Settings.py,sha256=v8fPhnvvcfBJP29m1RLOTEr3jndGLGwbUiORXmsj2Bo,2853
|
|
3
|
-
AOT_biomaps/__init__.py,sha256=
|
|
3
|
+
AOT_biomaps/__init__.py,sha256=6TSP5wI6TYb3icQQXws5eVgqxv0-F2F9HDYXj9uot6A,4304
|
|
4
4
|
AOT_biomaps/AOT_Acoustic/AcousticEnums.py,sha256=s5kXa6jKzbS4btwbubrVcynLOr0yg5tth5vL_FGfbMk,1802
|
|
5
|
-
AOT_biomaps/AOT_Acoustic/AcousticTools.py,sha256=
|
|
5
|
+
AOT_biomaps/AOT_Acoustic/AcousticTools.py,sha256=h2sCtGVcDtyLtEF1q7sLZmuWivWmesVGUBPnW-ndQqc,7535
|
|
6
6
|
AOT_biomaps/AOT_Acoustic/FocusedWave.py,sha256=3kGKKDx_3Msy5COYqIwzROPORGWvNjw8UsDanBfkMXE,11037
|
|
7
7
|
AOT_biomaps/AOT_Acoustic/IrregularWave.py,sha256=yZhtxkR6zlciRcEpdTR0BAhvgQl40XHKFaF8f4VXarE,3035
|
|
8
8
|
AOT_biomaps/AOT_Acoustic/PlaneWave.py,sha256=xza-rj5AUWDecLkGDxRcULrwZVWeBvGnEP2d51TyR04,1447
|
|
9
9
|
AOT_biomaps/AOT_Acoustic/StructuredWave.py,sha256=jTLVlOhYLWJb5MxZPxhq3OFVlz2McoyMPBmfLvnekDU,18209
|
|
10
10
|
AOT_biomaps/AOT_Acoustic/__init__.py,sha256=t9M2rRqa_L9pk7W2FeELTkHEMuP4DBr4gBRldMqsQbg,491
|
|
11
11
|
AOT_biomaps/AOT_Acoustic/_mainAcoustic.py,sha256=RdmhRF1i0KAlpsP7_wnZ7F4J27br3eUc4XR91Qq7C64,44158
|
|
12
|
+
AOT_biomaps/AOT_Experiment/ExperimentTools.py,sha256=BkHSX_foyyj5UrHZWQH5F9DGeV8o2fkp3euEbcvE4vA,2399
|
|
12
13
|
AOT_biomaps/AOT_Experiment/Focus.py,sha256=B2nBawmv-NG2AWJx9zgQ8GlN6aFB9FwTSqX-M-phKXg,3193
|
|
13
|
-
AOT_biomaps/AOT_Experiment/Tomography.py,sha256=
|
|
14
|
+
AOT_biomaps/AOT_Experiment/Tomography.py,sha256=qH7QlAPp5Er1AhMbWDpbOT6VRJHgrYUdUxazuEMAc_A,34569
|
|
14
15
|
AOT_biomaps/AOT_Experiment/__init__.py,sha256=H9zMLeBLA6uhbaHohAa-2u5mDDxqJi8oE5c6tShdQp8,308
|
|
15
16
|
AOT_biomaps/AOT_Experiment/_mainExperiment.py,sha256=zSfuNrsz7nhiKrGIdK6CAXjlI2T6qYC5-JXHFgPNzhc,24674
|
|
16
17
|
AOT_biomaps/AOT_Optic/Absorber.py,sha256=jEodzRy7gkEH-wbazVasRQiri0dU16BfapmR-qnTSvM,867
|
|
@@ -18,20 +19,20 @@ AOT_biomaps/AOT_Optic/Laser.py,sha256=uzQwxswjU0kZWix3CmZLoWmhsBa3VhN27STprNv-xB
|
|
|
18
19
|
AOT_biomaps/AOT_Optic/OpticEnums.py,sha256=b349_JyjHqQohmjK4Wke-A_HLGaqb3_BKbyUqFC4jxY,499
|
|
19
20
|
AOT_biomaps/AOT_Optic/__init__.py,sha256=HSUVhfz0NzwHHZZ9KP9Xyfu33IgP_rYJX86J-gEROlo,321
|
|
20
21
|
AOT_biomaps/AOT_Optic/_mainOptic.py,sha256=Wk63CcgWbU-ygMfjNK80islaUbGGJpTXgZY3_C2KQNY,8179
|
|
21
|
-
AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin,sha256=
|
|
22
|
+
AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin,sha256=JWy-bdtBTZdnNlDbJGZKwXyF-2u1wICtmlOC_YxEL6o,82528
|
|
22
23
|
AOT_biomaps/AOT_Recon/AlgebraicRecon.py,sha256=CGBXZyYEZ3TOTFOKSt-h7NGuFbuI9PNr3YTWTbSLxDo,46832
|
|
23
24
|
AOT_biomaps/AOT_Recon/AnalyticRecon.py,sha256=RaQ5AJ1HUmSct0BgjZ0GWSJg7SALCn3Q0laqj1yyhAE,7123
|
|
24
25
|
AOT_biomaps/AOT_Recon/BayesianRecon.py,sha256=RnnPa-tTcvirwiNPnCRZnSM4NWeEEltYET-piBbp34g,12671
|
|
25
26
|
AOT_biomaps/AOT_Recon/DeepLearningRecon.py,sha256=RfVcEsi4GeGqJn0_SPxwQPQx6IQjin79WKh2UarMRLI,1383
|
|
26
27
|
AOT_biomaps/AOT_Recon/PrimalDualRecon.py,sha256=JbFhxiyUoSTnlJgHbOWIfUUwhwfZoi39RJMnfkagegY,16504
|
|
27
28
|
AOT_biomaps/AOT_Recon/ReconEnums.py,sha256=KAf55RqHAr2ilt6pxFrUBGQOn-7HA8NP6TyL-1FNiXo,19714
|
|
28
|
-
AOT_biomaps/AOT_Recon/ReconTools.py,sha256=
|
|
29
|
+
AOT_biomaps/AOT_Recon/ReconTools.py,sha256=py1zKVEa0j7EfmcNZS2lpVQwzlkY6rRWsDQ8izWlme4,19872
|
|
29
30
|
AOT_biomaps/AOT_Recon/__init__.py,sha256=xs_argJqXKFl76xP7-jiUc1ynOEEtY7XZ0gDxD5uVZc,246
|
|
30
31
|
AOT_biomaps/AOT_Recon/_mainRecon.py,sha256=exoa2UBMfMHjemxAU9dW0mhEfsP6Oe1qjSfrTrgbIcY,13125
|
|
31
32
|
AOT_biomaps/AOT_Recon/AOT_Optimizers/DEPIERRO.py,sha256=qA1n722GLQJH3V8HcLr5q_GxEwBS_NRlIT3E6JZk-Ag,9479
|
|
32
33
|
AOT_biomaps/AOT_Recon/AOT_Optimizers/LS.py,sha256=bCu1rKzFXPbYQ7jV3L3E_jVQpb6LIEC5MIlN1-mCNdY,22814
|
|
33
34
|
AOT_biomaps/AOT_Recon/AOT_Optimizers/MAPEM.py,sha256=vQLCB0L4FSXJKn2_6kdIdWrI6WZ82KuqUh7CSqBGVuo,25766
|
|
34
|
-
AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py,sha256=
|
|
35
|
+
AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py,sha256=4omsqzHEZJfv0mEfmxfK71IovDbRstVE4x3Flf4cR3o,22441
|
|
35
36
|
AOT_biomaps/AOT_Recon/AOT_Optimizers/PDHG.py,sha256=oSojwug5mcZedKOWAV7YPMlCp0Qy_Aed0fjHRuyZWpo,28622
|
|
36
37
|
AOT_biomaps/AOT_Recon/AOT_Optimizers/__init__.py,sha256=tNGVulINaqQZzcs5cvCMAT5ypGdoFWRnxtl9y7ePECk,106
|
|
37
38
|
AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/Huber.py,sha256=dRd1t5OBag_gVmfji3L0QrA1GJ_702LcCkLH32Bot0M,3285
|
|
@@ -39,9 +40,9 @@ AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/Quadratic.py,sha256=wTbzcXxMdEl9ReE
|
|
|
39
40
|
AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/RelativeDifferences.py,sha256=ZlWaKsNPCMfy4fWxYFT2pSoKMbysQkJH4N1WbbWncq4,2493
|
|
40
41
|
AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/__init__.py,sha256=RwrJdLOFbAFBFnRxo5xdlOyeZgtQRDaRWDN9-uCGUiY,84
|
|
41
42
|
AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_CSR.py,sha256=RACc2P5oxmp0uPLAGnNj9mEtAxa_OlepNgCawKij3jI,12062
|
|
42
|
-
AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py,sha256=
|
|
43
|
+
AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py,sha256=ti3dZQsb_Uu62C7Bn65Z-yf-R5NKCFsmnBT5GlLd_HY,15138
|
|
43
44
|
AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/__init__.py,sha256=8nou-hqjQjuCTLhoL5qv4EM_lMPFviAZAZKSPhi84jE,67
|
|
44
|
-
aot_biomaps-2.9.
|
|
45
|
-
aot_biomaps-2.9.
|
|
46
|
-
aot_biomaps-2.9.
|
|
47
|
-
aot_biomaps-2.9.
|
|
45
|
+
aot_biomaps-2.9.321.dist-info/METADATA,sha256=DMnk4rlTFW95pX0welgBZD3GdTkjeFbn3vwXNyUxSHs,700
|
|
46
|
+
aot_biomaps-2.9.321.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
47
|
+
aot_biomaps-2.9.321.dist-info/top_level.txt,sha256=6STF-lT4kaAnBHJYCripmN5mZABoHjMuY689JdiDphk,12
|
|
48
|
+
aot_biomaps-2.9.321.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|