AOT-biomaps 2.9.294__py3-none-any.whl → 2.9.321__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AOT-biomaps might be problematic. Click here for more details.

@@ -157,8 +157,6 @@ def calculate_envelope_squared(field):
157
157
  print(f"Erreur dans calculate_envelope_squared: {e}")
158
158
  raise
159
159
 
160
-
161
-
162
160
  def getPattern(pathFile):
163
161
  """
164
162
  Get the pattern from a file path.
@@ -0,0 +1,60 @@
1
+ import numpy as np
2
+
3
+ def calc_mat_os(xm, fx, dx, bool_active_list, signal_type):
4
+ num_els = len(xm)
5
+
6
+ # Cas limite : Fréquence nulle (Décimation 0)
7
+ if fx == 0:
8
+ if signal_type == 'cos':
9
+ # cos(0) = 1 -> Tout est actif
10
+ mask = np.ones(num_els, dtype=bool)
11
+ else:
12
+ # sin(0) = 0 -> Tout est inactif
13
+ mask = np.zeros(num_els, dtype=bool)
14
+ else:
15
+ # Calcul normal pour fx > 0
16
+ half_period_elements = round(1 / (2 * fx * dx))
17
+
18
+ # Sécurité : si fx est tellement grand que half_period < 1
19
+ half_period_elements = max(1, half_period_elements)
20
+
21
+ indices = np.arange(num_els)
22
+ if signal_type == 'cos':
23
+ mask = ((indices // half_period_elements) % 2 == 0)
24
+ else:
25
+ # Déphasage de 90° pour le sinus : on décale d'une demi-demi-période
26
+ shift = half_period_elements // 2
27
+ mask = (((indices + shift) // half_period_elements) % 2 == 0)
28
+
29
+ return np.tile(mask[:, np.newaxis], (1, bool_active_list.shape[1]))
30
+
31
+ def convert_to_hex_list(matrix):
32
+ """
33
+ Convertit une matrice binaire en liste de strings hexa (paquets de 4 bits).
34
+ Chaque colonne devient une chaîne de caractères.
35
+ """
36
+ n_els, n_scans = matrix.shape
37
+
38
+ # 1. Padding pour s'assurer que n_els est multiple de 4
39
+ remainder = n_els % 4
40
+ if remainder != 0:
41
+ padding = np.zeros((4 - remainder, n_scans))
42
+ matrix = np.vstack([matrix, padding])
43
+
44
+ # 2. Reshape pour isoler des blocs de 4 bits (nibbles)
45
+ # Shape résultante : (Nombre de blocs, 4 bits, Nombre de scans)
46
+ blocks = matrix.reshape(-1, 4, n_scans)
47
+
48
+ # 3. Calcul de la valeur décimale de chaque bloc (0 à 15)
49
+ # On considère le premier élément comme le bit de poids faible (LSB)
50
+ weights = np.array([1, 2, 4, 8]).reshape(1, 4, 1)
51
+ dec_values = np.sum(blocks * weights, axis=1).astype(int)
52
+
53
+ # 4. Conversion en caractères Hexadécimaux
54
+ # On définit la table de conversion pour la rapidité
55
+ hex_table = np.array(list("0123456789abcdef"))
56
+ hex_matrix = hex_table[dec_values]
57
+
58
+ # 5. Assemblage des chaînes (de l'élément N vers 0 pour l'ordre Shift Register standard)
59
+ return ["".join(hex_matrix[::-1, col]) for col in range(n_scans)]
60
+
@@ -2,6 +2,7 @@ from ._mainExperiment import Experiment
2
2
  from AOT_biomaps.AOT_Acoustic.AcousticEnums import WaveType
3
3
  from AOT_biomaps.AOT_Acoustic.StructuredWave import StructuredWave
4
4
  from AOT_biomaps.Config import config
5
+ from AOT_biomaps.AOT_Experiment.ExperimentTools import calc_mat_os, convert_to_hex_list
5
6
  import os
6
7
  import psutil
7
8
  import numpy as np
@@ -273,7 +274,7 @@ class Tomography(Experiment):
273
274
  line = f"({coords}, {angles})\n"
274
275
  file.write(line)
275
276
 
276
- def generateActiveList(self, N):
277
+ def generateActiveList(self, N, decimations = None, angles = None):
277
278
  """
278
279
  Génère une liste de patterns d'activation équilibrés et réguliers.
279
280
  Args:
@@ -281,13 +282,136 @@ class Tomography(Experiment):
281
282
  Returns:
282
283
  list: Liste de strings au format "hex_angle".
283
284
  """
285
+ if decimations is not None and angles is not None:
286
+ self._genereate_patterns_from_decimations(decimations, angles)
284
287
  if N < 1:
285
288
  raise ValueError("N must be a positive integer.")
286
289
  self.patterns = self._generate_patterns(N)
287
290
  if not self._check_patterns(self.patterns):
288
291
  raise ValueError("Generated patterns failed validation.")
292
+
293
+
294
+
295
+ def selectAngles(self, angles):
296
+
297
+ if self.AOsignal_withTumor is None and self.AOsignal_withoutTumor is None:
298
+ raise ValueError("AO signals are not initialized. Please load or generate the AO signals first.")
299
+ if self.AcousticFields is None or len(self.AcousticFields) == 0:
300
+ raise ValueError("AcousticFields is not initialized. Please generate the system matrix first.")
301
+ newAcousticFields = []
302
+ index = []
303
+ for i,field in enumerate(self.AcousticFields):
304
+ if field.angle in angles:
305
+ newAcousticFields.append(field)
306
+ index.append(i)
307
+ if self.AOsignal_withTumor is not None:
308
+ self.AOsignal_withTumor = self.AOsignal_withTumor[:, index]
309
+ if self.AOsignal_withoutTumor is not None:
310
+ self.AOsignal_withoutTumor = self.AOsignal_withoutTumor[:, index]
311
+ self.AcousticFields = newAcousticFields
312
+
313
+ def selectPatterns(self, pattern_names):
314
+ if self.AOsignal_withTumor is None and self.AOsignal_withoutTumor is None:
315
+ raise ValueError("AO signals are not initialized. Please load or generate the AO signals first.")
316
+ if self.AcousticFields is None or len(self.AcousticFields) == 0:
317
+ raise ValueError("AcousticFields is not initialized. Please generate the system matrix first.")
318
+ newAcousticFields = []
319
+ index = []
320
+ for i,field in enumerate(self.AcousticFields):
321
+ if field.pattern.activeList in pattern_names:
322
+ newAcousticFields.append(field)
323
+ index.append(i)
324
+ if self.AOsignal_withTumor is not None:
325
+ self.AOsignal_withTumor = self.AOsignal_withTumor[:, index]
326
+ if self.AOsignal_withoutTumor is not None:
327
+ self.AOsignal_withoutTumor = self.AOsignal_withoutTumor[:, index]
328
+ self.AcousticFields = newAcousticFields
329
+
330
+ def selectRandom(self,N):
331
+ if self.AOsignal_withTumor is None and self.AOsignal_withoutTumor is None:
332
+ raise ValueError("AO signals are not initialized. Please load or generate the AO signals first.")
333
+ if self.AcousticFields is None or len(self.AcousticFields) == 0:
334
+ raise ValueError("AcousticFields is not initialized. Please generate the system matrix first.")
335
+ if N > len(self.AcousticFields):
336
+ raise ValueError("N is larger than the number of available AcousticFields.")
337
+ indices = np.random.choice(len(self.AcousticFields), size=N, replace=False)
338
+ newAcousticFields = [self.AcousticFields[i] for i in indices]
339
+ if self.AOsignal_withTumor is not None:
340
+ self.AOsignal_withTumor = self.AOsignal_withTumor[:, indices]
341
+ if self.AOsignal_withoutTumor is not None:
342
+ self.AOsignal_withoutTumor = self.AOsignal_withoutTumor[:, indices]
343
+ self.AcousticFields = newAcousticFields
344
+
345
+ def _genereate_patterns_from_decimations(self, decimations, angles):
346
+ if isinstance(decimations, list): decimations = np.array(decimations)
347
+ if isinstance(angles, list): angles = np.array(angles)
348
+
349
+ angles = np.sort(angles)
350
+ decimations = np.sort(decimations)
351
+
352
+ num_elements = self.params.acoustic['num_elements']
353
+ dx_mm = self.params.general['dx'] * 1e3
354
+
355
+ # --- Calcul du nombre de Scans ---
356
+ if 0 in decimations:
357
+ Nscans = 4 * angles.shape[0] * (decimations.shape[0] - 1) + angles.shape[0]
358
+ offSet = angles.shape[0]
359
+ else:
360
+ Nscans = 4 * angles.shape[0] * decimations.shape[0]
361
+ offSet = 0
362
+
363
+ ActiveLIST = np.ones((num_elements, Nscans))
364
+ Xm = np.arange(1, num_elements + 1) * dx_mm
365
+ dFx = 1 / (num_elements * dx_mm)
289
366
 
290
- def _generate_patterns(self, N):
367
+ # On traite séparément les décimations non nulles pour la boucle
368
+ active_decimations = decimations[decimations != 0]
369
+
370
+ for i_dec in range(len(active_decimations)):
371
+ idx_base = (np.arange(len(angles))) + (i_dec * 4 * len(angles)) + offSet
372
+
373
+ Icos = idx_base
374
+ Incos = idx_base + len(angles)
375
+ Isin = idx_base + 2 * len(angles)
376
+ Insin = idx_base + 3 * len(angles)
377
+
378
+ fx = dFx * active_decimations[i_dec]
379
+
380
+ # Remplissage des 4 phases
381
+ valid_icos = Icos[Icos < Nscans]
382
+ if valid_icos.size > 0:
383
+ ActiveLIST[:, valid_icos] = calc_mat_os(Xm, fx, dx_mm, ActiveLIST[:, valid_icos], 'cos')
384
+ if (Incos < Nscans).any():
385
+ ActiveLIST[:, Incos[Incos < Nscans]] = 1 - ActiveLIST[:, valid_icos]
386
+
387
+ valid_isin = Isin[Isin < Nscans]
388
+ if valid_isin.size > 0:
389
+ ActiveLIST[:, valid_isin] = calc_mat_os(Xm, fx, dx_mm, ActiveLIST[:, valid_isin], 'sin')
390
+ if (Insin < Nscans).any():
391
+ ActiveLIST[:, Insin[Insin < Nscans]] = 1 - ActiveLIST[:, valid_isin]
392
+
393
+ # --- Conversion au format attendu ---
394
+ # 1. On convertit toute la matrice en liste de strings Hexa
395
+ hexa_list = convert_to_hex_list(ActiveLIST)
396
+
397
+ # 2. Fonction interne de formatage d'angle (pour coller à votre ancien code)
398
+ def format_angle(a):
399
+ return f"{'1' if a < 0 else '0'}{abs(a):02d}"
400
+
401
+ # 3. Construction de la liste de dictionnaires
402
+ patterns = []
403
+ for i in range(Nscans):
404
+ # On retrouve l'angle correspondant à l'index i
405
+ # La logique est cyclique sur la taille de 'angles'
406
+ angle_val = angles[i % len(angles)]
407
+
408
+ hex_pattern = hexa_list[i]
409
+ pair = f"{hex_pattern}_{format_angle(angle_val)}"
410
+ patterns.append({"fileName": pair})
411
+
412
+ return patterns
413
+
414
+ def _generate_patterns(self, N,angles = None):
291
415
  def format_angle(a):
292
416
  return f"{'1' if a < 0 else '0'}{abs(a):02d}"
293
417
 
@@ -298,7 +422,13 @@ class Tomography(Experiment):
298
422
  return hex_string
299
423
 
300
424
  num_elements = self.params.acoustic['num_elements']
301
- angle_choices = list(range(-20, 21))
425
+ if angles is None:
426
+ angle_choices = list(range(-20, 21))
427
+ else:
428
+ # convert np.array to list if necessary
429
+ if isinstance(angles, np.ndarray):
430
+ angles = angles.tolist()
431
+ angle_choices = angles
302
432
 
303
433
  # 1. Trouver TOUS les diviseurs PAIRS de num_elements (y compris num_elements)
304
434
  divs = [d for d in range(2, num_elements + 1) if num_elements % d == 0 and d % 2 == 0]
@@ -390,6 +520,80 @@ class Tomography(Experiment):
390
520
 
391
521
  return True
392
522
 
523
+ def applyApodisation(self, alpha=0.3, divergence_deg=0.5):
524
+ """
525
+ Applique une apodisation dynamique sur les champs acoustiques stockés dans l'objet.
526
+ L'apodisation suit l'angle d'émission et la divergence naturelle du faisceau pour
527
+ supprimer les lobes de diffraction (artefacts de bord) sans toucher au signal utile.
528
+ Args:
529
+ probe_width (float): Largeur physique active de la sonde (ex: 40e-3 pour 40mm).
530
+ alpha (float): Paramètre de Tukey (0.0=rectangle, 1.0=hann). 0.3 est un bon compromis.
531
+ divergence_deg (float): Angle d'ouverture du masque pour suivre l'élargissement du faisceau.
532
+ 0.0 = Droit, 0.5 = Légère ouverture (conseillé).
533
+ """
534
+ print(f"Applying apodization (Alpha={alpha}, Div={divergence_deg}°) on {len(self.AcousticFields)} fields...")
535
+
536
+ probe_width = self.params.acoustic['num_elements'] * self.params.acoustic['element_width']
537
+
538
+ for i in trange(len(self.AcousticFields), desc="Apodisation"):
539
+ # 1. Récupération des données et de l'angle
540
+ field = self.AcousticFields[i].field # Peut être (Z, X) ou (Time, Z, X)
541
+ angle = self.AcousticFields[i].angle # L'angle de l'onde plane
542
+
543
+ # 2. Récupération ou construction des axes physiques
544
+ nz, nx = field.shape[-2:]
545
+
546
+ if hasattr(self, 'x_axis') and self.x_axis is not None:
547
+ x_axis = self.x_axis
548
+ else:
549
+ # Génération par défaut centrée sur 0 (ex: -20mm à +20mm)
550
+ x_axis = np.linspace(-probe_width/2, probe_width/2, nx)
551
+
552
+ if hasattr(self, 'z_axis') and self.z_axis is not None:
553
+ z_axis = self.z_axis
554
+ else:
555
+ # Génération par défaut (ex: 0 à 40mm, basé sur un pitch standard ou arbitraire)
556
+ estimated_depth = 40e-3 # Valeur arbitraire si inconnue
557
+ z_axis = np.linspace(0, estimated_depth, nz)
558
+
559
+ # 3. Préparation des grilles pour le masque
560
+ Z, X = np.meshgrid(z_axis, x_axis, indexing='ij')
561
+
562
+ # 4. Calcul de la géométrie orientée (Steering)
563
+ angle_rad = np.deg2rad(angle)
564
+ X_aligned = X - Z * np.tan(angle_rad)
565
+
566
+ # 5. Calcul de la largeur dynamique du masque (Divergence)
567
+ div_rad = np.deg2rad(divergence_deg)
568
+ current_half_width = (probe_width / 2.0) + Z * np.tan(div_rad)
569
+
570
+ # 6. Normalisation et création du masque Tukey
571
+ X_norm = np.divide(X_aligned, current_half_width, out=np.zeros_like(X_aligned), where=current_half_width!=0)
572
+
573
+ mask = np.zeros_like(X_norm)
574
+ plateau_threshold = 1.0 * (1 - alpha)
575
+
576
+ # Zone centrale (plateau = 1)
577
+ mask[np.abs(X_norm) <= plateau_threshold] = 1.0
578
+
579
+ # Zone de transition (cosinus)
580
+ transition_indices = (np.abs(X_norm) > plateau_threshold) & (np.abs(X_norm) <= 1.0)
581
+ if np.any(transition_indices):
582
+ x_trans = np.abs(X_norm[transition_indices]) - plateau_threshold
583
+ width_trans = 1.0 * alpha
584
+ mask[transition_indices] = 0.5 * (1 + np.cos(np.pi * x_trans / width_trans))
585
+
586
+ # 7. Application du masque (Gestion 2D vs 3D)
587
+ if field.ndim == 3:
588
+ field_apodized = field * mask[np.newaxis, :, :]
589
+ else:
590
+ field_apodized = field * mask
591
+
592
+ # 8. Mise à jour de l'objet
593
+ self.AcousticFields[i].field = field_apodized
594
+
595
+ print("Apodisation done.")
596
+
393
597
  # PRIVATE METHODS
394
598
  def _generateAcousticFields_STRUCT_CPU(self, fieldDataPath=None, show_log=False, nameBlock=None):
395
599
  if self.patterns is None:
@@ -58,11 +58,11 @@ def MLEM(
58
58
  # Dispatch to the appropriate implementation
59
59
  if use_gpu:
60
60
  if smatrixType == SMatrixType.CSR:
61
- return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
61
+ return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
62
62
  elif smatrixType == SMatrixType.SELL:
63
- return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
63
+ return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
64
64
  elif smatrixType == SMatrixType.DENSE:
65
- return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold,show_logs)
65
+ return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold,show_logs)
66
66
  else:
67
67
  raise ValueError("Unsupported SMatrixType for GPU MLEM.")
68
68
  else:
@@ -228,49 +228,60 @@ def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
228
228
  print(f"Error in optimized CPU MLEM: {type(e).__name__}: {e}")
229
229
  return None, None
230
230
 
231
- def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs=True):
231
+ def MLEM_sparseCSR_pycuda(
232
+ SMatrix,
233
+ y,
234
+ numIterations,
235
+ isSavingEachIteration,
236
+ tumor_str,
237
+ max_saves,
238
+ denominator_threshold,
239
+ show_logs=True,
240
+ ):
232
241
  """
233
- SMatrix: instance of SparseMatrixGPU (already allocated)
234
- y: measured data (1D np.float32 of length TN)
235
-
236
- Assumptions:
237
- - SMatrix.values_gpu and SMatrix.col_ind_gpu and SMatrix.row_ptr_gpu are device pointers
238
- - SMatrix.norm_factor_inv_gpu exists
239
- - SMatrix.ctx is the PyCUDA context for the target GPU.
242
+ Robust MLEM implementation for CSR SMatrix using PyCUDA kernels.
243
+ Expects SMatrix to be SparseSMatrix_CSR with attributes:
244
+ - values_gpu, col_ind_gpu, row_ptr_gpu (device pointers)
245
+ - norm_factor_inv_gpu (device pointer)
246
+ - sparse_mod (loaded module with kernels)
247
+ - ctx (PyCUDA context)
248
+ Returns (saved_theta_list, saved_indices) if isSavingEachIteration else (final_theta, None)
240
249
  """
241
-
242
- # We use a final_result placeholder to ensure it's defined outside the try block
243
250
  final_result = None
244
-
251
+
252
+ # Local holders to free in finally
253
+ y_gpu = q_flat_gpu = e_flat_gpu = c_flat_gpu = theta_flat_gpu = None
254
+
245
255
  try:
246
256
  if not isinstance(SMatrix, SparseSMatrix_CSR):
247
257
  raise TypeError("SMatrix must be a SparseSMatrix_CSR object")
248
258
 
249
- # --- CONTEXT FIX: Push the context associated with SMatrix ---
250
- # This ensures all subsequent PyCUDA operations use the correct GPU/context.
251
- if SMatrix.ctx:
259
+ # push context (if provided)
260
+ popped_ctx = False
261
+ if getattr(SMatrix, "ctx", None):
252
262
  SMatrix.ctx.push()
253
- # -----------------------------------------------------------
263
+ popped_ctx = True
254
264
 
255
265
  dtype = np.float32
256
- TN = SMatrix.N * SMatrix.T
257
- ZX = SMatrix.Z * SMatrix.X
258
- # Ensure Z and X are correctly defined for reshaping
259
- Z = SMatrix.Z
260
- X = SMatrix.X
261
-
262
- if show_logs:
263
- # We assume SMatrix was initialized using the correct device index.
264
- print(f"Executing on GPU device index: {SMatrix.device.primary_context.device.name()}")
265
- print(f"Dim X: {X}, Dim Z: {Z}, TN: {TN}, ZX: {ZX}")
266
-
267
- # streams
266
+ TN = int(SMatrix.N * SMatrix.T)
267
+ ZX = int(SMatrix.Z * SMatrix.X)
268
+ Z = int(SMatrix.Z)
269
+ X = int(SMatrix.X)
270
+
271
+ # Make sure required GPU pointers exist
272
+ if getattr(SMatrix, "values_gpu", None) is None or getattr(SMatrix, "col_ind_gpu", None) is None or getattr(SMatrix, "row_ptr_gpu", None) is None:
273
+ raise RuntimeError("SMatrix is missing GPU buffers (values_gpu / col_ind_gpu / row_ptr_gpu)")
274
+
275
+ if getattr(SMatrix, "norm_factor_inv_gpu", None) is None:
276
+ raise RuntimeError("SMatrix.norm_factor_inv_gpu not available on GPU")
277
+
278
+ # stream for async operations
268
279
  stream = drv.Stream()
269
280
 
270
- # allocate device buffers
271
- y = y.T.flatten().astype(np.float32)
272
- y_gpu = drv.mem_alloc(y.nbytes)
273
- drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
281
+ # prepare device buffers
282
+ y_arr = np.ascontiguousarray(y.T.flatten().astype(np.float32))
283
+ y_gpu = drv.mem_alloc(y_arr.nbytes)
284
+ drv.memcpy_htod_async(y_gpu, y_arr, stream)
274
285
 
275
286
  theta_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
276
287
  initial_theta = np.full(ZX, 0.1, dtype=dtype)
@@ -282,61 +293,111 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
282
293
  e_flat_gpu = drv.mem_alloc(TN * np.dtype(dtype).itemsize)
283
294
  c_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
284
295
 
285
- # Assuming the cubin file is found globally or managed by the caller
286
- projection_kernel = SMatrix.sparse_mod.get_function('projection_kernel__CSR')
287
- backprojection_kernel = SMatrix.sparse_mod.get_function('backprojection_kernel__CSR')
288
- ratio_kernel = SMatrix.sparse_mod.get_function('ratio_kernel')
289
- update_kernel = SMatrix.sparse_mod.get_function('update_theta_kernel')
296
+ # Ensure kernels exist
297
+ projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__CSR")
298
+ backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__CSR")
299
+ ratio_kernel = SMatrix.sparse_mod.get_function("ratio_kernel")
300
+ update_kernel = SMatrix.sparse_mod.get_function("update_theta_kernel")
290
301
  block_size = 256
291
302
 
292
- saved_theta, saved_indices = [], []
303
+ # prepare save indices once
293
304
  if numIterations <= max_saves:
294
305
  save_indices = list(range(numIterations))
295
306
  else:
296
- save_indices = list(range(0, numIterations, max(1, numIterations // max_saves)))
307
+ step = max(1, numIterations // max_saves)
308
+ save_indices = list(range(0, numIterations, step))
297
309
  if save_indices[-1] != numIterations - 1:
298
310
  save_indices.append(numIterations - 1)
299
311
 
312
+ saved_theta = []
313
+ saved_indices = []
314
+
300
315
  description = f"AOT-BioMaps -- ML-EM (CSR-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
301
316
  iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
317
+
318
+ # grid sizes
319
+ grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
320
+ grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
321
+
302
322
  for it in iterator:
303
323
  # projection: q = A * theta
304
- projection_kernel(q_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
305
- theta_flat_gpu, np.int32(TN),
306
- block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1),
307
- stream=stream)
324
+ projection_kernel(
325
+ q_flat_gpu,
326
+ SMatrix.values_gpu,
327
+ SMatrix.row_ptr_gpu,
328
+ SMatrix.col_ind_gpu,
329
+ theta_flat_gpu,
330
+ np.int32(TN),
331
+ block=(block_size, 1, 1),
332
+ grid=grid_rows,
333
+ stream=stream,
334
+ )
308
335
 
309
336
  # ratio: e = y / max(q, threshold)
310
- ratio_kernel(e_flat_gpu, y_gpu, q_flat_gpu, np.float32(denominator_threshold), np.int32(TN),
311
- block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
312
-
313
- # backprojection: c = A^T * e
337
+ ratio_kernel(
338
+ e_flat_gpu,
339
+ y_gpu,
340
+ q_flat_gpu,
341
+ np.float32(denominator_threshold),
342
+ np.int32(TN),
343
+ block=(block_size, 1, 1),
344
+ grid=grid_rows,
345
+ stream=stream,
346
+ )
347
+
348
+ # backprojection: c = A^T * e (zero c first)
314
349
  drv.memset_d32_async(c_flat_gpu, 0, ZX, stream)
315
- backprojection_kernel(c_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
316
- e_flat_gpu, np.int32(TN),
317
- block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
350
+ backprojection_kernel(
351
+ c_flat_gpu,
352
+ SMatrix.values_gpu,
353
+ SMatrix.row_ptr_gpu,
354
+ SMatrix.col_ind_gpu,
355
+ e_flat_gpu,
356
+ np.int32(TN),
357
+ block=(block_size, 1, 1),
358
+ grid=grid_rows,
359
+ stream=stream,
360
+ )
318
361
 
319
362
  # update: theta *= norm_factor_inv * c
320
- update_kernel(theta_flat_gpu, c_flat_gpu, norm_factor_inv_gpu, np.int32(ZX),
321
- block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
322
-
363
+ update_kernel(
364
+ theta_flat_gpu,
365
+ c_flat_gpu,
366
+ norm_factor_inv_gpu,
367
+ np.int32(ZX),
368
+ block=(block_size, 1, 1),
369
+ grid=grid_cols,
370
+ stream=stream,
371
+ )
372
+
373
+ # periodic synchronization for stability / logging
323
374
  if show_logs and (it % 10 == 0 or it == numIterations - 1):
324
- drv.Context.synchronize()
375
+ stream.synchronize()
325
376
 
377
+ # save snapshot if required
326
378
  if isSavingEachIteration and it in save_indices:
379
+ # ensure kernels finished
380
+ stream.synchronize()
327
381
  theta_host = np.empty(ZX, dtype=dtype)
328
382
  drv.memcpy_dtoh(theta_host, theta_flat_gpu)
329
383
  saved_theta.append(theta_host.reshape(Z, X))
330
- saved_indices.append(it)
331
-
332
- drv.Context.synchronize()
333
-
334
- final_result = np.empty(ZX, dtype=dtype)
335
- drv.memcpy_dtoh(final_result, theta_flat_gpu)
336
- final_result = final_result.reshape(Z, X)
337
-
338
- # free local allocations
339
- y_gpu.free(); q_flat_gpu.free(); e_flat_gpu.free(); c_flat_gpu.free(); theta_flat_gpu.free()
384
+ saved_indices.append(int(it))
385
+
386
+ # make sure everything finished
387
+ stream.synchronize()
388
+ final_theta_host = np.empty(ZX, dtype=dtype)
389
+ drv.memcpy_dtoh(final_theta_host, theta_flat_gpu)
390
+ final_result = final_theta_host.reshape(Z, X)
391
+
392
+ # free local allocations (will also be freed in finally if exception)
393
+ try:
394
+ y_gpu.free()
395
+ q_flat_gpu.free()
396
+ e_flat_gpu.free()
397
+ c_flat_gpu.free()
398
+ theta_flat_gpu.free()
399
+ except Exception:
400
+ pass
340
401
 
341
402
  return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
342
403
 
@@ -344,32 +405,50 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
344
405
  print(f"Error in MLEM_sparseCSR_pycuda: {type(e).__name__}: {e}")
345
406
  gc.collect()
346
407
  return None, None
347
-
348
- finally:
349
- # --- CONTEXT FIX: Pop the context ---
350
- if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
351
- SMatrix.ctx.pop()
352
- # ------------------------------------
353
408
 
354
- def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs=True):
409
+ finally:
410
+ # free buffers if still allocated
411
+ for buf in ("y_gpu", "q_flat_gpu", "e_flat_gpu", "c_flat_gpu", "theta_flat_gpu"):
412
+ try:
413
+ val = locals().get(buf, None)
414
+ if val is not None:
415
+ val.free()
416
+ except Exception:
417
+ pass
418
+ # pop context safely
419
+ try:
420
+ if SMatrix and hasattr(SMatrix, "ctx") and SMatrix.ctx and popped_ctx:
421
+ SMatrix.ctx.pop()
422
+ except Exception:
423
+ pass
424
+
425
+ def MLEM_sparseSELL_pycuda(
426
+ SMatrix,
427
+ y,
428
+ numIterations,
429
+ isSavingEachIteration,
430
+ tumor_str,
431
+ max_saves,
432
+ denominator_threshold,
433
+ show_logs=True,
434
+ ):
355
435
  """
356
436
  MLEM using SELL-C-σ kernels already present on device.
357
437
  y must be float32 length TN.
438
+
439
+ Version propre : diagnostics retirés.
358
440
  """
359
441
  final_result = None
360
442
 
361
443
  try:
362
- # check if SMatrix is SparseSMatrix_SELL object
363
444
  if not isinstance(SMatrix, SparseSMatrix_SELL):
364
445
  raise TypeError("SMatrix must be a SparseSMatrix_SELL object")
365
446
  if SMatrix.sell_values_gpu is None:
366
447
  raise RuntimeError("SELL not built. Call allocate_sell_c_sigma_direct() first.")
367
-
368
- # --- CONTEXT FIX: Push the context associated with SMatrix ---
369
- # This ensures all subsequent PyCUDA operations use the correct GPU/context.
448
+
449
+ # Context
370
450
  if SMatrix.ctx:
371
451
  SMatrix.ctx.push()
372
- # -----------------------------------------------------------
373
452
 
374
453
  TN = int(SMatrix.N * SMatrix.T)
375
454
  ZX = int(SMatrix.Z * SMatrix.X)
@@ -383,7 +462,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
383
462
 
384
463
  stream = drv.Stream()
385
464
 
386
- # device buffers
465
+ # Device buffers
387
466
  y = y.T.flatten().astype(np.float32)
388
467
  y_gpu = drv.mem_alloc(y.nbytes)
389
468
  drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
@@ -402,6 +481,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
402
481
  grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
403
482
  grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
404
483
 
484
+ # Prepare save indices
405
485
  saved_theta, saved_indices = [], []
406
486
  if numIterations <= max_saves:
407
487
  save_indices = list(range(numIterations))
@@ -412,52 +492,60 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
412
492
 
413
493
  description = f"AOT-BioMaps -- ML-EM (SELL-c-σ-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
414
494
  iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
495
+
496
+ # --- MLEM Loop ---
415
497
  for it in iterator:
416
- # projection
417
- proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
418
- theta_gpu, np.int32(TN), slice_height,
419
- block=(block_size,1,1), grid=grid_rows, stream=stream)
420
498
 
421
- # ratio
499
+ proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
500
+ slice_ptr_gpu, slice_len_gpu,
501
+ theta_gpu, np.int32(TN), slice_height,
502
+ block=(block_size,1,1), grid=grid_rows, stream=stream)
503
+
422
504
  ratio(e_gpu, y_gpu, q_gpu, np.float32(denominator_threshold), np.int32(TN),
423
- block=(block_size,1,1), grid=grid_rows, stream=stream)
505
+ block=(block_size,1,1), grid=grid_rows, stream=stream)
424
506
 
425
- # zero c
426
507
  drv.memset_d32_async(c_gpu, 0, ZX, stream)
427
508
 
428
- # backprojection accumulate
429
- backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
430
- e_gpu, c_gpu, np.int32(TN), slice_height,
431
- block=(block_size,1,1), grid=grid_rows, stream=stream)
509
+ backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
510
+ slice_ptr_gpu, slice_len_gpu,
511
+ e_gpu, c_gpu, np.int32(TN), slice_height,
512
+ block=(block_size,1,1), grid=grid_rows, stream=stream)
432
513
 
433
- # update
434
514
  update(theta_gpu, c_gpu, SMatrix.norm_factor_inv_gpu, np.int32(ZX),
435
- block=(block_size,1,1), grid=grid_cols, stream=stream)
515
+ block=(block_size,1,1), grid=grid_cols, stream=stream)
436
516
 
437
- stream.synchronize()
438
517
  if isSavingEachIteration and it in save_indices:
439
518
  out = np.empty(ZX, dtype=np.float32)
440
519
  drv.memcpy_dtoh(out, theta_gpu)
441
520
  saved_theta.append(out.reshape((SMatrix.Z, SMatrix.X)))
442
521
  saved_indices.append(it)
443
522
 
444
- # final copy
523
+ stream.synchronize()
445
524
  res = np.empty(ZX, dtype=np.float32)
446
525
  drv.memcpy_dtoh(res, theta_gpu)
447
526
 
448
- # free temporaries
449
- y_gpu.free(); q_gpu.free(); e_gpu.free(); c_gpu.free(); theta_gpu.free()
450
-
527
+ # free
528
+ try:
529
+ y_gpu.free()
530
+ q_gpu.free()
531
+ e_gpu.free()
532
+ c_gpu.free()
533
+ theta_gpu.free()
534
+ except Exception:
535
+ pass
536
+
451
537
  final_result = res.reshape((SMatrix.Z, SMatrix.X))
452
538
  return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
453
-
539
+
454
540
  except Exception as e:
455
541
  print(f"Error in MLEM_sparseSELL_pycuda: {type(e).__name__}: {e}")
456
542
  gc.collect()
457
543
  return None, None
458
-
544
+
459
545
  finally:
460
- # --- CONTEXT FIX: Pop the context ---
461
546
  if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
462
- SMatrix.ctx.pop()
463
- # ------------------------------------
547
+ try:
548
+ SMatrix.ctx.pop()
549
+ except Exception:
550
+ pass
551
+
@@ -92,13 +92,11 @@ class SparseSMatrix_SELL:
92
92
  def allocate(self):
93
93
  """
94
94
  Build SELL-C-σ directly from manip AcousticFields in streaming blocks.
95
- NOTE: This is the logic of allocate_sell_c_sigma_direct from the working class.
95
+ Corrected: per-block row_nnz copy, zeroing of host block, proper sync.
96
96
  """
97
97
  if self.sparse_mod is None:
98
98
  raise RuntimeError("CUDA module not loaded. Check compilation.")
99
99
 
100
- # NOTE: Les noms de kernel (count_nnz_rows_kernel, fill_kernel__SELL) sont utilisés
101
- # car ils sont présents dans la classe fonctionnelle.
102
100
  count_kernel = self.sparse_mod.get_function("count_nnz_rows_kernel")
103
101
  fill_kernel = self.sparse_mod.get_function("fill_kernel__SELL")
104
102
 
@@ -106,34 +104,34 @@ class SparseSMatrix_SELL:
106
104
  num_cols = int(self.Z * self.X)
107
105
  C = int(self.slice_height)
108
106
 
109
- # host temporary block
110
107
  br = int(self.block_rows)
111
- bytes_per_elem = np.dtype(np.float32).itemsize
112
108
  dense_host = np.empty((br, num_cols), dtype=np.float32)
113
109
 
114
- # Allocation 1: Dense block GPU memory
110
+ # Allocation dense buffer on device (size = br * num_cols)
115
111
  dense_gpu_size = dense_host.nbytes
116
112
  dense_gpu = drv.mem_alloc(dense_gpu_size)
117
113
 
118
- # 1) count nnz per row (on host via small blocks with GPU kernel)
114
+ # 1) count nnz per row (per block)
119
115
  row_nnz = np.zeros(num_rows, dtype=np.int32)
120
116
  row_nnz_gpu_block_size = br * np.dtype(np.int32).itemsize
121
117
  row_nnz_gpu_block = drv.mem_alloc(row_nnz_gpu_block_size)
122
118
 
123
- block = 256
119
+ block = 128
124
120
  for b in trange(0, num_rows, br, desc="Count NNZ per row"):
125
121
  R = min(br, num_rows - b)
126
- # fill dense_host
122
+ # zero the host block to avoid garbage in tail when R < br
123
+ dense_host.fill(0.0)
127
124
  for i in range(R):
128
125
  rg = b + i
129
126
  n_idx = rg // self.T
130
127
  t_idx = rg % self.T
131
128
  dense_host[i, :] = self.manip.AcousticFields[n_idx].field[t_idx].flatten()
132
- # copy only R rows
129
+ # copy whole buffer (safe because we zeroed tail)
133
130
  drv.memcpy_htod(dense_gpu, dense_host)
134
131
  grid = ((R + block - 1) // block, 1, 1)
135
132
  count_kernel(dense_gpu, row_nnz_gpu_block, np.int32(R), np.int32(num_cols), np.float32(self.relative_threshold),
136
- block=(block,1,1), grid=grid)
133
+ block=(block,1,1), grid=grid)
134
+ drv.Context.synchronize()
137
135
  tmp = np.empty(R, dtype=np.int32)
138
136
  drv.memcpy_dtoh(tmp, row_nnz_gpu_block)
139
137
  row_nnz[b:b+R] = tmp
@@ -148,7 +146,6 @@ class SparseSMatrix_SELL:
148
146
  r0 = s * C
149
147
  r1 = min(num_rows, r0 + C)
150
148
  slice_len[s] = int(np.max(row_nnz[r0:r1])) if (r1>r0) else 0
151
- # slice_ptr (int64)
152
149
  slice_ptr = np.zeros(num_slices + 1, dtype=np.int64)
153
150
  for s in range(num_slices):
154
151
  slice_ptr[s+1] = slice_ptr[s] + (slice_len[s] * C)
@@ -160,9 +157,14 @@ class SparseSMatrix_SELL:
160
157
  self.sell_values_gpu_size = total_storage * np.dtype(np.float32).itemsize
161
158
  self.sell_colinds_gpu_size = total_storage * np.dtype(np.uint32).itemsize
162
159
 
160
+ # allocate and optionally zero them
163
161
  self.sell_values_gpu = drv.mem_alloc(self.sell_values_gpu_size)
162
+ # It's good practice to zero the values buffer to avoid leftover memory
163
+ drv.memset_d32(self.sell_values_gpu, 0, total_storage)
164
+
164
165
  self.sell_colinds_gpu = drv.mem_alloc(self.sell_colinds_gpu_size)
165
-
166
+ drv.memset_d32(self.sell_colinds_gpu, 0, total_storage)
167
+
166
168
  # allocate slice metadata on device
167
169
  self.slice_ptr = slice_ptr
168
170
  self.slice_len = slice_len
@@ -177,29 +179,28 @@ class SparseSMatrix_SELL:
177
179
  drv.memcpy_htod(self.slice_len_gpu, self.slice_len)
178
180
 
179
181
  # 3) fill SELL arrays by streaming blocks again (use GPU fill kernel)
180
- # reuse dense_host and allocate new dense_gpu
181
182
  dense_host = np.empty((br, num_cols), dtype=np.float32)
183
+ dense_gpu = drv.mem_alloc(dense_host.nbytes)
182
184
 
183
- dense_gpu_2_size = dense_host.nbytes
184
- dense_gpu = drv.mem_alloc(dense_gpu_2_size)
185
-
186
- # we also need row_nnz on device per-block; supply global row_nnz on host but the kernel recomputes threshold
187
- row_nnz_host_gpu_size = br * np.dtype(np.int32).itemsize
188
- row_nnz_host_gpu = drv.mem_alloc(row_nnz_host_gpu_size)
185
+ # For per-block row_nnz pointer we allocate a buffer of max block size once, then reuse
186
+ row_nnz_host_gpu = drv.mem_alloc(br * np.dtype(np.int32).itemsize)
189
187
 
190
188
  for b in trange(0, num_rows, br, desc="Fill SELL"):
191
189
  R = min(br, num_rows - b)
190
+ dense_host.fill(0.0)
192
191
  for i in range(R):
193
192
  rg = b + i
194
193
  n_idx = rg // self.T
195
194
  t_idx = rg % self.T
196
195
  dense_host[i, :] = self.manip.AcousticFields[n_idx].field[t_idx].flatten()
196
+ # copy host block
197
197
  drv.memcpy_htod(dense_gpu, dense_host)
198
- # We pass a dummy row_nnz pointer (not used in this kernel; left for API)
199
- # Kernel expects rows_in_block, rows_global_offset to know where to write.
198
+ # copy corresponding row_nnz slice (only R entries)
199
+ drv.memcpy_htod(row_nnz_host_gpu, row_nnz[b:b+R])
200
+
200
201
  grid = ((R + block - 1) // block, 1, 1)
201
202
  fill_kernel(dense_gpu,
202
- np.intp(0), # placeholder for row_nnz pointer (not used)
203
+ row_nnz_host_gpu,
203
204
  self.slice_ptr_gpu,
204
205
  self.slice_len_gpu,
205
206
  self.sell_colinds_gpu,
@@ -210,12 +211,14 @@ class SparseSMatrix_SELL:
210
211
  np.int32(C),
211
212
  np.float32(self.relative_threshold),
212
213
  block=(block,1,1), grid=grid)
214
+ drv.Context.synchronize()
215
+
213
216
  dense_gpu.free()
214
217
  row_nnz_host_gpu.free()
215
218
 
216
219
  # 4) compute norm_factor_inv via GPU accumulate (col sums)
217
220
  self.compute_norm_factor()
218
-
221
+
219
222
  def apply_apodization_gpu(self, window_vector_gpu):
220
223
  """
221
224
  Applique le fenêtrage directement sur self.sell_values_gpu
@@ -234,7 +237,7 @@ class SparseSMatrix_SELL:
234
237
  )
235
238
 
236
239
  # Le total_storage inclut les éléments non-nuls et le padding SELL.
237
- threads = 256
240
+ threads = 128
238
241
  blocks = (self.total_storage + threads - 1) // threads
239
242
 
240
243
  # Lancement du kernel. Il travaille sur total_storage éléments.
@@ -248,43 +251,72 @@ class SparseSMatrix_SELL:
248
251
  )
249
252
  drv.Context.synchronize()
250
253
  print("✅ Multiplication par le fenêtrage effectuée in-place sur GPU (SELL-C-σ).")
251
- # --- Ajout de la fonction de normalisation (qui fonctionne) ---
254
+
252
255
  def compute_norm_factor(self):
253
256
  """
254
- Accumulate column sums on GPU using accumulate_columns_atomic, then compute inverse.
257
+ Compute the TRUE MLEM normalization norm_factor_inv = 1 / (A^T * 1)
258
+ by performing a SELL backprojection of a vector of ones.
259
+ This is the ONLY correct normalization for MLEM.
255
260
  """
256
- if self.total_storage == 0:
257
- raise RuntimeError("sell not built")
258
261
  ZX = int(self.Z * self.X)
262
+ TN = int(self.T * self.N)
259
263
 
260
- # allocate col sum on device
261
- col_sum_gpu_size = ZX * np.dtype(np.float32).itemsize
262
- col_sum_gpu = drv.mem_alloc(col_sum_gpu_size)
263
- drv.memset_d32(col_sum_gpu, 0, ZX)
264
+ # Allocate device vector of ones (projections)
265
+ ones_gpu = drv.mem_alloc(TN * np.dtype(np.float32).itemsize)
266
+ drv.memset_d32(ones_gpu, 0x3f800000, TN) # 1.0f bit pattern
264
267
 
265
- # FIX: Kernel name is "accumulate_columns_atomic"
266
- acc_kernel = self.sparse_mod.get_function("accumulate_columns_atomic")
267
-
268
- threads = 256
269
- blocks = (self.total_storage + threads - 1) // threads
270
- acc_kernel(self.sell_values_gpu, self.sell_colinds_gpu, np.int64(self.total_storage), col_sum_gpu,
271
- block=(threads,1,1), grid=(blocks,1,1))
268
+ # Allocate output for backprojection (ZX pixels)
269
+ c_gpu = drv.mem_alloc(ZX * np.dtype(np.float32).itemsize)
270
+ drv.memset_d32(c_gpu, 0, ZX)
271
+
272
+ # Get SELL backprojection kernel
273
+ try:
274
+ bp_kernel = self.sparse_mod.get_function("backprojection_kernel__SELL")
275
+ except Exception as e:
276
+ raise RuntimeError("Missing kernel backprojection_kernel__SELL in the cubin") from e
277
+
278
+ threads = 256
279
+ blocks = (TN + threads - 1) // threads
280
+
281
+ # Launch GPU backprojection
282
+ bp_kernel(
283
+ self.sell_values_gpu,
284
+ self.sell_colinds_gpu,
285
+ self.slice_ptr_gpu,
286
+ self.slice_len_gpu,
287
+ ones_gpu,
288
+ c_gpu,
289
+ np.int32(TN),
290
+ # np.int32(ZX),
291
+ np.int32(self.slice_height),
292
+ # np.int64(self.total_storage),
293
+ block=(threads, 1, 1), # Utilise le nouveau nombre de threads
294
+ grid=(blocks, 1, 1)
295
+ )
272
296
  drv.Context.synchronize()
273
297
 
274
- # copy back
275
- norm_host = np.empty(ZX, dtype=np.float32)
276
- drv.memcpy_dtoh(norm_host, col_sum_gpu)
277
- col_sum_gpu.free()
298
+ # Copy back to host
299
+ c_host = np.empty(ZX, dtype=np.float32)
300
+ drv.memcpy_dtoh(c_host, c_gpu)
301
+ ones_gpu.free()
302
+ c_gpu.free()
303
+
304
+ # Avoid divide-by-zero
305
+ c_host = np.maximum(c_host, 1e-6)
306
+
307
+ # Compute inverse (stored for use in MLEM)
308
+ self.norm_factor_inv = (1.0 / c_host).astype(np.float32)
278
309
 
279
- norm = np.maximum(norm_host.astype(np.float64), 1e-6)
280
- self.norm_factor_inv = (1.0 / norm).astype(np.float32)
310
+ # Upload to GPU
281
311
  if self.norm_factor_inv_gpu is not None:
282
312
  self.norm_factor_inv_gpu.free()
283
-
313
+
284
314
  self.norm_factor_inv_gpu_size = self.norm_factor_inv.nbytes
285
315
  self.norm_factor_inv_gpu = drv.mem_alloc(self.norm_factor_inv_gpu_size)
286
316
  drv.memcpy_htod(self.norm_factor_inv_gpu, self.norm_factor_inv)
287
317
 
318
+ print("✓ Normalization (A^T*1) computed for MLEM.")
319
+
288
320
  def compute_density(self):
289
321
  """
290
322
  Returns only the density of the SELL-C-σ matrix.
@@ -221,7 +221,6 @@ def calculate_memory_requirement(SMatrix, y):
221
221
  # --- 3. Final Result ---
222
222
  return total_bytes / (1024 ** 3)
223
223
 
224
-
225
224
  def check_gpu_memory(device_index, required_memory, show_logs=True):
226
225
  """Check if enough memory is available on the specified GPU."""
227
226
  free_memory, _ = torch.cuda.mem_get_info(f"cuda:{device_index}")
@@ -252,7 +251,6 @@ def _backward_projection(SMatrix, e_p, c_p):
252
251
  total += SMatrix[_t, _z, _x, _n] * e_p[_t, _n]
253
252
  c_p[_z, _x] = total
254
253
 
255
-
256
254
  def _build_adjacency_sparse(Z, X, device, corner=(0.5 - np.sqrt(2) / 4) / np.sqrt(2), face=0.5 - np.sqrt(2) / 4,dtype=torch.float32):
257
255
  rows, cols, weights = [], [], []
258
256
  for z in range(Z):
@@ -273,7 +271,6 @@ def _build_adjacency_sparse(Z, X, device, corner=(0.5 - np.sqrt(2) / 4) / np.sqr
273
271
  index, values = coalesce(index, values, m=Z*X, n=Z*X)
274
272
  return index, values
275
273
 
276
-
277
274
  def power_method(P, PT, data, Z, X, n_it=10):
278
275
  x = torch.randn(Z * X, device=data.device)
279
276
  x = x / torch.norm(x)
AOT_biomaps/__init__.py CHANGED
@@ -85,7 +85,7 @@ from .AOT_Recon.AOT_PotentialFunctions.RelativeDifferences import *
85
85
  from .Config import config
86
86
  from .Settings import *
87
87
 
88
- __version__ = '2.9.294'
88
+ __version__ = '2.9.321'
89
89
  __process__ = config.get_process()
90
90
 
91
91
  def initialize(process=None):
@@ -138,6 +138,33 @@ def initialize(process=None):
138
138
 
139
139
 
140
140
 
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+
166
+
167
+
141
168
 
142
169
 
143
170
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: AOT_biomaps
3
- Version: 2.9.294
3
+ Version: 2.9.321
4
4
  Summary: Acousto-Optic Tomography
5
5
  Home-page: https://github.com/LucasDuclos/AcoustoOpticTomography
6
6
  Author: Lucas Duclos
@@ -1,16 +1,17 @@
1
1
  AOT_biomaps/Config.py,sha256=ghEOP1n8aO1pR-su13wMeAZAxZRfry5hH67NbtZ8SqI,3614
2
2
  AOT_biomaps/Settings.py,sha256=v8fPhnvvcfBJP29m1RLOTEr3jndGLGwbUiORXmsj2Bo,2853
3
- AOT_biomaps/__init__.py,sha256=86X2xRJNI72niePVEmyZMqO0Y4vQeag7ZSHPM3kc4Pk,4250
3
+ AOT_biomaps/__init__.py,sha256=6TSP5wI6TYb3icQQXws5eVgqxv0-F2F9HDYXj9uot6A,4304
4
4
  AOT_biomaps/AOT_Acoustic/AcousticEnums.py,sha256=s5kXa6jKzbS4btwbubrVcynLOr0yg5tth5vL_FGfbMk,1802
5
- AOT_biomaps/AOT_Acoustic/AcousticTools.py,sha256=al7xXKMY5e-qQQ7nrQVPVAmqYiB320OluNlY6ti8iKc,7539
5
+ AOT_biomaps/AOT_Acoustic/AcousticTools.py,sha256=h2sCtGVcDtyLtEF1q7sLZmuWivWmesVGUBPnW-ndQqc,7535
6
6
  AOT_biomaps/AOT_Acoustic/FocusedWave.py,sha256=3kGKKDx_3Msy5COYqIwzROPORGWvNjw8UsDanBfkMXE,11037
7
7
  AOT_biomaps/AOT_Acoustic/IrregularWave.py,sha256=yZhtxkR6zlciRcEpdTR0BAhvgQl40XHKFaF8f4VXarE,3035
8
8
  AOT_biomaps/AOT_Acoustic/PlaneWave.py,sha256=xza-rj5AUWDecLkGDxRcULrwZVWeBvGnEP2d51TyR04,1447
9
9
  AOT_biomaps/AOT_Acoustic/StructuredWave.py,sha256=jTLVlOhYLWJb5MxZPxhq3OFVlz2McoyMPBmfLvnekDU,18209
10
10
  AOT_biomaps/AOT_Acoustic/__init__.py,sha256=t9M2rRqa_L9pk7W2FeELTkHEMuP4DBr4gBRldMqsQbg,491
11
11
  AOT_biomaps/AOT_Acoustic/_mainAcoustic.py,sha256=RdmhRF1i0KAlpsP7_wnZ7F4J27br3eUc4XR91Qq7C64,44158
12
+ AOT_biomaps/AOT_Experiment/ExperimentTools.py,sha256=BkHSX_foyyj5UrHZWQH5F9DGeV8o2fkp3euEbcvE4vA,2399
12
13
  AOT_biomaps/AOT_Experiment/Focus.py,sha256=B2nBawmv-NG2AWJx9zgQ8GlN6aFB9FwTSqX-M-phKXg,3193
13
- AOT_biomaps/AOT_Experiment/Tomography.py,sha256=Ri83b4GMrxJO60qWsK9JInS9a7HU2Q8uqpjD3Xkl9OY,24488
14
+ AOT_biomaps/AOT_Experiment/Tomography.py,sha256=qH7QlAPp5Er1AhMbWDpbOT6VRJHgrYUdUxazuEMAc_A,34569
14
15
  AOT_biomaps/AOT_Experiment/__init__.py,sha256=H9zMLeBLA6uhbaHohAa-2u5mDDxqJi8oE5c6tShdQp8,308
15
16
  AOT_biomaps/AOT_Experiment/_mainExperiment.py,sha256=zSfuNrsz7nhiKrGIdK6CAXjlI2T6qYC5-JXHFgPNzhc,24674
16
17
  AOT_biomaps/AOT_Optic/Absorber.py,sha256=jEodzRy7gkEH-wbazVasRQiri0dU16BfapmR-qnTSvM,867
@@ -18,20 +19,20 @@ AOT_biomaps/AOT_Optic/Laser.py,sha256=uzQwxswjU0kZWix3CmZLoWmhsBa3VhN27STprNv-xB
18
19
  AOT_biomaps/AOT_Optic/OpticEnums.py,sha256=b349_JyjHqQohmjK4Wke-A_HLGaqb3_BKbyUqFC4jxY,499
19
20
  AOT_biomaps/AOT_Optic/__init__.py,sha256=HSUVhfz0NzwHHZZ9KP9Xyfu33IgP_rYJX86J-gEROlo,321
20
21
  AOT_biomaps/AOT_Optic/_mainOptic.py,sha256=Wk63CcgWbU-ygMfjNK80islaUbGGJpTXgZY3_C2KQNY,8179
21
- AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin,sha256=Li4HdWpr65ONf82FqFAjZ8w3ob9UuJucDWGAAvBF77Q,83680
22
+ AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin,sha256=JWy-bdtBTZdnNlDbJGZKwXyF-2u1wICtmlOC_YxEL6o,82528
22
23
  AOT_biomaps/AOT_Recon/AlgebraicRecon.py,sha256=CGBXZyYEZ3TOTFOKSt-h7NGuFbuI9PNr3YTWTbSLxDo,46832
23
24
  AOT_biomaps/AOT_Recon/AnalyticRecon.py,sha256=RaQ5AJ1HUmSct0BgjZ0GWSJg7SALCn3Q0laqj1yyhAE,7123
24
25
  AOT_biomaps/AOT_Recon/BayesianRecon.py,sha256=RnnPa-tTcvirwiNPnCRZnSM4NWeEEltYET-piBbp34g,12671
25
26
  AOT_biomaps/AOT_Recon/DeepLearningRecon.py,sha256=RfVcEsi4GeGqJn0_SPxwQPQx6IQjin79WKh2UarMRLI,1383
26
27
  AOT_biomaps/AOT_Recon/PrimalDualRecon.py,sha256=JbFhxiyUoSTnlJgHbOWIfUUwhwfZoi39RJMnfkagegY,16504
27
28
  AOT_biomaps/AOT_Recon/ReconEnums.py,sha256=KAf55RqHAr2ilt6pxFrUBGQOn-7HA8NP6TyL-1FNiXo,19714
28
- AOT_biomaps/AOT_Recon/ReconTools.py,sha256=A4IQV7IETu9MgYr7hjLNPTImzjf8CEU4cZ2e0EgJNWA,19878
29
+ AOT_biomaps/AOT_Recon/ReconTools.py,sha256=py1zKVEa0j7EfmcNZS2lpVQwzlkY6rRWsDQ8izWlme4,19872
29
30
  AOT_biomaps/AOT_Recon/__init__.py,sha256=xs_argJqXKFl76xP7-jiUc1ynOEEtY7XZ0gDxD5uVZc,246
30
31
  AOT_biomaps/AOT_Recon/_mainRecon.py,sha256=exoa2UBMfMHjemxAU9dW0mhEfsP6Oe1qjSfrTrgbIcY,13125
31
32
  AOT_biomaps/AOT_Recon/AOT_Optimizers/DEPIERRO.py,sha256=qA1n722GLQJH3V8HcLr5q_GxEwBS_NRlIT3E6JZk-Ag,9479
32
33
  AOT_biomaps/AOT_Recon/AOT_Optimizers/LS.py,sha256=bCu1rKzFXPbYQ7jV3L3E_jVQpb6LIEC5MIlN1-mCNdY,22814
33
34
  AOT_biomaps/AOT_Recon/AOT_Optimizers/MAPEM.py,sha256=vQLCB0L4FSXJKn2_6kdIdWrI6WZ82KuqUh7CSqBGVuo,25766
34
- AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py,sha256=v5wITKacemu_hY391-cZDSpw4R95XqyLGivQWa-gOOc,21254
35
+ AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py,sha256=4omsqzHEZJfv0mEfmxfK71IovDbRstVE4x3Flf4cR3o,22441
35
36
  AOT_biomaps/AOT_Recon/AOT_Optimizers/PDHG.py,sha256=oSojwug5mcZedKOWAV7YPMlCp0Qy_Aed0fjHRuyZWpo,28622
36
37
  AOT_biomaps/AOT_Recon/AOT_Optimizers/__init__.py,sha256=tNGVulINaqQZzcs5cvCMAT5ypGdoFWRnxtl9y7ePECk,106
37
38
  AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/Huber.py,sha256=dRd1t5OBag_gVmfji3L0QrA1GJ_702LcCkLH32Bot0M,3285
@@ -39,9 +40,9 @@ AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/Quadratic.py,sha256=wTbzcXxMdEl9ReE
39
40
  AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/RelativeDifferences.py,sha256=ZlWaKsNPCMfy4fWxYFT2pSoKMbysQkJH4N1WbbWncq4,2493
40
41
  AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/__init__.py,sha256=RwrJdLOFbAFBFnRxo5xdlOyeZgtQRDaRWDN9-uCGUiY,84
41
42
  AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_CSR.py,sha256=RACc2P5oxmp0uPLAGnNj9mEtAxa_OlepNgCawKij3jI,12062
42
- AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py,sha256=WTqHBeglUxRx-jy6CcoETgqYSSHYTwi2zR5NrJcPXGU,14449
43
+ AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py,sha256=ti3dZQsb_Uu62C7Bn65Z-yf-R5NKCFsmnBT5GlLd_HY,15138
43
44
  AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/__init__.py,sha256=8nou-hqjQjuCTLhoL5qv4EM_lMPFviAZAZKSPhi84jE,67
44
- aot_biomaps-2.9.294.dist-info/METADATA,sha256=8c8tnkhnARdI6acSPSrB1FQloxgjuoNHKslkkgIq7xM,700
45
- aot_biomaps-2.9.294.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
46
- aot_biomaps-2.9.294.dist-info/top_level.txt,sha256=6STF-lT4kaAnBHJYCripmN5mZABoHjMuY689JdiDphk,12
47
- aot_biomaps-2.9.294.dist-info/RECORD,,
45
+ aot_biomaps-2.9.321.dist-info/METADATA,sha256=DMnk4rlTFW95pX0welgBZD3GdTkjeFbn3vwXNyUxSHs,700
46
+ aot_biomaps-2.9.321.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ aot_biomaps-2.9.321.dist-info/top_level.txt,sha256=6STF-lT4kaAnBHJYCripmN5mZABoHjMuY689JdiDphk,12
48
+ aot_biomaps-2.9.321.dist-info/RECORD,,