AOT-biomaps 2.9.281__tar.gz → 2.9.312__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AOT-biomaps might be problematic. Click here for more details.

Files changed (53) hide show
  1. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/LS.py +16 -19
  2. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py +189 -103
  3. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/PDHG.py +117 -20
  4. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_CSR.py +8 -15
  5. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py +79 -47
  6. aot_biomaps-2.9.312/AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin +0 -0
  7. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AlgebraicRecon.py +2 -8
  8. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/PrimalDualRecon.py +94 -41
  9. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/__init__.py +32 -1
  10. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/PKG-INFO +1 -1
  11. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/PKG-INFO +1 -1
  12. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/setup.py +32 -1
  13. aot_biomaps-2.9.281/AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin +0 -0
  14. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/AcousticEnums.py +0 -0
  15. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/AcousticTools.py +0 -0
  16. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/FocusedWave.py +0 -0
  17. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/IrregularWave.py +0 -0
  18. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/PlaneWave.py +0 -0
  19. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/StructuredWave.py +0 -0
  20. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/__init__.py +0 -0
  21. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/_mainAcoustic.py +0 -0
  22. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Experiment/Focus.py +0 -0
  23. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Experiment/Tomography.py +0 -0
  24. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Experiment/__init__.py +0 -0
  25. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Experiment/_mainExperiment.py +0 -0
  26. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/Absorber.py +0 -0
  27. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/Laser.py +0 -0
  28. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/OpticEnums.py +0 -0
  29. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/__init__.py +0 -0
  30. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/_mainOptic.py +0 -0
  31. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/DEPIERRO.py +0 -0
  32. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/MAPEM.py +0 -0
  33. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/__init__.py +0 -0
  34. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/Huber.py +0 -0
  35. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/Quadratic.py +0 -0
  36. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/RelativeDifferences.py +0 -0
  37. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/__init__.py +0 -0
  38. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/__init__.py +0 -0
  39. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AnalyticRecon.py +0 -0
  40. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/BayesianRecon.py +0 -0
  41. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/DeepLearningRecon.py +0 -0
  42. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/ReconEnums.py +0 -0
  43. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/ReconTools.py +0 -0
  44. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/__init__.py +0 -0
  45. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/_mainRecon.py +0 -0
  46. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/Config.py +0 -0
  47. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/Settings.py +0 -0
  48. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/SOURCES.txt +0 -0
  49. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/dependency_links.txt +0 -0
  50. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/requires.txt +0 -0
  51. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/top_level.txt +0 -0
  52. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/README.md +0 -0
  53. {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/setup.cfg +0 -0
@@ -23,8 +23,7 @@ def LS(
23
23
  denominator_threshold=1e-6,
24
24
  max_saves=5000,
25
25
  show_logs=True,
26
- smatrixType=SMatrixType.SELL,
27
- Z=350,
26
+ smatrixType=SMatrixType.SELL
28
27
  ):
29
28
  """
30
29
  Least Squares reconstruction using Projected Gradient Descent (PGD) with non-negativity constraint.
@@ -44,7 +43,7 @@ def LS(
44
43
  # Dispatch to the appropriate implementation
45
44
  if use_gpu:
46
45
  if smatrixType == SMatrixType.CSR:
47
- return _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, Z, show_logs)
46
+ return _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
48
47
  elif smatrixType == SMatrixType.SELL:
49
48
  return _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
50
49
  elif smatrixType == SMatrixType.DENSE:
@@ -181,13 +180,12 @@ def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tu
181
180
  print(f"Dim X: {X}, Dim Z: {Z}, TN: {TN}, ZX: {ZX}")
182
181
 
183
182
  stream = drv.Stream()
184
- mod = drv.module_from_file('AOT_biomaps_kernels.cubin')
185
183
 
186
184
  # Récupération des Kernels
187
- projection_kernel = mod.get_function('projection_kernel__CSR')
188
- backprojection_kernel = mod.get_function('backprojection_kernel__CSR')
189
- axpby_kernel = mod.get_function("vector_axpby_kernel")
190
- minus_axpy_kernel = mod.get_function("vector_minus_axpy_kernel")
185
+ projection_kernel = SMatrix.sparse_mod.get_function('projection_kernel__CSR')
186
+ backprojection_kernel = SMatrix.sparse_mod.get_function('backprojection_kernel__CSR')
187
+ axpby_kernel = SMatrix.sparse_mod.get_function("vector_axpby_kernel")
188
+ minus_axpy_kernel = SMatrix.sparse_mod.get_function("vector_minus_axpy_kernel")
191
189
 
192
190
  # --- Allocation des buffers (Pointeurs Bruts) ---
193
191
  y = y.T.flatten().astype(dtype)
@@ -231,7 +229,7 @@ def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tu
231
229
  drv.memcpy_dtod(p_flat_gpu, r_flat_gpu, ZX * np.dtype(dtype).itemsize)
232
230
 
233
231
  # 6. rho_prev = ||r_0||^2
234
- rho_prev = _dot_product_gpu(mod, r_flat_gpu, r_flat_gpu, ZX, stream)
232
+ rho_prev = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
235
233
 
236
234
  # --- Boucle itérative ---
237
235
  saved_theta, saved_indices = [], []
@@ -258,7 +256,7 @@ def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tu
258
256
  block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
259
257
 
260
258
  # c. alpha = rho_prev / <p, z>
261
- pAp = _dot_product_gpu(mod, p_flat_gpu, z_flat_gpu, ZX, stream)
259
+ pAp = _dot_product_gpu(SMatrix.sparse_mod, p_flat_gpu, z_flat_gpu, ZX, stream)
262
260
 
263
261
  if abs(pAp) < 1e-15: break
264
262
  alpha = rho_prev / pAp
@@ -273,7 +271,7 @@ def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tu
273
271
  block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
274
272
 
275
273
  # f. rho_curr = ||r||^2
276
- rho_curr = _dot_product_gpu(mod, r_flat_gpu, r_flat_gpu, ZX, stream)
274
+ rho_curr = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
277
275
 
278
276
  if rho_curr < tolerance: break
279
277
 
@@ -364,11 +362,10 @@ def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, t
364
362
  tolerance = 1e-12
365
363
 
366
364
  # Accès aux paramètres SELL
367
- mod = SMatrix.sparse_mod
368
- projection_kernel = mod.get_function("projection_kernel__SELL")
369
- backprojection_kernel = mod.get_function("backprojection_kernel__SELL")
370
- axpby_kernel = mod.get_function("vector_axpby_kernel")
371
- minus_axpy_kernel = mod.get_function("vector_minus_axpy_kernel")
365
+ projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__SELL")
366
+ backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__SELL")
367
+ axpby_kernel = SMatrix.sparse_mod.get_function("vector_axpby_kernel")
368
+ minus_axpy_kernel = SMatrix.sparse_mod.get_function("vector_minus_axpy_kernel")
372
369
  slice_height = np.int32(SMatrix.slice_height)
373
370
  grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
374
371
 
@@ -416,7 +413,7 @@ def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, t
416
413
  drv.memcpy_dtod(p_flat_gpu, r_flat_gpu, ZX * np.dtype(dtype).itemsize)
417
414
 
418
415
  # 6. rho_prev = ||r_0||^2
419
- rho_prev = _dot_product_gpu(mod, r_flat_gpu, r_flat_gpu, ZX, stream)
416
+ rho_prev = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
420
417
 
421
418
  # --- Boucle itérative ---
422
419
  saved_theta, saved_indices = [], []
@@ -443,7 +440,7 @@ def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, t
443
440
  block=(block_size, 1, 1), grid=grid_rows, stream=stream)
444
441
 
445
442
  # c. alpha = rho_prev / <p, z>
446
- pAp = _dot_product_gpu(mod, p_flat_gpu, z_flat_gpu, ZX, stream)
443
+ pAp = _dot_product_gpu(SMatrix.sparse_mod, p_flat_gpu, z_flat_gpu, ZX, stream)
447
444
 
448
445
  if abs(pAp) < 1e-15: break
449
446
  alpha = rho_prev / pAp
@@ -458,7 +455,7 @@ def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, t
458
455
  block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
459
456
 
460
457
  # f. rho_curr = ||r||^2
461
- rho_curr = _dot_product_gpu(mod, r_flat_gpu, r_flat_gpu, ZX, stream)
458
+ rho_curr = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
462
459
 
463
460
  if rho_curr < tolerance: break
464
461
 
@@ -26,7 +26,6 @@ def MLEM(
26
26
  max_saves=5000,
27
27
  show_logs=True,
28
28
  smatrixType=SMatrixType.SELL,
29
- Z=350,
30
29
  ):
31
30
  """
32
31
  Unified MLEM algorithm for Acousto-Optic Tomography.
@@ -59,11 +58,11 @@ def MLEM(
59
58
  # Dispatch to the appropriate implementation
60
59
  if use_gpu:
61
60
  if smatrixType == SMatrixType.CSR:
62
- return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, Z, show_logs)
61
+ return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
63
62
  elif smatrixType == SMatrixType.SELL:
64
- return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
63
+ return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
65
64
  elif smatrixType == SMatrixType.DENSE:
66
- return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold,show_logs)
65
+ return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold,show_logs)
67
66
  else:
68
67
  raise ValueError("Unsupported SMatrixType for GPU MLEM.")
69
68
  else:
@@ -229,49 +228,60 @@ def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
229
228
  print(f"Error in optimized CPU MLEM: {type(e).__name__}: {e}")
230
229
  return None, None
231
230
 
232
- def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs=True):
231
+ def MLEM_sparseCSR_pycuda(
232
+ SMatrix,
233
+ y,
234
+ numIterations,
235
+ isSavingEachIteration,
236
+ tumor_str,
237
+ max_saves,
238
+ denominator_threshold,
239
+ show_logs=True,
240
+ ):
233
241
  """
234
- SMatrix: instance of SparseMatrixGPU (already allocated)
235
- y: measured data (1D np.float32 of length TN)
236
-
237
- Assumptions:
238
- - SMatrix.values_gpu and SMatrix.col_ind_gpu and SMatrix.row_ptr_gpu are device pointers
239
- - SMatrix.norm_factor_inv_gpu exists
240
- - SMatrix.ctx is the PyCUDA context for the target GPU.
242
+ Robust MLEM implementation for CSR SMatrix using PyCUDA kernels.
243
+ Expects SMatrix to be SparseSMatrix_CSR with attributes:
244
+ - values_gpu, col_ind_gpu, row_ptr_gpu (device pointers)
245
+ - norm_factor_inv_gpu (device pointer)
246
+ - sparse_mod (loaded module with kernels)
247
+ - ctx (PyCUDA context)
248
+ Returns (saved_theta_list, saved_indices) if isSavingEachIteration else (final_theta, None)
241
249
  """
242
-
243
- # We use a final_result placeholder to ensure it's defined outside the try block
244
250
  final_result = None
245
-
251
+
252
+ # Local holders to free in finally
253
+ y_gpu = q_flat_gpu = e_flat_gpu = c_flat_gpu = theta_flat_gpu = None
254
+
246
255
  try:
247
256
  if not isinstance(SMatrix, SparseSMatrix_CSR):
248
257
  raise TypeError("SMatrix must be a SparseSMatrix_CSR object")
249
258
 
250
- # --- CONTEXT FIX: Push the context associated with SMatrix ---
251
- # This ensures all subsequent PyCUDA operations use the correct GPU/context.
252
- if SMatrix.ctx:
259
+ # push context (if provided)
260
+ popped_ctx = False
261
+ if getattr(SMatrix, "ctx", None):
253
262
  SMatrix.ctx.push()
254
- # -----------------------------------------------------------
263
+ popped_ctx = True
255
264
 
256
265
  dtype = np.float32
257
- TN = SMatrix.N * SMatrix.T
258
- ZX = SMatrix.Z * SMatrix.X
259
- # Ensure Z and X are correctly defined for reshaping
260
- Z = SMatrix.Z
261
- X = SMatrix.X
262
-
263
- if show_logs:
264
- # We assume SMatrix was initialized using the correct device index.
265
- print(f"Executing on GPU device index: {SMatrix.device.primary_context.device.name()}")
266
- print(f"Dim X: {X}, Dim Z: {Z}, TN: {TN}, ZX: {ZX}")
267
-
268
- # streams
266
+ TN = int(SMatrix.N * SMatrix.T)
267
+ ZX = int(SMatrix.Z * SMatrix.X)
268
+ Z = int(SMatrix.Z)
269
+ X = int(SMatrix.X)
270
+
271
+ # Make sure required GPU pointers exist
272
+ if getattr(SMatrix, "values_gpu", None) is None or getattr(SMatrix, "col_ind_gpu", None) is None or getattr(SMatrix, "row_ptr_gpu", None) is None:
273
+ raise RuntimeError("SMatrix is missing GPU buffers (values_gpu / col_ind_gpu / row_ptr_gpu)")
274
+
275
+ if getattr(SMatrix, "norm_factor_inv_gpu", None) is None:
276
+ raise RuntimeError("SMatrix.norm_factor_inv_gpu not available on GPU")
277
+
278
+ # stream for async operations
269
279
  stream = drv.Stream()
270
280
 
271
- # allocate device buffers
272
- y = y.T.flatten().astype(np.float32)
273
- y_gpu = drv.mem_alloc(y.nbytes)
274
- drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
281
+ # prepare device buffers
282
+ y_arr = np.ascontiguousarray(y.T.flatten().astype(np.float32))
283
+ y_gpu = drv.mem_alloc(y_arr.nbytes)
284
+ drv.memcpy_htod_async(y_gpu, y_arr, stream)
275
285
 
276
286
  theta_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
277
287
  initial_theta = np.full(ZX, 0.1, dtype=dtype)
@@ -283,61 +293,111 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
283
293
  e_flat_gpu = drv.mem_alloc(TN * np.dtype(dtype).itemsize)
284
294
  c_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
285
295
 
286
- # Assuming the cubin file is found globally or managed by the caller
287
- projection_kernel = SMatrix.sparse_mod.get_function('projection_kernel__CSR')
288
- backprojection_kernel = SMatrix.sparse_mod.get_function('backprojection_kernel__CSR')
289
- ratio_kernel = SMatrix.sparse_mod.get_function('ratio_kernel')
290
- update_kernel = SMatrix.sparse_mod.get_function('update_theta_kernel')
296
+ # Ensure kernels exist
297
+ projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__CSR")
298
+ backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__CSR")
299
+ ratio_kernel = SMatrix.sparse_mod.get_function("ratio_kernel")
300
+ update_kernel = SMatrix.sparse_mod.get_function("update_theta_kernel")
291
301
  block_size = 256
292
302
 
293
- saved_theta, saved_indices = [], []
303
+ # prepare save indices once
294
304
  if numIterations <= max_saves:
295
305
  save_indices = list(range(numIterations))
296
306
  else:
297
- save_indices = list(range(0, numIterations, max(1, numIterations // max_saves)))
307
+ step = max(1, numIterations // max_saves)
308
+ save_indices = list(range(0, numIterations, step))
298
309
  if save_indices[-1] != numIterations - 1:
299
310
  save_indices.append(numIterations - 1)
300
311
 
312
+ saved_theta = []
313
+ saved_indices = []
314
+
301
315
  description = f"AOT-BioMaps -- ML-EM (CSR-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
302
316
  iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
317
+
318
+ # grid sizes
319
+ grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
320
+ grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
321
+
303
322
  for it in iterator:
304
323
  # projection: q = A * theta
305
- projection_kernel(q_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
306
- theta_flat_gpu, np.int32(TN),
307
- block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1),
308
- stream=stream)
324
+ projection_kernel(
325
+ q_flat_gpu,
326
+ SMatrix.values_gpu,
327
+ SMatrix.row_ptr_gpu,
328
+ SMatrix.col_ind_gpu,
329
+ theta_flat_gpu,
330
+ np.int32(TN),
331
+ block=(block_size, 1, 1),
332
+ grid=grid_rows,
333
+ stream=stream,
334
+ )
309
335
 
310
336
  # ratio: e = y / max(q, threshold)
311
- ratio_kernel(e_flat_gpu, y_gpu, q_flat_gpu, np.float32(denominator_threshold), np.int32(TN),
312
- block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
313
-
314
- # backprojection: c = A^T * e
337
+ ratio_kernel(
338
+ e_flat_gpu,
339
+ y_gpu,
340
+ q_flat_gpu,
341
+ np.float32(denominator_threshold),
342
+ np.int32(TN),
343
+ block=(block_size, 1, 1),
344
+ grid=grid_rows,
345
+ stream=stream,
346
+ )
347
+
348
+ # backprojection: c = A^T * e (zero c first)
315
349
  drv.memset_d32_async(c_flat_gpu, 0, ZX, stream)
316
- backprojection_kernel(c_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
317
- e_flat_gpu, np.int32(TN),
318
- block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
350
+ backprojection_kernel(
351
+ c_flat_gpu,
352
+ SMatrix.values_gpu,
353
+ SMatrix.row_ptr_gpu,
354
+ SMatrix.col_ind_gpu,
355
+ e_flat_gpu,
356
+ np.int32(TN),
357
+ block=(block_size, 1, 1),
358
+ grid=grid_rows,
359
+ stream=stream,
360
+ )
319
361
 
320
362
  # update: theta *= norm_factor_inv * c
321
- update_kernel(theta_flat_gpu, c_flat_gpu, norm_factor_inv_gpu, np.int32(ZX),
322
- block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
323
-
363
+ update_kernel(
364
+ theta_flat_gpu,
365
+ c_flat_gpu,
366
+ norm_factor_inv_gpu,
367
+ np.int32(ZX),
368
+ block=(block_size, 1, 1),
369
+ grid=grid_cols,
370
+ stream=stream,
371
+ )
372
+
373
+ # periodic synchronization for stability / logging
324
374
  if show_logs and (it % 10 == 0 or it == numIterations - 1):
325
- drv.Context.synchronize()
375
+ stream.synchronize()
326
376
 
377
+ # save snapshot if required
327
378
  if isSavingEachIteration and it in save_indices:
379
+ # ensure kernels finished
380
+ stream.synchronize()
328
381
  theta_host = np.empty(ZX, dtype=dtype)
329
382
  drv.memcpy_dtoh(theta_host, theta_flat_gpu)
330
383
  saved_theta.append(theta_host.reshape(Z, X))
331
- saved_indices.append(it)
332
-
333
- drv.Context.synchronize()
334
-
335
- final_result = np.empty(ZX, dtype=dtype)
336
- drv.memcpy_dtoh(final_result, theta_flat_gpu)
337
- final_result = final_result.reshape(Z, X)
338
-
339
- # free local allocations
340
- y_gpu.free(); q_flat_gpu.free(); e_flat_gpu.free(); c_flat_gpu.free(); theta_flat_gpu.free()
384
+ saved_indices.append(int(it))
385
+
386
+ # make sure everything finished
387
+ stream.synchronize()
388
+ final_theta_host = np.empty(ZX, dtype=dtype)
389
+ drv.memcpy_dtoh(final_theta_host, theta_flat_gpu)
390
+ final_result = final_theta_host.reshape(Z, X)
391
+
392
+ # free local allocations (will also be freed in finally if exception)
393
+ try:
394
+ y_gpu.free()
395
+ q_flat_gpu.free()
396
+ e_flat_gpu.free()
397
+ c_flat_gpu.free()
398
+ theta_flat_gpu.free()
399
+ except Exception:
400
+ pass
341
401
 
342
402
  return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
343
403
 
@@ -345,32 +405,50 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
345
405
  print(f"Error in MLEM_sparseCSR_pycuda: {type(e).__name__}: {e}")
346
406
  gc.collect()
347
407
  return None, None
348
-
349
- finally:
350
- # --- CONTEXT FIX: Pop the context ---
351
- if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
352
- SMatrix.ctx.pop()
353
- # ------------------------------------
354
408
 
355
- def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs=True):
409
+ finally:
410
+ # free buffers if still allocated
411
+ for buf in ("y_gpu", "q_flat_gpu", "e_flat_gpu", "c_flat_gpu", "theta_flat_gpu"):
412
+ try:
413
+ val = locals().get(buf, None)
414
+ if val is not None:
415
+ val.free()
416
+ except Exception:
417
+ pass
418
+ # pop context safely
419
+ try:
420
+ if SMatrix and hasattr(SMatrix, "ctx") and SMatrix.ctx and popped_ctx:
421
+ SMatrix.ctx.pop()
422
+ except Exception:
423
+ pass
424
+
425
+ def MLEM_sparseSELL_pycuda(
426
+ SMatrix,
427
+ y,
428
+ numIterations,
429
+ isSavingEachIteration,
430
+ tumor_str,
431
+ max_saves,
432
+ denominator_threshold,
433
+ show_logs=True,
434
+ ):
356
435
  """
357
436
  MLEM using SELL-C-σ kernels already present on device.
358
437
  y must be float32 length TN.
438
+
439
+ Version propre : diagnostics retirés.
359
440
  """
360
441
  final_result = None
361
442
 
362
443
  try:
363
- # check if SMatrix is SparseSMatrix_SELL object
364
444
  if not isinstance(SMatrix, SparseSMatrix_SELL):
365
445
  raise TypeError("SMatrix must be a SparseSMatrix_SELL object")
366
446
  if SMatrix.sell_values_gpu is None:
367
447
  raise RuntimeError("SELL not built. Call allocate_sell_c_sigma_direct() first.")
368
-
369
- # --- CONTEXT FIX: Push the context associated with SMatrix ---
370
- # This ensures all subsequent PyCUDA operations use the correct GPU/context.
448
+
449
+ # Context
371
450
  if SMatrix.ctx:
372
451
  SMatrix.ctx.push()
373
- # -----------------------------------------------------------
374
452
 
375
453
  TN = int(SMatrix.N * SMatrix.T)
376
454
  ZX = int(SMatrix.Z * SMatrix.X)
@@ -384,7 +462,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
384
462
 
385
463
  stream = drv.Stream()
386
464
 
387
- # device buffers
465
+ # Device buffers
388
466
  y = y.T.flatten().astype(np.float32)
389
467
  y_gpu = drv.mem_alloc(y.nbytes)
390
468
  drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
@@ -403,6 +481,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
403
481
  grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
404
482
  grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
405
483
 
484
+ # Prepare save indices
406
485
  saved_theta, saved_indices = [], []
407
486
  if numIterations <= max_saves:
408
487
  save_indices = list(range(numIterations))
@@ -413,52 +492,59 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
413
492
 
414
493
  description = f"AOT-BioMaps -- ML-EM (SELL-c-σ-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
415
494
  iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
495
+
496
+ # --- MLEM Loop ---
416
497
  for it in iterator:
417
- # projection
418
- proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
419
- theta_gpu, np.int32(TN), slice_height,
420
- block=(block_size,1,1), grid=grid_rows, stream=stream)
421
498
 
422
- # ratio
499
+ proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
500
+ slice_ptr_gpu, slice_len_gpu,
501
+ theta_gpu, np.int32(TN), slice_height,
502
+ block=(block_size,1,1), grid=grid_rows, stream=stream)
503
+
423
504
  ratio(e_gpu, y_gpu, q_gpu, np.float32(denominator_threshold), np.int32(TN),
424
- block=(block_size,1,1), grid=grid_rows, stream=stream)
505
+ block=(block_size,1,1), grid=grid_rows, stream=stream)
425
506
 
426
- # zero c
427
507
  drv.memset_d32_async(c_gpu, 0, ZX, stream)
428
508
 
429
- # backprojection accumulate
430
- backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
431
- e_gpu, c_gpu, np.int32(TN), slice_height,
432
- block=(block_size,1,1), grid=grid_rows, stream=stream)
509
+ backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
510
+ slice_ptr_gpu, slice_len_gpu,
511
+ e_gpu, c_gpu, np.int32(TN), slice_height,
512
+ block=(block_size,1,1), grid=grid_rows, stream=stream)
433
513
 
434
- # update
435
514
  update(theta_gpu, c_gpu, SMatrix.norm_factor_inv_gpu, np.int32(ZX),
436
- block=(block_size,1,1), grid=grid_cols, stream=stream)
515
+ block=(block_size,1,1), grid=grid_cols, stream=stream)
437
516
 
438
- stream.synchronize()
439
517
  if isSavingEachIteration and it in save_indices:
440
518
  out = np.empty(ZX, dtype=np.float32)
441
519
  drv.memcpy_dtoh(out, theta_gpu)
442
520
  saved_theta.append(out.reshape((SMatrix.Z, SMatrix.X)))
443
521
  saved_indices.append(it)
444
522
 
445
- # final copy
523
+ stream.synchronize()
446
524
  res = np.empty(ZX, dtype=np.float32)
447
525
  drv.memcpy_dtoh(res, theta_gpu)
448
526
 
449
- # free temporaries
450
- y_gpu.free(); q_gpu.free(); e_gpu.free(); c_gpu.free(); theta_gpu.free()
451
-
527
+ # free
528
+ try:
529
+ y_gpu.free()
530
+ q_gpu.free()
531
+ e_gpu.free()
532
+ c_gpu.free()
533
+ theta_gpu.free()
534
+ except Exception:
535
+ pass
536
+
452
537
  final_result = res.reshape((SMatrix.Z, SMatrix.X))
453
538
  return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
454
-
539
+
455
540
  except Exception as e:
456
541
  print(f"Error in MLEM_sparseSELL_pycuda: {type(e).__name__}: {e}")
457
542
  gc.collect()
458
543
  return None, None
459
-
544
+
460
545
  finally:
461
- # --- CONTEXT FIX: Pop the context ---
462
546
  if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
463
- SMatrix.ctx.pop()
464
- # ------------------------------------
547
+ try:
548
+ SMatrix.ctx.pop()
549
+ except Exception:
550
+ pass