AOT-biomaps 2.9.261__py3-none-any.whl → 2.9.318__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,7 +26,6 @@ def MLEM(
26
26
  max_saves=5000,
27
27
  show_logs=True,
28
28
  smatrixType=SMatrixType.SELL,
29
- Z=350,
30
29
  ):
31
30
  """
32
31
  Unified MLEM algorithm for Acousto-Optic Tomography.
@@ -59,11 +58,11 @@ def MLEM(
59
58
  # Dispatch to the appropriate implementation
60
59
  if use_gpu:
61
60
  if smatrixType == SMatrixType.CSR:
62
- return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, Z, show_logs)
61
+ return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
63
62
  elif smatrixType == SMatrixType.SELL:
64
- return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
63
+ return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
65
64
  elif smatrixType == SMatrixType.DENSE:
66
- return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold,show_logs)
65
+ return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold,show_logs)
67
66
  else:
68
67
  raise ValueError("Unsupported SMatrixType for GPU MLEM.")
69
68
  else:
@@ -229,35 +228,60 @@ def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
229
228
  print(f"Error in optimized CPU MLEM: {type(e).__name__}: {e}")
230
229
  return None, None
231
230
 
232
- def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs=True):
231
+ def MLEM_sparseCSR_pycuda(
232
+ SMatrix,
233
+ y,
234
+ numIterations,
235
+ isSavingEachIteration,
236
+ tumor_str,
237
+ max_saves,
238
+ denominator_threshold,
239
+ show_logs=True,
240
+ ):
233
241
  """
234
- SMatrix: instance of SparseMatrixGPU (already allocated)
235
- y: measured data (1D np.float32 of length TN)
236
-
237
- Assumptions:
238
- - SMatrix.values_gpu and SMatrix.col_ind_gpu and SMatrix.row_ptr_gpu are device pointers
239
- - SMatrix.norm_factor_inv_gpu exists
242
+ Robust MLEM implementation for CSR SMatrix using PyCUDA kernels.
243
+ Expects SMatrix to be SparseSMatrix_CSR with attributes:
244
+ - values_gpu, col_ind_gpu, row_ptr_gpu (device pointers)
245
+ - norm_factor_inv_gpu (device pointer)
246
+ - sparse_mod (loaded module with kernels)
247
+ - ctx (PyCUDA context)
248
+ Returns (saved_theta_list, saved_indices) if isSavingEachIteration else (final_theta, None)
240
249
  """
250
+ final_result = None
251
+
252
+ # Local holders to free in finally
253
+ y_gpu = q_flat_gpu = e_flat_gpu = c_flat_gpu = theta_flat_gpu = None
254
+
241
255
  try:
242
256
  if not isinstance(SMatrix, SparseSMatrix_CSR):
243
257
  raise TypeError("SMatrix must be a SparseSMatrix_CSR object")
258
+
259
+ # push context (if provided)
260
+ popped_ctx = False
261
+ if getattr(SMatrix, "ctx", None):
262
+ SMatrix.ctx.push()
263
+ popped_ctx = True
264
+
244
265
  dtype = np.float32
245
- TN = SMatrix.N * SMatrix.T
246
- ZX = SMatrix.Z * SMatrix.X
247
- if Z is None:
248
- Z = SMatrix.Z
249
- X = SMatrix.X
266
+ TN = int(SMatrix.N * SMatrix.T)
267
+ ZX = int(SMatrix.Z * SMatrix.X)
268
+ Z = int(SMatrix.Z)
269
+ X = int(SMatrix.X)
250
270
 
251
- if show_logs:
252
- print(f"Dim X: {X}, Dim Z: {Z}, TN: {TN}, ZX: {ZX}")
271
+ # Make sure required GPU pointers exist
272
+ if getattr(SMatrix, "values_gpu", None) is None or getattr(SMatrix, "col_ind_gpu", None) is None or getattr(SMatrix, "row_ptr_gpu", None) is None:
273
+ raise RuntimeError("SMatrix is missing GPU buffers (values_gpu / col_ind_gpu / row_ptr_gpu)")
253
274
 
254
- # Use existing context from SMatrix
255
- # streams
275
+ if getattr(SMatrix, "norm_factor_inv_gpu", None) is None:
276
+ raise RuntimeError("SMatrix.norm_factor_inv_gpu not available on GPU")
277
+
278
+ # stream for async operations
256
279
  stream = drv.Stream()
257
280
 
258
- # allocate device buffers
259
- y_gpu = drv.mem_alloc(y.nbytes)
260
- drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
281
+ # prepare device buffers
282
+ y_arr = np.ascontiguousarray(y.T.flatten().astype(np.float32))
283
+ y_gpu = drv.mem_alloc(y_arr.nbytes)
284
+ drv.memcpy_htod_async(y_gpu, y_arr, stream)
261
285
 
262
286
  theta_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
263
287
  initial_theta = np.full(ZX, 0.1, dtype=dtype)
@@ -269,95 +293,177 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
269
293
  e_flat_gpu = drv.mem_alloc(TN * np.dtype(dtype).itemsize)
270
294
  c_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
271
295
 
272
- mlem_mod = drv.module_from_file('AOT_biomaps_kernels.cubin')
273
- projection_kernel = mlem_mod.get_function('projection_kernel__CSR')
274
- backprojection_kernel = mlem_mod.get_function('backprojection_kernel__CSR')
275
- ratio_kernel = mlem_mod.get_function('ratio_kernel')
276
- update_kernel = mlem_mod.get_function('update_theta_kernel')
277
-
296
+ # Ensure kernels exist
297
+ projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__CSR")
298
+ backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__CSR")
299
+ ratio_kernel = SMatrix.sparse_mod.get_function("ratio_kernel")
300
+ update_kernel = SMatrix.sparse_mod.get_function("update_theta_kernel")
278
301
  block_size = 256
279
302
 
280
- saved_theta, saved_indices = [], []
303
+ # prepare save indices once
281
304
  if numIterations <= max_saves:
282
305
  save_indices = list(range(numIterations))
283
306
  else:
284
- save_indices = list(range(0, numIterations, max(1, numIterations // max_saves)))
307
+ step = max(1, numIterations // max_saves)
308
+ save_indices = list(range(0, numIterations, step))
285
309
  if save_indices[-1] != numIterations - 1:
286
310
  save_indices.append(numIterations - 1)
287
311
 
312
+ saved_theta = []
313
+ saved_indices = []
314
+
288
315
  description = f"AOT-BioMaps -- ML-EM (CSR-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
289
316
  iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
317
+
318
+ # grid sizes
319
+ grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
320
+ grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
321
+
290
322
  for it in iterator:
291
323
  # projection: q = A * theta
292
- projection_kernel(q_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
293
- theta_flat_gpu, np.int32(TN),
294
- block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1),
295
- stream=stream)
324
+ projection_kernel(
325
+ q_flat_gpu,
326
+ SMatrix.values_gpu,
327
+ SMatrix.row_ptr_gpu,
328
+ SMatrix.col_ind_gpu,
329
+ theta_flat_gpu,
330
+ np.int32(TN),
331
+ block=(block_size, 1, 1),
332
+ grid=grid_rows,
333
+ stream=stream,
334
+ )
296
335
 
297
336
  # ratio: e = y / max(q, threshold)
298
- ratio_kernel(e_flat_gpu, y_gpu, q_flat_gpu, np.float32(denominator_threshold), np.int32(TN),
299
- block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
300
-
301
- # backprojection: c = A^T * e
337
+ ratio_kernel(
338
+ e_flat_gpu,
339
+ y_gpu,
340
+ q_flat_gpu,
341
+ np.float32(denominator_threshold),
342
+ np.int32(TN),
343
+ block=(block_size, 1, 1),
344
+ grid=grid_rows,
345
+ stream=stream,
346
+ )
347
+
348
+ # backprojection: c = A^T * e (zero c first)
302
349
  drv.memset_d32_async(c_flat_gpu, 0, ZX, stream)
303
- backprojection_kernel(c_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
304
- e_flat_gpu, np.int32(TN),
305
- block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
350
+ backprojection_kernel(
351
+ c_flat_gpu,
352
+ SMatrix.values_gpu,
353
+ SMatrix.row_ptr_gpu,
354
+ SMatrix.col_ind_gpu,
355
+ e_flat_gpu,
356
+ np.int32(TN),
357
+ block=(block_size, 1, 1),
358
+ grid=grid_rows,
359
+ stream=stream,
360
+ )
306
361
 
307
362
  # update: theta *= norm_factor_inv * c
308
- update_kernel(theta_flat_gpu, c_flat_gpu, norm_factor_inv_gpu, np.int32(ZX),
309
- block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
310
-
363
+ update_kernel(
364
+ theta_flat_gpu,
365
+ c_flat_gpu,
366
+ norm_factor_inv_gpu,
367
+ np.int32(ZX),
368
+ block=(block_size, 1, 1),
369
+ grid=grid_cols,
370
+ stream=stream,
371
+ )
372
+
373
+ # periodic synchronization for stability / logging
311
374
  if show_logs and (it % 10 == 0 or it == numIterations - 1):
312
- drv.Context.synchronize()
375
+ stream.synchronize()
313
376
 
377
+ # save snapshot if required
314
378
  if isSavingEachIteration and it in save_indices:
379
+ # ensure kernels finished
380
+ stream.synchronize()
315
381
  theta_host = np.empty(ZX, dtype=dtype)
316
382
  drv.memcpy_dtoh(theta_host, theta_flat_gpu)
317
383
  saved_theta.append(theta_host.reshape(Z, X))
318
- saved_indices.append(it)
319
-
320
- drv.Context.synchronize()
321
-
322
- result = np.empty(ZX, dtype=dtype)
323
- drv.memcpy_dtoh(result, theta_flat_gpu)
324
- result = result.reshape(Z, X)
325
-
326
- # free local allocations
327
- y_gpu.free(); q_flat_gpu.free(); e_flat_gpu.free(); c_flat_gpu.free(); theta_flat_gpu.free()
328
-
329
- return (saved_theta, saved_indices) if isSavingEachIteration else (result, None)
384
+ saved_indices.append(int(it))
385
+
386
+ # make sure everything finished
387
+ stream.synchronize()
388
+ final_theta_host = np.empty(ZX, dtype=dtype)
389
+ drv.memcpy_dtoh(final_theta_host, theta_flat_gpu)
390
+ final_result = final_theta_host.reshape(Z, X)
391
+
392
+ # free local allocations (will also be freed in finally if exception)
393
+ try:
394
+ y_gpu.free()
395
+ q_flat_gpu.free()
396
+ e_flat_gpu.free()
397
+ c_flat_gpu.free()
398
+ theta_flat_gpu.free()
399
+ except Exception:
400
+ pass
401
+
402
+ return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
330
403
 
331
404
  except Exception as e:
332
405
  print(f"Error in MLEM_sparseCSR_pycuda: {type(e).__name__}: {e}")
333
406
  gc.collect()
334
407
  return None, None
335
408
 
336
- def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs=True):
409
+ finally:
410
+ # free buffers if still allocated
411
+ for buf in ("y_gpu", "q_flat_gpu", "e_flat_gpu", "c_flat_gpu", "theta_flat_gpu"):
412
+ try:
413
+ val = locals().get(buf, None)
414
+ if val is not None:
415
+ val.free()
416
+ except Exception:
417
+ pass
418
+ # pop context safely
419
+ try:
420
+ if SMatrix and hasattr(SMatrix, "ctx") and SMatrix.ctx and popped_ctx:
421
+ SMatrix.ctx.pop()
422
+ except Exception:
423
+ pass
424
+
425
+ def MLEM_sparseSELL_pycuda(
426
+ SMatrix,
427
+ y,
428
+ numIterations,
429
+ isSavingEachIteration,
430
+ tumor_str,
431
+ max_saves,
432
+ denominator_threshold,
433
+ show_logs=True,
434
+ ):
337
435
  """
338
436
  MLEM using SELL-C-σ kernels already present on device.
339
437
  y must be float32 length TN.
438
+
439
+ Version propre : diagnostics retirés.
340
440
  """
441
+ final_result = None
442
+
341
443
  try:
342
- # check if SMatrix is SparseSMatrix_SELL object
343
444
  if not isinstance(SMatrix, SparseSMatrix_SELL):
344
445
  raise TypeError("SMatrix must be a SparseSMatrix_SELL object")
345
446
  if SMatrix.sell_values_gpu is None:
346
447
  raise RuntimeError("SELL not built. Call allocate_sell_c_sigma_direct() first.")
448
+
449
+ # Context
450
+ if SMatrix.ctx:
451
+ SMatrix.ctx.push()
452
+
347
453
  TN = int(SMatrix.N * SMatrix.T)
348
454
  ZX = int(SMatrix.Z * SMatrix.X)
349
455
  dtype = np.float32
350
456
  block_size = 256
351
457
 
352
- mod = SMatrix.sparse_mod
353
- proj = mod.get_function("projection_kernel__SELL")
354
- backproj = mod.get_function("backprojection_kernel__SELL")
355
- ratio = mod.get_function("ratio_kernel")
356
- update = mod.get_function("update_theta_kernel")
458
+ proj = SMatrix.sparse_mod.get_function("projection_kernel__SELL")
459
+ backproj = SMatrix.sparse_mod.get_function("backprojection_kernel__SELL")
460
+ ratio = SMatrix.sparse_mod.get_function("ratio_kernel")
461
+ update = SMatrix.sparse_mod.get_function("update_theta_kernel")
357
462
 
358
463
  stream = drv.Stream()
359
464
 
360
- # device buffers
465
+ # Device buffers
466
+ y = y.T.flatten().astype(np.float32)
361
467
  y_gpu = drv.mem_alloc(y.nbytes)
362
468
  drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
363
469
 
@@ -375,6 +481,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
375
481
  grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
376
482
  grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
377
483
 
484
+ # Prepare save indices
378
485
  saved_theta, saved_indices = [], []
379
486
  if numIterations <= max_saves:
380
487
  save_indices = list(range(numIterations))
@@ -385,44 +492,60 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
385
492
 
386
493
  description = f"AOT-BioMaps -- ML-EM (SELL-c-σ-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
387
494
  iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
495
+
496
+ # --- MLEM Loop ---
388
497
  for it in iterator:
389
- # projection
390
- proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
391
- theta_gpu, np.int32(TN), slice_height,
392
- block=(block_size,1,1), grid=grid_rows, stream=stream)
393
498
 
394
- # ratio
499
+ proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
500
+ slice_ptr_gpu, slice_len_gpu,
501
+ theta_gpu, np.int32(TN), slice_height,
502
+ block=(block_size,1,1), grid=grid_rows, stream=stream)
503
+
395
504
  ratio(e_gpu, y_gpu, q_gpu, np.float32(denominator_threshold), np.int32(TN),
396
- block=(block_size,1,1), grid=grid_rows, stream=stream)
505
+ block=(block_size,1,1), grid=grid_rows, stream=stream)
397
506
 
398
- # zero c
399
507
  drv.memset_d32_async(c_gpu, 0, ZX, stream)
400
508
 
401
- # backprojection accumulate
402
- backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
403
- e_gpu, c_gpu, np.int32(TN), slice_height,
404
- block=(block_size,1,1), grid=grid_rows, stream=stream)
509
+ backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
510
+ slice_ptr_gpu, slice_len_gpu,
511
+ e_gpu, c_gpu, np.int32(TN), slice_height,
512
+ block=(block_size,1,1), grid=grid_rows, stream=stream)
405
513
 
406
- # update
407
514
  update(theta_gpu, c_gpu, SMatrix.norm_factor_inv_gpu, np.int32(ZX),
408
- block=(block_size,1,1), grid=grid_cols, stream=stream)
515
+ block=(block_size,1,1), grid=grid_cols, stream=stream)
409
516
 
410
- stream.synchronize()
411
517
  if isSavingEachIteration and it in save_indices:
412
518
  out = np.empty(ZX, dtype=np.float32)
413
519
  drv.memcpy_dtoh(out, theta_gpu)
414
520
  saved_theta.append(out.reshape((SMatrix.Z, SMatrix.X)))
415
521
  saved_indices.append(it)
416
522
 
417
- # final copy
523
+ stream.synchronize()
418
524
  res = np.empty(ZX, dtype=np.float32)
419
525
  drv.memcpy_dtoh(res, theta_gpu)
420
526
 
421
- # free temporaries
422
- y_gpu.free(); q_gpu.free(); e_gpu.free(); c_gpu.free(); theta_gpu.free()
423
- return (saved_theta, saved_indices) if isSavingEachIteration else (res.reshape((SMatrix.Z, SMatrix.X)), None)
527
+ # free
528
+ try:
529
+ y_gpu.free()
530
+ q_gpu.free()
531
+ e_gpu.free()
532
+ c_gpu.free()
533
+ theta_gpu.free()
534
+ except Exception:
535
+ pass
536
+
537
+ final_result = res.reshape((SMatrix.Z, SMatrix.X))
538
+ return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
539
+
424
540
  except Exception as e:
425
541
  print(f"Error in MLEM_sparseSELL_pycuda: {type(e).__name__}: {e}")
426
542
  gc.collect()
427
543
  return None, None
428
544
 
545
+ finally:
546
+ if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
547
+ try:
548
+ SMatrix.ctx.pop()
549
+ except Exception:
550
+ pass
551
+