AOT-biomaps 2.9.261__py3-none-any.whl → 2.9.318__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AOT_biomaps/AOT_Experiment/Tomography.py +124 -0
- AOT_biomaps/AOT_Recon/AOT_Optimizers/LS.py +400 -10
- AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py +207 -84
- AOT_biomaps/AOT_Recon/AOT_Optimizers/PDHG.py +442 -11
- AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_CSR.py +48 -26
- AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py +172 -134
- AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin +0 -0
- AOT_biomaps/AOT_Recon/AlgebraicRecon.py +27 -20
- AOT_biomaps/AOT_Recon/PrimalDualRecon.py +94 -41
- AOT_biomaps/AOT_Recon/ReconTools.py +164 -18
- AOT_biomaps/__init__.py +58 -1
- {aot_biomaps-2.9.261.dist-info → aot_biomaps-2.9.318.dist-info}/METADATA +1 -1
- {aot_biomaps-2.9.261.dist-info → aot_biomaps-2.9.318.dist-info}/RECORD +15 -14
- {aot_biomaps-2.9.261.dist-info → aot_biomaps-2.9.318.dist-info}/WHEEL +0 -0
- {aot_biomaps-2.9.261.dist-info → aot_biomaps-2.9.318.dist-info}/top_level.txt +0 -0
|
@@ -26,7 +26,6 @@ def MLEM(
|
|
|
26
26
|
max_saves=5000,
|
|
27
27
|
show_logs=True,
|
|
28
28
|
smatrixType=SMatrixType.SELL,
|
|
29
|
-
Z=350,
|
|
30
29
|
):
|
|
31
30
|
"""
|
|
32
31
|
Unified MLEM algorithm for Acousto-Optic Tomography.
|
|
@@ -59,11 +58,11 @@ def MLEM(
|
|
|
59
58
|
# Dispatch to the appropriate implementation
|
|
60
59
|
if use_gpu:
|
|
61
60
|
if smatrixType == SMatrixType.CSR:
|
|
62
|
-
return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
61
|
+
return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
|
|
63
62
|
elif smatrixType == SMatrixType.SELL:
|
|
64
|
-
return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
63
|
+
return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
|
|
65
64
|
elif smatrixType == SMatrixType.DENSE:
|
|
66
|
-
return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
65
|
+
return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold,show_logs)
|
|
67
66
|
else:
|
|
68
67
|
raise ValueError("Unsupported SMatrixType for GPU MLEM.")
|
|
69
68
|
else:
|
|
@@ -229,35 +228,60 @@ def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
|
229
228
|
print(f"Error in optimized CPU MLEM: {type(e).__name__}: {e}")
|
|
230
229
|
return None, None
|
|
231
230
|
|
|
232
|
-
def MLEM_sparseCSR_pycuda(
|
|
231
|
+
def MLEM_sparseCSR_pycuda(
|
|
232
|
+
SMatrix,
|
|
233
|
+
y,
|
|
234
|
+
numIterations,
|
|
235
|
+
isSavingEachIteration,
|
|
236
|
+
tumor_str,
|
|
237
|
+
max_saves,
|
|
238
|
+
denominator_threshold,
|
|
239
|
+
show_logs=True,
|
|
240
|
+
):
|
|
233
241
|
"""
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
242
|
+
Robust MLEM implementation for CSR SMatrix using PyCUDA kernels.
|
|
243
|
+
Expects SMatrix to be SparseSMatrix_CSR with attributes:
|
|
244
|
+
- values_gpu, col_ind_gpu, row_ptr_gpu (device pointers)
|
|
245
|
+
- norm_factor_inv_gpu (device pointer)
|
|
246
|
+
- sparse_mod (loaded module with kernels)
|
|
247
|
+
- ctx (PyCUDA context)
|
|
248
|
+
Returns (saved_theta_list, saved_indices) if isSavingEachIteration else (final_theta, None)
|
|
240
249
|
"""
|
|
250
|
+
final_result = None
|
|
251
|
+
|
|
252
|
+
# Local holders to free in finally
|
|
253
|
+
y_gpu = q_flat_gpu = e_flat_gpu = c_flat_gpu = theta_flat_gpu = None
|
|
254
|
+
|
|
241
255
|
try:
|
|
242
256
|
if not isinstance(SMatrix, SparseSMatrix_CSR):
|
|
243
257
|
raise TypeError("SMatrix must be a SparseSMatrix_CSR object")
|
|
258
|
+
|
|
259
|
+
# push context (if provided)
|
|
260
|
+
popped_ctx = False
|
|
261
|
+
if getattr(SMatrix, "ctx", None):
|
|
262
|
+
SMatrix.ctx.push()
|
|
263
|
+
popped_ctx = True
|
|
264
|
+
|
|
244
265
|
dtype = np.float32
|
|
245
|
-
TN = SMatrix.N * SMatrix.T
|
|
246
|
-
ZX = SMatrix.Z * SMatrix.X
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
X = SMatrix.X
|
|
266
|
+
TN = int(SMatrix.N * SMatrix.T)
|
|
267
|
+
ZX = int(SMatrix.Z * SMatrix.X)
|
|
268
|
+
Z = int(SMatrix.Z)
|
|
269
|
+
X = int(SMatrix.X)
|
|
250
270
|
|
|
251
|
-
|
|
252
|
-
|
|
271
|
+
# Make sure required GPU pointers exist
|
|
272
|
+
if getattr(SMatrix, "values_gpu", None) is None or getattr(SMatrix, "col_ind_gpu", None) is None or getattr(SMatrix, "row_ptr_gpu", None) is None:
|
|
273
|
+
raise RuntimeError("SMatrix is missing GPU buffers (values_gpu / col_ind_gpu / row_ptr_gpu)")
|
|
253
274
|
|
|
254
|
-
|
|
255
|
-
|
|
275
|
+
if getattr(SMatrix, "norm_factor_inv_gpu", None) is None:
|
|
276
|
+
raise RuntimeError("SMatrix.norm_factor_inv_gpu not available on GPU")
|
|
277
|
+
|
|
278
|
+
# stream for async operations
|
|
256
279
|
stream = drv.Stream()
|
|
257
280
|
|
|
258
|
-
#
|
|
259
|
-
|
|
260
|
-
drv.
|
|
281
|
+
# prepare device buffers
|
|
282
|
+
y_arr = np.ascontiguousarray(y.T.flatten().astype(np.float32))
|
|
283
|
+
y_gpu = drv.mem_alloc(y_arr.nbytes)
|
|
284
|
+
drv.memcpy_htod_async(y_gpu, y_arr, stream)
|
|
261
285
|
|
|
262
286
|
theta_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
263
287
|
initial_theta = np.full(ZX, 0.1, dtype=dtype)
|
|
@@ -269,95 +293,177 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
|
|
|
269
293
|
e_flat_gpu = drv.mem_alloc(TN * np.dtype(dtype).itemsize)
|
|
270
294
|
c_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
271
295
|
|
|
272
|
-
|
|
273
|
-
projection_kernel =
|
|
274
|
-
backprojection_kernel =
|
|
275
|
-
ratio_kernel =
|
|
276
|
-
update_kernel =
|
|
277
|
-
|
|
296
|
+
# Ensure kernels exist
|
|
297
|
+
projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__CSR")
|
|
298
|
+
backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__CSR")
|
|
299
|
+
ratio_kernel = SMatrix.sparse_mod.get_function("ratio_kernel")
|
|
300
|
+
update_kernel = SMatrix.sparse_mod.get_function("update_theta_kernel")
|
|
278
301
|
block_size = 256
|
|
279
302
|
|
|
280
|
-
|
|
303
|
+
# prepare save indices once
|
|
281
304
|
if numIterations <= max_saves:
|
|
282
305
|
save_indices = list(range(numIterations))
|
|
283
306
|
else:
|
|
284
|
-
|
|
307
|
+
step = max(1, numIterations // max_saves)
|
|
308
|
+
save_indices = list(range(0, numIterations, step))
|
|
285
309
|
if save_indices[-1] != numIterations - 1:
|
|
286
310
|
save_indices.append(numIterations - 1)
|
|
287
311
|
|
|
312
|
+
saved_theta = []
|
|
313
|
+
saved_indices = []
|
|
314
|
+
|
|
288
315
|
description = f"AOT-BioMaps -- ML-EM (CSR-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
289
316
|
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
317
|
+
|
|
318
|
+
# grid sizes
|
|
319
|
+
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
320
|
+
grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
|
|
321
|
+
|
|
290
322
|
for it in iterator:
|
|
291
323
|
# projection: q = A * theta
|
|
292
|
-
projection_kernel(
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
324
|
+
projection_kernel(
|
|
325
|
+
q_flat_gpu,
|
|
326
|
+
SMatrix.values_gpu,
|
|
327
|
+
SMatrix.row_ptr_gpu,
|
|
328
|
+
SMatrix.col_ind_gpu,
|
|
329
|
+
theta_flat_gpu,
|
|
330
|
+
np.int32(TN),
|
|
331
|
+
block=(block_size, 1, 1),
|
|
332
|
+
grid=grid_rows,
|
|
333
|
+
stream=stream,
|
|
334
|
+
)
|
|
296
335
|
|
|
297
336
|
# ratio: e = y / max(q, threshold)
|
|
298
|
-
ratio_kernel(
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
337
|
+
ratio_kernel(
|
|
338
|
+
e_flat_gpu,
|
|
339
|
+
y_gpu,
|
|
340
|
+
q_flat_gpu,
|
|
341
|
+
np.float32(denominator_threshold),
|
|
342
|
+
np.int32(TN),
|
|
343
|
+
block=(block_size, 1, 1),
|
|
344
|
+
grid=grid_rows,
|
|
345
|
+
stream=stream,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# backprojection: c = A^T * e (zero c first)
|
|
302
349
|
drv.memset_d32_async(c_flat_gpu, 0, ZX, stream)
|
|
303
|
-
backprojection_kernel(
|
|
304
|
-
|
|
305
|
-
|
|
350
|
+
backprojection_kernel(
|
|
351
|
+
c_flat_gpu,
|
|
352
|
+
SMatrix.values_gpu,
|
|
353
|
+
SMatrix.row_ptr_gpu,
|
|
354
|
+
SMatrix.col_ind_gpu,
|
|
355
|
+
e_flat_gpu,
|
|
356
|
+
np.int32(TN),
|
|
357
|
+
block=(block_size, 1, 1),
|
|
358
|
+
grid=grid_rows,
|
|
359
|
+
stream=stream,
|
|
360
|
+
)
|
|
306
361
|
|
|
307
362
|
# update: theta *= norm_factor_inv * c
|
|
308
|
-
update_kernel(
|
|
309
|
-
|
|
310
|
-
|
|
363
|
+
update_kernel(
|
|
364
|
+
theta_flat_gpu,
|
|
365
|
+
c_flat_gpu,
|
|
366
|
+
norm_factor_inv_gpu,
|
|
367
|
+
np.int32(ZX),
|
|
368
|
+
block=(block_size, 1, 1),
|
|
369
|
+
grid=grid_cols,
|
|
370
|
+
stream=stream,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# periodic synchronization for stability / logging
|
|
311
374
|
if show_logs and (it % 10 == 0 or it == numIterations - 1):
|
|
312
|
-
|
|
375
|
+
stream.synchronize()
|
|
313
376
|
|
|
377
|
+
# save snapshot if required
|
|
314
378
|
if isSavingEachIteration and it in save_indices:
|
|
379
|
+
# ensure kernels finished
|
|
380
|
+
stream.synchronize()
|
|
315
381
|
theta_host = np.empty(ZX, dtype=dtype)
|
|
316
382
|
drv.memcpy_dtoh(theta_host, theta_flat_gpu)
|
|
317
383
|
saved_theta.append(theta_host.reshape(Z, X))
|
|
318
|
-
saved_indices.append(it)
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
drv.memcpy_dtoh(
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
# free local allocations
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
384
|
+
saved_indices.append(int(it))
|
|
385
|
+
|
|
386
|
+
# make sure everything finished
|
|
387
|
+
stream.synchronize()
|
|
388
|
+
final_theta_host = np.empty(ZX, dtype=dtype)
|
|
389
|
+
drv.memcpy_dtoh(final_theta_host, theta_flat_gpu)
|
|
390
|
+
final_result = final_theta_host.reshape(Z, X)
|
|
391
|
+
|
|
392
|
+
# free local allocations (will also be freed in finally if exception)
|
|
393
|
+
try:
|
|
394
|
+
y_gpu.free()
|
|
395
|
+
q_flat_gpu.free()
|
|
396
|
+
e_flat_gpu.free()
|
|
397
|
+
c_flat_gpu.free()
|
|
398
|
+
theta_flat_gpu.free()
|
|
399
|
+
except Exception:
|
|
400
|
+
pass
|
|
401
|
+
|
|
402
|
+
return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
|
|
330
403
|
|
|
331
404
|
except Exception as e:
|
|
332
405
|
print(f"Error in MLEM_sparseCSR_pycuda: {type(e).__name__}: {e}")
|
|
333
406
|
gc.collect()
|
|
334
407
|
return None, None
|
|
335
408
|
|
|
336
|
-
|
|
409
|
+
finally:
|
|
410
|
+
# free buffers if still allocated
|
|
411
|
+
for buf in ("y_gpu", "q_flat_gpu", "e_flat_gpu", "c_flat_gpu", "theta_flat_gpu"):
|
|
412
|
+
try:
|
|
413
|
+
val = locals().get(buf, None)
|
|
414
|
+
if val is not None:
|
|
415
|
+
val.free()
|
|
416
|
+
except Exception:
|
|
417
|
+
pass
|
|
418
|
+
# pop context safely
|
|
419
|
+
try:
|
|
420
|
+
if SMatrix and hasattr(SMatrix, "ctx") and SMatrix.ctx and popped_ctx:
|
|
421
|
+
SMatrix.ctx.pop()
|
|
422
|
+
except Exception:
|
|
423
|
+
pass
|
|
424
|
+
|
|
425
|
+
def MLEM_sparseSELL_pycuda(
|
|
426
|
+
SMatrix,
|
|
427
|
+
y,
|
|
428
|
+
numIterations,
|
|
429
|
+
isSavingEachIteration,
|
|
430
|
+
tumor_str,
|
|
431
|
+
max_saves,
|
|
432
|
+
denominator_threshold,
|
|
433
|
+
show_logs=True,
|
|
434
|
+
):
|
|
337
435
|
"""
|
|
338
436
|
MLEM using SELL-C-σ kernels already present on device.
|
|
339
437
|
y must be float32 length TN.
|
|
438
|
+
|
|
439
|
+
Version propre : diagnostics retirés.
|
|
340
440
|
"""
|
|
441
|
+
final_result = None
|
|
442
|
+
|
|
341
443
|
try:
|
|
342
|
-
# check if SMatrix is SparseSMatrix_SELL object
|
|
343
444
|
if not isinstance(SMatrix, SparseSMatrix_SELL):
|
|
344
445
|
raise TypeError("SMatrix must be a SparseSMatrix_SELL object")
|
|
345
446
|
if SMatrix.sell_values_gpu is None:
|
|
346
447
|
raise RuntimeError("SELL not built. Call allocate_sell_c_sigma_direct() first.")
|
|
448
|
+
|
|
449
|
+
# Context
|
|
450
|
+
if SMatrix.ctx:
|
|
451
|
+
SMatrix.ctx.push()
|
|
452
|
+
|
|
347
453
|
TN = int(SMatrix.N * SMatrix.T)
|
|
348
454
|
ZX = int(SMatrix.Z * SMatrix.X)
|
|
349
455
|
dtype = np.float32
|
|
350
456
|
block_size = 256
|
|
351
457
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
update = mod.get_function("update_theta_kernel")
|
|
458
|
+
proj = SMatrix.sparse_mod.get_function("projection_kernel__SELL")
|
|
459
|
+
backproj = SMatrix.sparse_mod.get_function("backprojection_kernel__SELL")
|
|
460
|
+
ratio = SMatrix.sparse_mod.get_function("ratio_kernel")
|
|
461
|
+
update = SMatrix.sparse_mod.get_function("update_theta_kernel")
|
|
357
462
|
|
|
358
463
|
stream = drv.Stream()
|
|
359
464
|
|
|
360
|
-
#
|
|
465
|
+
# Device buffers
|
|
466
|
+
y = y.T.flatten().astype(np.float32)
|
|
361
467
|
y_gpu = drv.mem_alloc(y.nbytes)
|
|
362
468
|
drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
|
|
363
469
|
|
|
@@ -375,6 +481,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
375
481
|
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
376
482
|
grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
|
|
377
483
|
|
|
484
|
+
# Prepare save indices
|
|
378
485
|
saved_theta, saved_indices = [], []
|
|
379
486
|
if numIterations <= max_saves:
|
|
380
487
|
save_indices = list(range(numIterations))
|
|
@@ -385,44 +492,60 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
385
492
|
|
|
386
493
|
description = f"AOT-BioMaps -- ML-EM (SELL-c-σ-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
387
494
|
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
495
|
+
|
|
496
|
+
# --- MLEM Loop ---
|
|
388
497
|
for it in iterator:
|
|
389
|
-
# projection
|
|
390
|
-
proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
|
|
391
|
-
theta_gpu, np.int32(TN), slice_height,
|
|
392
|
-
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
393
498
|
|
|
394
|
-
|
|
499
|
+
proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
|
|
500
|
+
slice_ptr_gpu, slice_len_gpu,
|
|
501
|
+
theta_gpu, np.int32(TN), slice_height,
|
|
502
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
503
|
+
|
|
395
504
|
ratio(e_gpu, y_gpu, q_gpu, np.float32(denominator_threshold), np.int32(TN),
|
|
396
|
-
|
|
505
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
397
506
|
|
|
398
|
-
# zero c
|
|
399
507
|
drv.memset_d32_async(c_gpu, 0, ZX, stream)
|
|
400
508
|
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
509
|
+
backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
|
|
510
|
+
slice_ptr_gpu, slice_len_gpu,
|
|
511
|
+
e_gpu, c_gpu, np.int32(TN), slice_height,
|
|
512
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
405
513
|
|
|
406
|
-
# update
|
|
407
514
|
update(theta_gpu, c_gpu, SMatrix.norm_factor_inv_gpu, np.int32(ZX),
|
|
408
|
-
|
|
515
|
+
block=(block_size,1,1), grid=grid_cols, stream=stream)
|
|
409
516
|
|
|
410
|
-
stream.synchronize()
|
|
411
517
|
if isSavingEachIteration and it in save_indices:
|
|
412
518
|
out = np.empty(ZX, dtype=np.float32)
|
|
413
519
|
drv.memcpy_dtoh(out, theta_gpu)
|
|
414
520
|
saved_theta.append(out.reshape((SMatrix.Z, SMatrix.X)))
|
|
415
521
|
saved_indices.append(it)
|
|
416
522
|
|
|
417
|
-
|
|
523
|
+
stream.synchronize()
|
|
418
524
|
res = np.empty(ZX, dtype=np.float32)
|
|
419
525
|
drv.memcpy_dtoh(res, theta_gpu)
|
|
420
526
|
|
|
421
|
-
# free
|
|
422
|
-
|
|
423
|
-
|
|
527
|
+
# free
|
|
528
|
+
try:
|
|
529
|
+
y_gpu.free()
|
|
530
|
+
q_gpu.free()
|
|
531
|
+
e_gpu.free()
|
|
532
|
+
c_gpu.free()
|
|
533
|
+
theta_gpu.free()
|
|
534
|
+
except Exception:
|
|
535
|
+
pass
|
|
536
|
+
|
|
537
|
+
final_result = res.reshape((SMatrix.Z, SMatrix.X))
|
|
538
|
+
return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
|
|
539
|
+
|
|
424
540
|
except Exception as e:
|
|
425
541
|
print(f"Error in MLEM_sparseSELL_pycuda: {type(e).__name__}: {e}")
|
|
426
542
|
gc.collect()
|
|
427
543
|
return None, None
|
|
428
544
|
|
|
545
|
+
finally:
|
|
546
|
+
if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
|
|
547
|
+
try:
|
|
548
|
+
SMatrix.ctx.pop()
|
|
549
|
+
except Exception:
|
|
550
|
+
pass
|
|
551
|
+
|