AOT-biomaps 2.9.279__py3-none-any.whl → 2.9.300__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AOT-biomaps might be problematic. Click here for more details.
- AOT_biomaps/AOT_Recon/AOT_Optimizers/LS.py +16 -19
- AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py +193 -109
- AOT_biomaps/AOT_Recon/AOT_Optimizers/PDHG.py +442 -11
- AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_CSR.py +8 -15
- AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py +26 -23
- AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin +0 -0
- AOT_biomaps/AOT_Recon/AlgebraicRecon.py +2 -8
- AOT_biomaps/AOT_Recon/PrimalDualRecon.py +94 -41
- AOT_biomaps/AOT_Recon/ReconTools.py +78 -1
- AOT_biomaps/__init__.py +22 -1
- {aot_biomaps-2.9.279.dist-info → aot_biomaps-2.9.300.dist-info}/METADATA +1 -1
- {aot_biomaps-2.9.279.dist-info → aot_biomaps-2.9.300.dist-info}/RECORD +14 -14
- {aot_biomaps-2.9.279.dist-info → aot_biomaps-2.9.300.dist-info}/WHEEL +0 -0
- {aot_biomaps-2.9.279.dist-info → aot_biomaps-2.9.300.dist-info}/top_level.txt +0 -0
|
@@ -23,8 +23,7 @@ def LS(
|
|
|
23
23
|
denominator_threshold=1e-6,
|
|
24
24
|
max_saves=5000,
|
|
25
25
|
show_logs=True,
|
|
26
|
-
smatrixType=SMatrixType.SELL
|
|
27
|
-
Z=350,
|
|
26
|
+
smatrixType=SMatrixType.SELL
|
|
28
27
|
):
|
|
29
28
|
"""
|
|
30
29
|
Least Squares reconstruction using Projected Gradient Descent (PGD) with non-negativity constraint.
|
|
@@ -44,7 +43,7 @@ def LS(
|
|
|
44
43
|
# Dispatch to the appropriate implementation
|
|
45
44
|
if use_gpu:
|
|
46
45
|
if smatrixType == SMatrixType.CSR:
|
|
47
|
-
return _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold,
|
|
46
|
+
return _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
|
|
48
47
|
elif smatrixType == SMatrixType.SELL:
|
|
49
48
|
return _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
|
|
50
49
|
elif smatrixType == SMatrixType.DENSE:
|
|
@@ -181,13 +180,12 @@ def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tu
|
|
|
181
180
|
print(f"Dim X: {X}, Dim Z: {Z}, TN: {TN}, ZX: {ZX}")
|
|
182
181
|
|
|
183
182
|
stream = drv.Stream()
|
|
184
|
-
mod = drv.module_from_file('AOT_biomaps_kernels.cubin')
|
|
185
183
|
|
|
186
184
|
# Récupération des Kernels
|
|
187
|
-
projection_kernel =
|
|
188
|
-
backprojection_kernel =
|
|
189
|
-
axpby_kernel =
|
|
190
|
-
minus_axpy_kernel =
|
|
185
|
+
projection_kernel = SMatrix.sparse_mod.get_function('projection_kernel__CSR')
|
|
186
|
+
backprojection_kernel = SMatrix.sparse_mod.get_function('backprojection_kernel__CSR')
|
|
187
|
+
axpby_kernel = SMatrix.sparse_mod.get_function("vector_axpby_kernel")
|
|
188
|
+
minus_axpy_kernel = SMatrix.sparse_mod.get_function("vector_minus_axpy_kernel")
|
|
191
189
|
|
|
192
190
|
# --- Allocation des buffers (Pointeurs Bruts) ---
|
|
193
191
|
y = y.T.flatten().astype(dtype)
|
|
@@ -231,7 +229,7 @@ def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tu
|
|
|
231
229
|
drv.memcpy_dtod(p_flat_gpu, r_flat_gpu, ZX * np.dtype(dtype).itemsize)
|
|
232
230
|
|
|
233
231
|
# 6. rho_prev = ||r_0||^2
|
|
234
|
-
rho_prev = _dot_product_gpu(
|
|
232
|
+
rho_prev = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
|
|
235
233
|
|
|
236
234
|
# --- Boucle itérative ---
|
|
237
235
|
saved_theta, saved_indices = [], []
|
|
@@ -258,7 +256,7 @@ def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tu
|
|
|
258
256
|
block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
259
257
|
|
|
260
258
|
# c. alpha = rho_prev / <p, z>
|
|
261
|
-
pAp = _dot_product_gpu(
|
|
259
|
+
pAp = _dot_product_gpu(SMatrix.sparse_mod, p_flat_gpu, z_flat_gpu, ZX, stream)
|
|
262
260
|
|
|
263
261
|
if abs(pAp) < 1e-15: break
|
|
264
262
|
alpha = rho_prev / pAp
|
|
@@ -273,7 +271,7 @@ def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tu
|
|
|
273
271
|
block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
274
272
|
|
|
275
273
|
# f. rho_curr = ||r||^2
|
|
276
|
-
rho_curr = _dot_product_gpu(
|
|
274
|
+
rho_curr = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
|
|
277
275
|
|
|
278
276
|
if rho_curr < tolerance: break
|
|
279
277
|
|
|
@@ -364,11 +362,10 @@ def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, t
|
|
|
364
362
|
tolerance = 1e-12
|
|
365
363
|
|
|
366
364
|
# Accès aux paramètres SELL
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
minus_axpy_kernel = mod.get_function("vector_minus_axpy_kernel")
|
|
365
|
+
projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__SELL")
|
|
366
|
+
backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__SELL")
|
|
367
|
+
axpby_kernel = SMatrix.sparse_mod.get_function("vector_axpby_kernel")
|
|
368
|
+
minus_axpy_kernel = SMatrix.sparse_mod.get_function("vector_minus_axpy_kernel")
|
|
372
369
|
slice_height = np.int32(SMatrix.slice_height)
|
|
373
370
|
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
374
371
|
|
|
@@ -416,7 +413,7 @@ def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, t
|
|
|
416
413
|
drv.memcpy_dtod(p_flat_gpu, r_flat_gpu, ZX * np.dtype(dtype).itemsize)
|
|
417
414
|
|
|
418
415
|
# 6. rho_prev = ||r_0||^2
|
|
419
|
-
rho_prev = _dot_product_gpu(
|
|
416
|
+
rho_prev = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
|
|
420
417
|
|
|
421
418
|
# --- Boucle itérative ---
|
|
422
419
|
saved_theta, saved_indices = [], []
|
|
@@ -443,7 +440,7 @@ def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, t
|
|
|
443
440
|
block=(block_size, 1, 1), grid=grid_rows, stream=stream)
|
|
444
441
|
|
|
445
442
|
# c. alpha = rho_prev / <p, z>
|
|
446
|
-
pAp = _dot_product_gpu(
|
|
443
|
+
pAp = _dot_product_gpu(SMatrix.sparse_mod, p_flat_gpu, z_flat_gpu, ZX, stream)
|
|
447
444
|
|
|
448
445
|
if abs(pAp) < 1e-15: break
|
|
449
446
|
alpha = rho_prev / pAp
|
|
@@ -458,7 +455,7 @@ def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, t
|
|
|
458
455
|
block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
459
456
|
|
|
460
457
|
# f. rho_curr = ||r||^2
|
|
461
|
-
rho_curr = _dot_product_gpu(
|
|
458
|
+
rho_curr = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
|
|
462
459
|
|
|
463
460
|
if rho_curr < tolerance: break
|
|
464
461
|
|
|
@@ -26,7 +26,6 @@ def MLEM(
|
|
|
26
26
|
max_saves=5000,
|
|
27
27
|
show_logs=True,
|
|
28
28
|
smatrixType=SMatrixType.SELL,
|
|
29
|
-
Z=350,
|
|
30
29
|
):
|
|
31
30
|
"""
|
|
32
31
|
Unified MLEM algorithm for Acousto-Optic Tomography.
|
|
@@ -59,11 +58,11 @@ def MLEM(
|
|
|
59
58
|
# Dispatch to the appropriate implementation
|
|
60
59
|
if use_gpu:
|
|
61
60
|
if smatrixType == SMatrixType.CSR:
|
|
62
|
-
return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
61
|
+
return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
|
|
63
62
|
elif smatrixType == SMatrixType.SELL:
|
|
64
|
-
return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
63
|
+
return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
|
|
65
64
|
elif smatrixType == SMatrixType.DENSE:
|
|
66
|
-
return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
65
|
+
return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold,show_logs)
|
|
67
66
|
else:
|
|
68
67
|
raise ValueError("Unsupported SMatrixType for GPU MLEM.")
|
|
69
68
|
else:
|
|
@@ -229,49 +228,60 @@ def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
|
229
228
|
print(f"Error in optimized CPU MLEM: {type(e).__name__}: {e}")
|
|
230
229
|
return None, None
|
|
231
230
|
|
|
232
|
-
def MLEM_sparseCSR_pycuda(
|
|
231
|
+
def MLEM_sparseCSR_pycuda(
|
|
232
|
+
SMatrix,
|
|
233
|
+
y,
|
|
234
|
+
numIterations,
|
|
235
|
+
isSavingEachIteration,
|
|
236
|
+
tumor_str,
|
|
237
|
+
max_saves,
|
|
238
|
+
denominator_threshold,
|
|
239
|
+
show_logs=True,
|
|
240
|
+
):
|
|
233
241
|
"""
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
242
|
+
Robust MLEM implementation for CSR SMatrix using PyCUDA kernels.
|
|
243
|
+
Expects SMatrix to be SparseSMatrix_CSR with attributes:
|
|
244
|
+
- values_gpu, col_ind_gpu, row_ptr_gpu (device pointers)
|
|
245
|
+
- norm_factor_inv_gpu (device pointer)
|
|
246
|
+
- sparse_mod (loaded module with kernels)
|
|
247
|
+
- ctx (PyCUDA context)
|
|
248
|
+
Returns (saved_theta_list, saved_indices) if isSavingEachIteration else (final_theta, None)
|
|
241
249
|
"""
|
|
242
|
-
|
|
243
|
-
# We use a final_result placeholder to ensure it's defined outside the try block
|
|
244
250
|
final_result = None
|
|
245
|
-
|
|
251
|
+
|
|
252
|
+
# Local holders to free in finally
|
|
253
|
+
y_gpu = q_flat_gpu = e_flat_gpu = c_flat_gpu = theta_flat_gpu = None
|
|
254
|
+
|
|
246
255
|
try:
|
|
247
256
|
if not isinstance(SMatrix, SparseSMatrix_CSR):
|
|
248
257
|
raise TypeError("SMatrix must be a SparseSMatrix_CSR object")
|
|
249
258
|
|
|
250
|
-
#
|
|
251
|
-
|
|
252
|
-
if SMatrix
|
|
259
|
+
# push context (if provided)
|
|
260
|
+
popped_ctx = False
|
|
261
|
+
if getattr(SMatrix, "ctx", None):
|
|
253
262
|
SMatrix.ctx.push()
|
|
254
|
-
|
|
263
|
+
popped_ctx = True
|
|
255
264
|
|
|
256
265
|
dtype = np.float32
|
|
257
|
-
TN = SMatrix.N * SMatrix.T
|
|
258
|
-
ZX = SMatrix.Z * SMatrix.X
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
if
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
266
|
+
TN = int(SMatrix.N * SMatrix.T)
|
|
267
|
+
ZX = int(SMatrix.Z * SMatrix.X)
|
|
268
|
+
Z = int(SMatrix.Z)
|
|
269
|
+
X = int(SMatrix.X)
|
|
270
|
+
|
|
271
|
+
# Make sure required GPU pointers exist
|
|
272
|
+
if getattr(SMatrix, "values_gpu", None) is None or getattr(SMatrix, "col_ind_gpu", None) is None or getattr(SMatrix, "row_ptr_gpu", None) is None:
|
|
273
|
+
raise RuntimeError("SMatrix is missing GPU buffers (values_gpu / col_ind_gpu / row_ptr_gpu)")
|
|
274
|
+
|
|
275
|
+
if getattr(SMatrix, "norm_factor_inv_gpu", None) is None:
|
|
276
|
+
raise RuntimeError("SMatrix.norm_factor_inv_gpu not available on GPU")
|
|
277
|
+
|
|
278
|
+
# stream for async operations
|
|
269
279
|
stream = drv.Stream()
|
|
270
280
|
|
|
271
|
-
#
|
|
272
|
-
|
|
273
|
-
y_gpu = drv.mem_alloc(
|
|
274
|
-
drv.memcpy_htod_async(y_gpu,
|
|
281
|
+
# prepare device buffers
|
|
282
|
+
y_arr = np.ascontiguousarray(y.T.flatten().astype(np.float32))
|
|
283
|
+
y_gpu = drv.mem_alloc(y_arr.nbytes)
|
|
284
|
+
drv.memcpy_htod_async(y_gpu, y_arr, stream)
|
|
275
285
|
|
|
276
286
|
theta_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
277
287
|
initial_theta = np.full(ZX, 0.1, dtype=dtype)
|
|
@@ -283,62 +293,111 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
|
|
|
283
293
|
e_flat_gpu = drv.mem_alloc(TN * np.dtype(dtype).itemsize)
|
|
284
294
|
c_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
285
295
|
|
|
286
|
-
#
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
update_kernel = mod.get_function('update_theta_kernel')
|
|
296
|
+
# Ensure kernels exist
|
|
297
|
+
projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__CSR")
|
|
298
|
+
backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__CSR")
|
|
299
|
+
ratio_kernel = SMatrix.sparse_mod.get_function("ratio_kernel")
|
|
300
|
+
update_kernel = SMatrix.sparse_mod.get_function("update_theta_kernel")
|
|
292
301
|
block_size = 256
|
|
293
302
|
|
|
294
|
-
|
|
303
|
+
# prepare save indices once
|
|
295
304
|
if numIterations <= max_saves:
|
|
296
305
|
save_indices = list(range(numIterations))
|
|
297
306
|
else:
|
|
298
|
-
|
|
307
|
+
step = max(1, numIterations // max_saves)
|
|
308
|
+
save_indices = list(range(0, numIterations, step))
|
|
299
309
|
if save_indices[-1] != numIterations - 1:
|
|
300
310
|
save_indices.append(numIterations - 1)
|
|
301
311
|
|
|
312
|
+
saved_theta = []
|
|
313
|
+
saved_indices = []
|
|
314
|
+
|
|
302
315
|
description = f"AOT-BioMaps -- ML-EM (CSR-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
303
316
|
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
317
|
+
|
|
318
|
+
# grid sizes
|
|
319
|
+
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
320
|
+
grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
|
|
321
|
+
|
|
304
322
|
for it in iterator:
|
|
305
323
|
# projection: q = A * theta
|
|
306
|
-
projection_kernel(
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
324
|
+
projection_kernel(
|
|
325
|
+
q_flat_gpu,
|
|
326
|
+
SMatrix.values_gpu,
|
|
327
|
+
SMatrix.row_ptr_gpu,
|
|
328
|
+
SMatrix.col_ind_gpu,
|
|
329
|
+
theta_flat_gpu,
|
|
330
|
+
np.int32(TN),
|
|
331
|
+
block=(block_size, 1, 1),
|
|
332
|
+
grid=grid_rows,
|
|
333
|
+
stream=stream,
|
|
334
|
+
)
|
|
310
335
|
|
|
311
336
|
# ratio: e = y / max(q, threshold)
|
|
312
|
-
ratio_kernel(
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
337
|
+
ratio_kernel(
|
|
338
|
+
e_flat_gpu,
|
|
339
|
+
y_gpu,
|
|
340
|
+
q_flat_gpu,
|
|
341
|
+
np.float32(denominator_threshold),
|
|
342
|
+
np.int32(TN),
|
|
343
|
+
block=(block_size, 1, 1),
|
|
344
|
+
grid=grid_rows,
|
|
345
|
+
stream=stream,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# backprojection: c = A^T * e (zero c first)
|
|
316
349
|
drv.memset_d32_async(c_flat_gpu, 0, ZX, stream)
|
|
317
|
-
backprojection_kernel(
|
|
318
|
-
|
|
319
|
-
|
|
350
|
+
backprojection_kernel(
|
|
351
|
+
c_flat_gpu,
|
|
352
|
+
SMatrix.values_gpu,
|
|
353
|
+
SMatrix.row_ptr_gpu,
|
|
354
|
+
SMatrix.col_ind_gpu,
|
|
355
|
+
e_flat_gpu,
|
|
356
|
+
np.int32(TN),
|
|
357
|
+
block=(block_size, 1, 1),
|
|
358
|
+
grid=grid_rows,
|
|
359
|
+
stream=stream,
|
|
360
|
+
)
|
|
320
361
|
|
|
321
362
|
# update: theta *= norm_factor_inv * c
|
|
322
|
-
update_kernel(
|
|
323
|
-
|
|
324
|
-
|
|
363
|
+
update_kernel(
|
|
364
|
+
theta_flat_gpu,
|
|
365
|
+
c_flat_gpu,
|
|
366
|
+
norm_factor_inv_gpu,
|
|
367
|
+
np.int32(ZX),
|
|
368
|
+
block=(block_size, 1, 1),
|
|
369
|
+
grid=grid_cols,
|
|
370
|
+
stream=stream,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# periodic synchronization for stability / logging
|
|
325
374
|
if show_logs and (it % 10 == 0 or it == numIterations - 1):
|
|
326
|
-
|
|
375
|
+
stream.synchronize()
|
|
327
376
|
|
|
377
|
+
# save snapshot if required
|
|
328
378
|
if isSavingEachIteration and it in save_indices:
|
|
379
|
+
# ensure kernels finished
|
|
380
|
+
stream.synchronize()
|
|
329
381
|
theta_host = np.empty(ZX, dtype=dtype)
|
|
330
382
|
drv.memcpy_dtoh(theta_host, theta_flat_gpu)
|
|
331
383
|
saved_theta.append(theta_host.reshape(Z, X))
|
|
332
|
-
saved_indices.append(it)
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
drv.memcpy_dtoh(
|
|
338
|
-
final_result =
|
|
339
|
-
|
|
340
|
-
# free local allocations
|
|
341
|
-
|
|
384
|
+
saved_indices.append(int(it))
|
|
385
|
+
|
|
386
|
+
# make sure everything finished
|
|
387
|
+
stream.synchronize()
|
|
388
|
+
final_theta_host = np.empty(ZX, dtype=dtype)
|
|
389
|
+
drv.memcpy_dtoh(final_theta_host, theta_flat_gpu)
|
|
390
|
+
final_result = final_theta_host.reshape(Z, X)
|
|
391
|
+
|
|
392
|
+
# free local allocations (will also be freed in finally if exception)
|
|
393
|
+
try:
|
|
394
|
+
y_gpu.free()
|
|
395
|
+
q_flat_gpu.free()
|
|
396
|
+
e_flat_gpu.free()
|
|
397
|
+
c_flat_gpu.free()
|
|
398
|
+
theta_flat_gpu.free()
|
|
399
|
+
except Exception:
|
|
400
|
+
pass
|
|
342
401
|
|
|
343
402
|
return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
|
|
344
403
|
|
|
@@ -346,47 +405,64 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
|
|
|
346
405
|
print(f"Error in MLEM_sparseCSR_pycuda: {type(e).__name__}: {e}")
|
|
347
406
|
gc.collect()
|
|
348
407
|
return None, None
|
|
349
|
-
|
|
350
|
-
finally:
|
|
351
|
-
# --- CONTEXT FIX: Pop the context ---
|
|
352
|
-
if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
|
|
353
|
-
SMatrix.ctx.pop()
|
|
354
|
-
# ------------------------------------
|
|
355
408
|
|
|
356
|
-
|
|
409
|
+
finally:
|
|
410
|
+
# free buffers if still allocated
|
|
411
|
+
for buf in ("y_gpu", "q_flat_gpu", "e_flat_gpu", "c_flat_gpu", "theta_flat_gpu"):
|
|
412
|
+
try:
|
|
413
|
+
val = locals().get(buf, None)
|
|
414
|
+
if val is not None:
|
|
415
|
+
val.free()
|
|
416
|
+
except Exception:
|
|
417
|
+
pass
|
|
418
|
+
# pop context safely
|
|
419
|
+
try:
|
|
420
|
+
if SMatrix and hasattr(SMatrix, "ctx") and SMatrix.ctx and popped_ctx:
|
|
421
|
+
SMatrix.ctx.pop()
|
|
422
|
+
except Exception:
|
|
423
|
+
pass
|
|
424
|
+
|
|
425
|
+
def MLEM_sparseSELL_pycuda(
|
|
426
|
+
SMatrix,
|
|
427
|
+
y,
|
|
428
|
+
numIterations,
|
|
429
|
+
isSavingEachIteration,
|
|
430
|
+
tumor_str,
|
|
431
|
+
max_saves,
|
|
432
|
+
denominator_threshold,
|
|
433
|
+
show_logs=True,
|
|
434
|
+
):
|
|
357
435
|
"""
|
|
358
436
|
MLEM using SELL-C-σ kernels already present on device.
|
|
359
437
|
y must be float32 length TN.
|
|
438
|
+
|
|
439
|
+
Version propre : diagnostics retirés.
|
|
360
440
|
"""
|
|
361
441
|
final_result = None
|
|
362
442
|
|
|
363
443
|
try:
|
|
364
|
-
# check if SMatrix is SparseSMatrix_SELL object
|
|
365
444
|
if not isinstance(SMatrix, SparseSMatrix_SELL):
|
|
366
445
|
raise TypeError("SMatrix must be a SparseSMatrix_SELL object")
|
|
367
446
|
if SMatrix.sell_values_gpu is None:
|
|
368
447
|
raise RuntimeError("SELL not built. Call allocate_sell_c_sigma_direct() first.")
|
|
369
|
-
|
|
370
|
-
#
|
|
371
|
-
# This ensures all subsequent PyCUDA operations use the correct GPU/context.
|
|
448
|
+
|
|
449
|
+
# Context
|
|
372
450
|
if SMatrix.ctx:
|
|
373
451
|
SMatrix.ctx.push()
|
|
374
|
-
# -----------------------------------------------------------
|
|
375
452
|
|
|
376
453
|
TN = int(SMatrix.N * SMatrix.T)
|
|
377
454
|
ZX = int(SMatrix.Z * SMatrix.X)
|
|
378
455
|
dtype = np.float32
|
|
379
456
|
block_size = 256
|
|
380
457
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
update = mod.get_function("update_theta_kernel")
|
|
458
|
+
proj = SMatrix.sparse_mod.get_function("projection_kernel__SELL")
|
|
459
|
+
backproj = SMatrix.sparse_mod.get_function("backprojection_kernel__SELL")
|
|
460
|
+
ratio = SMatrix.sparse_mod.get_function("ratio_kernel")
|
|
461
|
+
update = SMatrix.sparse_mod.get_function("update_theta_kernel")
|
|
386
462
|
|
|
387
463
|
stream = drv.Stream()
|
|
388
464
|
|
|
389
|
-
#
|
|
465
|
+
# Device buffers
|
|
390
466
|
y = y.T.flatten().astype(np.float32)
|
|
391
467
|
y_gpu = drv.mem_alloc(y.nbytes)
|
|
392
468
|
drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
|
|
@@ -405,6 +481,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
405
481
|
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
406
482
|
grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
|
|
407
483
|
|
|
484
|
+
# Prepare save indices
|
|
408
485
|
saved_theta, saved_indices = [], []
|
|
409
486
|
if numIterations <= max_saves:
|
|
410
487
|
save_indices = list(range(numIterations))
|
|
@@ -415,52 +492,59 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
415
492
|
|
|
416
493
|
description = f"AOT-BioMaps -- ML-EM (SELL-c-σ-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
417
494
|
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
495
|
+
|
|
496
|
+
# --- MLEM Loop ---
|
|
418
497
|
for it in iterator:
|
|
419
|
-
# projection
|
|
420
|
-
proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
|
|
421
|
-
theta_gpu, np.int32(TN), slice_height,
|
|
422
|
-
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
423
498
|
|
|
424
|
-
|
|
499
|
+
proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
|
|
500
|
+
slice_ptr_gpu, slice_len_gpu,
|
|
501
|
+
theta_gpu, np.int32(TN), slice_height,
|
|
502
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
503
|
+
|
|
425
504
|
ratio(e_gpu, y_gpu, q_gpu, np.float32(denominator_threshold), np.int32(TN),
|
|
426
|
-
|
|
505
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
427
506
|
|
|
428
|
-
# zero c
|
|
429
507
|
drv.memset_d32_async(c_gpu, 0, ZX, stream)
|
|
430
508
|
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
509
|
+
backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
|
|
510
|
+
slice_ptr_gpu, slice_len_gpu,
|
|
511
|
+
e_gpu, c_gpu, np.int32(TN), slice_height,
|
|
512
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
435
513
|
|
|
436
|
-
# update
|
|
437
514
|
update(theta_gpu, c_gpu, SMatrix.norm_factor_inv_gpu, np.int32(ZX),
|
|
438
|
-
|
|
515
|
+
block=(block_size,1,1), grid=grid_cols, stream=stream)
|
|
439
516
|
|
|
440
|
-
stream.synchronize()
|
|
441
517
|
if isSavingEachIteration and it in save_indices:
|
|
442
518
|
out = np.empty(ZX, dtype=np.float32)
|
|
443
519
|
drv.memcpy_dtoh(out, theta_gpu)
|
|
444
520
|
saved_theta.append(out.reshape((SMatrix.Z, SMatrix.X)))
|
|
445
521
|
saved_indices.append(it)
|
|
446
522
|
|
|
447
|
-
|
|
523
|
+
stream.synchronize()
|
|
448
524
|
res = np.empty(ZX, dtype=np.float32)
|
|
449
525
|
drv.memcpy_dtoh(res, theta_gpu)
|
|
450
526
|
|
|
451
|
-
# free
|
|
452
|
-
|
|
453
|
-
|
|
527
|
+
# free
|
|
528
|
+
try:
|
|
529
|
+
y_gpu.free()
|
|
530
|
+
q_gpu.free()
|
|
531
|
+
e_gpu.free()
|
|
532
|
+
c_gpu.free()
|
|
533
|
+
theta_gpu.free()
|
|
534
|
+
except Exception:
|
|
535
|
+
pass
|
|
536
|
+
|
|
454
537
|
final_result = res.reshape((SMatrix.Z, SMatrix.X))
|
|
455
538
|
return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
|
|
456
|
-
|
|
539
|
+
|
|
457
540
|
except Exception as e:
|
|
458
541
|
print(f"Error in MLEM_sparseSELL_pycuda: {type(e).__name__}: {e}")
|
|
459
542
|
gc.collect()
|
|
460
543
|
return None, None
|
|
461
|
-
|
|
544
|
+
|
|
462
545
|
finally:
|
|
463
|
-
# --- CONTEXT FIX: Pop the context ---
|
|
464
546
|
if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
|
|
465
|
-
|
|
466
|
-
|
|
547
|
+
try:
|
|
548
|
+
SMatrix.ctx.pop()
|
|
549
|
+
except Exception:
|
|
550
|
+
pass
|