AOT-biomaps 2.9.281__tar.gz → 2.9.312__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AOT-biomaps might be problematic. Click here for more details.
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/LS.py +16 -19
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py +189 -103
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/PDHG.py +117 -20
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_CSR.py +8 -15
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py +79 -47
- aot_biomaps-2.9.312/AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AlgebraicRecon.py +2 -8
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/PrimalDualRecon.py +94 -41
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/__init__.py +32 -1
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/PKG-INFO +1 -1
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/PKG-INFO +1 -1
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/setup.py +32 -1
- aot_biomaps-2.9.281/AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/AcousticEnums.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/AcousticTools.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/FocusedWave.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/IrregularWave.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/PlaneWave.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/StructuredWave.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/__init__.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/_mainAcoustic.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Experiment/Focus.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Experiment/Tomography.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Experiment/__init__.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Experiment/_mainExperiment.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/Absorber.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/Laser.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/OpticEnums.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/__init__.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/_mainOptic.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/DEPIERRO.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/MAPEM.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/__init__.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/Huber.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/Quadratic.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/RelativeDifferences.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/__init__.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/__init__.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AnalyticRecon.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/BayesianRecon.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/DeepLearningRecon.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/ReconEnums.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/ReconTools.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/__init__.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/_mainRecon.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/Config.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps/Settings.py +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/SOURCES.txt +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/dependency_links.txt +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/requires.txt +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/top_level.txt +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/README.md +0 -0
- {aot_biomaps-2.9.281 → aot_biomaps-2.9.312}/setup.cfg +0 -0
|
@@ -23,8 +23,7 @@ def LS(
|
|
|
23
23
|
denominator_threshold=1e-6,
|
|
24
24
|
max_saves=5000,
|
|
25
25
|
show_logs=True,
|
|
26
|
-
smatrixType=SMatrixType.SELL
|
|
27
|
-
Z=350,
|
|
26
|
+
smatrixType=SMatrixType.SELL
|
|
28
27
|
):
|
|
29
28
|
"""
|
|
30
29
|
Least Squares reconstruction using Projected Gradient Descent (PGD) with non-negativity constraint.
|
|
@@ -44,7 +43,7 @@ def LS(
|
|
|
44
43
|
# Dispatch to the appropriate implementation
|
|
45
44
|
if use_gpu:
|
|
46
45
|
if smatrixType == SMatrixType.CSR:
|
|
47
|
-
return _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold,
|
|
46
|
+
return _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
|
|
48
47
|
elif smatrixType == SMatrixType.SELL:
|
|
49
48
|
return _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
|
|
50
49
|
elif smatrixType == SMatrixType.DENSE:
|
|
@@ -181,13 +180,12 @@ def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tu
|
|
|
181
180
|
print(f"Dim X: {X}, Dim Z: {Z}, TN: {TN}, ZX: {ZX}")
|
|
182
181
|
|
|
183
182
|
stream = drv.Stream()
|
|
184
|
-
mod = drv.module_from_file('AOT_biomaps_kernels.cubin')
|
|
185
183
|
|
|
186
184
|
# Récupération des Kernels
|
|
187
|
-
projection_kernel =
|
|
188
|
-
backprojection_kernel =
|
|
189
|
-
axpby_kernel =
|
|
190
|
-
minus_axpy_kernel =
|
|
185
|
+
projection_kernel = SMatrix.sparse_mod.get_function('projection_kernel__CSR')
|
|
186
|
+
backprojection_kernel = SMatrix.sparse_mod.get_function('backprojection_kernel__CSR')
|
|
187
|
+
axpby_kernel = SMatrix.sparse_mod.get_function("vector_axpby_kernel")
|
|
188
|
+
minus_axpy_kernel = SMatrix.sparse_mod.get_function("vector_minus_axpy_kernel")
|
|
191
189
|
|
|
192
190
|
# --- Allocation des buffers (Pointeurs Bruts) ---
|
|
193
191
|
y = y.T.flatten().astype(dtype)
|
|
@@ -231,7 +229,7 @@ def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tu
|
|
|
231
229
|
drv.memcpy_dtod(p_flat_gpu, r_flat_gpu, ZX * np.dtype(dtype).itemsize)
|
|
232
230
|
|
|
233
231
|
# 6. rho_prev = ||r_0||^2
|
|
234
|
-
rho_prev = _dot_product_gpu(
|
|
232
|
+
rho_prev = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
|
|
235
233
|
|
|
236
234
|
# --- Boucle itérative ---
|
|
237
235
|
saved_theta, saved_indices = [], []
|
|
@@ -258,7 +256,7 @@ def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tu
|
|
|
258
256
|
block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
259
257
|
|
|
260
258
|
# c. alpha = rho_prev / <p, z>
|
|
261
|
-
pAp = _dot_product_gpu(
|
|
259
|
+
pAp = _dot_product_gpu(SMatrix.sparse_mod, p_flat_gpu, z_flat_gpu, ZX, stream)
|
|
262
260
|
|
|
263
261
|
if abs(pAp) < 1e-15: break
|
|
264
262
|
alpha = rho_prev / pAp
|
|
@@ -273,7 +271,7 @@ def _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tu
|
|
|
273
271
|
block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
274
272
|
|
|
275
273
|
# f. rho_curr = ||r||^2
|
|
276
|
-
rho_curr = _dot_product_gpu(
|
|
274
|
+
rho_curr = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
|
|
277
275
|
|
|
278
276
|
if rho_curr < tolerance: break
|
|
279
277
|
|
|
@@ -364,11 +362,10 @@ def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, t
|
|
|
364
362
|
tolerance = 1e-12
|
|
365
363
|
|
|
366
364
|
# Accès aux paramètres SELL
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
minus_axpy_kernel = mod.get_function("vector_minus_axpy_kernel")
|
|
365
|
+
projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__SELL")
|
|
366
|
+
backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__SELL")
|
|
367
|
+
axpby_kernel = SMatrix.sparse_mod.get_function("vector_axpby_kernel")
|
|
368
|
+
minus_axpy_kernel = SMatrix.sparse_mod.get_function("vector_minus_axpy_kernel")
|
|
372
369
|
slice_height = np.int32(SMatrix.slice_height)
|
|
373
370
|
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
374
371
|
|
|
@@ -416,7 +413,7 @@ def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, t
|
|
|
416
413
|
drv.memcpy_dtod(p_flat_gpu, r_flat_gpu, ZX * np.dtype(dtype).itemsize)
|
|
417
414
|
|
|
418
415
|
# 6. rho_prev = ||r_0||^2
|
|
419
|
-
rho_prev = _dot_product_gpu(
|
|
416
|
+
rho_prev = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
|
|
420
417
|
|
|
421
418
|
# --- Boucle itérative ---
|
|
422
419
|
saved_theta, saved_indices = [], []
|
|
@@ -443,7 +440,7 @@ def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, t
|
|
|
443
440
|
block=(block_size, 1, 1), grid=grid_rows, stream=stream)
|
|
444
441
|
|
|
445
442
|
# c. alpha = rho_prev / <p, z>
|
|
446
|
-
pAp = _dot_product_gpu(
|
|
443
|
+
pAp = _dot_product_gpu(SMatrix.sparse_mod, p_flat_gpu, z_flat_gpu, ZX, stream)
|
|
447
444
|
|
|
448
445
|
if abs(pAp) < 1e-15: break
|
|
449
446
|
alpha = rho_prev / pAp
|
|
@@ -458,7 +455,7 @@ def _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, t
|
|
|
458
455
|
block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
|
|
459
456
|
|
|
460
457
|
# f. rho_curr = ||r||^2
|
|
461
|
-
rho_curr = _dot_product_gpu(
|
|
458
|
+
rho_curr = _dot_product_gpu(SMatrix.sparse_mod, r_flat_gpu, r_flat_gpu, ZX, stream)
|
|
462
459
|
|
|
463
460
|
if rho_curr < tolerance: break
|
|
464
461
|
|
|
@@ -26,7 +26,6 @@ def MLEM(
|
|
|
26
26
|
max_saves=5000,
|
|
27
27
|
show_logs=True,
|
|
28
28
|
smatrixType=SMatrixType.SELL,
|
|
29
|
-
Z=350,
|
|
30
29
|
):
|
|
31
30
|
"""
|
|
32
31
|
Unified MLEM algorithm for Acousto-Optic Tomography.
|
|
@@ -59,11 +58,11 @@ def MLEM(
|
|
|
59
58
|
# Dispatch to the appropriate implementation
|
|
60
59
|
if use_gpu:
|
|
61
60
|
if smatrixType == SMatrixType.CSR:
|
|
62
|
-
return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
61
|
+
return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
|
|
63
62
|
elif smatrixType == SMatrixType.SELL:
|
|
64
|
-
return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
63
|
+
return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
|
|
65
64
|
elif smatrixType == SMatrixType.DENSE:
|
|
66
|
-
return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
65
|
+
return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold,show_logs)
|
|
67
66
|
else:
|
|
68
67
|
raise ValueError("Unsupported SMatrixType for GPU MLEM.")
|
|
69
68
|
else:
|
|
@@ -229,49 +228,60 @@ def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
|
229
228
|
print(f"Error in optimized CPU MLEM: {type(e).__name__}: {e}")
|
|
230
229
|
return None, None
|
|
231
230
|
|
|
232
|
-
def MLEM_sparseCSR_pycuda(
|
|
231
|
+
def MLEM_sparseCSR_pycuda(
|
|
232
|
+
SMatrix,
|
|
233
|
+
y,
|
|
234
|
+
numIterations,
|
|
235
|
+
isSavingEachIteration,
|
|
236
|
+
tumor_str,
|
|
237
|
+
max_saves,
|
|
238
|
+
denominator_threshold,
|
|
239
|
+
show_logs=True,
|
|
240
|
+
):
|
|
233
241
|
"""
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
242
|
+
Robust MLEM implementation for CSR SMatrix using PyCUDA kernels.
|
|
243
|
+
Expects SMatrix to be SparseSMatrix_CSR with attributes:
|
|
244
|
+
- values_gpu, col_ind_gpu, row_ptr_gpu (device pointers)
|
|
245
|
+
- norm_factor_inv_gpu (device pointer)
|
|
246
|
+
- sparse_mod (loaded module with kernels)
|
|
247
|
+
- ctx (PyCUDA context)
|
|
248
|
+
Returns (saved_theta_list, saved_indices) if isSavingEachIteration else (final_theta, None)
|
|
241
249
|
"""
|
|
242
|
-
|
|
243
|
-
# We use a final_result placeholder to ensure it's defined outside the try block
|
|
244
250
|
final_result = None
|
|
245
|
-
|
|
251
|
+
|
|
252
|
+
# Local holders to free in finally
|
|
253
|
+
y_gpu = q_flat_gpu = e_flat_gpu = c_flat_gpu = theta_flat_gpu = None
|
|
254
|
+
|
|
246
255
|
try:
|
|
247
256
|
if not isinstance(SMatrix, SparseSMatrix_CSR):
|
|
248
257
|
raise TypeError("SMatrix must be a SparseSMatrix_CSR object")
|
|
249
258
|
|
|
250
|
-
#
|
|
251
|
-
|
|
252
|
-
if SMatrix
|
|
259
|
+
# push context (if provided)
|
|
260
|
+
popped_ctx = False
|
|
261
|
+
if getattr(SMatrix, "ctx", None):
|
|
253
262
|
SMatrix.ctx.push()
|
|
254
|
-
|
|
263
|
+
popped_ctx = True
|
|
255
264
|
|
|
256
265
|
dtype = np.float32
|
|
257
|
-
TN = SMatrix.N * SMatrix.T
|
|
258
|
-
ZX = SMatrix.Z * SMatrix.X
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
if
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
266
|
+
TN = int(SMatrix.N * SMatrix.T)
|
|
267
|
+
ZX = int(SMatrix.Z * SMatrix.X)
|
|
268
|
+
Z = int(SMatrix.Z)
|
|
269
|
+
X = int(SMatrix.X)
|
|
270
|
+
|
|
271
|
+
# Make sure required GPU pointers exist
|
|
272
|
+
if getattr(SMatrix, "values_gpu", None) is None or getattr(SMatrix, "col_ind_gpu", None) is None or getattr(SMatrix, "row_ptr_gpu", None) is None:
|
|
273
|
+
raise RuntimeError("SMatrix is missing GPU buffers (values_gpu / col_ind_gpu / row_ptr_gpu)")
|
|
274
|
+
|
|
275
|
+
if getattr(SMatrix, "norm_factor_inv_gpu", None) is None:
|
|
276
|
+
raise RuntimeError("SMatrix.norm_factor_inv_gpu not available on GPU")
|
|
277
|
+
|
|
278
|
+
# stream for async operations
|
|
269
279
|
stream = drv.Stream()
|
|
270
280
|
|
|
271
|
-
#
|
|
272
|
-
|
|
273
|
-
y_gpu = drv.mem_alloc(
|
|
274
|
-
drv.memcpy_htod_async(y_gpu,
|
|
281
|
+
# prepare device buffers
|
|
282
|
+
y_arr = np.ascontiguousarray(y.T.flatten().astype(np.float32))
|
|
283
|
+
y_gpu = drv.mem_alloc(y_arr.nbytes)
|
|
284
|
+
drv.memcpy_htod_async(y_gpu, y_arr, stream)
|
|
275
285
|
|
|
276
286
|
theta_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
277
287
|
initial_theta = np.full(ZX, 0.1, dtype=dtype)
|
|
@@ -283,61 +293,111 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
|
|
|
283
293
|
e_flat_gpu = drv.mem_alloc(TN * np.dtype(dtype).itemsize)
|
|
284
294
|
c_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
285
295
|
|
|
286
|
-
#
|
|
287
|
-
projection_kernel = SMatrix.sparse_mod.get_function(
|
|
288
|
-
backprojection_kernel = SMatrix.sparse_mod.get_function(
|
|
289
|
-
ratio_kernel = SMatrix.sparse_mod.get_function(
|
|
290
|
-
update_kernel = SMatrix.sparse_mod.get_function(
|
|
296
|
+
# Ensure kernels exist
|
|
297
|
+
projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__CSR")
|
|
298
|
+
backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__CSR")
|
|
299
|
+
ratio_kernel = SMatrix.sparse_mod.get_function("ratio_kernel")
|
|
300
|
+
update_kernel = SMatrix.sparse_mod.get_function("update_theta_kernel")
|
|
291
301
|
block_size = 256
|
|
292
302
|
|
|
293
|
-
|
|
303
|
+
# prepare save indices once
|
|
294
304
|
if numIterations <= max_saves:
|
|
295
305
|
save_indices = list(range(numIterations))
|
|
296
306
|
else:
|
|
297
|
-
|
|
307
|
+
step = max(1, numIterations // max_saves)
|
|
308
|
+
save_indices = list(range(0, numIterations, step))
|
|
298
309
|
if save_indices[-1] != numIterations - 1:
|
|
299
310
|
save_indices.append(numIterations - 1)
|
|
300
311
|
|
|
312
|
+
saved_theta = []
|
|
313
|
+
saved_indices = []
|
|
314
|
+
|
|
301
315
|
description = f"AOT-BioMaps -- ML-EM (CSR-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
302
316
|
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
317
|
+
|
|
318
|
+
# grid sizes
|
|
319
|
+
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
320
|
+
grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
|
|
321
|
+
|
|
303
322
|
for it in iterator:
|
|
304
323
|
# projection: q = A * theta
|
|
305
|
-
projection_kernel(
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
324
|
+
projection_kernel(
|
|
325
|
+
q_flat_gpu,
|
|
326
|
+
SMatrix.values_gpu,
|
|
327
|
+
SMatrix.row_ptr_gpu,
|
|
328
|
+
SMatrix.col_ind_gpu,
|
|
329
|
+
theta_flat_gpu,
|
|
330
|
+
np.int32(TN),
|
|
331
|
+
block=(block_size, 1, 1),
|
|
332
|
+
grid=grid_rows,
|
|
333
|
+
stream=stream,
|
|
334
|
+
)
|
|
309
335
|
|
|
310
336
|
# ratio: e = y / max(q, threshold)
|
|
311
|
-
ratio_kernel(
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
337
|
+
ratio_kernel(
|
|
338
|
+
e_flat_gpu,
|
|
339
|
+
y_gpu,
|
|
340
|
+
q_flat_gpu,
|
|
341
|
+
np.float32(denominator_threshold),
|
|
342
|
+
np.int32(TN),
|
|
343
|
+
block=(block_size, 1, 1),
|
|
344
|
+
grid=grid_rows,
|
|
345
|
+
stream=stream,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# backprojection: c = A^T * e (zero c first)
|
|
315
349
|
drv.memset_d32_async(c_flat_gpu, 0, ZX, stream)
|
|
316
|
-
backprojection_kernel(
|
|
317
|
-
|
|
318
|
-
|
|
350
|
+
backprojection_kernel(
|
|
351
|
+
c_flat_gpu,
|
|
352
|
+
SMatrix.values_gpu,
|
|
353
|
+
SMatrix.row_ptr_gpu,
|
|
354
|
+
SMatrix.col_ind_gpu,
|
|
355
|
+
e_flat_gpu,
|
|
356
|
+
np.int32(TN),
|
|
357
|
+
block=(block_size, 1, 1),
|
|
358
|
+
grid=grid_rows,
|
|
359
|
+
stream=stream,
|
|
360
|
+
)
|
|
319
361
|
|
|
320
362
|
# update: theta *= norm_factor_inv * c
|
|
321
|
-
update_kernel(
|
|
322
|
-
|
|
323
|
-
|
|
363
|
+
update_kernel(
|
|
364
|
+
theta_flat_gpu,
|
|
365
|
+
c_flat_gpu,
|
|
366
|
+
norm_factor_inv_gpu,
|
|
367
|
+
np.int32(ZX),
|
|
368
|
+
block=(block_size, 1, 1),
|
|
369
|
+
grid=grid_cols,
|
|
370
|
+
stream=stream,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# periodic synchronization for stability / logging
|
|
324
374
|
if show_logs and (it % 10 == 0 or it == numIterations - 1):
|
|
325
|
-
|
|
375
|
+
stream.synchronize()
|
|
326
376
|
|
|
377
|
+
# save snapshot if required
|
|
327
378
|
if isSavingEachIteration and it in save_indices:
|
|
379
|
+
# ensure kernels finished
|
|
380
|
+
stream.synchronize()
|
|
328
381
|
theta_host = np.empty(ZX, dtype=dtype)
|
|
329
382
|
drv.memcpy_dtoh(theta_host, theta_flat_gpu)
|
|
330
383
|
saved_theta.append(theta_host.reshape(Z, X))
|
|
331
|
-
saved_indices.append(it)
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
drv.memcpy_dtoh(
|
|
337
|
-
final_result =
|
|
338
|
-
|
|
339
|
-
# free local allocations
|
|
340
|
-
|
|
384
|
+
saved_indices.append(int(it))
|
|
385
|
+
|
|
386
|
+
# make sure everything finished
|
|
387
|
+
stream.synchronize()
|
|
388
|
+
final_theta_host = np.empty(ZX, dtype=dtype)
|
|
389
|
+
drv.memcpy_dtoh(final_theta_host, theta_flat_gpu)
|
|
390
|
+
final_result = final_theta_host.reshape(Z, X)
|
|
391
|
+
|
|
392
|
+
# free local allocations (will also be freed in finally if exception)
|
|
393
|
+
try:
|
|
394
|
+
y_gpu.free()
|
|
395
|
+
q_flat_gpu.free()
|
|
396
|
+
e_flat_gpu.free()
|
|
397
|
+
c_flat_gpu.free()
|
|
398
|
+
theta_flat_gpu.free()
|
|
399
|
+
except Exception:
|
|
400
|
+
pass
|
|
341
401
|
|
|
342
402
|
return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
|
|
343
403
|
|
|
@@ -345,32 +405,50 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
|
|
|
345
405
|
print(f"Error in MLEM_sparseCSR_pycuda: {type(e).__name__}: {e}")
|
|
346
406
|
gc.collect()
|
|
347
407
|
return None, None
|
|
348
|
-
|
|
349
|
-
finally:
|
|
350
|
-
# --- CONTEXT FIX: Pop the context ---
|
|
351
|
-
if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
|
|
352
|
-
SMatrix.ctx.pop()
|
|
353
|
-
# ------------------------------------
|
|
354
408
|
|
|
355
|
-
|
|
409
|
+
finally:
|
|
410
|
+
# free buffers if still allocated
|
|
411
|
+
for buf in ("y_gpu", "q_flat_gpu", "e_flat_gpu", "c_flat_gpu", "theta_flat_gpu"):
|
|
412
|
+
try:
|
|
413
|
+
val = locals().get(buf, None)
|
|
414
|
+
if val is not None:
|
|
415
|
+
val.free()
|
|
416
|
+
except Exception:
|
|
417
|
+
pass
|
|
418
|
+
# pop context safely
|
|
419
|
+
try:
|
|
420
|
+
if SMatrix and hasattr(SMatrix, "ctx") and SMatrix.ctx and popped_ctx:
|
|
421
|
+
SMatrix.ctx.pop()
|
|
422
|
+
except Exception:
|
|
423
|
+
pass
|
|
424
|
+
|
|
425
|
+
def MLEM_sparseSELL_pycuda(
|
|
426
|
+
SMatrix,
|
|
427
|
+
y,
|
|
428
|
+
numIterations,
|
|
429
|
+
isSavingEachIteration,
|
|
430
|
+
tumor_str,
|
|
431
|
+
max_saves,
|
|
432
|
+
denominator_threshold,
|
|
433
|
+
show_logs=True,
|
|
434
|
+
):
|
|
356
435
|
"""
|
|
357
436
|
MLEM using SELL-C-σ kernels already present on device.
|
|
358
437
|
y must be float32 length TN.
|
|
438
|
+
|
|
439
|
+
Version propre : diagnostics retirés.
|
|
359
440
|
"""
|
|
360
441
|
final_result = None
|
|
361
442
|
|
|
362
443
|
try:
|
|
363
|
-
# check if SMatrix is SparseSMatrix_SELL object
|
|
364
444
|
if not isinstance(SMatrix, SparseSMatrix_SELL):
|
|
365
445
|
raise TypeError("SMatrix must be a SparseSMatrix_SELL object")
|
|
366
446
|
if SMatrix.sell_values_gpu is None:
|
|
367
447
|
raise RuntimeError("SELL not built. Call allocate_sell_c_sigma_direct() first.")
|
|
368
|
-
|
|
369
|
-
#
|
|
370
|
-
# This ensures all subsequent PyCUDA operations use the correct GPU/context.
|
|
448
|
+
|
|
449
|
+
# Context
|
|
371
450
|
if SMatrix.ctx:
|
|
372
451
|
SMatrix.ctx.push()
|
|
373
|
-
# -----------------------------------------------------------
|
|
374
452
|
|
|
375
453
|
TN = int(SMatrix.N * SMatrix.T)
|
|
376
454
|
ZX = int(SMatrix.Z * SMatrix.X)
|
|
@@ -384,7 +462,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
384
462
|
|
|
385
463
|
stream = drv.Stream()
|
|
386
464
|
|
|
387
|
-
#
|
|
465
|
+
# Device buffers
|
|
388
466
|
y = y.T.flatten().astype(np.float32)
|
|
389
467
|
y_gpu = drv.mem_alloc(y.nbytes)
|
|
390
468
|
drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
|
|
@@ -403,6 +481,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
403
481
|
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
404
482
|
grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
|
|
405
483
|
|
|
484
|
+
# Prepare save indices
|
|
406
485
|
saved_theta, saved_indices = [], []
|
|
407
486
|
if numIterations <= max_saves:
|
|
408
487
|
save_indices = list(range(numIterations))
|
|
@@ -413,52 +492,59 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
413
492
|
|
|
414
493
|
description = f"AOT-BioMaps -- ML-EM (SELL-c-σ-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
415
494
|
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
495
|
+
|
|
496
|
+
# --- MLEM Loop ---
|
|
416
497
|
for it in iterator:
|
|
417
|
-
# projection
|
|
418
|
-
proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
|
|
419
|
-
theta_gpu, np.int32(TN), slice_height,
|
|
420
|
-
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
421
498
|
|
|
422
|
-
|
|
499
|
+
proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
|
|
500
|
+
slice_ptr_gpu, slice_len_gpu,
|
|
501
|
+
theta_gpu, np.int32(TN), slice_height,
|
|
502
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
503
|
+
|
|
423
504
|
ratio(e_gpu, y_gpu, q_gpu, np.float32(denominator_threshold), np.int32(TN),
|
|
424
|
-
|
|
505
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
425
506
|
|
|
426
|
-
# zero c
|
|
427
507
|
drv.memset_d32_async(c_gpu, 0, ZX, stream)
|
|
428
508
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
509
|
+
backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
|
|
510
|
+
slice_ptr_gpu, slice_len_gpu,
|
|
511
|
+
e_gpu, c_gpu, np.int32(TN), slice_height,
|
|
512
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
433
513
|
|
|
434
|
-
# update
|
|
435
514
|
update(theta_gpu, c_gpu, SMatrix.norm_factor_inv_gpu, np.int32(ZX),
|
|
436
|
-
|
|
515
|
+
block=(block_size,1,1), grid=grid_cols, stream=stream)
|
|
437
516
|
|
|
438
|
-
stream.synchronize()
|
|
439
517
|
if isSavingEachIteration and it in save_indices:
|
|
440
518
|
out = np.empty(ZX, dtype=np.float32)
|
|
441
519
|
drv.memcpy_dtoh(out, theta_gpu)
|
|
442
520
|
saved_theta.append(out.reshape((SMatrix.Z, SMatrix.X)))
|
|
443
521
|
saved_indices.append(it)
|
|
444
522
|
|
|
445
|
-
|
|
523
|
+
stream.synchronize()
|
|
446
524
|
res = np.empty(ZX, dtype=np.float32)
|
|
447
525
|
drv.memcpy_dtoh(res, theta_gpu)
|
|
448
526
|
|
|
449
|
-
# free
|
|
450
|
-
|
|
451
|
-
|
|
527
|
+
# free
|
|
528
|
+
try:
|
|
529
|
+
y_gpu.free()
|
|
530
|
+
q_gpu.free()
|
|
531
|
+
e_gpu.free()
|
|
532
|
+
c_gpu.free()
|
|
533
|
+
theta_gpu.free()
|
|
534
|
+
except Exception:
|
|
535
|
+
pass
|
|
536
|
+
|
|
452
537
|
final_result = res.reshape((SMatrix.Z, SMatrix.X))
|
|
453
538
|
return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
|
|
454
|
-
|
|
539
|
+
|
|
455
540
|
except Exception as e:
|
|
456
541
|
print(f"Error in MLEM_sparseSELL_pycuda: {type(e).__name__}: {e}")
|
|
457
542
|
gc.collect()
|
|
458
543
|
return None, None
|
|
459
|
-
|
|
544
|
+
|
|
460
545
|
finally:
|
|
461
|
-
# --- CONTEXT FIX: Pop the context ---
|
|
462
546
|
if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
|
|
463
|
-
|
|
464
|
-
|
|
547
|
+
try:
|
|
548
|
+
SMatrix.ctx.pop()
|
|
549
|
+
except Exception:
|
|
550
|
+
pass
|