AOT-biomaps 2.9.291__tar.gz → 2.9.312__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AOT-biomaps might be problematic. Click here for more details.
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/LS.py +2 -3
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py +189 -103
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/PDHG.py +2 -2
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_CSR.py +8 -15
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py +79 -47
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AlgebraicRecon.py +2 -8
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/__init__.py +22 -1
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/PKG-INFO +1 -1
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/PKG-INFO +1 -1
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/setup.py +22 -1
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/AcousticEnums.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/AcousticTools.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/FocusedWave.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/IrregularWave.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/PlaneWave.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/StructuredWave.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/__init__.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Acoustic/_mainAcoustic.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Experiment/Focus.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Experiment/Tomography.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Experiment/__init__.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Experiment/_mainExperiment.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/Absorber.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/Laser.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/OpticEnums.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/__init__.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Optic/_mainOptic.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/DEPIERRO.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/MAPEM.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/__init__.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/Huber.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/Quadratic.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/RelativeDifferences.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/__init__.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/__init__.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AnalyticRecon.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/BayesianRecon.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/DeepLearningRecon.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/PrimalDualRecon.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/ReconEnums.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/ReconTools.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/__init__.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/_mainRecon.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/Config.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/Settings.py +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/SOURCES.txt +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/dependency_links.txt +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/requires.txt +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps.egg-info/top_level.txt +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/README.md +0 -0
- {aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/setup.cfg +0 -0
|
@@ -23,8 +23,7 @@ def LS(
|
|
|
23
23
|
denominator_threshold=1e-6,
|
|
24
24
|
max_saves=5000,
|
|
25
25
|
show_logs=True,
|
|
26
|
-
smatrixType=SMatrixType.SELL
|
|
27
|
-
Z=350,
|
|
26
|
+
smatrixType=SMatrixType.SELL
|
|
28
27
|
):
|
|
29
28
|
"""
|
|
30
29
|
Least Squares reconstruction using Projected Gradient Descent (PGD) with non-negativity constraint.
|
|
@@ -44,7 +43,7 @@ def LS(
|
|
|
44
43
|
# Dispatch to the appropriate implementation
|
|
45
44
|
if use_gpu:
|
|
46
45
|
if smatrixType == SMatrixType.CSR:
|
|
47
|
-
return _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold,
|
|
46
|
+
return _LS_CG_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
|
|
48
47
|
elif smatrixType == SMatrixType.SELL:
|
|
49
48
|
return _LS_CG_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
|
|
50
49
|
elif smatrixType == SMatrixType.DENSE:
|
|
@@ -26,7 +26,6 @@ def MLEM(
|
|
|
26
26
|
max_saves=5000,
|
|
27
27
|
show_logs=True,
|
|
28
28
|
smatrixType=SMatrixType.SELL,
|
|
29
|
-
Z=350,
|
|
30
29
|
):
|
|
31
30
|
"""
|
|
32
31
|
Unified MLEM algorithm for Acousto-Optic Tomography.
|
|
@@ -59,11 +58,11 @@ def MLEM(
|
|
|
59
58
|
# Dispatch to the appropriate implementation
|
|
60
59
|
if use_gpu:
|
|
61
60
|
if smatrixType == SMatrixType.CSR:
|
|
62
|
-
return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
61
|
+
return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
|
|
63
62
|
elif smatrixType == SMatrixType.SELL:
|
|
64
|
-
return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
63
|
+
return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
|
|
65
64
|
elif smatrixType == SMatrixType.DENSE:
|
|
66
|
-
return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
65
|
+
return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold,show_logs)
|
|
67
66
|
else:
|
|
68
67
|
raise ValueError("Unsupported SMatrixType for GPU MLEM.")
|
|
69
68
|
else:
|
|
@@ -229,49 +228,60 @@ def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
|
229
228
|
print(f"Error in optimized CPU MLEM: {type(e).__name__}: {e}")
|
|
230
229
|
return None, None
|
|
231
230
|
|
|
232
|
-
def MLEM_sparseCSR_pycuda(
|
|
231
|
+
def MLEM_sparseCSR_pycuda(
|
|
232
|
+
SMatrix,
|
|
233
|
+
y,
|
|
234
|
+
numIterations,
|
|
235
|
+
isSavingEachIteration,
|
|
236
|
+
tumor_str,
|
|
237
|
+
max_saves,
|
|
238
|
+
denominator_threshold,
|
|
239
|
+
show_logs=True,
|
|
240
|
+
):
|
|
233
241
|
"""
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
242
|
+
Robust MLEM implementation for CSR SMatrix using PyCUDA kernels.
|
|
243
|
+
Expects SMatrix to be SparseSMatrix_CSR with attributes:
|
|
244
|
+
- values_gpu, col_ind_gpu, row_ptr_gpu (device pointers)
|
|
245
|
+
- norm_factor_inv_gpu (device pointer)
|
|
246
|
+
- sparse_mod (loaded module with kernels)
|
|
247
|
+
- ctx (PyCUDA context)
|
|
248
|
+
Returns (saved_theta_list, saved_indices) if isSavingEachIteration else (final_theta, None)
|
|
241
249
|
"""
|
|
242
|
-
|
|
243
|
-
# We use a final_result placeholder to ensure it's defined outside the try block
|
|
244
250
|
final_result = None
|
|
245
|
-
|
|
251
|
+
|
|
252
|
+
# Local holders to free in finally
|
|
253
|
+
y_gpu = q_flat_gpu = e_flat_gpu = c_flat_gpu = theta_flat_gpu = None
|
|
254
|
+
|
|
246
255
|
try:
|
|
247
256
|
if not isinstance(SMatrix, SparseSMatrix_CSR):
|
|
248
257
|
raise TypeError("SMatrix must be a SparseSMatrix_CSR object")
|
|
249
258
|
|
|
250
|
-
#
|
|
251
|
-
|
|
252
|
-
if SMatrix
|
|
259
|
+
# push context (if provided)
|
|
260
|
+
popped_ctx = False
|
|
261
|
+
if getattr(SMatrix, "ctx", None):
|
|
253
262
|
SMatrix.ctx.push()
|
|
254
|
-
|
|
263
|
+
popped_ctx = True
|
|
255
264
|
|
|
256
265
|
dtype = np.float32
|
|
257
|
-
TN = SMatrix.N * SMatrix.T
|
|
258
|
-
ZX = SMatrix.Z * SMatrix.X
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
if
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
266
|
+
TN = int(SMatrix.N * SMatrix.T)
|
|
267
|
+
ZX = int(SMatrix.Z * SMatrix.X)
|
|
268
|
+
Z = int(SMatrix.Z)
|
|
269
|
+
X = int(SMatrix.X)
|
|
270
|
+
|
|
271
|
+
# Make sure required GPU pointers exist
|
|
272
|
+
if getattr(SMatrix, "values_gpu", None) is None or getattr(SMatrix, "col_ind_gpu", None) is None or getattr(SMatrix, "row_ptr_gpu", None) is None:
|
|
273
|
+
raise RuntimeError("SMatrix is missing GPU buffers (values_gpu / col_ind_gpu / row_ptr_gpu)")
|
|
274
|
+
|
|
275
|
+
if getattr(SMatrix, "norm_factor_inv_gpu", None) is None:
|
|
276
|
+
raise RuntimeError("SMatrix.norm_factor_inv_gpu not available on GPU")
|
|
277
|
+
|
|
278
|
+
# stream for async operations
|
|
269
279
|
stream = drv.Stream()
|
|
270
280
|
|
|
271
|
-
#
|
|
272
|
-
|
|
273
|
-
y_gpu = drv.mem_alloc(
|
|
274
|
-
drv.memcpy_htod_async(y_gpu,
|
|
281
|
+
# prepare device buffers
|
|
282
|
+
y_arr = np.ascontiguousarray(y.T.flatten().astype(np.float32))
|
|
283
|
+
y_gpu = drv.mem_alloc(y_arr.nbytes)
|
|
284
|
+
drv.memcpy_htod_async(y_gpu, y_arr, stream)
|
|
275
285
|
|
|
276
286
|
theta_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
277
287
|
initial_theta = np.full(ZX, 0.1, dtype=dtype)
|
|
@@ -283,61 +293,111 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
|
|
|
283
293
|
e_flat_gpu = drv.mem_alloc(TN * np.dtype(dtype).itemsize)
|
|
284
294
|
c_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
|
|
285
295
|
|
|
286
|
-
#
|
|
287
|
-
projection_kernel = SMatrix.sparse_mod.get_function(
|
|
288
|
-
backprojection_kernel = SMatrix.sparse_mod.get_function(
|
|
289
|
-
ratio_kernel = SMatrix.sparse_mod.get_function(
|
|
290
|
-
update_kernel = SMatrix.sparse_mod.get_function(
|
|
296
|
+
# Ensure kernels exist
|
|
297
|
+
projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__CSR")
|
|
298
|
+
backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__CSR")
|
|
299
|
+
ratio_kernel = SMatrix.sparse_mod.get_function("ratio_kernel")
|
|
300
|
+
update_kernel = SMatrix.sparse_mod.get_function("update_theta_kernel")
|
|
291
301
|
block_size = 256
|
|
292
302
|
|
|
293
|
-
|
|
303
|
+
# prepare save indices once
|
|
294
304
|
if numIterations <= max_saves:
|
|
295
305
|
save_indices = list(range(numIterations))
|
|
296
306
|
else:
|
|
297
|
-
|
|
307
|
+
step = max(1, numIterations // max_saves)
|
|
308
|
+
save_indices = list(range(0, numIterations, step))
|
|
298
309
|
if save_indices[-1] != numIterations - 1:
|
|
299
310
|
save_indices.append(numIterations - 1)
|
|
300
311
|
|
|
312
|
+
saved_theta = []
|
|
313
|
+
saved_indices = []
|
|
314
|
+
|
|
301
315
|
description = f"AOT-BioMaps -- ML-EM (CSR-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
302
316
|
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
317
|
+
|
|
318
|
+
# grid sizes
|
|
319
|
+
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
320
|
+
grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
|
|
321
|
+
|
|
303
322
|
for it in iterator:
|
|
304
323
|
# projection: q = A * theta
|
|
305
|
-
projection_kernel(
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
324
|
+
projection_kernel(
|
|
325
|
+
q_flat_gpu,
|
|
326
|
+
SMatrix.values_gpu,
|
|
327
|
+
SMatrix.row_ptr_gpu,
|
|
328
|
+
SMatrix.col_ind_gpu,
|
|
329
|
+
theta_flat_gpu,
|
|
330
|
+
np.int32(TN),
|
|
331
|
+
block=(block_size, 1, 1),
|
|
332
|
+
grid=grid_rows,
|
|
333
|
+
stream=stream,
|
|
334
|
+
)
|
|
309
335
|
|
|
310
336
|
# ratio: e = y / max(q, threshold)
|
|
311
|
-
ratio_kernel(
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
337
|
+
ratio_kernel(
|
|
338
|
+
e_flat_gpu,
|
|
339
|
+
y_gpu,
|
|
340
|
+
q_flat_gpu,
|
|
341
|
+
np.float32(denominator_threshold),
|
|
342
|
+
np.int32(TN),
|
|
343
|
+
block=(block_size, 1, 1),
|
|
344
|
+
grid=grid_rows,
|
|
345
|
+
stream=stream,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# backprojection: c = A^T * e (zero c first)
|
|
315
349
|
drv.memset_d32_async(c_flat_gpu, 0, ZX, stream)
|
|
316
|
-
backprojection_kernel(
|
|
317
|
-
|
|
318
|
-
|
|
350
|
+
backprojection_kernel(
|
|
351
|
+
c_flat_gpu,
|
|
352
|
+
SMatrix.values_gpu,
|
|
353
|
+
SMatrix.row_ptr_gpu,
|
|
354
|
+
SMatrix.col_ind_gpu,
|
|
355
|
+
e_flat_gpu,
|
|
356
|
+
np.int32(TN),
|
|
357
|
+
block=(block_size, 1, 1),
|
|
358
|
+
grid=grid_rows,
|
|
359
|
+
stream=stream,
|
|
360
|
+
)
|
|
319
361
|
|
|
320
362
|
# update: theta *= norm_factor_inv * c
|
|
321
|
-
update_kernel(
|
|
322
|
-
|
|
323
|
-
|
|
363
|
+
update_kernel(
|
|
364
|
+
theta_flat_gpu,
|
|
365
|
+
c_flat_gpu,
|
|
366
|
+
norm_factor_inv_gpu,
|
|
367
|
+
np.int32(ZX),
|
|
368
|
+
block=(block_size, 1, 1),
|
|
369
|
+
grid=grid_cols,
|
|
370
|
+
stream=stream,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# periodic synchronization for stability / logging
|
|
324
374
|
if show_logs and (it % 10 == 0 or it == numIterations - 1):
|
|
325
|
-
|
|
375
|
+
stream.synchronize()
|
|
326
376
|
|
|
377
|
+
# save snapshot if required
|
|
327
378
|
if isSavingEachIteration and it in save_indices:
|
|
379
|
+
# ensure kernels finished
|
|
380
|
+
stream.synchronize()
|
|
328
381
|
theta_host = np.empty(ZX, dtype=dtype)
|
|
329
382
|
drv.memcpy_dtoh(theta_host, theta_flat_gpu)
|
|
330
383
|
saved_theta.append(theta_host.reshape(Z, X))
|
|
331
|
-
saved_indices.append(it)
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
drv.memcpy_dtoh(
|
|
337
|
-
final_result =
|
|
338
|
-
|
|
339
|
-
# free local allocations
|
|
340
|
-
|
|
384
|
+
saved_indices.append(int(it))
|
|
385
|
+
|
|
386
|
+
# make sure everything finished
|
|
387
|
+
stream.synchronize()
|
|
388
|
+
final_theta_host = np.empty(ZX, dtype=dtype)
|
|
389
|
+
drv.memcpy_dtoh(final_theta_host, theta_flat_gpu)
|
|
390
|
+
final_result = final_theta_host.reshape(Z, X)
|
|
391
|
+
|
|
392
|
+
# free local allocations (will also be freed in finally if exception)
|
|
393
|
+
try:
|
|
394
|
+
y_gpu.free()
|
|
395
|
+
q_flat_gpu.free()
|
|
396
|
+
e_flat_gpu.free()
|
|
397
|
+
c_flat_gpu.free()
|
|
398
|
+
theta_flat_gpu.free()
|
|
399
|
+
except Exception:
|
|
400
|
+
pass
|
|
341
401
|
|
|
342
402
|
return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
|
|
343
403
|
|
|
@@ -345,32 +405,50 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
|
|
|
345
405
|
print(f"Error in MLEM_sparseCSR_pycuda: {type(e).__name__}: {e}")
|
|
346
406
|
gc.collect()
|
|
347
407
|
return None, None
|
|
348
|
-
|
|
349
|
-
finally:
|
|
350
|
-
# --- CONTEXT FIX: Pop the context ---
|
|
351
|
-
if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
|
|
352
|
-
SMatrix.ctx.pop()
|
|
353
|
-
# ------------------------------------
|
|
354
408
|
|
|
355
|
-
|
|
409
|
+
finally:
|
|
410
|
+
# free buffers if still allocated
|
|
411
|
+
for buf in ("y_gpu", "q_flat_gpu", "e_flat_gpu", "c_flat_gpu", "theta_flat_gpu"):
|
|
412
|
+
try:
|
|
413
|
+
val = locals().get(buf, None)
|
|
414
|
+
if val is not None:
|
|
415
|
+
val.free()
|
|
416
|
+
except Exception:
|
|
417
|
+
pass
|
|
418
|
+
# pop context safely
|
|
419
|
+
try:
|
|
420
|
+
if SMatrix and hasattr(SMatrix, "ctx") and SMatrix.ctx and popped_ctx:
|
|
421
|
+
SMatrix.ctx.pop()
|
|
422
|
+
except Exception:
|
|
423
|
+
pass
|
|
424
|
+
|
|
425
|
+
def MLEM_sparseSELL_pycuda(
|
|
426
|
+
SMatrix,
|
|
427
|
+
y,
|
|
428
|
+
numIterations,
|
|
429
|
+
isSavingEachIteration,
|
|
430
|
+
tumor_str,
|
|
431
|
+
max_saves,
|
|
432
|
+
denominator_threshold,
|
|
433
|
+
show_logs=True,
|
|
434
|
+
):
|
|
356
435
|
"""
|
|
357
436
|
MLEM using SELL-C-σ kernels already present on device.
|
|
358
437
|
y must be float32 length TN.
|
|
438
|
+
|
|
439
|
+
Version propre : diagnostics retirés.
|
|
359
440
|
"""
|
|
360
441
|
final_result = None
|
|
361
442
|
|
|
362
443
|
try:
|
|
363
|
-
# check if SMatrix is SparseSMatrix_SELL object
|
|
364
444
|
if not isinstance(SMatrix, SparseSMatrix_SELL):
|
|
365
445
|
raise TypeError("SMatrix must be a SparseSMatrix_SELL object")
|
|
366
446
|
if SMatrix.sell_values_gpu is None:
|
|
367
447
|
raise RuntimeError("SELL not built. Call allocate_sell_c_sigma_direct() first.")
|
|
368
|
-
|
|
369
|
-
#
|
|
370
|
-
# This ensures all subsequent PyCUDA operations use the correct GPU/context.
|
|
448
|
+
|
|
449
|
+
# Context
|
|
371
450
|
if SMatrix.ctx:
|
|
372
451
|
SMatrix.ctx.push()
|
|
373
|
-
# -----------------------------------------------------------
|
|
374
452
|
|
|
375
453
|
TN = int(SMatrix.N * SMatrix.T)
|
|
376
454
|
ZX = int(SMatrix.Z * SMatrix.X)
|
|
@@ -384,7 +462,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
384
462
|
|
|
385
463
|
stream = drv.Stream()
|
|
386
464
|
|
|
387
|
-
#
|
|
465
|
+
# Device buffers
|
|
388
466
|
y = y.T.flatten().astype(np.float32)
|
|
389
467
|
y_gpu = drv.mem_alloc(y.nbytes)
|
|
390
468
|
drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
|
|
@@ -403,6 +481,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
403
481
|
grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
|
|
404
482
|
grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
|
|
405
483
|
|
|
484
|
+
# Prepare save indices
|
|
406
485
|
saved_theta, saved_indices = [], []
|
|
407
486
|
if numIterations <= max_saves:
|
|
408
487
|
save_indices = list(range(numIterations))
|
|
@@ -413,52 +492,59 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
|
|
|
413
492
|
|
|
414
493
|
description = f"AOT-BioMaps -- ML-EM (SELL-c-σ-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
415
494
|
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
495
|
+
|
|
496
|
+
# --- MLEM Loop ---
|
|
416
497
|
for it in iterator:
|
|
417
|
-
# projection
|
|
418
|
-
proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
|
|
419
|
-
theta_gpu, np.int32(TN), slice_height,
|
|
420
|
-
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
421
498
|
|
|
422
|
-
|
|
499
|
+
proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
|
|
500
|
+
slice_ptr_gpu, slice_len_gpu,
|
|
501
|
+
theta_gpu, np.int32(TN), slice_height,
|
|
502
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
503
|
+
|
|
423
504
|
ratio(e_gpu, y_gpu, q_gpu, np.float32(denominator_threshold), np.int32(TN),
|
|
424
|
-
|
|
505
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
425
506
|
|
|
426
|
-
# zero c
|
|
427
507
|
drv.memset_d32_async(c_gpu, 0, ZX, stream)
|
|
428
508
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
509
|
+
backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
|
|
510
|
+
slice_ptr_gpu, slice_len_gpu,
|
|
511
|
+
e_gpu, c_gpu, np.int32(TN), slice_height,
|
|
512
|
+
block=(block_size,1,1), grid=grid_rows, stream=stream)
|
|
433
513
|
|
|
434
|
-
# update
|
|
435
514
|
update(theta_gpu, c_gpu, SMatrix.norm_factor_inv_gpu, np.int32(ZX),
|
|
436
|
-
|
|
515
|
+
block=(block_size,1,1), grid=grid_cols, stream=stream)
|
|
437
516
|
|
|
438
|
-
stream.synchronize()
|
|
439
517
|
if isSavingEachIteration and it in save_indices:
|
|
440
518
|
out = np.empty(ZX, dtype=np.float32)
|
|
441
519
|
drv.memcpy_dtoh(out, theta_gpu)
|
|
442
520
|
saved_theta.append(out.reshape((SMatrix.Z, SMatrix.X)))
|
|
443
521
|
saved_indices.append(it)
|
|
444
522
|
|
|
445
|
-
|
|
523
|
+
stream.synchronize()
|
|
446
524
|
res = np.empty(ZX, dtype=np.float32)
|
|
447
525
|
drv.memcpy_dtoh(res, theta_gpu)
|
|
448
526
|
|
|
449
|
-
# free
|
|
450
|
-
|
|
451
|
-
|
|
527
|
+
# free
|
|
528
|
+
try:
|
|
529
|
+
y_gpu.free()
|
|
530
|
+
q_gpu.free()
|
|
531
|
+
e_gpu.free()
|
|
532
|
+
c_gpu.free()
|
|
533
|
+
theta_gpu.free()
|
|
534
|
+
except Exception:
|
|
535
|
+
pass
|
|
536
|
+
|
|
452
537
|
final_result = res.reshape((SMatrix.Z, SMatrix.X))
|
|
453
538
|
return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
|
|
454
|
-
|
|
539
|
+
|
|
455
540
|
except Exception as e:
|
|
456
541
|
print(f"Error in MLEM_sparseSELL_pycuda: {type(e).__name__}: {e}")
|
|
457
542
|
gc.collect()
|
|
458
543
|
return None, None
|
|
459
|
-
|
|
544
|
+
|
|
460
545
|
finally:
|
|
461
|
-
# --- CONTEXT FIX: Pop the context ---
|
|
462
546
|
if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
|
|
463
|
-
|
|
464
|
-
|
|
547
|
+
try:
|
|
548
|
+
SMatrix.ctx.pop()
|
|
549
|
+
except Exception:
|
|
550
|
+
pass
|
|
@@ -51,7 +51,7 @@ def CP_TV(
|
|
|
51
51
|
if smatrixType == SMatrixType.CSR:
|
|
52
52
|
raise NotImplementedError("GPU Chambolle Pock (LS-TV) with CSR not implemented.")
|
|
53
53
|
elif smatrixType == SMatrixType.SELL:
|
|
54
|
-
return
|
|
54
|
+
return CP_TV_Tikhonov_sparseSELL_pycuda(SMatrix, y, alpha,beta, theta, numIterations, isSavingEachIteration, L, tumor_str, device, max_saves, show_logs, k_security, use_power_method, auto_alpha_gamma, apply_positivity_clamp, tikhonov_as_gradient, use_laplacian, laplacian_beta_scale)
|
|
55
55
|
elif smatrixType == SMatrixType.DENSE:
|
|
56
56
|
return CP_TV_dense(SMatrix, y, alpha, theta, numIterations, isSavingEachIteration, L, tumor_str, device, max_saves, show_logs)
|
|
57
57
|
else:
|
|
@@ -223,7 +223,7 @@ def CP_TV_dense(
|
|
|
223
223
|
else:
|
|
224
224
|
return (x.reshape(Z, X) * (norm_y / norm_A)).cpu().numpy(), None
|
|
225
225
|
|
|
226
|
-
def
|
|
226
|
+
def CP_TV_Tikhonov_sparseSELL_pycuda(
|
|
227
227
|
SMatrix,
|
|
228
228
|
y,
|
|
229
229
|
alpha=None, # TV regularization parameter (if None, alpha is auto-scaled)
|
|
@@ -224,27 +224,20 @@ class SparseSMatrix_CSR:
|
|
|
224
224
|
def getMatrixSize(self):
|
|
225
225
|
"""
|
|
226
226
|
Retourne la taille totale de la matrice CSR en Go (en sommant la mémoire GPU).
|
|
227
|
+
Utilise les attributs de taille stockés pour contourner l'AttributeError de DeviceAllocation.
|
|
227
228
|
"""
|
|
229
|
+
# Note: L'utilisateur doit s'assurer que self.row_ptr existe avant cet appel.
|
|
228
230
|
if self.row_ptr is None:
|
|
229
231
|
return {"error": "La matrice sparse n'est pas encore allouée."}
|
|
230
232
|
|
|
231
233
|
total_bytes = 0
|
|
232
|
-
|
|
233
|
-
# Mémoire GPU (row_ptr_gpu, col_ind_gpu, values_gpu, norm_factor_inv_gpu)
|
|
234
|
-
if hasattr(self, 'row_ptr_gpu') and self.row_ptr_gpu:
|
|
235
|
-
total_bytes += self.row_ptr_gpu.size
|
|
236
|
-
if hasattr(self, 'col_ind_gpu') and self.col_ind_gpu:
|
|
237
|
-
total_bytes += self.col_ind_gpu.size
|
|
238
|
-
if hasattr(self, 'values_gpu') and self.values_gpu:
|
|
239
|
-
total_bytes += self.values_gpu.size
|
|
240
|
-
if hasattr(self, 'norm_factor_inv_gpu') and self.norm_factor_inv_gpu:
|
|
241
|
-
total_bytes += self.norm_factor_inv_gpu.size
|
|
242
|
-
|
|
243
|
-
# NOTE: Les versions précédentes utilisaient le .size de l'objet DeviceAllocation,
|
|
244
|
-
# qui était problématique. Si l'erreur se reproduit ici, il faudra
|
|
245
|
-
# stocker la taille en octets comme nous l'avons fait pour SELL.
|
|
246
|
-
# Pour l'instant, nous conservons la méthode getMatrixSize originale de CSR.
|
|
247
234
|
|
|
235
|
+
# Somme des tailles stockées (Taille calculée et attribuée dans allocate et compute_norm_factor_from_csr)
|
|
236
|
+
total_bytes += getattr(self, 'row_ptr_gpu_size', 0)
|
|
237
|
+
total_bytes += getattr(self, 'col_ind_gpu_size', 0)
|
|
238
|
+
total_bytes += getattr(self, 'values_gpu_size', 0)
|
|
239
|
+
total_bytes += getattr(self, 'norm_factor_inv_gpu_size', 0)
|
|
240
|
+
|
|
248
241
|
return total_bytes / (1024**3)
|
|
249
242
|
|
|
250
243
|
def free(self):
|
|
@@ -92,13 +92,11 @@ class SparseSMatrix_SELL:
|
|
|
92
92
|
def allocate(self):
|
|
93
93
|
"""
|
|
94
94
|
Build SELL-C-σ directly from manip AcousticFields in streaming blocks.
|
|
95
|
-
|
|
95
|
+
Corrected: per-block row_nnz copy, zeroing of host block, proper sync.
|
|
96
96
|
"""
|
|
97
97
|
if self.sparse_mod is None:
|
|
98
98
|
raise RuntimeError("CUDA module not loaded. Check compilation.")
|
|
99
99
|
|
|
100
|
-
# NOTE: Les noms de kernel (count_nnz_rows_kernel, fill_kernel__SELL) sont utilisés
|
|
101
|
-
# car ils sont présents dans la classe fonctionnelle.
|
|
102
100
|
count_kernel = self.sparse_mod.get_function("count_nnz_rows_kernel")
|
|
103
101
|
fill_kernel = self.sparse_mod.get_function("fill_kernel__SELL")
|
|
104
102
|
|
|
@@ -106,34 +104,34 @@ class SparseSMatrix_SELL:
|
|
|
106
104
|
num_cols = int(self.Z * self.X)
|
|
107
105
|
C = int(self.slice_height)
|
|
108
106
|
|
|
109
|
-
# host temporary block
|
|
110
107
|
br = int(self.block_rows)
|
|
111
|
-
bytes_per_elem = np.dtype(np.float32).itemsize
|
|
112
108
|
dense_host = np.empty((br, num_cols), dtype=np.float32)
|
|
113
109
|
|
|
114
|
-
# Allocation
|
|
110
|
+
# Allocation dense buffer on device (size = br * num_cols)
|
|
115
111
|
dense_gpu_size = dense_host.nbytes
|
|
116
112
|
dense_gpu = drv.mem_alloc(dense_gpu_size)
|
|
117
113
|
|
|
118
|
-
# 1) count nnz per row (
|
|
114
|
+
# 1) count nnz per row (per block)
|
|
119
115
|
row_nnz = np.zeros(num_rows, dtype=np.int32)
|
|
120
116
|
row_nnz_gpu_block_size = br * np.dtype(np.int32).itemsize
|
|
121
117
|
row_nnz_gpu_block = drv.mem_alloc(row_nnz_gpu_block_size)
|
|
122
118
|
|
|
123
|
-
block =
|
|
119
|
+
block = 128
|
|
124
120
|
for b in trange(0, num_rows, br, desc="Count NNZ per row"):
|
|
125
121
|
R = min(br, num_rows - b)
|
|
126
|
-
#
|
|
122
|
+
# zero the host block to avoid garbage in tail when R < br
|
|
123
|
+
dense_host.fill(0.0)
|
|
127
124
|
for i in range(R):
|
|
128
125
|
rg = b + i
|
|
129
126
|
n_idx = rg // self.T
|
|
130
127
|
t_idx = rg % self.T
|
|
131
128
|
dense_host[i, :] = self.manip.AcousticFields[n_idx].field[t_idx].flatten()
|
|
132
|
-
# copy
|
|
129
|
+
# copy whole buffer (safe because we zeroed tail)
|
|
133
130
|
drv.memcpy_htod(dense_gpu, dense_host)
|
|
134
131
|
grid = ((R + block - 1) // block, 1, 1)
|
|
135
132
|
count_kernel(dense_gpu, row_nnz_gpu_block, np.int32(R), np.int32(num_cols), np.float32(self.relative_threshold),
|
|
136
|
-
|
|
133
|
+
block=(block,1,1), grid=grid)
|
|
134
|
+
drv.Context.synchronize()
|
|
137
135
|
tmp = np.empty(R, dtype=np.int32)
|
|
138
136
|
drv.memcpy_dtoh(tmp, row_nnz_gpu_block)
|
|
139
137
|
row_nnz[b:b+R] = tmp
|
|
@@ -148,7 +146,6 @@ class SparseSMatrix_SELL:
|
|
|
148
146
|
r0 = s * C
|
|
149
147
|
r1 = min(num_rows, r0 + C)
|
|
150
148
|
slice_len[s] = int(np.max(row_nnz[r0:r1])) if (r1>r0) else 0
|
|
151
|
-
# slice_ptr (int64)
|
|
152
149
|
slice_ptr = np.zeros(num_slices + 1, dtype=np.int64)
|
|
153
150
|
for s in range(num_slices):
|
|
154
151
|
slice_ptr[s+1] = slice_ptr[s] + (slice_len[s] * C)
|
|
@@ -160,9 +157,14 @@ class SparseSMatrix_SELL:
|
|
|
160
157
|
self.sell_values_gpu_size = total_storage * np.dtype(np.float32).itemsize
|
|
161
158
|
self.sell_colinds_gpu_size = total_storage * np.dtype(np.uint32).itemsize
|
|
162
159
|
|
|
160
|
+
# allocate and optionally zero them
|
|
163
161
|
self.sell_values_gpu = drv.mem_alloc(self.sell_values_gpu_size)
|
|
162
|
+
# It's good practice to zero the values buffer to avoid leftover memory
|
|
163
|
+
drv.memset_d32(self.sell_values_gpu, 0, total_storage)
|
|
164
|
+
|
|
164
165
|
self.sell_colinds_gpu = drv.mem_alloc(self.sell_colinds_gpu_size)
|
|
165
|
-
|
|
166
|
+
drv.memset_d32(self.sell_colinds_gpu, 0, total_storage)
|
|
167
|
+
|
|
166
168
|
# allocate slice metadata on device
|
|
167
169
|
self.slice_ptr = slice_ptr
|
|
168
170
|
self.slice_len = slice_len
|
|
@@ -177,29 +179,28 @@ class SparseSMatrix_SELL:
|
|
|
177
179
|
drv.memcpy_htod(self.slice_len_gpu, self.slice_len)
|
|
178
180
|
|
|
179
181
|
# 3) fill SELL arrays by streaming blocks again (use GPU fill kernel)
|
|
180
|
-
# reuse dense_host and allocate new dense_gpu
|
|
181
182
|
dense_host = np.empty((br, num_cols), dtype=np.float32)
|
|
183
|
+
dense_gpu = drv.mem_alloc(dense_host.nbytes)
|
|
182
184
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
# we also need row_nnz on device per-block; supply global row_nnz on host but the kernel recomputes threshold
|
|
187
|
-
row_nnz_host_gpu_size = br * np.dtype(np.int32).itemsize
|
|
188
|
-
row_nnz_host_gpu = drv.mem_alloc(row_nnz_host_gpu_size)
|
|
185
|
+
# For per-block row_nnz pointer we allocate a buffer of max block size once, then reuse
|
|
186
|
+
row_nnz_host_gpu = drv.mem_alloc(br * np.dtype(np.int32).itemsize)
|
|
189
187
|
|
|
190
188
|
for b in trange(0, num_rows, br, desc="Fill SELL"):
|
|
191
189
|
R = min(br, num_rows - b)
|
|
190
|
+
dense_host.fill(0.0)
|
|
192
191
|
for i in range(R):
|
|
193
192
|
rg = b + i
|
|
194
193
|
n_idx = rg // self.T
|
|
195
194
|
t_idx = rg % self.T
|
|
196
195
|
dense_host[i, :] = self.manip.AcousticFields[n_idx].field[t_idx].flatten()
|
|
196
|
+
# copy host block
|
|
197
197
|
drv.memcpy_htod(dense_gpu, dense_host)
|
|
198
|
-
#
|
|
199
|
-
|
|
198
|
+
# copy corresponding row_nnz slice (only R entries)
|
|
199
|
+
drv.memcpy_htod(row_nnz_host_gpu, row_nnz[b:b+R])
|
|
200
|
+
|
|
200
201
|
grid = ((R + block - 1) // block, 1, 1)
|
|
201
202
|
fill_kernel(dense_gpu,
|
|
202
|
-
|
|
203
|
+
row_nnz_host_gpu,
|
|
203
204
|
self.slice_ptr_gpu,
|
|
204
205
|
self.slice_len_gpu,
|
|
205
206
|
self.sell_colinds_gpu,
|
|
@@ -210,12 +211,14 @@ class SparseSMatrix_SELL:
|
|
|
210
211
|
np.int32(C),
|
|
211
212
|
np.float32(self.relative_threshold),
|
|
212
213
|
block=(block,1,1), grid=grid)
|
|
214
|
+
drv.Context.synchronize()
|
|
215
|
+
|
|
213
216
|
dense_gpu.free()
|
|
214
217
|
row_nnz_host_gpu.free()
|
|
215
218
|
|
|
216
219
|
# 4) compute norm_factor_inv via GPU accumulate (col sums)
|
|
217
220
|
self.compute_norm_factor()
|
|
218
|
-
|
|
221
|
+
|
|
219
222
|
def apply_apodization_gpu(self, window_vector_gpu):
|
|
220
223
|
"""
|
|
221
224
|
Applique le fenêtrage directement sur self.sell_values_gpu
|
|
@@ -234,7 +237,7 @@ class SparseSMatrix_SELL:
|
|
|
234
237
|
)
|
|
235
238
|
|
|
236
239
|
# Le total_storage inclut les éléments non-nuls et le padding SELL.
|
|
237
|
-
threads =
|
|
240
|
+
threads = 128
|
|
238
241
|
blocks = (self.total_storage + threads - 1) // threads
|
|
239
242
|
|
|
240
243
|
# Lancement du kernel. Il travaille sur total_storage éléments.
|
|
@@ -248,43 +251,72 @@ class SparseSMatrix_SELL:
|
|
|
248
251
|
)
|
|
249
252
|
drv.Context.synchronize()
|
|
250
253
|
print("✅ Multiplication par le fenêtrage effectuée in-place sur GPU (SELL-C-σ).")
|
|
251
|
-
|
|
254
|
+
|
|
252
255
|
def compute_norm_factor(self):
|
|
253
256
|
"""
|
|
254
|
-
|
|
257
|
+
Compute the TRUE MLEM normalization norm_factor_inv = 1 / (A^T * 1)
|
|
258
|
+
by performing a SELL backprojection of a vector of ones.
|
|
259
|
+
This is the ONLY correct normalization for MLEM.
|
|
255
260
|
"""
|
|
256
|
-
if self.total_storage == 0:
|
|
257
|
-
raise RuntimeError("sell not built")
|
|
258
261
|
ZX = int(self.Z * self.X)
|
|
262
|
+
TN = int(self.T * self.N)
|
|
259
263
|
|
|
260
|
-
#
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
drv.memset_d32(col_sum_gpu, 0, ZX)
|
|
264
|
+
# Allocate device vector of ones (projections)
|
|
265
|
+
ones_gpu = drv.mem_alloc(TN * np.dtype(np.float32).itemsize)
|
|
266
|
+
drv.memset_d32(ones_gpu, 0x3f800000, TN) # 1.0f bit pattern
|
|
264
267
|
|
|
265
|
-
#
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
268
|
+
# Allocate output for backprojection (ZX pixels)
|
|
269
|
+
c_gpu = drv.mem_alloc(ZX * np.dtype(np.float32).itemsize)
|
|
270
|
+
drv.memset_d32(c_gpu, 0, ZX)
|
|
271
|
+
|
|
272
|
+
# Get SELL backprojection kernel
|
|
273
|
+
try:
|
|
274
|
+
bp_kernel = self.sparse_mod.get_function("backprojection_kernel__SELL")
|
|
275
|
+
except Exception as e:
|
|
276
|
+
raise RuntimeError("Missing kernel backprojection_kernel__SELL in the cubin") from e
|
|
277
|
+
|
|
278
|
+
threads = 256
|
|
279
|
+
blocks = (TN + threads - 1) // threads
|
|
280
|
+
|
|
281
|
+
# Launch GPU backprojection
|
|
282
|
+
bp_kernel(
|
|
283
|
+
self.sell_values_gpu,
|
|
284
|
+
self.sell_colinds_gpu,
|
|
285
|
+
self.slice_ptr_gpu,
|
|
286
|
+
self.slice_len_gpu,
|
|
287
|
+
ones_gpu,
|
|
288
|
+
c_gpu,
|
|
289
|
+
np.int32(TN),
|
|
290
|
+
# np.int32(ZX),
|
|
291
|
+
np.int32(self.slice_height),
|
|
292
|
+
# np.int64(self.total_storage),
|
|
293
|
+
block=(threads, 1, 1), # Utilise le nouveau nombre de threads
|
|
294
|
+
grid=(blocks, 1, 1)
|
|
295
|
+
)
|
|
272
296
|
drv.Context.synchronize()
|
|
273
297
|
|
|
274
|
-
#
|
|
275
|
-
|
|
276
|
-
drv.memcpy_dtoh(
|
|
277
|
-
|
|
298
|
+
# Copy back to host
|
|
299
|
+
c_host = np.empty(ZX, dtype=np.float32)
|
|
300
|
+
drv.memcpy_dtoh(c_host, c_gpu)
|
|
301
|
+
ones_gpu.free()
|
|
302
|
+
c_gpu.free()
|
|
303
|
+
|
|
304
|
+
# Avoid divide-by-zero
|
|
305
|
+
c_host = np.maximum(c_host, 1e-6)
|
|
306
|
+
|
|
307
|
+
# Compute inverse (stored for use in MLEM)
|
|
308
|
+
self.norm_factor_inv = (1.0 / c_host).astype(np.float32)
|
|
278
309
|
|
|
279
|
-
|
|
280
|
-
self.norm_factor_inv = (1.0 / norm).astype(np.float32)
|
|
310
|
+
# Upload to GPU
|
|
281
311
|
if self.norm_factor_inv_gpu is not None:
|
|
282
312
|
self.norm_factor_inv_gpu.free()
|
|
283
|
-
|
|
313
|
+
|
|
284
314
|
self.norm_factor_inv_gpu_size = self.norm_factor_inv.nbytes
|
|
285
315
|
self.norm_factor_inv_gpu = drv.mem_alloc(self.norm_factor_inv_gpu_size)
|
|
286
316
|
drv.memcpy_htod(self.norm_factor_inv_gpu, self.norm_factor_inv)
|
|
287
317
|
|
|
318
|
+
print("✓ Normalization (A^T*1) computed for MLEM.")
|
|
319
|
+
|
|
288
320
|
def compute_density(self):
|
|
289
321
|
"""
|
|
290
322
|
Returns only the density of the SELL-C-σ matrix.
|
|
Binary file
|
|
@@ -45,8 +45,6 @@ class AlgebraicRecon(Recon):
|
|
|
45
45
|
|
|
46
46
|
self.sparseThreshold = sparseThreshold
|
|
47
47
|
|
|
48
|
-
self.Z_dim = None # Used for sparse matrix reconstruction
|
|
49
|
-
|
|
50
48
|
if self.numIterations <= 0:
|
|
51
49
|
raise ValueError("Number of iterations must be greater than 0.")
|
|
52
50
|
if self.numSubsets <= 0:
|
|
@@ -760,7 +758,6 @@ class AlgebraicRecon(Recon):
|
|
|
760
758
|
max_saves=self.maxSaves,
|
|
761
759
|
show_logs=show_logs,
|
|
762
760
|
smatrixType=self.smatrixType,
|
|
763
|
-
Z=self.Z_dim
|
|
764
761
|
)
|
|
765
762
|
else:
|
|
766
763
|
self.reconLaser, self.indices = MLEM(SMatrix=self.SMatrix,
|
|
@@ -774,7 +771,6 @@ class AlgebraicRecon(Recon):
|
|
|
774
771
|
max_saves=self.maxSaves,
|
|
775
772
|
show_logs=show_logs,
|
|
776
773
|
smatrixType=self.smatrixType,
|
|
777
|
-
Z=self.Z_dim
|
|
778
774
|
)
|
|
779
775
|
elif self.optimizer.value == OptimizerType.LS.value:
|
|
780
776
|
if self.alpha is None:
|
|
@@ -790,8 +786,7 @@ class AlgebraicRecon(Recon):
|
|
|
790
786
|
denominator_threshold=self.denominatorThreshold,
|
|
791
787
|
max_saves=self.maxSaves,
|
|
792
788
|
show_logs=show_logs,
|
|
793
|
-
smatrixType=self.smatrixType
|
|
794
|
-
Z=self.Z_dim
|
|
789
|
+
smatrixType=self.smatrixType
|
|
795
790
|
)
|
|
796
791
|
else:
|
|
797
792
|
self.reconLaser, self.indices = LS(SMatrix=self.SMatrix,
|
|
@@ -805,8 +800,7 @@ class AlgebraicRecon(Recon):
|
|
|
805
800
|
denominator_threshold=self.denominatorThreshold,
|
|
806
801
|
max_saves=self.maxSaves,
|
|
807
802
|
show_logs=show_logs,
|
|
808
|
-
smatrixType=self.smatrixType
|
|
809
|
-
Z=self.Z_dim
|
|
803
|
+
smatrixType=self.smatrixType
|
|
810
804
|
)
|
|
811
805
|
else:
|
|
812
806
|
raise ValueError(f"Only MLEM and LS are supported for simple algebraic reconstruction. {self.optimizer.value} need Bayesian reconstruction")
|
|
@@ -85,7 +85,7 @@ from .AOT_Recon.AOT_PotentialFunctions.RelativeDifferences import *
|
|
|
85
85
|
from .Config import config
|
|
86
86
|
from .Settings import *
|
|
87
87
|
|
|
88
|
-
__version__ = '2.9.
|
|
88
|
+
__version__ = '2.9.312'
|
|
89
89
|
__process__ = config.get_process()
|
|
90
90
|
|
|
91
91
|
def initialize(process=None):
|
|
@@ -135,6 +135,27 @@ def initialize(process=None):
|
|
|
135
135
|
|
|
136
136
|
|
|
137
137
|
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
|
|
138
159
|
|
|
139
160
|
|
|
140
161
|
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name='AOT_biomaps',
|
|
5
|
-
version='2.9.
|
|
5
|
+
version='2.9.312',
|
|
6
6
|
packages=find_packages(),
|
|
7
7
|
include_package_data=True,
|
|
8
8
|
|
|
@@ -293,6 +293,27 @@ setup(
|
|
|
293
293
|
|
|
294
294
|
|
|
295
295
|
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
|
|
296
317
|
|
|
297
318
|
|
|
298
319
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/DEPIERRO.py
RENAMED
|
File without changes
|
|
File without changes
|
{aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_Optimizers/__init__.py
RENAMED
|
File without changes
|
{aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/Huber.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/__init__.py
RENAMED
|
File without changes
|
{aot_biomaps-2.9.291 → aot_biomaps-2.9.312}/AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|