PyPI - AOT-biomaps - Versions diffs - 2.9.261__py3-none-any.whl → 2.9.318__py3-none-any.whl - Mend

AOT-biomaps 2.9.261py3-none-any.whl → 2.9.318py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

AOT_biomaps/AOT_Experiment/Tomography.py +124 -0
AOT_biomaps/AOT_Recon/AOT_Optimizers/LS.py +400 -10
AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py +207 -84
AOT_biomaps/AOT_Recon/AOT_Optimizers/PDHG.py +442 -11
AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_CSR.py +48 -26
AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py +172 -134
AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin +0 -0
AOT_biomaps/AOT_Recon/AlgebraicRecon.py +27 -20
AOT_biomaps/AOT_Recon/PrimalDualRecon.py +94 -41
AOT_biomaps/AOT_Recon/ReconTools.py +164 -18
AOT_biomaps/__init__.py +58 -1
{aot_biomaps-2.9.261.dist-info → aot_biomaps-2.9.318.dist-info}/METADATA +1 -1
{aot_biomaps-2.9.261.dist-info → aot_biomaps-2.9.318.dist-info}/RECORD +15 -14
{aot_biomaps-2.9.261.dist-info → aot_biomaps-2.9.318.dist-info}/WHEEL +0 -0
{aot_biomaps-2.9.261.dist-info → aot_biomaps-2.9.318.dist-info}/top_level.txt +0 -0

AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py CHANGED Viewed

@@ -26,7 +26,6 @@ def MLEM(
     max_saves=5000,
     show_logs=True,
     smatrixType=SMatrixType.SELL,
-    Z=350,
 ):
     """
     Unified MLEM algorithm for Acousto-Optic Tomography.
@@ -59,11 +58,11 @@ def MLEM(
     # Dispatch to the appropriate implementation
     if use_gpu:
             if smatrixType == SMatrixType.CSR:
-                return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, Z, show_logs)
+                return MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
             elif smatrixType == SMatrixType.SELL:
-                return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs)
+                return MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
             elif smatrixType == SMatrixType.DENSE:
-                return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold,show_logs)
+                return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold,show_logs)
             else:
                 raise ValueError("Unsupported SMatrixType for GPU MLEM.")
     else:
@@ -229,35 +228,60 @@ def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
         print(f"Error in optimized CPU MLEM: {type(e).__name__}: {e}")
         return None, None
-def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs=True):
+def MLEM_sparseCSR_pycuda(
+    SMatrix,
+    y,
+    numIterations,
+    isSavingEachIteration,
+    tumor_str,
+    max_saves,
+    denominator_threshold,
+    show_logs=True,
+):
     """
-    SMatrix: instance of SparseMatrixGPU (already allocated)
-    y: measured data (1D np.float32 of length TN)
-    Assumptions:
-    - SMatrix.values_gpu and SMatrix.col_ind_gpu and SMatrix.row_ptr_gpu are device pointers
-    - SMatrix.norm_factor_inv_gpu exists
+    Robust MLEM implementation for CSR SMatrix using PyCUDA kernels.
+    Expects SMatrix to be SparseSMatrix_CSR with attributes:
+      - values_gpu, col_ind_gpu, row_ptr_gpu (device pointers)
+      - norm_factor_inv_gpu (device pointer)
+      - sparse_mod (loaded module with kernels)
+      - ctx (PyCUDA context)
+    Returns (saved_theta_list, saved_indices) if isSavingEachIteration else (final_theta, None)
     """
+    final_result = None
+    # Local holders to free in finally
+    y_gpu = q_flat_gpu = e_flat_gpu = c_flat_gpu = theta_flat_gpu = None
     try:
         if not isinstance(SMatrix, SparseSMatrix_CSR):
             raise TypeError("SMatrix must be a SparseSMatrix_CSR object")
+        # push context (if provided)
+        popped_ctx = False
+        if getattr(SMatrix, "ctx", None):
+            SMatrix.ctx.push()
+            popped_ctx = True
         dtype = np.float32
-        TN = SMatrix.N * SMatrix.T
-        ZX = SMatrix.Z * SMatrix.X
-        if Z is None:
-            Z = SMatrix.Z
-        X = SMatrix.X
+        TN = int(SMatrix.N * SMatrix.T)
+        ZX = int(SMatrix.Z * SMatrix.X)
+        Z = int(SMatrix.Z)
+        X = int(SMatrix.X)
-        if show_logs:
-            print(f"Dim X: {X}, Dim Z: {Z}, TN: {TN}, ZX: {ZX}")
+        # Make sure required GPU pointers exist
+        if getattr(SMatrix, "values_gpu", None) is None or getattr(SMatrix, "col_ind_gpu", None) is None or getattr(SMatrix, "row_ptr_gpu", None) is None:
+            raise RuntimeError("SMatrix is missing GPU buffers (values_gpu / col_ind_gpu / row_ptr_gpu)")
-        # Use existing context from SMatrix
-        # streams
+        if getattr(SMatrix, "norm_factor_inv_gpu", None) is None:
+            raise RuntimeError("SMatrix.norm_factor_inv_gpu not available on GPU")
+        # stream for async operations
         stream = drv.Stream()
-        # allocate device buffers
-        y_gpu = drv.mem_alloc(y.nbytes)
-        drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
+        # prepare device buffers
+        y_arr = np.ascontiguousarray(y.T.flatten().astype(np.float32))
+        y_gpu = drv.mem_alloc(y_arr.nbytes)
+        drv.memcpy_htod_async(y_gpu, y_arr, stream)
         theta_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
         initial_theta = np.full(ZX, 0.1, dtype=dtype)
@@ -269,95 +293,177 @@ def MLEM_sparseCSR_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumo
         e_flat_gpu = drv.mem_alloc(TN * np.dtype(dtype).itemsize)
         c_flat_gpu = drv.mem_alloc(ZX * np.dtype(dtype).itemsize)
-        mlem_mod = drv.module_from_file('AOT_biomaps_kernels.cubin')
-        projection_kernel = mlem_mod.get_function('projection_kernel__CSR')
-        backprojection_kernel = mlem_mod.get_function('backprojection_kernel__CSR')
-        ratio_kernel = mlem_mod.get_function('ratio_kernel')
-        update_kernel = mlem_mod.get_function('update_theta_kernel')
+        # Ensure kernels exist
+        projection_kernel = SMatrix.sparse_mod.get_function("projection_kernel__CSR")
+        backprojection_kernel = SMatrix.sparse_mod.get_function("backprojection_kernel__CSR")
+        ratio_kernel = SMatrix.sparse_mod.get_function("ratio_kernel")
+        update_kernel = SMatrix.sparse_mod.get_function("update_theta_kernel")
         block_size = 256
-        saved_theta, saved_indices = [], []
+        # prepare save indices once
         if numIterations <= max_saves:
             save_indices = list(range(numIterations))
         else:
-            save_indices = list(range(0, numIterations, max(1, numIterations // max_saves)))
+            step = max(1, numIterations // max_saves)
+            save_indices = list(range(0, numIterations, step))
             if save_indices[-1] != numIterations - 1:
                 save_indices.append(numIterations - 1)
+        saved_theta = []
+        saved_indices = []
         description = f"AOT-BioMaps -- ML-EM (CSR-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
         iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
+        # grid sizes
+        grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
+        grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
         for it in iterator:
             # projection: q = A * theta
-            projection_kernel(q_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
-                              theta_flat_gpu, np.int32(TN),
-                              block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1),
-                              stream=stream)
+            projection_kernel(
+                q_flat_gpu,
+                SMatrix.values_gpu,
+                SMatrix.row_ptr_gpu,
+                SMatrix.col_ind_gpu,
+                theta_flat_gpu,
+                np.int32(TN),
+                block=(block_size, 1, 1),
+                grid=grid_rows,
+                stream=stream,
+            )
             # ratio: e = y / max(q, threshold)
-            ratio_kernel(e_flat_gpu, y_gpu, q_flat_gpu, np.float32(denominator_threshold), np.int32(TN),
-                         block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
-            # backprojection: c = A^T * e
+            ratio_kernel(
+                e_flat_gpu,
+                y_gpu,
+                q_flat_gpu,
+                np.float32(denominator_threshold),
+                np.int32(TN),
+                block=(block_size, 1, 1),
+                grid=grid_rows,
+                stream=stream,
+            )
+            # backprojection: c = A^T * e  (zero c first)
             drv.memset_d32_async(c_flat_gpu, 0, ZX, stream)
-            backprojection_kernel(c_flat_gpu, SMatrix.values_gpu, SMatrix.row_ptr_gpu, SMatrix.col_ind_gpu,
-                                  e_flat_gpu, np.int32(TN),
-                                  block=(block_size, 1, 1), grid=((TN + block_size - 1) // block_size, 1, 1), stream=stream)
+            backprojection_kernel(
+                c_flat_gpu,
+                SMatrix.values_gpu,
+                SMatrix.row_ptr_gpu,
+                SMatrix.col_ind_gpu,
+                e_flat_gpu,
+                np.int32(TN),
+                block=(block_size, 1, 1),
+                grid=grid_rows,
+                stream=stream,
+            )
             # update: theta *= norm_factor_inv * c
-            update_kernel(theta_flat_gpu, c_flat_gpu, norm_factor_inv_gpu, np.int32(ZX),
-                          block=(block_size, 1, 1), grid=((ZX + block_size - 1) // block_size, 1, 1), stream=stream)
+            update_kernel(
+                theta_flat_gpu,
+                c_flat_gpu,
+                norm_factor_inv_gpu,
+                np.int32(ZX),
+                block=(block_size, 1, 1),
+                grid=grid_cols,
+                stream=stream,
+            )
+            # periodic synchronization for stability / logging
             if show_logs and (it % 10 == 0 or it == numIterations - 1):
-                drv.Context.synchronize()
+                stream.synchronize()
+            # save snapshot if required
             if isSavingEachIteration and it in save_indices:
+                # ensure kernels finished
+                stream.synchronize()
                 theta_host = np.empty(ZX, dtype=dtype)
                 drv.memcpy_dtoh(theta_host, theta_flat_gpu)
                 saved_theta.append(theta_host.reshape(Z, X))
-                saved_indices.append(it)
-        drv.Context.synchronize()
-        result = np.empty(ZX, dtype=dtype)
-        drv.memcpy_dtoh(result, theta_flat_gpu)
-        result = result.reshape(Z, X)
-        # free local allocations
-        y_gpu.free(); q_flat_gpu.free(); e_flat_gpu.free(); c_flat_gpu.free(); theta_flat_gpu.free()
-        return (saved_theta, saved_indices) if isSavingEachIteration else (result, None)
+                saved_indices.append(int(it))
+        # make sure everything finished
+        stream.synchronize()
+        final_theta_host = np.empty(ZX, dtype=dtype)
+        drv.memcpy_dtoh(final_theta_host, theta_flat_gpu)
+        final_result = final_theta_host.reshape(Z, X)
+        # free local allocations (will also be freed in finally if exception)
+        try:
+            y_gpu.free()
+            q_flat_gpu.free()
+            e_flat_gpu.free()
+            c_flat_gpu.free()
+            theta_flat_gpu.free()
+        except Exception:
+            pass
+        return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
     except Exception as e:
         print(f"Error in MLEM_sparseCSR_pycuda: {type(e).__name__}: {e}")
         gc.collect()
         return None, None
-def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs=True):
+    finally:
+        # free buffers if still allocated
+        for buf in ("y_gpu", "q_flat_gpu", "e_flat_gpu", "c_flat_gpu", "theta_flat_gpu"):
+            try:
+                val = locals().get(buf, None)
+                if val is not None:
+                    val.free()
+            except Exception:
+                pass
+        # pop context safely
+        try:
+            if SMatrix and hasattr(SMatrix, "ctx") and SMatrix.ctx and popped_ctx:
+                SMatrix.ctx.pop()
+        except Exception:
+            pass
+def MLEM_sparseSELL_pycuda(
+    SMatrix,
+    y,
+    numIterations,
+    isSavingEachIteration,
+    tumor_str,
+    max_saves,
+    denominator_threshold,
+    show_logs=True,
+):
     """
     MLEM using SELL-C-σ kernels already present on device.
     y must be float32 length TN.
+    Version propre : diagnostics retirés.
     """
+    final_result = None
     try:
-        # check if SMatrix is SparseSMatrix_SELL object
         if not isinstance(SMatrix, SparseSMatrix_SELL):
             raise TypeError("SMatrix must be a SparseSMatrix_SELL object")
         if SMatrix.sell_values_gpu is None:
             raise RuntimeError("SELL not built. Call allocate_sell_c_sigma_direct() first.")
+        # Context
+        if SMatrix.ctx:
+            SMatrix.ctx.push()
         TN = int(SMatrix.N * SMatrix.T)
         ZX = int(SMatrix.Z * SMatrix.X)
         dtype = np.float32
         block_size = 256
-        mod = SMatrix.sparse_mod
-        proj = mod.get_function("projection_kernel__SELL")
-        backproj = mod.get_function("backprojection_kernel__SELL")
-        ratio = mod.get_function("ratio_kernel")
-        update = mod.get_function("update_theta_kernel")
+        proj = SMatrix.sparse_mod.get_function("projection_kernel__SELL")
+        backproj = SMatrix.sparse_mod.get_function("backprojection_kernel__SELL")
+        ratio = SMatrix.sparse_mod.get_function("ratio_kernel")
+        update = SMatrix.sparse_mod.get_function("update_theta_kernel")
         stream = drv.Stream()
-        # device buffers
+        # Device buffers
+        y = y.T.flatten().astype(np.float32)
         y_gpu = drv.mem_alloc(y.nbytes)
         drv.memcpy_htod_async(y_gpu, y.astype(dtype), stream)
@@ -375,6 +481,7 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
         grid_rows = ((TN + block_size - 1) // block_size, 1, 1)
         grid_cols = ((ZX + block_size - 1) // block_size, 1, 1)
+        # Prepare save indices
         saved_theta, saved_indices = [], []
         if numIterations <= max_saves:
             save_indices = list(range(numIterations))
@@ -385,44 +492,60 @@ def MLEM_sparseSELL_pycuda(SMatrix, y, numIterations, isSavingEachIteration, tum
         description = f"AOT-BioMaps -- ML-EM (SELL-c-σ-sparse SMatrix) ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
         iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
+        # --- MLEM Loop ---
         for it in iterator:
-            # projection
-            proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
-                    theta_gpu, np.int32(TN), slice_height,
-                    block=(block_size,1,1), grid=grid_rows, stream=stream)
-            # ratio
+            proj(q_gpu, SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
+                 slice_ptr_gpu, slice_len_gpu,
+                 theta_gpu, np.int32(TN), slice_height,
+                 block=(block_size,1,1), grid=grid_rows, stream=stream)
             ratio(e_gpu, y_gpu, q_gpu, np.float32(denominator_threshold), np.int32(TN),
-                    block=(block_size,1,1), grid=grid_rows, stream=stream)
+                  block=(block_size,1,1), grid=grid_rows, stream=stream)
-            # zero c
             drv.memset_d32_async(c_gpu, 0, ZX, stream)
-            # backprojection accumulate
-            backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu, slice_ptr_gpu, slice_len_gpu,
-                        e_gpu, c_gpu, np.int32(TN), slice_height,
-                        block=(block_size,1,1), grid=grid_rows, stream=stream)
+            backproj(SMatrix.sell_values_gpu, SMatrix.sell_colinds_gpu,
+                     slice_ptr_gpu, slice_len_gpu,
+                     e_gpu, c_gpu, np.int32(TN), slice_height,
+                     block=(block_size,1,1), grid=grid_rows, stream=stream)
-            # update
             update(theta_gpu, c_gpu, SMatrix.norm_factor_inv_gpu, np.int32(ZX),
-                    block=(block_size,1,1), grid=grid_cols, stream=stream)
+                   block=(block_size,1,1), grid=grid_cols, stream=stream)
-            stream.synchronize()
             if isSavingEachIteration and it in save_indices:
                 out = np.empty(ZX, dtype=np.float32)
                 drv.memcpy_dtoh(out, theta_gpu)
                 saved_theta.append(out.reshape((SMatrix.Z, SMatrix.X)))
                 saved_indices.append(it)
-        # final copy
+        stream.synchronize()
         res = np.empty(ZX, dtype=np.float32)
         drv.memcpy_dtoh(res, theta_gpu)
-        # free temporaries
-        y_gpu.free(); q_gpu.free(); e_gpu.free(); c_gpu.free(); theta_gpu.free()
-        return (saved_theta, saved_indices) if isSavingEachIteration else (res.reshape((SMatrix.Z, SMatrix.X)), None)
+        # free
+        try:
+            y_gpu.free()
+            q_gpu.free()
+            e_gpu.free()
+            c_gpu.free()
+            theta_gpu.free()
+        except Exception:
+            pass
+        final_result = res.reshape((SMatrix.Z, SMatrix.X))
+        return (saved_theta, saved_indices) if isSavingEachIteration else (final_result, None)
     except Exception as e:
         print(f"Error in MLEM_sparseSELL_pycuda: {type(e).__name__}: {e}")
         gc.collect()
         return None, None
+    finally:
+        if SMatrix and hasattr(SMatrix, 'ctx') and SMatrix.ctx:
+            try:
+                SMatrix.ctx.pop()
+            except Exception:
+                pass

AOT-biomaps 2.9.261__py3-none-any.whl → 2.9.318__py3-none-any.whl

AOT-biomaps 2.9.261py3-none-any.whl → 2.9.318py3-none-any.whl