PyPI - AOT-biomaps - Versions diffs - 2.9.138__py3-none-any.whl → 2.9.279__py3-none-any.whl - Mend

AOT-biomaps 2.9.138py3-none-any.whl → 2.9.279py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of AOT-biomaps might be problematic. Click here for more details.

Files changed (31) hide show

AOT_biomaps/AOT_Acoustic/AcousticTools.py +35 -115
AOT_biomaps/AOT_Acoustic/StructuredWave.py +2 -2
AOT_biomaps/AOT_Acoustic/_mainAcoustic.py +22 -18
AOT_biomaps/AOT_Experiment/Tomography.py +74 -4
AOT_biomaps/AOT_Experiment/_mainExperiment.py +102 -68
AOT_biomaps/AOT_Optic/_mainOptic.py +124 -58
AOT_biomaps/AOT_Recon/AOT_Optimizers/DEPIERRO.py +72 -108
AOT_biomaps/AOT_Recon/AOT_Optimizers/LS.py +474 -289
AOT_biomaps/AOT_Recon/AOT_Optimizers/MAPEM.py +173 -68
AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py +360 -154
AOT_biomaps/AOT_Recon/AOT_Optimizers/PDHG.py +150 -111
AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/RelativeDifferences.py +10 -14
AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_CSR.py +281 -0
AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_SELL.py +328 -0
AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/__init__.py +2 -0
AOT_biomaps/AOT_Recon/AOT_biomaps_kernels.cubin +0 -0
AOT_biomaps/AOT_Recon/AlgebraicRecon.py +359 -238
AOT_biomaps/AOT_Recon/AnalyticRecon.py +29 -41
AOT_biomaps/AOT_Recon/BayesianRecon.py +165 -91
AOT_biomaps/AOT_Recon/DeepLearningRecon.py +4 -1
AOT_biomaps/AOT_Recon/PrimalDualRecon.py +175 -31
AOT_biomaps/AOT_Recon/ReconEnums.py +38 -3
AOT_biomaps/AOT_Recon/ReconTools.py +184 -77
AOT_biomaps/AOT_Recon/__init__.py +1 -0
AOT_biomaps/AOT_Recon/_mainRecon.py +144 -74
AOT_biomaps/__init__.py +4 -36
{aot_biomaps-2.9.138.dist-info → aot_biomaps-2.9.279.dist-info}/METADATA +2 -1
aot_biomaps-2.9.279.dist-info/RECORD +47 -0
aot_biomaps-2.9.138.dist-info/RECORD +0 -43
{aot_biomaps-2.9.138.dist-info → aot_biomaps-2.9.279.dist-info}/WHEEL +0 -0
{aot_biomaps-2.9.138.dist-info → aot_biomaps-2.9.279.dist-info}/top_level.txt +0 -0

AOT_biomaps/AOT_Recon/AOT_Optimizers/PDHG.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from AOT_biomaps.AOT_Recon.ReconTools import power_method, gradient_cpu, gradient_gpu, div_cpu, div_gpu, proj_l2, prox_G, prox_F_star
+from AOT_biomaps.AOT_Recon.ReconTools import power_method, gradient, div, proj_l2, prox_G, prox_F_star
 from AOT_biomaps.Config import config
+from AOT_biomaps.AOT_Recon.ReconEnums import NoiseType
 import torch
 from tqdm import trange
@@ -10,173 +10,212 @@ It includes Chambolle-Pock algorithms for Total Variation (TV) and Kullback-Leib
 The methods can run on both CPU and GPU, with configurations set in the AOT_biomaps.Config module.
 '''
-def chambolle_pock_TV_cpu(SMatrix, y, alpha, theta, numIterations, isSavingEachIteration, L, withTumor):
-    device = torch.device("cpu")
+def CP_TV(
+    SMatrix,
+    y,
+    alpha=1e-1,
+    theta=1.0,
+    numIterations=5000,
+    isSavingEachIteration=True,
+    L=None,
+    withTumor=True,
+    device=None,
+    max_saves=5000,
+):
+    """
+    Chambolle-Pock algorithm for Total Variation (TV) regularization.
+    Works on both CPU and GPU.
+    Args:
+        SMatrix: System matrix (shape: T, Z, X, N)
+        y: Measurement data (shape: T, N)
+        alpha: Regularization parameter for TV
+        theta: Relaxation parameter (1.0 for standard Chambolle-Pock)
+        numIterations: Number of iterations
+        isSavingEachIteration: If True, returns selected intermediate reconstructions
+        L: Lipschitz constant (estimated if None)
+        withTumor: Boolean for description only
+        device: Torch device (auto-selected if None)
+        max_saves: Maximum number of intermediate saves (default: 5000)
+    """
+    # Auto-select device if not provided
+    if device is None:
+        device = torch.device(f"cuda:{config.select_best_gpu()}" if torch.cuda.is_available() else "cpu")
+    else:
+        device = torch.device(device)
+    # Convert data to tensors and move to device
     A = torch.tensor(SMatrix, dtype=torch.float32, device=device)
     y = torch.tensor(y, dtype=torch.float32, device=device)
     T, Z, X, N = SMatrix.shape
     A_flat = A.permute(0, 3, 1, 2).reshape(T * N, Z * X)
     y_flat = y.reshape(-1)
-    P = lambda x: torch.matmul(A_flat, x.ravel())
+    # Robust normalization
+    norm_A = A_flat.abs().max().clamp(min=1e-8)
+    norm_y = y_flat.abs().max().clamp(min=1e-8)
+    A_flat = A_flat / norm_A
+    y_flat = y_flat / norm_y
+    # Define forward/backward operators
+    P = lambda x: torch.matmul(A_flat, x)
     PT = lambda y: torch.matmul(A_flat.T, y)
+    # Estimate Lipschitz constant if needed
     if L is None:
-        L = power_method(P, PT, y_flat, Z, X, isGPU=True)
+        try:
+            L = power_method(P, PT, y_flat, Z, X)
+            L = max(L, 1e-3)
+        except:
+            L = 1.0
     sigma = 1.0 / L
     tau = 1.0 / L
+    # Initialize variables
     x = torch.zeros(Z * X, device=device)
     p = torch.zeros((2, Z, X), device=device)
     q = torch.zeros_like(y_flat)
     x_tilde = x.clone()
-    I_reconMatrix = [x.reshape(Z, X).cpu().numpy()]
-    if withTumor:
-        description = f"AOT-BioMaps -- Primal/Dual Recontruction Tomography : Chambolle-Pock (TV : Gaussian Noise) α:{alpha:.4f} L: {L:.4f} ---- WITH TUMOR ---- processing on single CPU ----"
+    # Calculate save indices
+    if numIterations <= max_saves:
+        save_indices = list(range(numIterations))
     else:
-        description = f"AOT-BioMaps -- Primal/Dual Recontruction Tomography : Chambolle-Pock (TV : Gaussian Noise) α:{alpha:.4f} L: {L:.4f} ---- WITHOUT TUMOR ---- processing on single CPU ----"
+        step = numIterations // max_saves
+        save_indices = list(range(0, numIterations, step))
+        if save_indices[-1] != numIterations - 1:
+            save_indices.append(numIterations - 1)
-    for iteration in trange(numIterations, desc=description):
-        p = proj_l2(p + sigma * gradient_cpu(x_tilde.reshape(Z, X)), alpha)
-        q = (q + sigma * P(x_tilde) - sigma * y_flat) / (1.0 + sigma)
+    I_reconMatrix = []
+    saved_indices = []
-        x_old = x
-        x = x + tau * div_cpu(p).ravel() - tau * PT(q)
-        x_tilde = x + theta * (x - x_old)
+    # Description for progress bar
+    tumor_str = "WITH TUMOR" if withTumor else "WITHOUT TUMOR"
+    device_str = f"GPU no.{torch.cuda.current_device()}" if device.type == "cuda" else "CPU"
+    description = f"AOT-BioMaps -- Primal/Dual Reconstruction (TV) α:{alpha:.4f} L:{L:.4f} -- {tumor_str} -- {device_str}"
-        if iteration % 1 == 0:
-            I_reconMatrix.append(x.reshape(Z, X).cpu().numpy())
+    # Main loop
+    for iteration in trange(numIterations, desc=description):
+        # Update p (TV proximal step)
+        grad_x = gradient(x_tilde.reshape(Z, X))
+        p = proj_l2(p + sigma * grad_x, alpha)
-    return I_reconMatrix if isSavingEachIteration else I_reconMatrix[-1]
+        # Update q (data fidelity)
+        q = (q + sigma * (P(x_tilde) - y_flat)) / (1 + sigma)
-def chambolle_pock_TV_gpu(SMatrix, y, alpha, theta, numIterations, isSavingEachIteration, L, withTumor):
+        # Update x
+        x_old = x.clone()
+        div_p = div(p).ravel()  # Divergence calculation
+        ATq = PT(q)
+        x = (x - tau * (ATq - div_p)) / (1 + tau * 1e-6)  # Light L2 regularization
-    device = torch.device(f"cuda:{config.select_best_gpu()}")
-    A = torch.tensor(SMatrix, dtype=torch.float32, device=device)
-    y = torch.tensor(y, dtype=torch.float32, device=device)
-    T, Z, X, N = SMatrix.shape
-    A_flat = A.permute(0, 3, 1, 2).reshape(T * N, Z * X)
-    y_flat = y.reshape(-1)
-    P = lambda x: torch.matmul(A_flat, x.ravel())
-    PT = lambda y: torch.matmul(A_flat.T, y)
+        # Update x_tilde
+        x_tilde = x + theta * (x - x_old)
-    if L is None:
-        L = power_method(P, PT, y_flat, Z, X, isGPU=True)
+        # Save intermediate result if needed
+        if isSavingEachIteration and iteration in save_indices:
+            I_reconMatrix.append(x.reshape(Z, X).clone() * (norm_y / norm_A))
+            saved_indices.append(iteration)
-    sigma = 1.0 / L
-    tau = 1.0 / L
-    x = torch.zeros(Z * X, device=device)
-    p = torch.zeros((2, Z, X), device=device)
-    q = torch.zeros_like(y_flat)
-    x_tilde = x.clone()
-    I_reconMatrix = [x.reshape(Z, X).cpu().numpy()]
-    if withTumor:
-        description = f"AOT-BioMaps -- Primal/Dual Recontruction Tomography : Chambolle-Pock (TV : Gaussian Noise) α:{alpha:.4f} L: {L:.4f} ---- WITH TUMOR ---- processing on GPU no.{torch.cuda.current_device()} ----"
+    # Return results
+    if isSavingEachIteration:
+        return [tensor.cpu().numpy() for tensor in I_reconMatrix], saved_indices
     else:
-        description = f"AOT-BioMaps -- Primal/Dual Recontruction Tomography : Chambolle-Pock (TV: Gaussian Noise) α:{alpha:.4f} L: {L:.4f} ---- WITHOUT TUMOR ---- processing on GPU no.{torch.cuda.current_device()} ----"
-    for iteration in trange(numIterations, desc=description):
-        p = proj_l2(p + sigma * gradient_gpu(x_tilde.reshape(Z, X)), alpha)
-        q = (q + sigma * P(x_tilde) - sigma * y_flat) / (1.0 + sigma)
-        x_old = x
-        x = x + tau * div_gpu(p).ravel() - tau * PT(q)
-        x_tilde = x + theta * (x - x_old)
-        if iteration % 1 == 0:
-            I_reconMatrix.append(x.reshape(Z, X).cpu().numpy())
-    return I_reconMatrix if isSavingEachIteration else I_reconMatrix[-1]
+        return (x.reshape(Z, X) * (norm_y / norm_A)).cpu().numpy(), None
+def CP_KL(
+    SMatrix,
+    y,
+    alpha=1e-9,
+    theta=1.0,
+    numIterations=5000,
+    isSavingEachIteration=True,
+    L=None,
+    withTumor=True,
+    device=None,
+    max_saves=5000,
+):
+    """
+    Chambolle-Pock algorithm for Kullback-Leibler (KL) divergence regularization.
+    Works on both CPU and GPU.
+    Args:
+        SMatrix: System matrix (shape: T, Z, X, N)
+        y: Measurement data (shape: T, X, N)
+        alpha: Regularization parameter
+        theta: Relaxation parameter (1.0 for standard Chambolle-Pock)
+        numIterations: Number of iterations
+        isSavingEachIteration: If True, returns selected intermediate reconstructions
+        L: Lipschitz constant (estimated if None)
+        withTumor: Boolean for description only
+        device: Torch device (auto-selected if None)
+        max_saves: Maximum number of intermediate saves (default: 5000)
+    """
+    # Auto-select device if not provided
+    if device is None:
+        device = torch.device(f"cuda:{config.select_best_gpu()}" if torch.cuda.is_available() else "cpu")
+    else:
+        device = torch.device(device)
-def chambolle_pock_KL_cpu(SMatrix, y, alpha, theta, numIterations, isSavingEachIteration, L, withTumor):
-    device = torch.device("cpu")
+    # Convert data to tensors and move to device
     A = torch.tensor(SMatrix, dtype=torch.float32, device=device)
     y = torch.tensor(y, dtype=torch.float32, device=device)
     T, Z, X, N = SMatrix.shape
     A_flat = A.permute(0, 3, 1, 2).reshape(T * N, Z * X)
     y_flat = y.reshape(-1)
+    # Define forward/backward operators
     P = lambda x: torch.matmul(A_flat, x.ravel())
     PT = lambda y: torch.matmul(A_flat.T, y)
+    # Estimate Lipschitz constant if needed
     if L is None:
-        L = power_method(P, PT, y_flat, Z, X, isGPU=False)
+        L = power_method(P, PT, y_flat, Z, X)
     sigma = 1.0 / L
     tau = 1.0 / L
+    # Initialize variables
     x = torch.zeros(Z * X, device=device)
     q = torch.zeros_like(y_flat)
     x_tilde = x.clone()
-    I_reconMatrix = [x.reshape(Z, X).cpu().numpy()]
-    if withTumor:
-        description = f"AOT-BioMaps -- Primal/Dual Reconstruction Tomography: Chambolle-Pock (KL) α:{alpha:.4f} L: {L:.4f} ---- WITH TUMOR ---- processing on single CPU ----"
+    # Calculate save indices
+    if numIterations <= max_saves:
+        save_indices = list(range(numIterations))
     else:
-        description = f"AOT-BioMaps -- Primal/Dual Reconstruction Tomography: Chambolle-Pock (KL) α:{alpha:.4f} L: {L:.4f} ---- WITHOUT TUMOR ---- processing on single CPU ----"
-    for iteration in trange(numIterations, desc=description):
-        # Mise à jour de q avec l'opérateur proximal pour F*
-        q = prox_F_star(q + sigma * P(x_tilde) - sigma * y_flat, sigma, y_flat)
-        # Mise à jour de x avec l'opérateur proximal pour G
-        x_old = x
-        x = prox_G(x - tau * PT(q), tau, PT(torch.ones_like(y_flat)))
-        x_tilde = x + theta * (x - x_old)
-        if iteration % 1 == 0:
-            I_reconMatrix.append(x.reshape(Z, X).cpu().numpy())
-    return I_reconMatrix if isSavingEachIteration else I_reconMatrix[-1]
-def chambolle_pock_KL_gpu(SMatrix, y, alpha, theta, numIterations, isSavingEachIteration, L, withTumor):
-    # Sélection du GPU
-    device = torch.device(f"cuda:{config.select_best_gpu()}")
-    # Conversion des données en tenseurs et déplacement vers le GPU
-    A = torch.tensor(SMatrix, dtype=torch.float32, device=device)
-    y = torch.tensor(y, dtype=torch.float32, device=device)
-    T, Z, X, N = SMatrix.shape
-    A_flat = A.permute(0, 3, 1, 2).reshape(T * N, Z * X)
-    y_flat = y.reshape(-1)
-    P = lambda x: torch.matmul(A_flat, x.ravel())
-    PT = lambda y: torch.matmul(A_flat.T, y)
-    if L is None:
-        L = power_method(P, PT, y_flat, Z, X, isGPU=True)
-    sigma = 1.0 / L
-    tau = 1.0 / L
-    x = torch.zeros(Z * X, device=device)
-    q = torch.zeros_like(y_flat)
-    x_tilde = x.clone()
+        step = numIterations // max_saves
+        save_indices = list(range(0, numIterations, step))
+        if save_indices[-1] != numIterations - 1:
+            save_indices.append(numIterations - 1)
     I_reconMatrix = [x.reshape(Z, X).cpu().numpy()]
+    saved_indices = [0]
-    if withTumor:
-        description = f"AOT-BioMaps -- Primal/Dual Reconstruction Tomography: Chambolle-Pock (KL) α:{alpha:.4f} L: {L:.4f} ---- WITH TUMOR ---- processing on GPU no.{torch.cuda.current_device()} ----"
-    else:
-        description = f"AOT-BioMaps -- Primal/Dual Reconstruction Tomography: Chambolle-Pock (KL) α:{alpha:.4f} L: {L:.4f} ---- WITHOUT TUMOR ---- processing on GPU no.{torch.cuda.current_device()} ----"
+    # Description for progress bar
+    tumor_str = "WITH TUMOR" if withTumor else "WITHOUT TUMOR"
+    device_str = f"GPU no.{torch.cuda.current_device()}" if device.type == "cuda" else "CPU"
+    description = f"AOT-BioMaps -- Primal/Dual Reconstruction (KL) α:{alpha:.4f} L:{L:.4f} -- {tumor_str} -- {device_str}"
+    # Main loop
     for iteration in trange(numIterations, desc=description):
-        # Mise à jour de q avec l'opérateur proximal pour F*
+        # Update q (proximal step for F*)
         q = prox_F_star(q + sigma * P(x_tilde) - sigma * y_flat, sigma, y_flat)
-        # Mise à jour de x avec l'opérateur proximal pour G
-        x_old = x
+        # Update x (proximal step for G)
+        x_old = x.clone()
         x = prox_G(x - tau * PT(q), tau, PT(torch.ones_like(y_flat)))
+        # Update x_tilde
         x_tilde = x + theta * (x - x_old)
-        if iteration % 1 == 0:
+        # Save intermediate result if needed
+        if isSavingEachIteration and iteration in save_indices:
             I_reconMatrix.append(x.reshape(Z, X).cpu().numpy())
+            saved_indices.append(iteration)
-    return I_reconMatrix if isSavingEachIteration else I_reconMatrix[-1]
+    # Return results
+    if isSavingEachIteration:
+        return I_reconMatrix, saved_indices
+    else:
+        return I_reconMatrix[-1], None

AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/RelativeDifferences.py CHANGED Viewed

@@ -9,26 +9,24 @@ def _Omega_RELATIVE_DIFFERENCE_CPU(theta_flat, index, values, gamma):
     theta_k = theta_flat[k_idx]
     diff = theta_k - theta_j
     abs_diff = np.abs(diff)
     denom = theta_k + theta_j + gamma * abs_diff + 1e-8
     num = diff ** 2
+    psi_pair = num / denom
+    psi_pair = values * psi_pair
     # First derivative ∂U/∂θ_j
     dpsi = (2 * diff * denom - num * (1 + gamma * np.sign(diff))) / (denom ** 2)
     grad_pair = values * (-dpsi)  # Note the negative sign: U contains ψ(θ_k, θ_j), seeking ∂/∂θ_j
     # Second derivative ∂²U/∂θ_j² (numerically stable, approximate treatment)
     d2psi = (2 * denom ** 2 - 4 * diff * denom * (1 + gamma * np.sign(diff))
                 + 2 * num * (1 + gamma * np.sign(diff)) ** 2) / (denom ** 3 + 1e-8)
     hess_pair = values * d2psi
     grad_U = np.zeros_like(theta_flat)
     hess_U = np.zeros_like(theta_flat)
     np.add.at(grad_U, j_idx, grad_pair)
     np.add.at(hess_U, j_idx, hess_pair)
-    return grad_U, hess_U
+    # Compute U_value
+    U_value = 0.5 * np.sum(psi_pair)
+    return grad_U, hess_U, U_value
 def _Omega_RELATIVE_DIFFERENCE_GPU(theta_flat, index, values, device, gamma):
     j_idx, k_idx = index
@@ -38,26 +36,24 @@ def _Omega_RELATIVE_DIFFERENCE_GPU(theta_flat, index, values, device, gamma):
     abs_diff = torch.abs(diff)
     denom = theta_k + theta_j + gamma * abs_diff + 1e-8
     num = diff ** 2
+    psi_pair = num / denom
+    psi_pair = values * psi_pair
     # Compute gradient contributions
     dpsi = (2 * diff * denom - num * (1 + gamma * torch.sign(diff))) / (denom ** 2)
     grad_pair = values * (-dpsi)
     # Compute Hessian contributions
     d2psi = (2 * denom ** 2 - 4 * diff * denom * (1 + gamma * torch.sign(diff))
              + 2 * num * (1 + gamma * torch.sign(diff)) ** 2) / (denom ** 3 + 1e-8)
     hess_pair = values * d2psi
     # Initialize gradient and Hessian on the correct device
     grad_U = torch.zeros_like(theta_flat, device=device)
     hess_U = torch.zeros_like(theta_flat, device=device)
     # Accumulate gradient contributions
     grad_U.index_add_(0, j_idx, grad_pair)
     grad_U.index_add_(0, k_idx, -grad_pair)
     # Accumulate Hessian contributions
     hess_U.index_add_(0, j_idx, hess_pair)
     hess_U.index_add_(0, k_idx, hess_pair)
-    return grad_U, hess_U
+    # Compute U_value
+    U_value = 0.5 * psi_pair.sum()
+    return grad_U, hess_U, U_value

AOT_biomaps/AOT_Recon/AOT_SparseSMatrix/SparseSMatrix_CSR.py ADDED Viewed

@@ -0,0 +1,281 @@
+import pycuda.driver as drv
+import numpy as np
+from pycuda.compiler import SourceModule
+from tqdm import trange
+import gc
+import os
+class SparseSMatrix_CSR:
+    """Construction d'une matrice CSR à partir d'un objet `manip`.
+    Usage:
+        S = SparseMatrixGPU(manip)
+        S.allocate()
+    Après allocate(), on a: row_ptr (host np.int64 array), row_ptr_gpu (device ptr),
+    h_col_ind, h_values, col_ind_gpu, values_gpu, norm_factor_inv.
+    """
+    def __init__(self, manip, block_rows=64, relative_threshold=0.3, device=0):
+        drv.init()
+        self.device = drv.Device(device)
+        self.ctx = self.device.make_context()
+        self.manip = manip
+        self.N = len(manip.AcousticFields)
+        self.T = manip.AcousticFields[0].field.shape[0]
+        self.Z = manip.AcousticFields[0].field.shape[1]
+        self.X = manip.AcousticFields[0].field.shape[2]
+        self.block_rows = block_rows
+        self.relative_threshold = relative_threshold
+        # --- FIX: Résolution du chemin du .cubin (dans AOT_Recon/) ---
+        # Le fichier SparseSMatrix_CSR.py est dans AOT_Recon/AOT_SparseSMatrix/
+        # On remonte d'un répertoire pour atteindre AOT_Recon/
+        cubin_parent_dir = os.path.dirname(os.path.dirname(__file__))
+        self.module_path = os.path.join(cubin_parent_dir, "AOT_biomaps_kernels.cubin")
+        # --- FIN FIX ---
+        self.h_dense = None
+        self.row_ptr = None
+        self.row_ptr_gpu = None
+        self.h_col_ind = None
+        self.h_values = None
+        self.total_nnz = 0
+        self.norm_factor_inv = None
+        self.sparse_mod = None
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc, tb):
+        self.free()
+    def load_precompiled_module(self):
+        """
+        Charge le module CUDA pré-compilé (.cubin) en utilisant le chemin résolu.
+        Supprime la logique de compilation JIT.
+        """
+        so_path = self.module_path # Utilise le chemin résolu dans __init__
+        if not os.path.exists(so_path):
+            raise FileNotFoundError(
+                f"Le module CUDA {os.path.basename(so_path)} est introuvable au chemin: {so_path}. "
+                "Assurez-vous qu'il est compilé et bien placé."
+            )
+        try:
+            self.sparse_mod = drv.module_from_file(so_path)
+            print(f"✅ Module CUDA chargé depuis {so_path}")
+        except Exception as e:
+             raise RuntimeError(f"Le fichier {os.path.basename(so_path)} a été trouvé, mais PyCUDA n'a pas pu le charger. Vérifiez la compatibilité.") from e
+    def estimate_nnz_cpu(self):
+        """Estimation rapide (non-exacte) — utile si tu veux une estimation faible.
+        Recommandé : utiliser la passe GPU exacte (count_nnz_per_row_kernel) à la place.
+        """
+        total = 0
+        for n in range(self.N):
+            field = self.manip.AcousticFields[n].field
+            for t in range(self.T):
+                row = field[t].flatten()
+                row_max = np.max(np.abs(row))
+                thr = row_max * self.relative_threshold
+                total += np.count_nonzero(np.abs(row) > thr)
+        return int(total)
+    def allocate(self, kernel_module_path=None):
+        try:
+            # --- 1. Construction bloc par bloc (sans garder tout le dense si possible) ---
+            num_rows = self.N * self.T
+            num_cols = self.Z * self.X
+            bytes_float = np.dtype(np.float32).itemsize
+            # Charge module
+            # FIX: Toujours charger depuis self.module_path (résolu)
+            self.load_precompiled_module()
+            count_nnz_kernel = self.sparse_mod.get_function('count_nnz_rows_kernel')
+            fill_csr_kernel = self.sparse_mod.get_function('fill_kernel__CSR')
+            # allocate host row_ptr
+            self.row_ptr = np.zeros(num_rows + 1, dtype=np.int64)
+            # GPU temp buffers
+            dense_block_host = np.empty((self.block_rows, num_cols), dtype=np.float32)
+            dense_block_gpu = drv.mem_alloc(self.block_rows * num_cols * bytes_float)
+            row_nnz_gpu = drv.mem_alloc(self.block_rows * np.dtype(np.int32).itemsize)
+            block_size = 128
+            # --- Count NNZ per row using GPU kernel to be consistent with filling logic ---
+            for b in trange(0, num_rows, self.block_rows, desc='Comptage NNZ'):
+                current_rows = min(self.block_rows, num_rows - b)
+                # Fill dense_block_host from manip
+                for r in range(current_rows):
+                    global_row = b + r
+                    n_idx = global_row // self.T
+                    t_idx = global_row % self.T
+                    dense_block_host[r, :] = self.manip.AcousticFields[n_idx].field[t_idx].flatten()
+                drv.memcpy_htod(dense_block_gpu, dense_block_host)
+                grid = ((current_rows + block_size - 1) // block_size, 1, 1)
+                # Note: Assuming 'count_nnz_per_row_kernel' is the correct name (verified by user in prior steps)
+                count_nnz_kernel(dense_block_gpu, row_nnz_gpu,
+                                 np.int32(current_rows), np.int32(num_cols),
+                                 np.float32(self.relative_threshold),
+                                 block=(block_size, 1, 1), grid=grid)
+                row_nnz_host = np.empty(current_rows, dtype=np.int32)
+                drv.memcpy_dtoh(row_nnz_host, row_nnz_gpu)
+                self.row_ptr[b + 1:b + current_rows + 1] = self.row_ptr[b] + np.cumsum(row_nnz_host, dtype=np.int64)
+            # total nnz
+            self.total_nnz = int(self.row_ptr[-1])
+            print(f"NNZ total : {self.total_nnz}")
+            # allocate final arrays
+            self.h_col_ind = np.zeros(self.total_nnz, dtype=np.uint32)
+            self.h_values = np.zeros(self.total_nnz, dtype=np.float32)
+            # copy row_ptr to device once
+            self.row_ptr_gpu = drv.mem_alloc(self.row_ptr.nbytes)
+            drv.memcpy_htod(self.row_ptr_gpu, self.row_ptr)
+            # allocate device arrays for final csr
+            self.col_ind_gpu = drv.mem_alloc(self.h_col_ind.nbytes)
+            self.values_gpu = drv.mem_alloc(self.h_values.nbytes)
+            # --- Fill CSR per-block ---
+            for b in trange(0, num_rows, self.block_rows, desc='Remplissage CSR'):
+                current_rows = min(self.block_rows, num_rows - b)
+                for r in range(current_rows):
+                    global_row = b + r
+                    n_idx = global_row // self.T
+                    t_idx = global_row % self.T
+                    dense_block_host[r, :] = self.manip.AcousticFields[n_idx].field[t_idx].flatten()
+                drv.memcpy_htod(dense_block_gpu, dense_block_host)
+                grid = ((current_rows + block_size - 1) // block_size, 1, 1)
+                fill_csr_kernel(dense_block_gpu,
+                                self.row_ptr_gpu,
+                                self.col_ind_gpu,
+                                self.values_gpu,
+                                np.int32(b),
+                                np.int32(current_rows),
+                                np.int32(num_cols),
+                                np.float32(self.relative_threshold),
+                                np.int64(self.total_nnz),
+                                block=(block_size, 1, 1), grid=grid)
+                drv.Context.synchronize()
+            # copy back
+            drv.memcpy_dtoh(self.h_col_ind, self.col_ind_gpu)
+            drv.memcpy_dtoh(self.h_values, self.values_gpu)
+            print('CSR généré ✔')
+            # compute normalization factor from CSR (sum per column)
+            self.compute_norm_factor_from_csr()
+            # free temporaries
+            dense_block_gpu.free(); row_nnz_gpu.free()
+        except Exception as e:
+            print(f"❌ Erreur détaillée : {e}")
+            self.free()
+            raise
+    def compute_norm_factor_from_csr(self):
+        ZX = self.Z * self.X
+        # 1) Allouer un vecteur de somme colonne sur le GPU
+        col_sum_gpu = drv.mem_alloc(ZX * np.dtype(np.float32).itemsize)
+        drv.memset_d32(col_sum_gpu, 0, ZX)
+        # 2) Récupérer le kernel
+        # FIX: Utiliser le nom générique 'accumulate_columns_atomic' comme dans SELL (si le binaire est partagé)
+        # Si le développeur utilise la convention __CSR, on la garde.
+        # Basé sur notre historique SELL, le nom est probablement générique 'accumulate_columns_atomic'.
+        # Je vais supposer que le nom est générique pour éviter une LogicError ici aussi.
+        acc_kernel = self.sparse_mod.get_function("accumulate_columns_atomic")
+        # 3) Lancer le kernel
+        threads = 256
+        blocks = (self.total_nnz + threads - 1) // threads
+        acc_kernel(
+            self.values_gpu,
+            self.col_ind_gpu,
+            np.int64(self.total_nnz),
+            col_sum_gpu,
+            block=(threads,1,1),
+            grid=(blocks,1,1)
+        )
+        drv.Context.synchronize()
+        # 4) Récupérer le résultat
+        norm = np.empty(ZX, dtype=np.float32)
+        drv.memcpy_dtoh(norm, col_sum_gpu)
+        col_sum_gpu.free()
+        norm = np.maximum(norm.astype(np.float64), 1e-6)
+        self.norm_factor_inv = (1.0 / norm).astype(np.float32)
+        self.norm_factor_inv_gpu = drv.mem_alloc(self.norm_factor_inv.nbytes)
+        drv.memcpy_htod(self.norm_factor_inv_gpu, self.norm_factor_inv)
+    def getMatrixSize(self):
+        """
+        Retourne la taille totale de la matrice CSR en Go (en sommant la mémoire GPU).
+        """
+        if self.row_ptr is None:
+            return {"error": "La matrice sparse n'est pas encore allouée."}
+        total_bytes = 0
+        # Mémoire GPU (row_ptr_gpu, col_ind_gpu, values_gpu, norm_factor_inv_gpu)
+        if hasattr(self, 'row_ptr_gpu') and self.row_ptr_gpu:
+            total_bytes += self.row_ptr_gpu.size
+        if hasattr(self, 'col_ind_gpu') and self.col_ind_gpu:
+            total_bytes += self.col_ind_gpu.size
+        if hasattr(self, 'values_gpu') and self.values_gpu:
+            total_bytes += self.values_gpu.size
+        if hasattr(self, 'norm_factor_inv_gpu') and self.norm_factor_inv_gpu:
+            total_bytes += self.norm_factor_inv_gpu.size
+        # NOTE: Les versions précédentes utilisaient le .size de l'objet DeviceAllocation,
+        # qui était problématique. Si l'erreur se reproduit ici, il faudra
+        # stocker la taille en octets comme nous l'avons fait pour SELL.
+        # Pour l'instant, nous conservons la méthode getMatrixSize originale de CSR.
+        return total_bytes / (1024**3)
+    def free(self):
+        try:
+            if hasattr(self, 'col_ind_gpu') and self.col_ind_gpu:
+                self.col_ind_gpu.free()
+            if hasattr(self, 'values_gpu') and self.values_gpu:
+                self.values_gpu.free()
+            if hasattr(self, 'row_ptr_gpu') and self.row_ptr_gpu:
+                self.row_ptr_gpu.free()
+            if hasattr(self, 'norm_factor_inv_gpu') and self.norm_factor_inv_gpu:
+                self.norm_factor_inv_gpu.free()
+            if hasattr(self, 'ctx') and self.ctx:
+                try:
+                    self.ctx.pop()
+                except Exception:
+                    pass
+                self.ctx = None
+            print('✅ Mémoire GPU libérée.')
+        except Exception as e:
+            print(f"❌ Erreur lors de la libération de la mémoire GPU : {e}")
+    def compute_density(self):
+        """
+        Retourne la densité réelle de la CSR = NNZ / (num_rows * num_cols)
+        Nécessite que self.h_values et self.row_ptr existent (host).
+        """
+        if self.row_ptr is None or self.h_values is None:
+            raise RuntimeError("row_ptr et h_values requis pour calculer la densité")
+        num_rows = int(self.N * self.T)
+        num_cols = int(self.Z * self.X)
+        total_nnz = int(self.row_ptr[-1])
+        density = total_nnz / (num_rows * num_cols)
+        return density

AOT-biomaps 2.9.138__py3-none-any.whl → 2.9.279__py3-none-any.whl

Potentially problematic release.

AOT-biomaps 2.9.138py3-none-any.whl → 2.9.279py3-none-any.whl