PyPI - miblab-ssa - Versions diffs - 0.0.0__py3-none-any.whl - Mend

miblab-ssa 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

miblab_ssa/__init__.py +14 -0
miblab_ssa/lb.py +260 -0
miblab_ssa/metrics.py +280 -0
miblab_ssa/normalize.py +532 -0
miblab_ssa/pca.py +98 -0
miblab_ssa/pdm.py +177 -0
miblab_ssa/sdf_cheby.py +153 -0
miblab_ssa/sdf_ft.py +78 -0
miblab_ssa/sdf_ft_simple.py +47 -0
miblab_ssa/sdf_mono.py +214 -0
miblab_ssa/sh.py +444 -0
miblab_ssa/ssa.py +525 -0
miblab_ssa/zernike.py +144 -0
miblab_ssa-0.0.0.dist-info/METADATA +34 -0
miblab_ssa-0.0.0.dist-info/RECORD +18 -0
miblab_ssa-0.0.0.dist-info/WHEEL +5 -0
miblab_ssa-0.0.0.dist-info/licenses/LICENSE +201 -0
miblab_ssa-0.0.0.dist-info/top_level.txt +1 -0

miblab_ssa/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+from .normalize import (
+    normalize_kidney_mask
+)
+from .ssa import (
+    features_from_dataset_zarr,
+    pca_from_features_zarr,
+    coefficients_from_features_zarr,
+    modes_from_pca_zarr,
+)
+from .metrics import (
+    hausdorff_matrix_zarr,
+    dice_matrix_zarr
+)
+from . import sdf_ft, sdf_cheby, lb, zernike

miblab_ssa/lb.py ADDED Viewed

@@ -0,0 +1,260 @@
+import numpy as np
+from skimage import measure
+import trimesh
+from scipy.sparse import coo_matrix, diags
+from scipy.sparse.linalg import eigsh
+# -------------------------------
+# Helper: convert trimesh to mask
+# -------------------------------
+def mesh_to_mask(mesh, shape):
+    """
+    Rasterize mesh into 3D binary mask
+    """
+    mask = np.zeros(shape, dtype=bool)
+    # Use trimesh voxelization
+    vox = mesh.voxelized(pitch=1.0)
+    indices = vox.sparse_indices
+    mask[indices[:,0], indices[:,1], indices[:,2]] = True
+    return mask
+# -------------------------------
+# 1️⃣ Mask → Mesh
+# -------------------------------
+def mask_to_mesh(mask, spacing=(1.0,1.0,1.0)):
+    """
+    Convert 3D binary mask to triangular mesh.
+    """
+    verts, faces, normals, values = measure.marching_cubes(mask.astype(float), level=0.5, spacing=spacing)
+    mesh = trimesh.Trimesh(vertices=verts, faces=faces, process=False)
+    return mesh
+def mask_to_mesh_fixed_vertices(mask: np.ndarray, spacing: np.ndarray, target_vertices: int = 5000) -> trimesh.Trimesh:
+    """
+    Convert a 3D binary mask to a mesh with a fixed number of vertices.
+    Parameters
+    ----------
+    center : bool
+        If True, center the mesh at the origin.
+    spacing : np.ndarray
+        Voxel size
+    Returns
+    -------
+    mesh_simplified : trimesh.Trimesh
+        Mesh object with approximately target_vertices vertices.
+    """
+    # Step 1: extract surface using marching cubes
+    verts, faces, normals, _ = measure.marching_cubes(mask.astype(float), level=0.5, spacing=spacing)
+    mesh = trimesh.Trimesh(vertices=verts, faces=faces, vertex_normals=normals, process=True)
+    # Step 2: simplify / resample to target number of vertices
+    # Needs testing
+    mesh_simplified = mesh.simplify_quadratic_decimation(target_vertices)
+    return mesh_simplified
+# -------------------------------
+# 2️⃣ Preprocessing for invariance (FIXED)
+# -------------------------------
+def preprocess_mesh(mesh):
+    """
+    Apply translation, scaling, and PCA alignment.
+    Returns processed mesh and preprocessing parameters for inverse mapping.
+    """
+    # Center
+    centroid = mesh.vertices.mean(axis=0)
+    mesh_c = mesh.copy()
+    mesh_c.vertices = mesh.vertices - centroid
+    # Scale
+    scale = np.sqrt((mesh_c.vertices**2).sum(axis=1).mean())
+    mesh_s = mesh_c.copy()
+    mesh_s.vertices = mesh_c.vertices / scale
+    # PCA alignment
+    cov = np.cov(mesh_s.vertices.T)
+    eigvals, eigvecs = np.linalg.eigh(cov)
+    idx = np.argsort(eigvals)[::-1]
+    eigvecs = eigvecs[:, idx]
+    mesh_aligned = mesh_s.copy()
+    mesh_aligned.vertices = mesh_s.vertices @ eigvecs
+    # Save parameters for inverse transformation
+    params = {"centroid": centroid, "scale": scale, "pca_eigvecs": eigvecs}
+    return mesh_aligned, params
+def inverse_preprocess_mesh(vertices, params):
+    """
+    Map reconstructed vertices back to original coordinates.
+    """
+    v = vertices @ params["pca_eigvecs"].T  # undo PCA
+    v = v * params["scale"]                # undo scaling
+    v = v + params["centroid"]             # undo translation
+    return v
+# -------------------------------
+# 3️⃣ Laplace-Beltrami Eigenfunctions
+# -------------------------------
+def cotangent_laplacian(mesh):
+    vertices = mesh.vertices
+    faces = mesh.faces
+    def cotangent(a, b, c):
+        ba = b - a
+        ca = c - a
+        cos_angle = np.dot(ba, ca)
+        sin_angle = np.linalg.norm(np.cross(ba, ca))
+        return cos_angle / (sin_angle + 1e-10)
+    I, J, V = [], [], []
+    n = len(vertices)
+    for face in faces:
+        i, j, k = face
+        vi, vj, vk = vertices[i], vertices[j], vertices[k]
+        cot_alpha = cotangent(vj, vi, vk)
+        cot_beta  = cotangent(vk, vj, vi)
+        cot_gamma = cotangent(vi, vk, vj)
+        for (p, q, w) in [(i,j,cot_gamma),(j,i,cot_gamma),
+                          (j,k,cot_alpha),(k,j,cot_alpha),
+                          (k,i,cot_beta),(i,k,cot_beta)]:
+            I.append(p)
+            J.append(q)
+            V.append(w/2)
+    L = coo_matrix((V, (I, J)), shape=(n, n))
+    L = diags(L.sum(axis=1).A1) - L
+    return L
+def lb_eigen_decomposition(mesh, k=50):
+    L = cotangent_laplacian(mesh)
+    M = diags(np.ones(mesh.vertices.shape[0]))
+    eigvals, eigvecs = eigsh(L, k=k, M=M, sigma=1e-8, which='LM')
+    return eigvals, eigvecs
+def surface_to_coefficients(mesh, k=50):
+    eigvals, eigvecs = lb_eigen_decomposition(mesh, k=k)
+    coords = mesh.vertices
+    coeffs = eigvecs.T @ coords  # shape (k,3)
+    return coeffs, eigvecs, eigvals
+def rotationally_invariant_lb_coeffs(coeffs, eigvals, k=100):
+    """
+    Compute rotationally invariant Laplace–Beltrami spectral coefficients.
+    Parameters
+    ----------
+    mesh : trimesh.Trimesh or similar
+        Input surface mesh with vertices (N, 3)
+    k : int
+        Number of eigenmodes to use
+    Returns
+    -------
+    eigvals : (k,) array
+        Laplace–Beltrami eigenvalues
+    invariants : (k,) array
+        Rotationally invariant spectral coefficients
+    """
+    invariants = np.linalg.norm(coeffs, axis=1)  # sqrt(sum over x,y,z)
+    invariants /= np.linalg.norm(invariants)
+    # Optional: normalize eigenvalues by first non-zero eigenvalue
+    eigvals = eigvals / eigvals[1] if eigvals[1] != 0 else eigvals
+    # Optionally drop the first eigenvalue (zero mode) from descriptor since it's trivial
+    eigvals = eigvals[1:]  # length k-1
+    invariants = invariants[1:]  # skip first mode as it may be trivial
+    descriptor = np.concatenate([eigvals[:k], invariants[:k]])
+    descriptor /= np.linalg.norm(descriptor)  # normalize final vector
+    return invariants, eigvals
+# def coefficients_to_surface(coeffs, eigvecs):
+#     reconstructed = eigvecs @ coeffs
+#     return reconstructed
+def coefficients_to_surface(coeffs, eigvecs, threshold=None):
+    """
+    Reconstruct surface vertices from coefficients and eigenvectors.
+    Args:
+        coeffs (np.ndarray): shape (k, 3), coefficients from surface_to_coefficients
+        eigvecs (np.ndarray): shape (n, k), eigenvectors of Laplace-Beltrami
+        threshold (float, optional): percentage (0-100).
+            If given, only the top threshold% dominant modes (by coefficient norm)
+            are kept in the reconstruction.
+    Returns:
+        np.ndarray: reconstructed vertices, shape (n, 3)
+    """
+    if threshold is not None:
+        # Compute importance of each eigenfunction
+        norms = np.linalg.norm(coeffs, axis=1)
+        k = len(norms)
+        # How many to keep
+        keep = max(1, int(np.ceil(k * threshold / 100.0)))
+        # Select indices of the most important modes
+        idx_sorted = np.argsort(norms)[::-1]
+        idx_keep = idx_sorted[:keep]
+        # Zero out the others
+        coeffs_filtered = np.zeros_like(coeffs)
+        coeffs_filtered[idx_keep] = coeffs[idx_keep]
+        reconstructed = eigvecs @ coeffs_filtered
+    else:
+        reconstructed = eigvecs @ coeffs
+    return reconstructed
+def pipeline(mask, k=50):
+    # mesh = mask_to_mesh(mask)
+    # Fixed number of vertices is necessary to achieve comparable coefficients
+    mesh = mask_to_mesh_fixed_vertices(mask)
+    mesh_proc, params = preprocess_mesh(mesh)
+    coeffs, eigvecs, eigvals = surface_to_coefficients(mesh_proc, k=k)
+    return coeffs, eigvecs, mesh_proc, params
+def eigvals(mask, k=100, normalize=False):
+    mesh = mask_to_mesh(mask)
+    coeffs, eigvecs, eigvals = surface_to_coefficients(mesh, k=k)
+    if normalize:
+        # Normalize eigenvalues by first non-zero eigenvalue
+        # eigvals = eigvals / eigvals[1] if eigvals[1] != 0 else eigvals
+        eigvals = eigvals / np.max(eigvals)
+        # Drop the first eigenvalue (zero mode) from descriptor since it's trivial
+        eigvals = eigvals[1:]  # length k-1
+    return eigvals
+def process(mesh, k=10, threshold=None):
+    mesh_proc, params = preprocess_mesh(mesh)
+    # Compute LB coefficients (invariant)
+    coeffs, eigvecs, eigvals = surface_to_coefficients(mesh_proc, k=k)
+    # Reconstruct in normalized/aligned space
+    reconstructed_vertices_proc = coefficients_to_surface(coeffs, eigvecs, threshold=threshold)
+    # Map reconstruction back to original coordinates
+    reconstructed_vertices_orig = inverse_preprocess_mesh(reconstructed_vertices_proc, params)
+    # Build reconstructed mesh
+    reconstructed_mesh = mesh.copy()
+    reconstructed_mesh.vertices = reconstructed_vertices_orig
+    return coeffs, eigvals, reconstructed_mesh

miblab_ssa/metrics.py ADDED Viewed

@@ -0,0 +1,280 @@
+import logging
+import numpy as np
+from skimage import measure
+from scipy.spatial import cKDTree
+import dask
+from dask.diagnostics import ProgressBar
+import dask.array as da
+import psutil
+import zarr
+def dice_coefficient(vol_a, vol_b):
+    """
+    Compute the Dice similarity coefficient between two binary masks.
+    Parameters
+    ----------
+    mask1 : np.ndarray
+        First binary mask (values should be 0 or 1).
+    mask2 : np.ndarray
+        Second binary mask (values should be 0 or 1).
+    Returns
+    -------
+    float
+        Dice coefficient, ranging from 0 (no overlap) to 1 (perfect overlap).
+    Notes
+    -----
+    The Dice coefficient is defined as:
+        Dice = 2 * |A ∩ B| / (|A| + |B|)
+    """
+    vol_a = vol_a.astype(bool)
+    vol_b = vol_b.astype(bool)
+    intersection = np.logical_and(vol_a, vol_b).sum()
+    size_a = vol_a.sum()
+    size_b = vol_b.sum()
+    if size_a + size_b == 0:
+        return 1.0
+    return 2.0 * intersection / (size_a + size_b)
+def surface_distances(vol_a, vol_b, spacing=(1.0,1.0,1.0)):
+    """
+    Compute surface distances (Hausdorff and mean) between two binary volumes.
+    Args:
+      vol_a, vol_b: binary 3D arrays
+      spacing: voxel spacing (dz,dy,dx)
+    Returns:
+      hausdorff, mean_dist
+    """
+    # extract meshes
+    verts_a, faces_a, _, _ = measure.marching_cubes(vol_a.astype(np.uint8), level=0.5, spacing=spacing)
+    verts_b, faces_b, _, _ = measure.marching_cubes(vol_b.astype(np.uint8), level=0.5, spacing=spacing)
+    # build kd-trees
+    tree_a = cKDTree(verts_a)
+    tree_b = cKDTree(verts_b)
+    # distances from A→B and B→A
+    d_ab, _ = tree_b.query(verts_a, k=1)
+    d_ba, _ = tree_a.query(verts_b, k=1)
+    hausdorff = max(d_ab.max(), d_ba.max())
+    mean_dist = 0.5 * (d_ab.mean() + d_ba.mean())
+    return hausdorff, mean_dist
+def dice_matrix_in_memory(M:np.ndarray):
+    """
+    Computes a Dice similarity matrix for all numpy masks in a folder using
+    vectorized sparse matrix multiplication.
+    """
+    # Esure the matrix is 2D
+    M = M.reshape((M.shape[0], -1))
+    # Convert from Boolean (True/False) to Integer (1/0)
+    # This ensures the dot product counts overlapping voxels.
+    M = M.astype(np.int32)
+    # 3. Vectorized Intersection Calculation (Matrix Multiplication)
+    # Intersections[i, j] = dot_product(mask_i, mask_j)
+    # This replaces the nested loop. M.T means M transpose.
+    intersection_matrix = M @ M.T
+    # 4. Compute Dice Score
+    # Formula: 2 * (A n B) / (|A| + |B|)
+    # The diagonal of the intersection matrix represents |A n A|, which is just |A| (the volume)
+    volumes = intersection_matrix.diagonal()
+    # Broadcasting sum: creates a matrix where cell [i,j] = volume[i] + volume[j]
+    volumes_sum_matrix = volumes[:, None] + volumes[None, :]
+    # Avoid division by zero (though volumes shouldn't be 0 for valid masks)
+    # If both volumes are 0, Dice is technically 1.0 (empty matches empty),
+    # but usually we handle this based on context. Here we use np.errstate to handle specific cases.
+    with np.errstate(divide='ignore', invalid='ignore'):
+        dice_matrix = (2 * intersection_matrix) / volumes_sum_matrix
+    # Handle NaN cases where volumes_sum_matrix might be 0
+    dice_matrix = np.nan_to_num(dice_matrix, nan=1.0)
+    return dice_matrix
+def get_optimal_chunk_size(shape, dtype, target_mb=250):
+    """
+    Calculates the optimal number of masks per chunk based on the specific dtype size.
+    """
+    # 1. Dynamically get bytes per voxel based on the dtype argument
+    # np.int32 -> 4 bytes
+    # np.float64 -> 8 bytes
+    # np.bool_ -> 1 byte
+    bytes_per_voxel = np.dtype(dtype).itemsize
+    # 2. Calculate size of ONE mask in Megabytes (MB)
+    one_mask_bytes = np.prod(shape) * bytes_per_voxel
+    one_mask_mb = one_mask_bytes / (1024**2)
+    # 3. Constraint A: Dask Target Size (~250MB)
+    if one_mask_mb > target_mb:
+        dask_optimal_count = 1
+    else:
+        dask_optimal_count = int(target_mb / one_mask_mb)
+    # 4. Constraint B: System RAM Safety Net (10% of Available RAM)
+    available_ram_mb = psutil.virtual_memory().available / (1024**2)
+    safe_ram_limit_mb = available_ram_mb * 0.10
+    ram_limited_count = int(safe_ram_limit_mb / one_mask_mb)
+    # 5. Pick the safer number
+    final_count = min(dask_optimal_count, ram_limited_count)
+    return max(1, final_count)
+def dice_matrix_zarr(zarr_path, chunk_size='auto'):
+    """
+    Computes Dice similarity matrix with auto-optimized memory chunking.
+    """
+    # 1. Connect to Zarr
+    d_masks = da.from_zarr(zarr_path, component='masks')
+    # 2. Determine Chunk Size
+    if chunk_size == 'auto':
+        # Note: We pass d_masks.shape[1:] to exclude the 'N' dimension (we just want D,H,W)
+        chunk_size = get_optimal_chunk_size(d_masks.shape[1:], dtype=np.int32)
+        print(f"Auto-configured chunk_size: {chunk_size} masks")
+    # 3. Flatten Spatial Dimensions
+    d_masks = d_masks.reshape(d_masks.shape[0], -1)
+    # 4. Apply Chunking
+    d_masks = d_masks.rechunk({0: chunk_size})
+    # 5. Cast to int32
+    d_masks = d_masks.astype(np.int32)
+    # 6. Matrix Multiplication (Lazy)
+    intersection_graph = d_masks @ d_masks.T
+    print(f"Computing {d_masks.shape[0]}x{d_masks.shape[0]} Dice matrix...")
+    with ProgressBar():
+        intersection_matrix = intersection_graph.compute()
+    # 7. Compute Dice Score
+    volumes = intersection_matrix.diagonal()
+    volumes_sum_matrix = volumes[:, None] + volumes[None, :]
+    with np.errstate(divide='ignore', invalid='ignore'):
+        dice = (2 * intersection_matrix) / volumes_sum_matrix
+    return np.nan_to_num(dice, nan=1.0)
+def hausdorff_matrix_in_memory(M, chunk_size = 1000): # (n_subjects, n_voxels)
+    # Chunk output to produce less and larger tasks, and less files
+    # Otherwise dask takes too long to schedule
+    # Convert from Boolean (True/False) to Integer (1/0)
+    # This ensures the dot product counts overlapping voxels.
+    M = M.astype(np.int32)
+    n = M.shape[0]
+    # Build a list of all index pairs in the sorted list that need computing
+    # Since the matrix is symmetric only half needs to be computed
+    pairs = [(i, j) for i in range(n) for j in range(i, n)]
+    # Split the list of index pairs up into chunks
+    chunks = [pairs[i:i+chunk_size] for i in range(0, len(pairs), chunk_size)]
+    # Compute dice scores for each chunk in parallel
+    logging.info("Hausdorff matrix - scheduling tasks..")
+    tasks = [
+        dask.delayed(_hausdorff_matrix_chunk)(M, chunk)
+        for chunk in chunks
+    ]
+    logging.info("Hausdorff matrix - computing tasks..")
+    with ProgressBar():
+        chunks = dask.compute(*tasks)
+    # Gather up all the chunks to build one matrix
+    logging.info(f"Hausdorff matrix - building matrix..")
+    haus_matrix = np.zeros((n, n), dtype=np.float32)
+    for chunk in chunks:
+        for (i, j), haus_ij in chunk.items():
+            haus_matrix[i, j] = haus_ij
+            haus_matrix[j, i] = haus_ij
+    return haus_matrix
+def _hausdorff_matrix_chunk(M, pairs):
+    chunk = {}
+    for (i,j) in pairs:
+        # Load masks
+        mask_i = M[i, ...].astype(bool)
+        mask_j = M[j, ...].astype(bool)
+        # Compute metrics
+        haus_ij, _ = surface_distances(mask_i, mask_j)
+        # Add to results
+        chunk[(i, j)] = haus_ij
+    return chunk
+def hausdorff_matrix_zarr(zarr_path: str):
+    # 1. Open metadata
+    z_root = zarr.open(zarr_path, mode='r')
+    n = z_root['masks'].shape[0]
+    logging.info(f"Hausdorff matrix: Scheduling {n} row tasks...")
+    # 2. Schedule one task per row
+    # Each task computes the distances for row i from [i to n]
+    tasks = [
+        dask.delayed(_compute_hausdorff_row)(zarr_path, i, n)
+        for i in range(n)
+    ]
+    # 3. Compute
+    with ProgressBar():
+        rows = dask.compute(*tasks)
+    # 4. Assemble
+    # 'rows' is now a list of arrays of varying lengths
+    haus_matrix = np.zeros((n, n), dtype=np.float32)
+    for i, row_values in enumerate(rows):
+        # row_values contains distances for [i, i+1, ... n-1]
+        haus_matrix[i, i:] = row_values
+        haus_matrix[i:, i] = row_values # Mirror to lower triangle
+    return haus_matrix
+def _compute_hausdorff_row(zarr_path, i, n):
+    """Computes all distances for a single row starting from the diagonal."""
+    z_masks = zarr.open(zarr_path, mode='r')['masks']
+    # Load mask_i once for the entire row
+    mask_i = z_masks[i].astype(bool)
+    # Pre-allocate result for the partial row
+    row_len = n - i
+    row_results = np.zeros(row_len, dtype=np.float32)
+    for idx, j in enumerate(range(i, n)):
+        if i == j:
+            row_results[idx] = 0.0
+            continue
+        mask_j = z_masks[j].astype(bool)
+        h_val, _ = surface_distances(mask_i, mask_j)
+        row_results[idx] = h_val
+    return row_results