PyPI - risk-network - Versions diffs - 0.0.8b27__py3-none-any.whl → 0.0.9__py3-none-any.whl - Mend

risk-network 0.0.8b27py3-none-any.whl → 0.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

risk/__init__.py +2 -2
risk/annotations/__init__.py +2 -2
risk/annotations/annotations.py +195 -118
risk/annotations/io.py +47 -31
risk/log/__init__.py +4 -2
risk/log/console.py +3 -1
risk/log/{params.py → parameters.py} +17 -42
risk/neighborhoods/__init__.py +3 -5
risk/neighborhoods/api.py +442 -0
risk/neighborhoods/community.py +324 -101
risk/neighborhoods/domains.py +125 -52
risk/neighborhoods/neighborhoods.py +177 -165
risk/network/__init__.py +1 -3
risk/network/geometry.py +71 -89
risk/network/graph/__init__.py +6 -0
risk/network/graph/api.py +200 -0
risk/network/{graph.py → graph/graph.py} +90 -40
risk/network/graph/summary.py +254 -0
risk/network/io.py +103 -114
risk/network/plotter/__init__.py +6 -0
risk/network/plotter/api.py +54 -0
risk/network/{plot → plotter}/canvas.py +9 -8
risk/network/{plot → plotter}/contour.py +27 -24
risk/network/{plot → plotter}/labels.py +73 -78
risk/network/{plot → plotter}/network.py +45 -39
risk/network/{plot → plotter}/plotter.py +23 -17
risk/network/{plot/utils/color.py → plotter/utils/colors.py} +114 -122
risk/network/{plot → plotter}/utils/layout.py +10 -7
risk/risk.py +11 -500
risk/stats/__init__.py +10 -4
risk/stats/permutation/__init__.py +1 -1
risk/stats/permutation/permutation.py +44 -38
risk/stats/permutation/test_functions.py +26 -18
risk/stats/{stats.py → significance.py} +17 -15
risk/stats/stat_tests.py +267 -0
{risk_network-0.0.8b27.dist-info → risk_network-0.0.9.dist-info}/METADATA +31 -46
risk_network-0.0.9.dist-info/RECORD +40 -0
{risk_network-0.0.8b27.dist-info → risk_network-0.0.9.dist-info}/WHEEL +1 -1
risk/constants.py +0 -31
risk/network/plot/__init__.py +0 -6
risk/stats/hypergeom.py +0 -54
risk/stats/poisson.py +0 -44
risk_network-0.0.8b27.dist-info/RECORD +0 -37
{risk_network-0.0.8b27.dist-info → risk_network-0.0.9.dist-info}/LICENSE +0 -0
{risk_network-0.0.8b27.dist-info → risk_network-0.0.9.dist-info}/top_level.txt +0 -0

risk/stats/permutation/permutation.py CHANGED Viewed

@@ -5,18 +5,19 @@ risk/stats/permutation/permutation
 from multiprocessing import get_context, Manager
 from multiprocessing.managers import ValueProxy
-from tqdm import tqdm
-from typing import Any, Callable, Dict
+from typing import Any, Callable, Dict, List, Tuple, Union
 import numpy as np
+from scipy.sparse import csr_matrix
 from threadpoolctl import threadpool_limits
+from tqdm import tqdm
 from risk.stats.permutation.test_functions import DISPATCH_TEST_FUNCTIONS
 def compute_permutation_test(
-    neighborhoods: np.ndarray,
-    annotations: np.ndarray,
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
     score_metric: str = "sum",
     null_distribution: str = "network",
     num_permutations: int = 1000,
@@ -26,9 +27,9 @@ def compute_permutation_test(
     """Compute permutation test for enrichment and depletion in neighborhoods.
     Args:
-        neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
-        annotations (np.ndarray): Binary matrix representing annotations.
-        score_metric (str, optional): Metric to use for scoring ('sum', 'mean', etc.). Defaults to "sum".
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        score_metric (str, optional): Metric to use for scoring ('sum' or 'stdev'). Defaults to "sum".
         null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
         num_permutations (int, optional): Number of permutations to run. Defaults to 1000.
         random_seed (int, optional): Seed for random number generation. Defaults to 888.
@@ -38,6 +39,7 @@ def compute_permutation_test(
         Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
     """
     # Ensure that the matrices are in the correct format and free of NaN values
+    # NOTE: Keep the data type as float32 to avoid locking issues with dot product operations
     neighborhoods = neighborhoods.astype(np.float32)
     annotations = annotations.astype(np.float32)
     # Retrieve the appropriate neighborhood score function based on the metric
@@ -65,19 +67,19 @@ def compute_permutation_test(
 def _run_permutation_test(
-    neighborhoods: np.ndarray,
-    annotations: np.ndarray,
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
     neighborhood_score_func: Callable,
     null_distribution: str = "network",
     num_permutations: int = 1000,
     random_seed: int = 888,
     max_workers: int = 4,
 ) -> tuple:
-    """Run a permutation test to calculate enrichment and depletion counts.
+    """Run the permutation test to calculate depletion and enrichment counts.
     Args:
-        neighborhoods (np.ndarray): The neighborhood matrix.
-        annotations (np.ndarray): The annotation matrix.
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
         neighborhood_score_func (Callable): Function to calculate neighborhood scores.
         null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
         num_permutations (int, optional): Number of permutations. Defaults to 1000.
@@ -93,14 +95,14 @@ def _run_permutation_test(
     if null_distribution == "network":
         idxs = range(annotations.shape[0])
     elif null_distribution == "annotations":
-        idxs = np.nonzero(np.sum(~np.isnan(annotations), axis=1))[0]
+        idxs = np.nonzero(annotations.getnnz(axis=1) > 0)[0]
     else:
         raise ValueError(
             "Invalid null_distribution value. Choose either 'network' or 'annotations'."
         )
-    # Replace NaNs with zeros in the annotations matrix
-    annotations[np.isnan(annotations)] = 0
+    # Replace NaNs with zeros in the sparse annotations matrix
+    annotations.data[np.isnan(annotations.data)] = 0
     annotation_matrix_obsv = annotations[idxs]
     neighborhoods_matrix_obsv = neighborhoods.T[idxs].T
     # Calculate observed neighborhood scores
@@ -121,28 +123,35 @@ def _run_permutation_test(
     manager = Manager()
     progress_counter = manager.Value("i", 0)
     total_progress = num_permutations
+    # Generate precomputed permutations
+    permutations = [rng.permutation(idxs) for _ in range(num_permutations)]
+    # Divide permutations into batches for workers
+    batch_size = subset_size + (1 if remainder > 0 else 0)
+    permutation_batches = [
+        permutations[i * batch_size : (i + 1) * batch_size] for i in range(max_workers)
+    ]
     # Execute the permutation test using multiprocessing
     with ctx.Pool(max_workers) as pool:
         with tqdm(total=total_progress, desc="Total progress", position=0) as progress:
             # Prepare parameters for multiprocessing
             params_list = [
                 (
+                    permutation_batches[i],  # Pass the batch of precomputed permutations
                     annotations,
-                    np.array(idxs),
                     neighborhoods_matrix_obsv,
                     observed_neighborhood_scores,
                     neighborhood_score_func,
-                    subset_size + (1 if i < remainder else 0),
                     num_permutations,
                     progress_counter,
                     max_workers,
-                    rng,  # Pass the random number generator to each worker
                 )
                 for i in range(max_workers)
             ]
             # Start the permutation process in parallel
-            results = pool.starmap_async(_permutation_process_subset, params_list, chunksize=1)
+            results = pool.starmap_async(_permutation_process_batch, params_list, chunksize=1)
             # Update progress bar based on progress_counter
             while not results.ready():
@@ -159,31 +168,27 @@ def _run_permutation_test(
     return counts_depletion, counts_enrichment
-def _permutation_process_subset(
-    annotation_matrix: np.ndarray,
-    idxs: np.ndarray,
-    neighborhoods_matrix_obsv: np.ndarray,
+def _permutation_process_batch(
+    permutations: Union[List, Tuple, np.ndarray],
+    annotation_matrix: csr_matrix,
+    neighborhoods_matrix_obsv: csr_matrix,
     observed_neighborhood_scores: np.ndarray,
     neighborhood_score_func: Callable,
-    subset_size: int,
     num_permutations: int,
     progress_counter: ValueProxy,
     max_workers: int,
-    rng: np.random.Generator,
 ) -> tuple:
-    """Process a subset of permutations for the permutation test.
+    """Process a batch of permutations in a worker process.
     Args:
-        annotation_matrix (np.ndarray): The annotation matrix.
-        idxs (np.ndarray): Indices of valid rows in the matrix.
-        neighborhoods_matrix_obsv (np.ndarray): Observed neighborhoods matrix.
+        permutations (Union[List, Tuple, np.ndarray]): Permutation batch to process.
+        annotation_matrix (csr_matrix): Sparse binary matrix representing annotations.
+        neighborhoods_matrix_obsv (csr_matrix): Sparse binary matrix representing observed neighborhoods.
         observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
         neighborhood_score_func (Callable): Function to calculate neighborhood scores.
-        subset_size (int): Number of permutations to run in this subset.
         num_permutations (int): Number of total permutations across all subsets.
         progress_counter (multiprocessing.managers.ValueProxy): Shared counter for tracking progress.
         max_workers (int): Number of workers for multiprocessing.
-        rng (np.random.Generator): Random number generator object.
     Returns:
         tuple: Local counts of depletion and enrichment.
@@ -192,7 +197,9 @@ def _permutation_process_subset(
     local_counts_depletion = np.zeros(observed_neighborhood_scores.shape)
     local_counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
-    # NOTE: Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used.
+    # Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used
+    # NOTE: This does not work for Mac M chips due to a bug in the threadpoolctl package
+    # This is currently a known issue and is being addressed by the maintainers [https://github.com/joblib/threadpoolctl/issues/135]
     limits = None if max_workers == 1 else 1
     with threadpool_limits(limits=limits, user_api="blas"):
         # Initialize a local counter for batched progress updates
@@ -200,16 +207,16 @@ def _permutation_process_subset(
         # Calculate the modulo value based on total permutations for 1/100th frequency updates
         modulo_value = max(1, num_permutations // 100)
-        for _ in range(subset_size):
-            # Permute the annotation matrix using the RNG
-            annotation_matrix_permut = annotation_matrix[rng.permutation(idxs)]
+        for permuted_idxs in permutations:
+            # Apply precomputed permutation
+            annotation_matrix_permut = annotation_matrix[permuted_idxs]
             # Calculate permuted neighborhood scores
             with np.errstate(invalid="ignore", divide="ignore"):
                 permuted_neighborhood_scores = neighborhood_score_func(
                     neighborhoods_matrix_obsv, annotation_matrix_permut
                 )
-            # Update local depletion and enrichment counts based on permuted scores
+            # Update local depletion and enrichment counts
             local_counts_depletion = np.add(
                 local_counts_depletion, permuted_neighborhood_scores <= observed_neighborhood_scores
             )
@@ -218,9 +225,8 @@ def _permutation_process_subset(
                 permuted_neighborhood_scores >= observed_neighborhood_scores,
             )
-            # Update local progress counter
+            # Update progress
             local_progress += 1
-            # Update shared progress counter every 1/100th of total permutations
             if local_progress % modulo_value == 0:
                 progress_counter.value += modulo_value

risk/stats/permutation/test_functions.py CHANGED Viewed

@@ -4,53 +4,61 @@ risk/stats/permutation/test_functions
 """
 import numpy as np
+from scipy.sparse import csr_matrix
-# Note: Cython optimizations provided minimal performance benefits.
+# NOTE: Cython optimizations provided minimal performance benefits.
 # The final version with Cython is archived in the `cython_permutation` branch.
 # DISPATCH_TEST_FUNCTIONS can be found at the end of the file.
 def compute_neighborhood_score_by_sum(
-    neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
+    neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
 ) -> np.ndarray:
-    """Compute the sum of attribute values for each neighborhood.
+    """Compute the sum of attribute values for each neighborhood using sparse matrices.
     Args:
-        neighborhoods_matrix (np.ndarray): Binary matrix representing neighborhoods.
-        annotation_matrix (np.ndarray): Matrix representing annotation values.
+        neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
     Returns:
-        np.ndarray: Sum of attribute values for each neighborhood.
+        np.ndarray: Dense array of summed attribute values for each neighborhood.
     """
     # Calculate the neighborhood score as the dot product of neighborhoods and annotations
-    neighborhood_sum = np.dot(neighborhoods_matrix, annotation_matrix)
-    return neighborhood_sum
+    neighborhood_score = neighborhoods_matrix @ annotation_matrix  # Sparse matrix multiplication
+    # Convert the result to a dense array for downstream calculations
+    neighborhood_score_dense = neighborhood_score.toarray()
+    return neighborhood_score_dense
 def compute_neighborhood_score_by_stdev(
-    neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
+    neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
 ) -> np.ndarray:
-    """Compute the standard deviation of neighborhood scores.
+    """Compute the standard deviation of neighborhood scores for sparse matrices.
     Args:
-        neighborhoods_matrix (np.ndarray): Binary matrix representing neighborhoods.
-        annotation_matrix (np.ndarray): Matrix representing annotation values.
+        neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
     Returns:
         np.ndarray: Standard deviation of the neighborhood scores.
     """
     # Calculate the neighborhood score as the dot product of neighborhoods and annotations
-    neighborhood_score = np.dot(neighborhoods_matrix, annotation_matrix)
-    # Calculate the number of elements in each neighborhood
-    N = np.sum(neighborhoods_matrix, axis=1)
+    neighborhood_score = neighborhoods_matrix @ annotation_matrix  # Sparse matrix multiplication
+    # Calculate the number of elements in each neighborhood (sum of rows)
+    N = neighborhoods_matrix.sum(axis=1).A.flatten()  # Convert to 1D array
+    # Avoid division by zero by replacing zeros in N with np.nan temporarily
+    N[N == 0] = np.nan
     # Compute the mean of the neighborhood scores
-    M = neighborhood_score / N[:, None]
+    M = neighborhood_score.multiply(1 / N[:, None]).toarray()  # Sparse element-wise division
     # Compute the mean of squares (EXX) directly using squared annotation matrix
-    EXX = np.dot(neighborhoods_matrix, annotation_matrix**2) / N[:, None]
+    annotation_squared = annotation_matrix.multiply(annotation_matrix)  # Element-wise squaring
+    EXX = (neighborhoods_matrix @ annotation_squared).multiply(1 / N[:, None]).toarray()
     # Calculate variance as EXX - M^2
-    variance = EXX - M**2
+    variance = EXX - np.power(M, 2)
     # Compute the standard deviation as the square root of the variance
     neighborhood_stdev = np.sqrt(variance)
+    # Replace np.nan back with zeros in case N was 0 (no elements in the neighborhood)
+    neighborhood_stdev[np.isnan(neighborhood_stdev)] = 0
     return neighborhood_stdev

risk/stats/{stats.py → significance.py} RENAMED Viewed

@@ -1,6 +1,6 @@
 """
-risk/stats/stats
-~~~~~~~~~~~~~~~~
+risk/stats/significance
+~~~~~~~~~~~~~~~~~~~~~~~
 """
 from typing import Any, Dict, Union
@@ -44,7 +44,7 @@ def calculate_significance_matrices(
             enrichment_pvals, enrichment_qvals, pval_cutoff=pval_cutoff, fdr_cutoff=fdr_cutoff
         )
         # Compute the enrichment matrix using both q-values and p-values
-        enrichment_matrix = (enrichment_qvals**2) * (enrichment_pvals**0.5)
+        enrichment_matrix = (enrichment_pvals**0.5) * (enrichment_qvals**2)
     else:
         # Compute threshold matrices based on p-value cutoffs only
         depletion_alpha_threshold_matrix = _compute_threshold_matrix(
@@ -62,7 +62,7 @@ def calculate_significance_matrices(
     log_enrichment_matrix = -np.log10(enrichment_matrix)
     # Select the appropriate significance matrices based on the specified tail
-    enrichment_matrix, significant_binary_enrichment_matrix = _select_significance_matrices(
+    significance_matrix, significant_binary_significance_matrix = _select_significance_matrices(
         tail,
         log_depletion_matrix,
         depletion_alpha_threshold_matrix,
@@ -71,14 +71,14 @@ def calculate_significance_matrices(
     )
     # Filter the enrichment matrix using the binary significance matrix
-    significant_enrichment_matrix = np.where(
-        significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
+    significant_significance_matrix = np.where(
+        significant_binary_significance_matrix == 1, significance_matrix, 0
     )
     return {
-        "enrichment_matrix": enrichment_matrix,
-        "significant_binary_enrichment_matrix": significant_binary_enrichment_matrix,
-        "significant_enrichment_matrix": significant_enrichment_matrix,
+        "significance_matrix": significance_matrix,
+        "significant_significance_matrix": significant_significance_matrix,
+        "significant_binary_significance_matrix": significant_binary_significance_matrix,
     }
@@ -109,15 +109,15 @@ def _select_significance_matrices(
     if tail == "left":
         # Select depletion matrix and corresponding alpha threshold for left-tail analysis
-        enrichment_matrix = -log_depletion_matrix
+        significance_matrix = -log_depletion_matrix
         alpha_threshold_matrix = depletion_alpha_threshold_matrix
     elif tail == "right":
         # Select enrichment matrix and corresponding alpha threshold for right-tail analysis
-        enrichment_matrix = log_enrichment_matrix
+        significance_matrix = log_enrichment_matrix
         alpha_threshold_matrix = enrichment_alpha_threshold_matrix
     elif tail == "both":
         # Select the matrix with the highest absolute values while preserving the sign
-        enrichment_matrix = np.where(
+        significance_matrix = np.where(
             np.abs(log_depletion_matrix) >= np.abs(log_enrichment_matrix),
             -log_depletion_matrix,
             log_enrichment_matrix,
@@ -126,13 +126,15 @@ def _select_significance_matrices(
         alpha_threshold_matrix = np.logical_or(
             depletion_alpha_threshold_matrix, enrichment_alpha_threshold_matrix
         )
+    else:
+        raise ValueError("Invalid value for 'tail'. Must be 'left', 'right', or 'both'.")
     # Create a binary significance matrix where valid indices meet the alpha threshold
     valid_idxs = ~np.isnan(alpha_threshold_matrix)
-    significant_binary_enrichment_matrix = np.zeros(alpha_threshold_matrix.shape)
-    significant_binary_enrichment_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
+    significant_binary_significance_matrix = np.zeros(alpha_threshold_matrix.shape)
+    significant_binary_significance_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
-    return enrichment_matrix, significant_binary_enrichment_matrix
+    return significance_matrix, significant_binary_significance_matrix
 def _compute_threshold_matrix(

risk/stats/stat_tests.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""
+risk/stats/stat_tests
+~~~~~~~~~~~~~~~~~~~~~
+"""
+from typing import Any, Dict
+import numpy as np
+from scipy.sparse import csr_matrix
+from scipy.stats import binom
+from scipy.stats import chi2
+from scipy.stats import hypergeom
+from scipy.stats import norm
+from scipy.stats import poisson
+def compute_binom_test(
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
+) -> Dict[str, Any]:
+    """Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
+    Args:
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+    Returns:
+        Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
+    """
+    # Get the total number of nodes in the network
+    total_nodes = neighborhoods.shape[1]
+    # Compute sums (remain sparse here)
+    neighborhood_sizes = neighborhoods.sum(axis=1)  # Row sums
+    annotation_totals = annotations.sum(axis=0)  # Column sums
+    # Compute probabilities (convert to dense)
+    if null_distribution == "network":
+        p_values = (annotation_totals / total_nodes).A.flatten()  # Dense 1D array
+    elif null_distribution == "annotations":
+        p_values = (annotation_totals / annotations.sum()).A.flatten()  # Dense 1D array
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Observed counts (sparse matrix multiplication)
+    annotated_counts = neighborhoods @ annotations  # Sparse result
+    annotated_counts_dense = annotated_counts.toarray()  # Convert for dense operations
+    # Compute enrichment and depletion p-values
+    enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, neighborhood_sizes.A, p_values)
+    depletion_pvals = binom.cdf(annotated_counts_dense, neighborhood_sizes.A, p_values)
+    return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
+def compute_chi2_test(
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
+) -> Dict[str, Any]:
+    """Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
+    Args:
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+    Returns:
+        Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
+    """
+    # Total number of nodes in the network
+    total_node_count = neighborhoods.shape[0]
+    if null_distribution == "network":
+        # Case 1: Use all nodes as the background
+        background_population = total_node_count
+        neighborhood_sums = neighborhoods.sum(axis=0)  # Column sums of neighborhoods
+        annotation_sums = annotations.sum(axis=0)  # Column sums of annotations
+    elif null_distribution == "annotations":
+        # Case 2: Only consider nodes with at least one annotation
+        annotated_nodes = (
+            np.ravel(annotations.sum(axis=1)) > 0
+        )  # Row-wise sum to filter nodes with annotations
+        background_population = annotated_nodes.sum()  # Total number of annotated nodes
+        neighborhood_sums = neighborhoods[annotated_nodes].sum(
+            axis=0
+        )  # Neighborhood sums for annotated nodes
+        annotation_sums = annotations[annotated_nodes].sum(
+            axis=0
+        )  # Annotation sums for annotated nodes
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Convert to dense arrays for downstream computations
+    neighborhood_sums = np.asarray(neighborhood_sums).reshape(-1, 1)  # Ensure column vector shape
+    annotation_sums = np.asarray(annotation_sums).reshape(1, -1)  # Ensure row vector shape
+    # Observed values: number of annotated nodes in each neighborhood
+    observed = neighborhoods.T @ annotations  # Shape: (neighborhoods, annotations)
+    # Expected values under the null
+    expected = (neighborhood_sums @ annotation_sums) / background_population
+    # Chi-squared statistic: sum((observed - expected)^2 / expected)
+    with np.errstate(divide="ignore", invalid="ignore"):  # Handle divide-by-zero
+        chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
+    # Compute p-values for enrichment (upper tail) and depletion (lower tail)
+    enrichment_pvals = chi2.sf(chi2_stat, df=1)  # Survival function for upper tail
+    depletion_pvals = chi2.cdf(chi2_stat, df=1)  # Cumulative distribution for lower tail
+    return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
+def compute_hypergeom_test(
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
+) -> Dict[str, Any]:
+    """
+    Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
+    Args:
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+    Returns:
+        Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
+    """
+    # Get the total number of nodes in the network
+    total_nodes = neighborhoods.shape[1]
+    # Compute sums
+    neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()  # Convert to dense array
+    annotation_sums = annotations.sum(axis=0).A.flatten()  # Convert to dense array
+    if null_distribution == "network":
+        background_population = total_nodes
+    elif null_distribution == "annotations":
+        annotated_nodes = annotations.sum(axis=1).A.flatten() > 0  # Boolean mask
+        background_population = annotated_nodes.sum()
+        neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
+        annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Observed counts
+    annotated_in_neighborhood = neighborhoods.T @ annotations  # Sparse result
+    annotated_in_neighborhood = annotated_in_neighborhood.toarray()  # Convert to dense
+    # Align shapes for broadcasting
+    neighborhood_sums = neighborhood_sums.reshape(-1, 1)
+    annotation_sums = annotation_sums.reshape(1, -1)
+    background_population = np.array(background_population).reshape(1, 1)
+    # Compute hypergeometric p-values
+    depletion_pvals = hypergeom.cdf(
+        annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
+    )
+    enrichment_pvals = hypergeom.sf(
+        annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
+    )
+    return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
+def compute_poisson_test(
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
+) -> Dict[str, Any]:
+    """
+    Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
+    Args:
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+    Returns:
+        Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
+    """
+    # Matrix multiplication to get the number of annotated nodes in each neighborhood
+    annotated_in_neighborhood = neighborhoods @ annotations  # Sparse result
+    # Convert annotated counts to dense for downstream calculations
+    annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
+    # Compute lambda_expected based on the chosen null distribution
+    if null_distribution == "network":
+        # Use the mean across neighborhoods (axis=1)
+        lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
+    elif null_distribution == "annotations":
+        # Use the mean across annotations (axis=0)
+        lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Compute p-values for enrichment and depletion using Poisson distribution
+    enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
+    depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
+    return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
+def compute_zscore_test(
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
+) -> Dict[str, Any]:
+    """
+    Compute z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
+    Args:
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+    Returns:
+        Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
+    """
+    # Total number of nodes in the network
+    total_node_count = neighborhoods.shape[1]
+    # Compute sums
+    if null_distribution == "network":
+        background_population = total_node_count
+        neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()  # Dense column sums
+        annotation_sums = annotations.sum(axis=0).A.flatten()  # Dense row sums
+    elif null_distribution == "annotations":
+        annotated_nodes = annotations.sum(axis=1).A.flatten() > 0  # Dense boolean mask
+        background_population = annotated_nodes.sum()
+        neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
+        annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Observed values
+    observed = (neighborhoods.T @ annotations).toarray()  # Convert sparse result to dense
+    # Expected values under the null
+    neighborhood_sums = neighborhood_sums.reshape(-1, 1)  # Ensure correct shape
+    annotation_sums = annotation_sums.reshape(1, -1)  # Ensure correct shape
+    expected = (neighborhood_sums @ annotation_sums) / background_population
+    # Standard deviation under the null
+    std_dev = np.sqrt(
+        expected
+        * (1 - annotation_sums / background_population)
+        * (1 - neighborhood_sums / background_population)
+    )
+    std_dev[std_dev == 0] = np.nan  # Avoid division by zero
+    # Compute z-scores
+    z_scores = (observed - expected) / std_dev
+    # Convert z-scores to depletion and enrichment p-values
+    enrichment_pvals = norm.sf(z_scores)  # Upper tail
+    depletion_pvals = norm.cdf(z_scores)  # Lower tail
+    return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}

risk-network 0.0.8b27__py3-none-any.whl → 0.0.9__py3-none-any.whl

risk-network 0.0.8b27py3-none-any.whl → 0.0.9py3-none-any.whl