PyPI - risk-network - Versions diffs - 0.0.8b26__py3-none-any.whl → 0.0.9b26__py3-none-any.whl - Mend

risk-network 0.0.8b26py3-none-any.whl → 0.0.9b26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

risk/__init__.py +2 -2
risk/annotations/__init__.py +2 -2
risk/annotations/annotations.py +74 -47
risk/annotations/io.py +47 -31
risk/log/__init__.py +4 -2
risk/log/{config.py → console.py} +5 -3
risk/log/{params.py → parameters.py} +17 -42
risk/neighborhoods/__init__.py +3 -5
risk/neighborhoods/api.py +446 -0
risk/neighborhoods/community.py +255 -77
risk/neighborhoods/domains.py +62 -31
risk/neighborhoods/neighborhoods.py +156 -160
risk/network/__init__.py +1 -3
risk/network/geometry.py +65 -57
risk/network/graph/__init__.py +6 -0
risk/network/graph/api.py +194 -0
risk/network/{graph.py → graph/network.py} +87 -37
risk/network/graph/summary.py +254 -0
risk/network/io.py +56 -47
risk/network/plotter/__init__.py +6 -0
risk/network/plotter/api.py +54 -0
risk/network/{plot → plotter}/canvas.py +7 -4
risk/network/{plot → plotter}/contour.py +22 -19
risk/network/{plot → plotter}/labels.py +69 -74
risk/network/{plot → plotter}/network.py +170 -34
risk/network/{plot/utils/color.py → plotter/utils/colors.py} +104 -112
risk/network/{plot → plotter}/utils/layout.py +8 -5
risk/risk.py +11 -500
risk/stats/__init__.py +8 -4
risk/stats/binom.py +51 -0
risk/stats/chi2.py +69 -0
risk/stats/hypergeom.py +27 -17
risk/stats/permutation/__init__.py +1 -1
risk/stats/permutation/permutation.py +44 -38
risk/stats/permutation/test_functions.py +25 -17
risk/stats/poisson.py +15 -9
risk/stats/stats.py +15 -13
risk/stats/zscore.py +68 -0
{risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
risk_network-0.0.9b26.dist-info/RECORD +44 -0
{risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
risk/network/plot/__init__.py +0 -6
risk/network/plot/plotter.py +0 -137
risk_network-0.0.8b26.dist-info/RECORD +0 -37
{risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
{risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0

risk/stats/hypergeom.py CHANGED Viewed

@@ -6,44 +6,54 @@ risk/stats/hypergeom
 from typing import Any, Dict
 import numpy as np
+from scipy.sparse import csr_matrix
 from scipy.stats import hypergeom
 def compute_hypergeom_test(
-    neighborhoods: np.ndarray, annotations: np.ndarray, null_distribution: str = "network"
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
 ) -> Dict[str, Any]:
-    """Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
+    """
+    Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
     Args:
-        neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
-        annotations (np.ndarray): Binary matrix representing annotations.
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
         null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
     Returns:
         Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
     """
     # Get the total number of nodes in the network
-    total_node_count = neighborhoods.shape[0]
+    total_nodes = neighborhoods.shape[1]
+    # Compute sums
+    neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()  # Convert to dense array
+    annotation_sums = annotations.sum(axis=0).A.flatten()  # Convert to dense array
     if null_distribution == "network":
-        # Case 1: Use all nodes as the background
-        background_population = total_node_count
-        neighborhood_sums = np.sum(neighborhoods, axis=0, keepdims=True).T
-        annotation_sums = np.sum(annotations, axis=0, keepdims=True)
+        background_population = total_nodes
     elif null_distribution == "annotations":
-        # Case 2: Only consider nodes with at least one annotation
-        annotated_nodes = np.sum(annotations, axis=1) > 0
-        background_population = np.sum(annotated_nodes)
-        neighborhood_sums = np.sum(neighborhoods[annotated_nodes], axis=0, keepdims=True).T
-        annotation_sums = np.sum(annotations[annotated_nodes], axis=0, keepdims=True)
+        annotated_nodes = annotations.sum(axis=1).A.flatten() > 0  # Boolean mask
+        background_population = annotated_nodes.sum()
+        neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
+        annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
     else:
         raise ValueError(
             "Invalid null_distribution value. Choose either 'network' or 'annotations'."
         )
-    # Matrix multiplication for annotated nodes in each neighborhood
-    annotated_in_neighborhood = neighborhoods.T @ annotations
-    # Calculate depletion and enrichment p-values using the hypergeometric distribution
+    # Observed counts
+    annotated_in_neighborhood = neighborhoods.T @ annotations  # Sparse result
+    annotated_in_neighborhood = annotated_in_neighborhood.toarray()  # Convert to dense
+    # Align shapes for broadcasting
+    neighborhood_sums = neighborhood_sums.reshape(-1, 1)
+    annotation_sums = annotation_sums.reshape(1, -1)
+    background_population = np.array(background_population).reshape(1, 1)
+    # Compute hypergeometric p-values
     depletion_pvals = hypergeom.cdf(
         annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
     )

risk/stats/permutation/__init__.py CHANGED Viewed

@@ -3,4 +3,4 @@ risk/stats/permutation
 ~~~~~~~~~~~~~~~~~~~~~~
 """
-from .permutation import compute_permutation_test
+from risk.stats.permutation.permutation import compute_permutation_test

risk/stats/permutation/permutation.py CHANGED Viewed

@@ -5,18 +5,19 @@ risk/stats/permutation/permutation
 from multiprocessing import get_context, Manager
 from multiprocessing.managers import ValueProxy
-from tqdm import tqdm
-from typing import Any, Callable, Dict
+from typing import Any, Callable, Dict, List, Tuple, Union
 import numpy as np
+from scipy.sparse import csr_matrix
 from threadpoolctl import threadpool_limits
+from tqdm import tqdm
 from risk.stats.permutation.test_functions import DISPATCH_TEST_FUNCTIONS
 def compute_permutation_test(
-    neighborhoods: np.ndarray,
-    annotations: np.ndarray,
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
     score_metric: str = "sum",
     null_distribution: str = "network",
     num_permutations: int = 1000,
@@ -26,9 +27,9 @@ def compute_permutation_test(
     """Compute permutation test for enrichment and depletion in neighborhoods.
     Args:
-        neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
-        annotations (np.ndarray): Binary matrix representing annotations.
-        score_metric (str, optional): Metric to use for scoring ('sum', 'mean', etc.). Defaults to "sum".
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        score_metric (str, optional): Metric to use for scoring ('sum' or 'stdev'). Defaults to "sum".
         null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
         num_permutations (int, optional): Number of permutations to run. Defaults to 1000.
         random_seed (int, optional): Seed for random number generation. Defaults to 888.
@@ -38,6 +39,7 @@ def compute_permutation_test(
         Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
     """
     # Ensure that the matrices are in the correct format and free of NaN values
+    # NOTE: Keep the data type as float32 to avoid locking issues with dot product operations
     neighborhoods = neighborhoods.astype(np.float32)
     annotations = annotations.astype(np.float32)
     # Retrieve the appropriate neighborhood score function based on the metric
@@ -65,19 +67,19 @@ def compute_permutation_test(
 def _run_permutation_test(
-    neighborhoods: np.ndarray,
-    annotations: np.ndarray,
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
     neighborhood_score_func: Callable,
     null_distribution: str = "network",
     num_permutations: int = 1000,
     random_seed: int = 888,
     max_workers: int = 4,
 ) -> tuple:
-    """Run a permutation test to calculate enrichment and depletion counts.
+    """Run the permutation test to calculate depletion and enrichment counts.
     Args:
-        neighborhoods (np.ndarray): The neighborhood matrix.
-        annotations (np.ndarray): The annotation matrix.
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
         neighborhood_score_func (Callable): Function to calculate neighborhood scores.
         null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
         num_permutations (int, optional): Number of permutations. Defaults to 1000.
@@ -93,14 +95,14 @@ def _run_permutation_test(
     if null_distribution == "network":
         idxs = range(annotations.shape[0])
     elif null_distribution == "annotations":
-        idxs = np.nonzero(np.sum(~np.isnan(annotations), axis=1))[0]
+        idxs = np.nonzero(annotations.getnnz(axis=1) > 0)[0]
     else:
         raise ValueError(
             "Invalid null_distribution value. Choose either 'network' or 'annotations'."
         )
-    # Replace NaNs with zeros in the annotations matrix
-    annotations[np.isnan(annotations)] = 0
+    # Replace NaNs with zeros in the sparse annotations matrix
+    annotations.data[np.isnan(annotations.data)] = 0
     annotation_matrix_obsv = annotations[idxs]
     neighborhoods_matrix_obsv = neighborhoods.T[idxs].T
     # Calculate observed neighborhood scores
@@ -121,28 +123,35 @@ def _run_permutation_test(
     manager = Manager()
     progress_counter = manager.Value("i", 0)
     total_progress = num_permutations
+    # Generate precomputed permutations
+    permutations = [rng.permutation(idxs) for _ in range(num_permutations)]
+    # Divide permutations into batches for workers
+    batch_size = subset_size + (1 if remainder > 0 else 0)
+    permutation_batches = [
+        permutations[i * batch_size : (i + 1) * batch_size] for i in range(max_workers)
+    ]
     # Execute the permutation test using multiprocessing
     with ctx.Pool(max_workers) as pool:
         with tqdm(total=total_progress, desc="Total progress", position=0) as progress:
             # Prepare parameters for multiprocessing
             params_list = [
                 (
+                    permutation_batches[i],  # Pass the batch of precomputed permutations
                     annotations,
-                    np.array(idxs),
                     neighborhoods_matrix_obsv,
                     observed_neighborhood_scores,
                     neighborhood_score_func,
-                    subset_size + (1 if i < remainder else 0),
                     num_permutations,
                     progress_counter,
                     max_workers,
-                    rng,  # Pass the random number generator to each worker
                 )
                 for i in range(max_workers)
             ]
             # Start the permutation process in parallel
-            results = pool.starmap_async(_permutation_process_subset, params_list, chunksize=1)
+            results = pool.starmap_async(_permutation_process_batch, params_list, chunksize=1)
             # Update progress bar based on progress_counter
             while not results.ready():
@@ -159,31 +168,27 @@ def _run_permutation_test(
     return counts_depletion, counts_enrichment
-def _permutation_process_subset(
-    annotation_matrix: np.ndarray,
-    idxs: np.ndarray,
-    neighborhoods_matrix_obsv: np.ndarray,
+def _permutation_process_batch(
+    permutations: Union[List, Tuple, np.ndarray],
+    annotation_matrix: csr_matrix,
+    neighborhoods_matrix_obsv: csr_matrix,
     observed_neighborhood_scores: np.ndarray,
     neighborhood_score_func: Callable,
-    subset_size: int,
     num_permutations: int,
     progress_counter: ValueProxy,
     max_workers: int,
-    rng: np.random.Generator,
 ) -> tuple:
-    """Process a subset of permutations for the permutation test.
+    """Process a batch of permutations in a worker process.
     Args:
-        annotation_matrix (np.ndarray): The annotation matrix.
-        idxs (np.ndarray): Indices of valid rows in the matrix.
-        neighborhoods_matrix_obsv (np.ndarray): Observed neighborhoods matrix.
+        permutations (Union[List, Tuple, np.ndarray]): Permutation batch to process.
+        annotation_matrix (csr_matrix): Sparse binary matrix representing annotations.
+        neighborhoods_matrix_obsv (csr_matrix): Sparse binary matrix representing observed neighborhoods.
         observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
         neighborhood_score_func (Callable): Function to calculate neighborhood scores.
-        subset_size (int): Number of permutations to run in this subset.
         num_permutations (int): Number of total permutations across all subsets.
         progress_counter (multiprocessing.managers.ValueProxy): Shared counter for tracking progress.
         max_workers (int): Number of workers for multiprocessing.
-        rng (np.random.Generator): Random number generator object.
     Returns:
         tuple: Local counts of depletion and enrichment.
@@ -192,7 +197,9 @@ def _permutation_process_subset(
     local_counts_depletion = np.zeros(observed_neighborhood_scores.shape)
     local_counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
-    # NOTE: Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used.
+    # Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used
+    # NOTE: This does not work for Mac M chips due to a bug in the threadpoolctl package
+    # This is currently a known issue and is being addressed by the maintainers [https://github.com/joblib/threadpoolctl/issues/135]
     limits = None if max_workers == 1 else 1
     with threadpool_limits(limits=limits, user_api="blas"):
         # Initialize a local counter for batched progress updates
@@ -200,16 +207,16 @@ def _permutation_process_subset(
         # Calculate the modulo value based on total permutations for 1/100th frequency updates
         modulo_value = max(1, num_permutations // 100)
-        for _ in range(subset_size):
-            # Permute the annotation matrix using the RNG
-            annotation_matrix_permut = annotation_matrix[rng.permutation(idxs)]
+        for permuted_idxs in permutations:
+            # Apply precomputed permutation
+            annotation_matrix_permut = annotation_matrix[permuted_idxs]
             # Calculate permuted neighborhood scores
             with np.errstate(invalid="ignore", divide="ignore"):
                 permuted_neighborhood_scores = neighborhood_score_func(
                     neighborhoods_matrix_obsv, annotation_matrix_permut
                 )
-            # Update local depletion and enrichment counts based on permuted scores
+            # Update local depletion and enrichment counts
             local_counts_depletion = np.add(
                 local_counts_depletion, permuted_neighborhood_scores <= observed_neighborhood_scores
             )
@@ -218,9 +225,8 @@ def _permutation_process_subset(
                 permuted_neighborhood_scores >= observed_neighborhood_scores,
             )
-            # Update local progress counter
+            # Update progress
             local_progress += 1
-            # Update shared progress counter every 1/100th of total permutations
             if local_progress % modulo_value == 0:
                 progress_counter.value += modulo_value

risk/stats/permutation/test_functions.py CHANGED Viewed

@@ -4,6 +4,7 @@ risk/stats/permutation/test_functions
 """
 import numpy as np
+from scipy.sparse import csr_matrix
 # Note: Cython optimizations provided minimal performance benefits.
 # The final version with Cython is archived in the `cython_permutation` branch.
@@ -11,46 +12,53 @@ import numpy as np
 def compute_neighborhood_score_by_sum(
-    neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
+    neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
 ) -> np.ndarray:
-    """Compute the sum of attribute values for each neighborhood.
+    """Compute the sum of attribute values for each neighborhood using sparse matrices.
     Args:
-        neighborhoods_matrix (np.ndarray): Binary matrix representing neighborhoods.
-        annotation_matrix (np.ndarray): Matrix representing annotation values.
+        neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
     Returns:
-        np.ndarray: Sum of attribute values for each neighborhood.
+        np.ndarray: Dense array of summed attribute values for each neighborhood.
     """
     # Calculate the neighborhood score as the dot product of neighborhoods and annotations
-    neighborhood_sum = np.dot(neighborhoods_matrix, annotation_matrix)
-    return neighborhood_sum
+    neighborhood_score = neighborhoods_matrix @ annotation_matrix  # Sparse matrix multiplication
+    # Convert the result to a dense array for downstream calculations
+    neighborhood_score_dense = neighborhood_score.toarray()
+    return neighborhood_score_dense
 def compute_neighborhood_score_by_stdev(
-    neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
+    neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
 ) -> np.ndarray:
-    """Compute the standard deviation of neighborhood scores.
+    """Compute the standard deviation of neighborhood scores for sparse matrices.
     Args:
-        neighborhoods_matrix (np.ndarray): Binary matrix representing neighborhoods.
-        annotation_matrix (np.ndarray): Matrix representing annotation values.
+        neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
     Returns:
         np.ndarray: Standard deviation of the neighborhood scores.
     """
     # Calculate the neighborhood score as the dot product of neighborhoods and annotations
-    neighborhood_score = np.dot(neighborhoods_matrix, annotation_matrix)
-    # Calculate the number of elements in each neighborhood
-    N = np.sum(neighborhoods_matrix, axis=1)
+    neighborhood_score = neighborhoods_matrix @ annotation_matrix  # Sparse matrix multiplication
+    # Calculate the number of elements in each neighborhood (sum of rows)
+    N = neighborhoods_matrix.sum(axis=1).A.flatten()  # Convert to 1D array
+    # Avoid division by zero by replacing zeros in N with np.nan temporarily
+    N[N == 0] = np.nan
     # Compute the mean of the neighborhood scores
-    M = neighborhood_score / N[:, None]
+    M = neighborhood_score.multiply(1 / N[:, None]).toarray()  # Sparse element-wise division
     # Compute the mean of squares (EXX) directly using squared annotation matrix
-    EXX = np.dot(neighborhoods_matrix, annotation_matrix**2) / N[:, None]
+    annotation_squared = annotation_matrix.multiply(annotation_matrix)  # Element-wise squaring
+    EXX = (neighborhoods_matrix @ annotation_squared).multiply(1 / N[:, None]).toarray()
     # Calculate variance as EXX - M^2
-    variance = EXX - M**2
+    variance = EXX - np.power(M, 2)
     # Compute the standard deviation as the square root of the variance
     neighborhood_stdev = np.sqrt(variance)
+    # Replace np.nan back with zeros in case N was 0 (no elements in the neighborhood)
+    neighborhood_stdev[np.isnan(neighborhood_stdev)] = 0
     return neighborhood_stdev

risk/stats/poisson.py CHANGED Viewed

@@ -6,39 +6,45 @@ risk/stats/poisson
 from typing import Any, Dict
 import numpy as np
+from scipy.sparse import csr_matrix
 from scipy.stats import poisson
 def compute_poisson_test(
-    neighborhoods: np.ndarray, annotations: np.ndarray, null_distribution: str = "network"
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
 ) -> Dict[str, Any]:
-    """Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
+    """
+    Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
     Args:
-        neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
-        annotations (np.ndarray): Binary matrix representing annotations.
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
         null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
     Returns:
         Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
     """
     # Matrix multiplication to get the number of annotated nodes in each neighborhood
-    annotated_in_neighborhood = neighborhoods @ annotations
+    annotated_in_neighborhood = neighborhoods @ annotations  # Sparse result
+    # Convert annotated counts to dense for downstream calculations
+    annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
     # Compute lambda_expected based on the chosen null distribution
     if null_distribution == "network":
         # Use the mean across neighborhoods (axis=1)
-        lambda_expected = np.mean(annotated_in_neighborhood, axis=1, keepdims=True)
+        lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
     elif null_distribution == "annotations":
         # Use the mean across annotations (axis=0)
-        lambda_expected = np.mean(annotated_in_neighborhood, axis=0, keepdims=True)
+        lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
     else:
         raise ValueError(
             "Invalid null_distribution value. Choose either 'network' or 'annotations'."
         )
     # Compute p-values for enrichment and depletion using Poisson distribution
-    enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood - 1, lambda_expected)
-    depletion_pvals = poisson.cdf(annotated_in_neighborhood, lambda_expected)
+    enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
+    depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
     return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}

risk/stats/stats.py CHANGED Viewed

@@ -44,7 +44,7 @@ def calculate_significance_matrices(
             enrichment_pvals, enrichment_qvals, pval_cutoff=pval_cutoff, fdr_cutoff=fdr_cutoff
         )
         # Compute the enrichment matrix using both q-values and p-values
-        enrichment_matrix = (enrichment_qvals**2) * (enrichment_pvals**0.5)
+        enrichment_matrix = (enrichment_pvals**0.5) * (enrichment_qvals**2)
     else:
         # Compute threshold matrices based on p-value cutoffs only
         depletion_alpha_threshold_matrix = _compute_threshold_matrix(
@@ -62,7 +62,7 @@ def calculate_significance_matrices(
     log_enrichment_matrix = -np.log10(enrichment_matrix)
     # Select the appropriate significance matrices based on the specified tail
-    enrichment_matrix, significant_binary_enrichment_matrix = _select_significance_matrices(
+    significance_matrix, significant_binary_significance_matrix = _select_significance_matrices(
         tail,
         log_depletion_matrix,
         depletion_alpha_threshold_matrix,
@@ -71,14 +71,14 @@ def calculate_significance_matrices(
     )
     # Filter the enrichment matrix using the binary significance matrix
-    significant_enrichment_matrix = np.where(
-        significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
+    significant_significance_matrix = np.where(
+        significant_binary_significance_matrix == 1, significance_matrix, 0
     )
     return {
-        "enrichment_matrix": enrichment_matrix,
-        "significant_binary_enrichment_matrix": significant_binary_enrichment_matrix,
-        "significant_enrichment_matrix": significant_enrichment_matrix,
+        "significance_matrix": significance_matrix,
+        "significant_significance_matrix": significant_significance_matrix,
+        "significant_binary_significance_matrix": significant_binary_significance_matrix,
     }
@@ -109,15 +109,15 @@ def _select_significance_matrices(
     if tail == "left":
         # Select depletion matrix and corresponding alpha threshold for left-tail analysis
-        enrichment_matrix = -log_depletion_matrix
+        significance_matrix = -log_depletion_matrix
         alpha_threshold_matrix = depletion_alpha_threshold_matrix
     elif tail == "right":
         # Select enrichment matrix and corresponding alpha threshold for right-tail analysis
-        enrichment_matrix = log_enrichment_matrix
+        significance_matrix = log_enrichment_matrix
         alpha_threshold_matrix = enrichment_alpha_threshold_matrix
     elif tail == "both":
         # Select the matrix with the highest absolute values while preserving the sign
-        enrichment_matrix = np.where(
+        significance_matrix = np.where(
             np.abs(log_depletion_matrix) >= np.abs(log_enrichment_matrix),
             -log_depletion_matrix,
             log_enrichment_matrix,
@@ -126,13 +126,15 @@ def _select_significance_matrices(
         alpha_threshold_matrix = np.logical_or(
             depletion_alpha_threshold_matrix, enrichment_alpha_threshold_matrix
         )
+    else:
+        raise ValueError("Invalid value for 'tail'. Must be 'left', 'right', or 'both'.")
     # Create a binary significance matrix where valid indices meet the alpha threshold
     valid_idxs = ~np.isnan(alpha_threshold_matrix)
-    significant_binary_enrichment_matrix = np.zeros(alpha_threshold_matrix.shape)
-    significant_binary_enrichment_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
+    significant_binary_significance_matrix = np.zeros(alpha_threshold_matrix.shape)
+    significant_binary_significance_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
-    return enrichment_matrix, significant_binary_enrichment_matrix
+    return significance_matrix, significant_binary_significance_matrix
 def _compute_threshold_matrix(

risk/stats/zscore.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""
+risk/stats/zscore
+~~~~~~~~~~~~~~~~~~
+"""
+from typing import Any, Dict
+import numpy as np
+from scipy.sparse import csr_matrix
+from scipy.stats import norm
+def compute_zscore_test(
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
+) -> Dict[str, Any]:
+    """
+    Compute Z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
+    Args:
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+    Returns:
+        Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
+    """
+    # Total number of nodes in the network
+    total_node_count = neighborhoods.shape[1]
+    # Compute sums
+    if null_distribution == "network":
+        background_population = total_node_count
+        neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()  # Dense column sums
+        annotation_sums = annotations.sum(axis=0).A.flatten()  # Dense row sums
+    elif null_distribution == "annotations":
+        annotated_nodes = annotations.sum(axis=1).A.flatten() > 0  # Dense boolean mask
+        background_population = annotated_nodes.sum()
+        neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
+        annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Observed values
+    observed = (neighborhoods.T @ annotations).toarray()  # Convert sparse result to dense
+    # Expected values under the null
+    neighborhood_sums = neighborhood_sums.reshape(-1, 1)  # Ensure correct shape
+    annotation_sums = annotation_sums.reshape(1, -1)  # Ensure correct shape
+    expected = (neighborhood_sums @ annotation_sums) / background_population
+    # Standard deviation under the null
+    std_dev = np.sqrt(
+        expected
+        * (1 - annotation_sums / background_population)
+        * (1 - neighborhood_sums / background_population)
+    )
+    std_dev[std_dev == 0] = np.nan  # Avoid division by zero
+    # Compute Z-scores
+    z_scores = (observed - expected) / std_dev
+    # Convert Z-scores to depletion and enrichment p-values
+    enrichment_pvals = norm.sf(z_scores)  # Upper tail
+    depletion_pvals = norm.cdf(z_scores)  # Lower tail
+    return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}

{risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,10 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: risk-network
-Version: 0.0.8b26
+Version: 0.0.9b26
 Summary: A Python package for biological network analysis
 Author: Ira Horecka
 Author-email: Ira Horecka <ira89@icloud.com>
-License: GNU GENERAL PUBLIC LICENSE
+License:                     GNU GENERAL PUBLIC LICENSE
                                Version 3, 29 June 2007
          Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
@@ -695,19 +695,23 @@ Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: ipywidgets
-Requires-Dist: markov-clustering
+Requires-Dist: leidenalg
+Requires-Dist: markov_clustering
 Requires-Dist: matplotlib
 Requires-Dist: networkx
 Requires-Dist: nltk==3.8.1
 Requires-Dist: numpy
 Requires-Dist: openpyxl
 Requires-Dist: pandas
+Requires-Dist: python-igraph
 Requires-Dist: python-louvain
 Requires-Dist: scikit-learn
 Requires-Dist: scipy
 Requires-Dist: statsmodels
 Requires-Dist: threadpoolctl
 Requires-Dist: tqdm
+Dynamic: author
+Dynamic: requires-python
 # RISK Network
@@ -724,7 +728,7 @@ Requires-Dist: tqdm
 ![Downloads](https://img.shields.io/pypi/dm/risk-network)
 ![Platforms](https://img.shields.io/badge/platform-linux%20%7C%20macos%20%7C%20windows-lightgrey)
-**RISK (RISK Infers Spatial Kinships)** is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
+**RISK** (Regional Inference of Significant Kinships) is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
 ## Documentation and Tutorial

risk-network 0.0.8b26__py3-none-any.whl → 0.0.9b26__py3-none-any.whl

risk-network 0.0.8b26py3-none-any.whl → 0.0.9b26py3-none-any.whl