PyPI - risk-network - Versions diffs - 0.0.9b25__py3-none-any.whl → 0.0.9b27__py3-none-any.whl - Mend

risk-network 0.0.9b25py3-none-any.whl → 0.0.9b27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

risk/__init__.py +1 -1
risk/annotations/annotations.py +39 -38
risk/neighborhoods/api.py +1 -5
risk/neighborhoods/community.py +140 -95
risk/neighborhoods/domains.py +11 -3
risk/neighborhoods/neighborhoods.py +34 -18
risk/network/geometry.py +24 -27
risk/network/io.py +39 -15
risk/stats/__init__.py +8 -6
risk/stats/permutation/permutation.py +1 -1
risk/stats/{stats.py → significance.py} +2 -2
risk/stats/stat_tests.py +267 -0
{risk_network-0.0.9b25.dist-info → risk_network-0.0.9b27.dist-info}/METADATA +1 -1
{risk_network-0.0.9b25.dist-info → risk_network-0.0.9b27.dist-info}/RECORD +17 -21
risk/stats/binom.py +0 -51
risk/stats/chi2.py +0 -69
risk/stats/hypergeom.py +0 -64
risk/stats/poisson.py +0 -50
risk/stats/zscore.py +0 -68
{risk_network-0.0.9b25.dist-info → risk_network-0.0.9b27.dist-info}/LICENSE +0 -0
{risk_network-0.0.9b25.dist-info → risk_network-0.0.9b27.dist-info}/WHEEL +0 -0
{risk_network-0.0.9b25.dist-info → risk_network-0.0.9b27.dist-info}/top_level.txt +0 -0

risk/neighborhoods/neighborhoods.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Tuple, Union
 import networkx as nx
 import numpy as np
+from scipy.sparse import csr_matrix
 from sklearn.exceptions import DataConversionWarning
 from sklearn.metrics.pairwise import cosine_similarity
@@ -34,43 +35,43 @@ def get_network_neighborhoods(
     louvain_resolution: float = 0.1,
     leiden_resolution: float = 1.0,
     random_seed: int = 888,
-) -> np.ndarray:
-    """Calculate the combined neighborhoods for each node based on the specified community detection algorithm(s).
+) -> csr_matrix:
+    """Calculate the combined neighborhoods for each node using sparse matrices.
     Args:
         network (nx.Graph): The network graph.
         distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use.
-        fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
+        fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction thresholds.
         louvain_resolution (float, optional): Resolution parameter for the Louvain method.
         leiden_resolution (float, optional): Resolution parameter for the Leiden method.
         random_seed (int, optional): Random seed for methods requiring random initialization.
     Returns:
-        np.ndarray: Summed neighborhood matrix from all selected algorithms.
+        csr_matrix: The combined neighborhood matrix.
     """
     # Set random seed for reproducibility
     random.seed(random_seed)
     np.random.seed(random_seed)
-    # Ensure distance_metric is a list/tuple for multi-algorithm handling
+    # Ensure distance_metric is a list for multi-algorithm handling
     if isinstance(distance_metric, (str, np.ndarray)):
         distance_metric = [distance_metric]
-    # Ensure fraction_shortest_edges is a list/tuple for multi-threshold handling
+    # Ensure fraction_shortest_edges is a list for multi-threshold handling
     if isinstance(fraction_shortest_edges, (float, int)):
         fraction_shortest_edges = [fraction_shortest_edges] * len(distance_metric)
-    # Check that the number of distance metrics matches the number of edge length thresholds
+    # Validate matching lengths of distance metrics and thresholds
     if len(distance_metric) != len(fraction_shortest_edges):
         raise ValueError(
             "The number of distance metrics must match the number of edge length thresholds."
         )
-    # Initialize combined neighborhood matrix
+    # Initialize a sparse LIL matrix for incremental updates
     num_nodes = network.number_of_nodes()
-    combined_neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
+    # Initialize a sparse matrix with the same shape as the network
+    combined_neighborhoods = csr_matrix((num_nodes, num_nodes), dtype=np.uint8)
     # Loop through each distance metric and corresponding edge rank fraction
     for metric, percentile in zip(distance_metric, fraction_shortest_edges):
-        # Call the appropriate neighborhood function based on the metric
+        # Compute neighborhoods for the specified metric
         if metric == "greedy_modularity":
             neighborhoods = calculate_greedy_modularity_neighborhoods(
                 network, fraction_shortest_edges=percentile
@@ -107,22 +108,37 @@ def get_network_neighborhoods(
             )
         else:
             raise ValueError(
-                "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'label_propagation',"
+                "Invalid distance metric. Choose from: 'greedy_modularity', 'label_propagation',"
                 "'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
             )
-        # Sum the neighborhood matrices
+        # Add the sparse neighborhood matrix
         combined_neighborhoods += neighborhoods
-    # Ensure that the maximum value in each row is set to 1
-    # This ensures that for each row, only the strongest relationship (the maximum value) is retained,
-    # while all other values are reset to 0. This transformation simplifies the neighborhood matrix by
-    # focusing on the most significant connection per row (or nodes).
-    combined_neighborhoods = _set_max_row_value_to_one(combined_neighborhoods)
+    # Ensure maximum value in each row is set to 1
+    combined_neighborhoods = _set_max_row_value_to_one_sparse(combined_neighborhoods)
     return combined_neighborhoods
+def _set_max_row_value_to_one_sparse(matrix: csr_matrix) -> csr_matrix:
+    """Set the maximum value in each row of a sparse matrix to 1.
+    Args:
+        matrix (csr_matrix): The input sparse matrix.
+    Returns:
+        csr_matrix: The modified sparse matrix where only the maximum value in each row is set to 1.
+    """
+    # Iterate over each row and set the maximum value to 1
+    for i in range(matrix.shape[0]):
+        row_data = matrix[i].data
+        if len(row_data) > 0:
+            row_data[:] = (row_data == max(row_data)).astype(int)
+    return matrix
 def _set_max_row_value_to_one(matrix: np.ndarray) -> np.ndarray:
     """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0. This is particularly
     useful for neighborhood matrices that have undergone multiple neighborhood detection algorithms, where the

risk/network/geometry.py CHANGED Viewed

@@ -3,8 +3,6 @@ risk/network/geometry
 ~~~~~~~~~~~~~~~~~~~~~
 """
-import copy
 import networkx as nx
 import numpy as np
@@ -31,44 +29,43 @@ def assign_edge_lengths(
         """Compute distances between pairs of coordinates."""
         u_coords, v_coords = coords[:, 0, :], coords[:, 1, :]
         if is_sphere:
-            u_norm = np.linalg.norm(u_coords, axis=1, keepdims=True)
-            v_norm = np.linalg.norm(v_coords, axis=1, keepdims=True)
-            u_coords /= u_norm
-            v_coords /= v_norm
+            u_coords /= np.linalg.norm(u_coords, axis=1, keepdims=True)
+            v_coords /= np.linalg.norm(v_coords, axis=1, keepdims=True)
             dot_products = np.einsum("ij,ij->i", u_coords, v_coords)
             return np.arccos(np.clip(dot_products, -1.0, 1.0))
         return np.linalg.norm(u_coords - v_coords, axis=1)
     # Normalize graph coordinates and weights
     _normalize_graph_coordinates(G)
     _normalize_weights(G)
     # Map nodes to sphere and adjust depth if required
     if compute_sphere:
         _map_to_sphere(G)
-        G_depth = _create_depth(copy.deepcopy(G), surface_depth=surface_depth)
+        G_depth = _create_depth(G, surface_depth=surface_depth)
     else:
-        G_depth = copy.deepcopy(G)
-    # Precompute edge coordinate arrays for vectorized computation
-    edge_data = []
-    for u, v in G_depth.edges:
-        u_coords = np.array([G_depth.nodes[u]["x"], G_depth.nodes[u]["y"]])
-        v_coords = np.array([G_depth.nodes[v]["x"], G_depth.nodes[v]["y"]])
-        if compute_sphere:
-            u_coords = np.append(u_coords, G_depth.nodes[u].get("z", 0))
-            v_coords = np.append(v_coords, G_depth.nodes[v].get("z", 0))
-        edge_data.append([u_coords, v_coords, (u, v)])
-    # Convert to numpy for faster processing
-    edge_coords = np.array([(e[0], e[1]) for e in edge_data])
-    edge_indices = [e[2] for e in edge_data]
-    # Compute distances in bulk
-    distances = compute_distance_vectorized(edge_coords, compute_sphere)
+        G_depth = G
+    # Precompute edge coordinate arrays and compute distances in bulk
+    edge_data = np.array(
+        [
+            [
+                np.array(
+                    [G_depth.nodes[u]["x"], G_depth.nodes[u]["y"], G_depth.nodes[u].get("z", 0)]
+                ),
+                np.array(
+                    [G_depth.nodes[v]["x"], G_depth.nodes[v]["y"], G_depth.nodes[v].get("z", 0)]
+                ),
+            ]
+            for u, v in G_depth.edges
+        ]
+    )
+    # Compute distances
+    distances = compute_distance_vectorized(edge_data, compute_sphere)
     # Assign distances back to the graph
-    for (u, v), distance in zip(edge_indices, distances):
+    for (u, v), distance in zip(G_depth.edges, distances):
         if include_edge_weight:
-            weight = G.edges[u, v].get("normalized_weight", 0) + 1e-6
+            weight = G.edges[u, v].get("normalized_weight", 1e-6)  # Avoid divide-by-zero
             G.edges[u, v]["length"] = distance / np.sqrt(weight)
         else:
             G.edges[u, v]["length"] = distance

risk/network/io.py CHANGED Viewed

@@ -217,6 +217,9 @@ class NetworkIO:
         Returns:
             nx.Graph: Loaded and processed network.
+        Raises:
+            ValueError: If no matching attribute metadata file is found.
         """
         filetype = "Cytoscape"
         # Log the loading of the Cytoscape file
@@ -258,13 +261,29 @@ class NetworkIO:
             # Read the node attributes (from /tables/)
             attribute_metadata_keywords = ["/tables/", "SHARED_ATTRS", "edge.cytable"]
-            attribute_metadata = [
-                os.path.join(tmp_dir, cf)
-                for cf in cys_files
-                if all(keyword in cf for keyword in attribute_metadata_keywords)
-            ][0]
-            # Load attributes file from Cytoscape as pandas data frame
-            attribute_table = pd.read_csv(attribute_metadata, sep=",", header=None, skiprows=1)
+            # Use a generator to find the first matching file
+            attribute_metadata = next(
+                (
+                    os.path.join(tmp_dir, cf)
+                    for cf in cys_files
+                    if all(keyword in cf for keyword in attribute_metadata_keywords)
+                ),
+                None,  # Default if no file matches
+            )
+            if attribute_metadata:
+                # Optimize `read_csv` by leveraging proper options
+                attribute_table = pd.read_csv(
+                    attribute_metadata,
+                    sep=",",
+                    header=None,
+                    skiprows=1,
+                    dtype=str,  # Use specific dtypes to reduce memory usage
+                    engine="c",  # Use the C engine for parsing if compatible
+                    low_memory=False,  # Optimize memory handling for large files
+                )
+            else:
+                raise ValueError("No matching attribute metadata file found.")
             # Set columns
             attribute_table.columns = attribute_table.iloc[0]
             # Skip first four rows
@@ -464,14 +483,19 @@ class NetworkIO:
         Args:
             G (nx.Graph): A NetworkX graph object.
         """
-        missing_weights = 0
-        # Assign user-defined edge weights to the "weight" attribute
-        nx.set_edge_attributes(G, 1.0, "weight")  # Set default weight
-        if self.weight_label in nx.get_edge_attributes(G, self.weight_label):
-            nx.set_edge_attributes(G, nx.get_edge_attributes(G, self.weight_label), "weight")
-        if self.include_edge_weight and missing_weights:
-            logger.debug(f"Total edges missing weights: {missing_weights}")
+        # Set default weight for all edges in bulk
+        default_weight = 1.0
+        nx.set_edge_attributes(G, default_weight, "weight")
+        # Check and assign user-defined edge weights if available
+        weight_attributes = nx.get_edge_attributes(G, self.weight_label)
+        if weight_attributes:
+            nx.set_edge_attributes(G, weight_attributes, "weight")
+        # Log missing weights if include_edge_weight is enabled
+        if self.include_edge_weight:
+            missing_weights = len(G.edges) - len(weight_attributes)
+            if missing_weights > 0:
+                logger.debug(f"Total edges missing weights: {missing_weights}")
     def _validate_nodes(self, G: nx.Graph) -> None:
         """Validate the graph structure and attributes with attribute fallback for positions and labels.

risk/stats/__init__.py CHANGED Viewed

@@ -3,11 +3,13 @@ risk/stats
 ~~~~~~~~~~
 """
-from risk.stats.binom import compute_binom_test
-from risk.stats.chi2 import compute_chi2_test
-from risk.stats.hypergeom import compute_hypergeom_test
 from risk.stats.permutation import compute_permutation_test
-from risk.stats.poisson import compute_poisson_test
-from risk.stats.zscore import compute_zscore_test
+from risk.stats.stat_tests import (
+    compute_binom_test,
+    compute_chi2_test,
+    compute_hypergeom_test,
+    compute_poisson_test,
+    compute_zscore_test,
+)
-from risk.stats.stats import calculate_significance_matrices
+from risk.stats.significance import calculate_significance_matrices

risk/stats/permutation/permutation.py CHANGED Viewed

@@ -95,7 +95,7 @@ def _run_permutation_test(
     if null_distribution == "network":
         idxs = range(annotations.shape[0])
     elif null_distribution == "annotations":
-        idxs = np.nonzero(np.sum(~np.isnan(annotations), axis=1))[0]
+        idxs = np.nonzero(annotations.getnnz(axis=1) > 0)[0]
     else:
         raise ValueError(
             "Invalid null_distribution value. Choose either 'network' or 'annotations'."

risk/stats/{stats.py → significance.py} RENAMED Viewed

@@ -1,6 +1,6 @@
 """
-risk/stats/stats
-~~~~~~~~~~~~~~~~
+risk/stats/significance
+~~~~~~~~~~~~~~~~~~~~~~~
 """
 from typing import Any, Dict, Union

risk/stats/stat_tests.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""
+risk/stats/stat_tests
+~~~~~~~~~~~~~~~~~~~~~
+"""
+from typing import Any, Dict
+import numpy as np
+from scipy.sparse import csr_matrix
+from scipy.stats import binom
+from scipy.stats import chi2
+from scipy.stats import hypergeom
+from scipy.stats import poisson
+from scipy.stats import norm
+def compute_binom_test(
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
+) -> Dict[str, Any]:
+    """Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
+    Args:
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+    Returns:
+        Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
+    """
+    # Get the total number of nodes in the network
+    total_nodes = neighborhoods.shape[1]
+    # Compute sums (remain sparse here)
+    neighborhood_sizes = neighborhoods.sum(axis=1)  # Row sums
+    annotation_totals = annotations.sum(axis=0)  # Column sums
+    # Compute probabilities (convert to dense)
+    if null_distribution == "network":
+        p_values = (annotation_totals / total_nodes).A.flatten()  # Dense 1D array
+    elif null_distribution == "annotations":
+        p_values = (annotation_totals / annotations.sum()).A.flatten()  # Dense 1D array
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Observed counts (sparse matrix multiplication)
+    annotated_counts = neighborhoods @ annotations  # Sparse result
+    annotated_counts_dense = annotated_counts.toarray()  # Convert for dense operations
+    # Compute enrichment and depletion p-values
+    enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, neighborhood_sizes.A, p_values)
+    depletion_pvals = binom.cdf(annotated_counts_dense, neighborhood_sizes.A, p_values)
+    return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
+def compute_chi2_test(
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
+) -> Dict[str, Any]:
+    """Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
+    Args:
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+    Returns:
+        Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
+    """
+    # Total number of nodes in the network
+    total_node_count = neighborhoods.shape[0]
+    if null_distribution == "network":
+        # Case 1: Use all nodes as the background
+        background_population = total_node_count
+        neighborhood_sums = neighborhoods.sum(axis=0)  # Column sums of neighborhoods
+        annotation_sums = annotations.sum(axis=0)  # Column sums of annotations
+    elif null_distribution == "annotations":
+        # Case 2: Only consider nodes with at least one annotation
+        annotated_nodes = (
+            np.ravel(annotations.sum(axis=1)) > 0
+        )  # Row-wise sum to filter nodes with annotations
+        background_population = annotated_nodes.sum()  # Total number of annotated nodes
+        neighborhood_sums = neighborhoods[annotated_nodes].sum(
+            axis=0
+        )  # Neighborhood sums for annotated nodes
+        annotation_sums = annotations[annotated_nodes].sum(
+            axis=0
+        )  # Annotation sums for annotated nodes
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Convert to dense arrays for downstream computations
+    neighborhood_sums = np.asarray(neighborhood_sums).reshape(-1, 1)  # Ensure column vector shape
+    annotation_sums = np.asarray(annotation_sums).reshape(1, -1)  # Ensure row vector shape
+    # Observed values: number of annotated nodes in each neighborhood
+    observed = neighborhoods.T @ annotations  # Shape: (neighborhoods, annotations)
+    # Expected values under the null
+    expected = (neighborhood_sums @ annotation_sums) / background_population
+    # Chi-squared statistic: sum((observed - expected)^2 / expected)
+    with np.errstate(divide="ignore", invalid="ignore"):  # Handle divide-by-zero
+        chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
+    # Compute p-values for enrichment (upper tail) and depletion (lower tail)
+    enrichment_pvals = chi2.sf(chi2_stat, df=1)  # Survival function for upper tail
+    depletion_pvals = chi2.cdf(chi2_stat, df=1)  # Cumulative distribution for lower tail
+    return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
+def compute_hypergeom_test(
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
+) -> Dict[str, Any]:
+    """
+    Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
+    Args:
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+    Returns:
+        Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
+    """
+    # Get the total number of nodes in the network
+    total_nodes = neighborhoods.shape[1]
+    # Compute sums
+    neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()  # Convert to dense array
+    annotation_sums = annotations.sum(axis=0).A.flatten()  # Convert to dense array
+    if null_distribution == "network":
+        background_population = total_nodes
+    elif null_distribution == "annotations":
+        annotated_nodes = annotations.sum(axis=1).A.flatten() > 0  # Boolean mask
+        background_population = annotated_nodes.sum()
+        neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
+        annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Observed counts
+    annotated_in_neighborhood = neighborhoods.T @ annotations  # Sparse result
+    annotated_in_neighborhood = annotated_in_neighborhood.toarray()  # Convert to dense
+    # Align shapes for broadcasting
+    neighborhood_sums = neighborhood_sums.reshape(-1, 1)
+    annotation_sums = annotation_sums.reshape(1, -1)
+    background_population = np.array(background_population).reshape(1, 1)
+    # Compute hypergeometric p-values
+    depletion_pvals = hypergeom.cdf(
+        annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
+    )
+    enrichment_pvals = hypergeom.sf(
+        annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
+    )
+    return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
+def compute_poisson_test(
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
+) -> Dict[str, Any]:
+    """
+    Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
+    Args:
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+    Returns:
+        Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
+    """
+    # Matrix multiplication to get the number of annotated nodes in each neighborhood
+    annotated_in_neighborhood = neighborhoods @ annotations  # Sparse result
+    # Convert annotated counts to dense for downstream calculations
+    annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
+    # Compute lambda_expected based on the chosen null distribution
+    if null_distribution == "network":
+        # Use the mean across neighborhoods (axis=1)
+        lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
+    elif null_distribution == "annotations":
+        # Use the mean across annotations (axis=0)
+        lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Compute p-values for enrichment and depletion using Poisson distribution
+    enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
+    depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
+    return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
+def compute_zscore_test(
+    neighborhoods: csr_matrix,
+    annotations: csr_matrix,
+    null_distribution: str = "network",
+) -> Dict[str, Any]:
+    """
+    Compute Z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
+    Args:
+        neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
+        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+    Returns:
+        Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
+    """
+    # Total number of nodes in the network
+    total_node_count = neighborhoods.shape[1]
+    # Compute sums
+    if null_distribution == "network":
+        background_population = total_node_count
+        neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()  # Dense column sums
+        annotation_sums = annotations.sum(axis=0).A.flatten()  # Dense row sums
+    elif null_distribution == "annotations":
+        annotated_nodes = annotations.sum(axis=1).A.flatten() > 0  # Dense boolean mask
+        background_population = annotated_nodes.sum()
+        neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
+        annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Observed values
+    observed = (neighborhoods.T @ annotations).toarray()  # Convert sparse result to dense
+    # Expected values under the null
+    neighborhood_sums = neighborhood_sums.reshape(-1, 1)  # Ensure correct shape
+    annotation_sums = annotation_sums.reshape(1, -1)  # Ensure correct shape
+    expected = (neighborhood_sums @ annotation_sums) / background_population
+    # Standard deviation under the null
+    std_dev = np.sqrt(
+        expected
+        * (1 - annotation_sums / background_population)
+        * (1 - neighborhood_sums / background_population)
+    )
+    std_dev[std_dev == 0] = np.nan  # Avoid division by zero
+    # Compute Z-scores
+    z_scores = (observed - expected) / std_dev
+    # Convert Z-scores to depletion and enrichment p-values
+    enrichment_pvals = norm.sf(z_scores)  # Upper tail
+    depletion_pvals = norm.cdf(z_scores)  # Lower tail
+    return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}

{risk_network-0.0.9b25.dist-info → risk_network-0.0.9b27.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: risk-network
-Version: 0.0.9b25
+Version: 0.0.9b27
 Summary: A Python package for biological network analysis
 Author: Ira Horecka
 Author-email: Ira Horecka <ira89@icloud.com>

{risk_network-0.0.9b25.dist-info → risk_network-0.0.9b27.dist-info}/RECORD RENAMED Viewed

@@ -1,20 +1,20 @@
-risk/__init__.py,sha256=Rgsnt64VMhbyTleA_DXr5VcFJG6748i86zd0VZHy9r4,127
+risk/__init__.py,sha256=SltIM8IfW_qsGbSLdMIbWJ-5vP7CfTfjXj5rinVcdJI,127
 risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
 risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
 risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
-risk/annotations/annotations.py,sha256=XmVuLL5NFAj6F30fZY22N8nb4LK6sig7fE0NXL1iZp8,14497
+risk/annotations/annotations.py,sha256=g8ca9H49dZIqHv6Od3Dem4BIo_euy8alL3PDauT6ZJI,14088
 risk/annotations/io.py,sha256=z1AJySsU-KL_IYuHa7j3nvuczmOHgK3WfaQ4TRunvrA,10499
 risk/log/__init__.py,sha256=7LxDysQu7doi0LAvlY2YbjN6iJH0fNknqy8lSLgeljo,217
 risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
 risk/log/parameters.py,sha256=VtwfMzLU1xI4yji3-Ch5vHjH-KdwTfwaEMmi7hFQTs0,5716
 risk/neighborhoods/__init__.py,sha256=Q74HwTH7okI-vaskJPy2bYwb5sNjGASTzJ6m8V8arCU,234
-risk/neighborhoods/api.py,sha256=KdUouMHJPwvePJGdz7Ck1GWYhN96QDb_SuPyTt3KwAc,23515
-risk/neighborhoods/community.py,sha256=VIDvB-SsMDDvWkUaYXf_E-gcg0HELMVv2MKshPwJAFQ,15480
-risk/neighborhoods/domains.py,sha256=MufM4cbvP3HrJyESOuGT0wYD_cz3rjT0SGqEnbytkh8,12523
-risk/neighborhoods/neighborhoods.py,sha256=bBUY7hXqcsOoAEkPdRoRNuj36WsllXicmz_LxZfEuyw,21186
+risk/neighborhoods/api.py,sha256=TjIMVnSPC702zMlwyaz2i0ofNx-d9L9g3P-TTSBMx90,23341
+risk/neighborhoods/community.py,sha256=5Q_-VAJC-5SY5EUsB8gIlemeDoAL85uLjyl16pItHiQ,16699
+risk/neighborhoods/domains.py,sha256=jMJ4-Qzwgmo6Hya8h0E2_IcMaLpbuH_FWlmSjJl2ikc,12832
+risk/neighborhoods/neighborhoods.py,sha256=l9FhADB1C-OxM8E9QXOcA4osUDgA1vs4ud-OCGKKybc,21457
 risk/network/__init__.py,sha256=oVi3FA1XXKD84014Cykq-9bpX4_s0F3aAUfNOU-07Qw,73
-risk/network/geometry.py,sha256=omyb9afSKMUtQ-RKVHUoRyxJifOW0ASenHjyCjg43kg,6836
-risk/network/io.py,sha256=JV5hqf1oIwWUVw07BjhD0qACQGbtIeA8NSMDcFql88k,23465
+risk/network/geometry.py,sha256=dU1hMq4j9gG0nkDqGRl_NiZ2Z-xvT_HF11FwEQ7oOR4,6570
+risk/network/io.py,sha256=PqsRw1g7nfJJ3xs4aYcim3JWWLMFS1irgtg5hIyht5I,24376
 risk/network/graph/__init__.py,sha256=ziGJew3yhtqvrb9LUuneDu_LwW2Wa9vd4UuhoL5l1CA,91
 risk/network/graph/api.py,sha256=Ag4PjFTX6BUvmW7ZdfIgwdsr8URigX9jD9yEFRXUxrU,8220
 risk/network/graph/network.py,sha256=KdIBM_-flHMWcBK4RUjU_QRfOZIf_yv9fv4L7AOLkqU,12199
@@ -27,18 +27,14 @@ risk/network/plotter/labels.py,sha256=QesD1ybseA6ldLmWMqVaAqSPR34yVEgEzXzg1AKQD6
 risk/network/plotter/network.py,sha256=wcBf1GaM1wPzW-iXTrLzOmlG2_9wwfll_hJUzUO2u2Y,19917
 risk/network/plotter/utils/colors.py,sha256=EFlIUZ3MGSKoHeZi9cgR6uLKK5GGJ4QzE6lmnrHViLw,18967
 risk/network/plotter/utils/layout.py,sha256=2P4Bqi1dGiX9KsriLYqiq1KlHpsMdZemAUza4WcYoNA,3634
-risk/stats/__init__.py,sha256=1CPRtT1LDwudrvFgkVtSom8cp4cM7b4X6b4fHPaNHw0,405
-risk/stats/binom.py,sha256=8Qwcxnq1u-AycwQs_sQxwuxgkgDpES-A-kIcj4fRc3g,2032
-risk/stats/chi2.py,sha256=MGFNrWP40i9TxnMsZYbDgqdMrN_Fe0xFsnWU8xNsVSs,3046
-risk/stats/hypergeom.py,sha256=VfQBtpgSGG826uBP1WyBMavP3ylZnhponUZ2rHFdGAE,2502
-risk/stats/poisson.py,sha256=_KHe9g8XNRD4-Q486zx2UgHCO2QyvBOiHuX3hRZLEqc,2050
-risk/stats/stats.py,sha256=y2DMJF3uKRIWRyYiCd2Kwxa-EqOzX5HsMBms_Vw6wK8,7322
-risk/stats/zscore.py,sha256=Jx9cLKAHiDnrgW_Su9KZYYQiTVsuyJMC7vXBusnEI-c,2648
+risk/stats/__init__.py,sha256=2zdLv3tUHKyAjwAo7LprVXRaak1cHgrpYMVMSik6JM4,324
+risk/stats/significance.py,sha256=6cKv2xBQXWTHZ6HpNWIqlNfKKS5pG_BcCUdMM3r_zw4,7336
+risk/stats/stat_tests.py,sha256=ImCC0Ao7KfLxuIEt_9JzfH92uVRPNOUzEXbV7Y-HTDo,11776
 risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
-risk/stats/permutation/permutation.py,sha256=693DyWPNz6L_wCL06F7gj2u1df0qVc4F3Na36jCLYMI,10577
+risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
 risk/stats/permutation/test_functions.py,sha256=D3XMPM8CasUNytWSRce22TI6KK6XulYn5uGG4lWxaHs,3120
-risk_network-0.0.9b25.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
-risk_network-0.0.9b25.dist-info/METADATA,sha256=XJSNAooxsGNwoMnp-6Nx0YCnp1zBWVm9ej2yjtUUPDg,47627
-risk_network-0.0.9b25.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-risk_network-0.0.9b25.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
-risk_network-0.0.9b25.dist-info/RECORD,,
+risk_network-0.0.9b27.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
+risk_network-0.0.9b27.dist-info/METADATA,sha256=7cc6HEXAc7nDYfRkuNxlP-vMOnnxsGNSkEXMIZJ8sgo,47627
+risk_network-0.0.9b27.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+risk_network-0.0.9b27.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
+risk_network-0.0.9b27.dist-info/RECORD,,

risk-network 0.0.9b25__py3-none-any.whl → 0.0.9b27__py3-none-any.whl

risk-network 0.0.9b25py3-none-any.whl → 0.0.9b27py3-none-any.whl