PyPI - risk-network - Versions diffs - 0.0.4b2__py3-none-any.whl → 0.0.5__py3-none-any.whl - Mend

risk-network 0.0.4b2py3-none-any.whl → 0.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

risk/__init__.py +2 -5
risk/annotations/annotations.py +3 -3
risk/constants.py +2 -2
risk/neighborhoods/neighborhoods.py +5 -1
risk/network/geometry.py +2 -2
risk/network/graph.py +45 -19
risk/network/io.py +45 -30
risk/network/plot.py +70 -18
risk/risk.py +175 -19
risk/stats/__init__.py +4 -1
risk/stats/fisher_exact.py +132 -0
risk/stats/hypergeom.py +131 -0
risk/stats/permutation/__init__.py +6 -0
risk/stats/permutation/permutation.py +212 -0
risk/stats/{permutation.py → permutation/test_functions.py} +12 -39
risk/stats/stats.py +1 -212
{risk_network-0.0.4b2.dist-info → risk_network-0.0.5.dist-info}/METADATA +4 -5
risk_network-0.0.5.dist-info/RECORD +30 -0
{risk_network-0.0.4b2.dist-info → risk_network-0.0.5.dist-info}/WHEEL +1 -1
risk_network-0.0.4b2.dist-info/RECORD +0 -26
{risk_network-0.0.4b2.dist-info → risk_network-0.0.5.dist-info}/LICENSE +0 -0
{risk_network-0.0.4b2.dist-info → risk_network-0.0.5.dist-info}/top_level.txt +0 -0

risk/risk.py CHANGED Viewed

@@ -6,6 +6,7 @@ risk/risk
 from typing import Any, Dict
 import networkx as nx
+import numpy as np
 import pandas as pd
 from risk.annotations import AnnotationsIO, define_top_annotations
@@ -17,7 +18,12 @@ from risk.neighborhoods import (
     trim_domains_and_top_annotations,
 )
 from risk.network import NetworkIO, NetworkGraph, NetworkPlotter
-from risk.stats import compute_permutation, calculate_significance_matrices
+from risk.stats import (
+    calculate_significance_matrices,
+    compute_fisher_exact_test,
+    compute_hypergeom_test,
+    compute_permutation_test,
+)
 class RISK(NetworkIO, AnnotationsIO):
@@ -39,7 +45,7 @@ class RISK(NetworkIO, AnnotationsIO):
         """Access the logged parameters."""
         return params
-    def load_neighborhoods(
+    def load_neighborhoods_by_permutation(
         self,
         network: nx.Graph,
         annotations: Dict[str, Any],
@@ -52,7 +58,7 @@ class RISK(NetworkIO, AnnotationsIO):
         random_seed: int = 888,
         max_workers: int = 1,
     ) -> Dict[str, Any]:
-        """Load significant neighborhoods for the network.
+        """Load significant neighborhoods for the network using the permutation test.
         Args:
             network (nx.Graph): The network graph.
@@ -75,6 +81,7 @@ class RISK(NetworkIO, AnnotationsIO):
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             edge_length_threshold=edge_length_threshold,
+            statistical_test_function="permutation",
             score_metric=score_metric,
             null_distribution=null_distribution,
             num_permutations=num_permutations,
@@ -82,30 +89,22 @@ class RISK(NetworkIO, AnnotationsIO):
             max_workers=max_workers,
         )
-        # Display the chosen distance metric
-        if distance_metric == "louvain":
-            for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
-        else:
-            for_print_distance_metric = distance_metric
-        print(f"Distance metric: '{for_print_distance_metric}'")
-        print(f"Edge length threshold: {edge_length_threshold}")
-        # Compute neighborhoods based on the network and distance metric
-        neighborhoods = get_network_neighborhoods(
+        # Load neighborhoods based on the network and distance metric
+        neighborhoods = self._load_neighborhoods(
             network,
             distance_metric,
-            edge_length_threshold,
             louvain_resolution=louvain_resolution,
+            edge_length_threshold=edge_length_threshold,
             random_seed=random_seed,
         )
         # Log and display permutation test settings
-        print(f"Null distribution: '{null_distribution}'")
         print(f"Neighborhood scoring metric: '{score_metric}'")
+        print(f"Null distribution: '{null_distribution}'")
         print(f"Number of permutations: {num_permutations}")
-        print(f"Random seed: {random_seed}")
         print(f"Maximum workers: {max_workers}")
-        # Run the permutation test to compute neighborhood significance
-        neighborhood_significance = compute_permutation(
+        # Run permutation test to compute neighborhood significance
+        neighborhood_significance = compute_permutation_test(
             neighborhoods=neighborhoods,
             annotations=annotations["matrix"],
             score_metric=score_metric,
@@ -117,6 +116,116 @@ class RISK(NetworkIO, AnnotationsIO):
         return neighborhood_significance
+    def load_neighborhoods_by_fisher_exact(
+        self,
+        network: nx.Graph,
+        annotations: Dict[str, Any],
+        distance_metric: str = "dijkstra",
+        louvain_resolution: float = 0.1,
+        edge_length_threshold: float = 0.5,
+        random_seed: int = 888,
+        max_workers: int = 1,
+    ) -> Dict[str, Any]:
+        """Load significant neighborhoods for the network using the Fisher's exact test.
+        Args:
+            network (nx.Graph): The network graph.
+            annotations (pd.DataFrame): The matrix of annotations associated with the network.
+            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
+            louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
+            edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
+            random_seed (int, optional): Seed for random number generation. Defaults to 888.
+            max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
+        Returns:
+            dict: Computed significance of neighborhoods.
+        """
+        print_header("Running Fisher's exact test")
+        # Log neighborhood analysis parameters
+        params.log_neighborhoods(
+            distance_metric=distance_metric,
+            louvain_resolution=louvain_resolution,
+            edge_length_threshold=edge_length_threshold,
+            statistical_test_function="fisher_exact",
+            random_seed=random_seed,
+            max_workers=max_workers,
+        )
+        # Load neighborhoods based on the network and distance metric
+        neighborhoods = self._load_neighborhoods(
+            network,
+            distance_metric,
+            louvain_resolution=louvain_resolution,
+            edge_length_threshold=edge_length_threshold,
+            random_seed=random_seed,
+        )
+        # Log and display Fisher's exact test settings
+        print(f"Maximum workers: {max_workers}")
+        # Run Fisher's exact test to compute neighborhood significance
+        neighborhood_significance = compute_fisher_exact_test(
+            neighborhoods=neighborhoods,
+            annotations=annotations["matrix"],
+            max_workers=max_workers,
+        )
+        return neighborhood_significance
+    def load_neighborhoods_by_hypergeom(
+        self,
+        network: nx.Graph,
+        annotations: Dict[str, Any],
+        distance_metric: str = "dijkstra",
+        louvain_resolution: float = 0.1,
+        edge_length_threshold: float = 0.5,
+        random_seed: int = 888,
+        max_workers: int = 1,
+    ) -> Dict[str, Any]:
+        """Load significant neighborhoods for the network using the hypergeometric test.
+        Args:
+            network (nx.Graph): The network graph.
+            annotations (pd.DataFrame): The matrix of annotations associated with the network.
+            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
+            louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
+            edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
+            random_seed (int, optional): Seed for random number generation. Defaults to 888.
+            max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
+        Returns:
+            dict: Computed significance of neighborhoods.
+        """
+        print_header("Running hypergeometric test")
+        # Log neighborhood analysis parameters
+        params.log_neighborhoods(
+            distance_metric=distance_metric,
+            louvain_resolution=louvain_resolution,
+            edge_length_threshold=edge_length_threshold,
+            statistical_test_function="hypergeom",
+            random_seed=random_seed,
+            max_workers=max_workers,
+        )
+        # Load neighborhoods based on the network and distance metric
+        neighborhoods = self._load_neighborhoods(
+            network,
+            distance_metric,
+            louvain_resolution=louvain_resolution,
+            edge_length_threshold=edge_length_threshold,
+            random_seed=random_seed,
+        )
+        # Log and display hypergeometric test settings
+        print(f"Maximum workers: {max_workers}")
+        # Run hypergeometric test to compute neighborhood significance
+        neighborhood_significance = compute_hypergeom_test(
+            neighborhoods=neighborhoods,
+            annotations=annotations["matrix"],
+            max_workers=max_workers,
+        )
+        return neighborhood_significance
     def load_graph(
         self,
         network: nx.Graph,
@@ -140,7 +249,7 @@ class RISK(NetworkIO, AnnotationsIO):
             annotations (pd.DataFrame): DataFrame containing annotation data for the network.
             neighborhoods (dict): Neighborhood enrichment data.
             tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
-            pval_cutoff (float, optional): P-value cutoff for significance. Defaults to 0.01.
+            pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
             fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
             impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
             prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
@@ -168,7 +277,7 @@ class RISK(NetworkIO, AnnotationsIO):
             max_cluster_size=max_cluster_size,
         )
-        print(f"P-value cutoff: {pval_cutoff}")
+        print(f"p-value cutoff: {pval_cutoff}")
         print(f"FDR BH cutoff: {fdr_cutoff}")
         print(
             f"Significance tail: '{tail}' ({'enrichment' if tail == 'right' else 'depletion' if tail == 'left' else 'both'})"
@@ -243,6 +352,7 @@ class RISK(NetworkIO, AnnotationsIO):
         plot_outline: bool = True,
         outline_color: str = "black",
         outline_scale: float = 1.00,
+        linestyle: str = "dashed",
     ) -> NetworkPlotter:
         """Get a NetworkPlotter object for plotting.
@@ -253,6 +363,7 @@ class RISK(NetworkIO, AnnotationsIO):
             plot_outline (bool, optional): Whether to plot the network outline. Defaults to True.
             outline_color (str, optional): Color of the outline. Defaults to "black".
             outline_scale (float, optional): Scaling factor for the outline. Defaults to 1.00.
+            linestyle (str): Line style for the network perimeter circle (e.g., dashed, solid). Defaults to "dashed".
         Returns:
             NetworkPlotter: A NetworkPlotter object configured with the given parameters.
@@ -265,7 +376,9 @@ class RISK(NetworkIO, AnnotationsIO):
             plot_outline=plot_outline,
             outline_color=outline_color,
             outline_scale=outline_scale,
+            linestyle=linestyle,
         )
         # Initialize and return a NetworkPlotter object
         return NetworkPlotter(
             graph,
@@ -274,8 +387,51 @@ class RISK(NetworkIO, AnnotationsIO):
             plot_outline=plot_outline,
             outline_color=outline_color,
             outline_scale=outline_scale,
+            linestyle=linestyle,
         )
+    def _load_neighborhoods(
+        self,
+        network: nx.Graph,
+        distance_metric: str = "dijkstra",
+        louvain_resolution: float = 0.1,
+        edge_length_threshold: float = 0.5,
+        random_seed: int = 888,
+    ) -> np.ndarray:
+        """Load significant neighborhoods for the network.
+        Args:
+            network (nx.Graph): The network graph.
+            annotations (pd.DataFrame): The matrix of annotations associated with the network.
+            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
+            louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
+            edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
+            random_seed (int, optional): Seed for random number generation. Defaults to 888.
+        Returns:
+            np.ndarray: Neighborhood matrix calculated based on the selected distance metric.
+        """
+        # Display the chosen distance metric
+        if distance_metric == "louvain":
+            for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
+        else:
+            for_print_distance_metric = distance_metric
+        # Log and display neighborhood settings
+        print(f"Distance metric: '{for_print_distance_metric}'")
+        print(f"Edge length threshold: {edge_length_threshold}")
+        print(f"Random seed: {random_seed}")
+        # Compute neighborhoods based on the network and distance metric
+        neighborhoods = get_network_neighborhoods(
+            network,
+            distance_metric,
+            edge_length_threshold,
+            louvain_resolution=louvain_resolution,
+            random_seed=random_seed,
+        )
+        return neighborhoods
     def _define_top_annotations(
         self,
         network: nx.Graph,

risk/stats/__init__.py CHANGED Viewed

@@ -3,4 +3,7 @@ risk/stats
 ~~~~~~~~~~
 """
-from .stats import calculate_significance_matrices, compute_permutation
+from .stats import calculate_significance_matrices
+from .fisher_exact import compute_fisher_exact_test
+from .hypergeom import compute_hypergeom_test
+from .permutation import compute_permutation_test

risk/stats/fisher_exact.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""
+risk/stats/fisher_exact
+~~~~~~~~~~~~~~~~~~~~~~~
+"""
+from multiprocessing import get_context, Manager
+from tqdm import tqdm
+from typing import Any, Dict
+import numpy as np
+from scipy.stats import fisher_exact
+def compute_fisher_exact_test(
+    neighborhoods: np.ndarray,
+    annotations: np.ndarray,
+    max_workers: int = 4,
+) -> Dict[str, Any]:
+    """Compute Fisher's exact test for enrichment and depletion in neighborhoods.
+    Args:
+        neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
+        annotations (np.ndarray): Binary matrix representing annotations.
+        max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
+    Returns:
+        dict: Dictionary containing depletion and enrichment p-values.
+    """
+    # Ensure that the matrices are binary (boolean) and free of NaN values
+    neighborhoods = neighborhoods.astype(bool)  # Convert to boolean
+    annotations = annotations.astype(bool)  # Convert to boolean
+    # Initialize the process of calculating p-values using multiprocessing
+    ctx = get_context("spawn")
+    manager = Manager()
+    progress_counter = manager.Value("i", 0)
+    total_tasks = neighborhoods.shape[1] * annotations.shape[1]
+    # Calculate the workload per worker
+    chunk_size = total_tasks // max_workers
+    remainder = total_tasks % max_workers
+    # Execute the Fisher's exact test using multiprocessing
+    with ctx.Pool(max_workers) as pool:
+        with tqdm(total=total_tasks, desc="Total progress", position=0) as progress:
+            params_list = []
+            start_idx = 0
+            for i in range(max_workers):
+                end_idx = start_idx + chunk_size + (1 if i < remainder else 0)
+                params_list.append(
+                    (neighborhoods, annotations, start_idx, end_idx, progress_counter)
+                )
+                start_idx = end_idx
+            # Start the Fisher's exact test process in parallel
+            results = pool.starmap_async(_fisher_exact_process_subset, params_list, chunksize=1)
+            # Update progress bar based on progress_counter
+            while not results.ready():
+                progress.update(progress_counter.value - progress.n)
+                results.wait(0.05)  # Wait for 50ms
+            # Ensure progress bar reaches 100%
+            progress.update(total_tasks - progress.n)
+            # Accumulate results from each worker
+            depletion_pvals, enrichment_pvals = [], []
+            for dp, ep in results.get():
+                depletion_pvals.extend(dp)
+                enrichment_pvals.extend(ep)
+    # Reshape the results back into arrays with the appropriate dimensions
+    depletion_pvals = np.array(depletion_pvals).reshape(
+        neighborhoods.shape[1], annotations.shape[1]
+    )
+    enrichment_pvals = np.array(enrichment_pvals).reshape(
+        neighborhoods.shape[1], annotations.shape[1]
+    )
+    return {
+        "depletion_pvals": depletion_pvals,
+        "enrichment_pvals": enrichment_pvals,
+    }
+def _fisher_exact_process_subset(
+    neighborhoods: np.ndarray,
+    annotations: np.ndarray,
+    start_idx: int,
+    end_idx: int,
+    progress_counter,
+) -> tuple:
+    """Process a subset of neighborhoods using Fisher's exact test.
+    Args:
+        neighborhoods (np.ndarray): The full neighborhood matrix.
+        annotations (np.ndarray): The annotation matrix.
+        start_idx (int): Starting index of the neighborhood-annotation pairs to process.
+        end_idx (int): Ending index of the neighborhood-annotation pairs to process.
+        progress_counter: Shared counter for tracking progress.
+    Returns:
+        tuple: Local p-values for depletion and enrichment.
+    """
+    # Initialize lists to store p-values for depletion and enrichment
+    depletion_pvals = []
+    enrichment_pvals = []
+    # Process the subset of tasks assigned to this worker
+    for idx in range(start_idx, end_idx):
+        i = idx // annotations.shape[1]  # Neighborhood index
+        j = idx % annotations.shape[1]  # Annotation index
+        neighborhood = neighborhoods[:, i]
+        annotation = annotations[:, j]
+        # Calculate the contingency table values
+        TP = np.sum(neighborhood & annotation)
+        FP = np.sum(neighborhood & ~annotation)
+        FN = np.sum(~neighborhood & annotation)
+        TN = np.sum(~neighborhood & ~annotation)
+        table = np.array([[TP, FP], [FN, TN]])
+        # Perform Fisher's exact test for depletion (alternative='less')
+        _, p_value_depletion = fisher_exact(table, alternative="less")
+        depletion_pvals.append(p_value_depletion)
+        # Perform Fisher's exact test for enrichment (alternative='greater')
+        _, p_value_enrichment = fisher_exact(table, alternative="greater")
+        enrichment_pvals.append(p_value_enrichment)
+        # Update the shared progress counter
+        progress_counter.value += 1
+    return depletion_pvals, enrichment_pvals

risk/stats/hypergeom.py ADDED Viewed

@@ -0,0 +1,131 @@
+"""
+risk/stats/hypergeom
+~~~~~~~~~~~~~~~~~~~~
+"""
+from multiprocessing import get_context, Manager
+from tqdm import tqdm
+from typing import Any, Dict
+import numpy as np
+from scipy.stats import hypergeom
+def compute_hypergeom_test(
+    neighborhoods: np.ndarray,
+    annotations: np.ndarray,
+    max_workers: int = 4,
+) -> Dict[str, Any]:
+    """Compute hypergeometric test for enrichment and depletion in neighborhoods.
+    Args:
+        neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
+        annotations (np.ndarray): Binary matrix representing annotations.
+        max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
+    Returns:
+        dict: Dictionary containing depletion and enrichment p-values.
+    """
+    # Ensure that the matrices are binary (boolean) and free of NaN values
+    neighborhoods = neighborhoods.astype(bool)  # Convert to boolean
+    annotations = annotations.astype(bool)  # Convert to boolean
+    # Initialize the process of calculating p-values using multiprocessing
+    ctx = get_context("spawn")
+    manager = Manager()
+    progress_counter = manager.Value("i", 0)
+    total_tasks = neighborhoods.shape[1] * annotations.shape[1]
+    # Calculate the workload per worker
+    chunk_size = total_tasks // max_workers
+    remainder = total_tasks % max_workers
+    # Execute the hypergeometric test using multiprocessing
+    with ctx.Pool(max_workers) as pool:
+        with tqdm(total=total_tasks, desc="Total progress", position=0) as progress:
+            params_list = []
+            start_idx = 0
+            for i in range(max_workers):
+                end_idx = start_idx + chunk_size + (1 if i < remainder else 0)
+                params_list.append(
+                    (neighborhoods, annotations, start_idx, end_idx, progress_counter)
+                )
+                start_idx = end_idx
+            # Start the hypergeometric test process in parallel
+            results = pool.starmap_async(_hypergeom_process_subset, params_list, chunksize=1)
+            # Update progress bar based on progress_counter
+            while not results.ready():
+                progress.update(progress_counter.value - progress.n)
+                results.wait(0.05)  # Wait for 50ms
+            # Ensure progress bar reaches 100%
+            progress.update(total_tasks - progress.n)
+            # Accumulate results from each worker
+            depletion_pvals, enrichment_pvals = [], []
+            for dp, ep in results.get():
+                depletion_pvals.extend(dp)
+                enrichment_pvals.extend(ep)
+    # Reshape the results back into arrays with the appropriate dimensions
+    depletion_pvals = np.array(depletion_pvals).reshape(
+        neighborhoods.shape[1], annotations.shape[1]
+    )
+    enrichment_pvals = np.array(enrichment_pvals).reshape(
+        neighborhoods.shape[1], annotations.shape[1]
+    )
+    return {
+        "depletion_pvals": depletion_pvals,
+        "enrichment_pvals": enrichment_pvals,
+    }
+def _hypergeom_process_subset(
+    neighborhoods: np.ndarray,
+    annotations: np.ndarray,
+    start_idx: int,
+    end_idx: int,
+    progress_counter,
+) -> tuple:
+    """Process a subset of neighborhoods using the hypergeometric test.
+    Args:
+        neighborhoods (np.ndarray): The full neighborhood matrix.
+        annotations (np.ndarray): The annotation matrix.
+        start_idx (int): Starting index of the neighborhood-annotation pairs to process.
+        end_idx (int): Ending index of the neighborhood-annotation pairs to process.
+        progress_counter: Shared counter for tracking progress.
+    Returns:
+        tuple: Local p-values for depletion and enrichment.
+    """
+    # Initialize lists to store p-values for depletion and enrichment
+    depletion_pvals = []
+    enrichment_pvals = []
+    # Process the subset of tasks assigned to this worker
+    for idx in range(start_idx, end_idx):
+        i = idx // annotations.shape[1]  # Neighborhood index
+        j = idx % annotations.shape[1]  # Annotation index
+        neighborhood = neighborhoods[:, i]
+        annotation = annotations[:, j]
+        # Calculate the required values for the hypergeometric test
+        M = annotations.shape[0]  # Total number of items (population size)
+        n = np.sum(annotation)  # Total number of successes in population
+        N = np.sum(neighborhood)  # Total number of draws (sample size)
+        k = np.sum(neighborhood & annotation)  # Number of successes in sample
+        # Perform hypergeometric test for depletion
+        p_value_depletion = hypergeom.cdf(k, M, n, N)
+        depletion_pvals.append(p_value_depletion)
+        # Perform hypergeometric test for enrichment
+        p_value_enrichment = hypergeom.sf(k - 1, M, n, N)
+        enrichment_pvals.append(p_value_enrichment)
+        # Update the shared progress counter
+        progress_counter.value += 1
+    return depletion_pvals, enrichment_pvals

risk/stats/permutation/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""
+risk/stats/permutation
+~~~~~~~~~~~~~~~~~~~~~~
+"""
+from .permutation import compute_permutation_test

risk-network 0.0.4b2__py3-none-any.whl → 0.0.5__py3-none-any.whl

risk-network 0.0.4b2py3-none-any.whl → 0.0.5py3-none-any.whl