PyPI - risk-network - Versions diffs - 0.0.3b4__py3-none-any.whl → 0.0.4__py3-none-any.whl - Mend

risk-network 0.0.3b4py3-none-any.whl → 0.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

risk/__init__.py +1 -4
risk/annotations/annotations.py +4 -2
risk/annotations/io.py +1 -1
risk/neighborhoods/neighborhoods.py +15 -2
risk/network/geometry.py +2 -2
risk/network/graph.py +4 -4
risk/network/io.py +234 -53
risk/network/plot.py +179 -58
risk/risk.py +187 -75
risk/stats/__init__.py +4 -1
risk/stats/fisher_exact.py +132 -0
risk/stats/hypergeom.py +131 -0
risk/stats/permutation/__init__.py +6 -0
risk/stats/permutation/permutation.py +212 -0
risk/stats/{permutation.py → permutation/test_functions.py} +12 -39
risk/stats/stats.py +1 -212
{risk_network-0.0.3b4.dist-info → risk_network-0.0.4.dist-info}/METADATA +6 -6
risk_network-0.0.4.dist-info/RECORD +30 -0
{risk_network-0.0.3b4.dist-info → risk_network-0.0.4.dist-info}/WHEEL +1 -1
risk_network-0.0.3b4.dist-info/RECORD +0 -26
{risk_network-0.0.3b4.dist-info → risk_network-0.0.4.dist-info}/LICENSE +0 -0
{risk_network-0.0.3b4.dist-info → risk_network-0.0.4.dist-info}/top_level.txt +0 -0

risk/risk.py CHANGED Viewed

@@ -6,6 +6,7 @@ risk/risk
 from typing import Any, Dict
 import networkx as nx
+import numpy as np
 import pandas as pd
 from risk.annotations import AnnotationsIO, define_top_annotations
@@ -17,7 +18,12 @@ from risk.neighborhoods import (
     trim_domains_and_top_annotations,
 )
 from risk.network import NetworkIO, NetworkGraph, NetworkPlotter
-from risk.stats import compute_permutation, calculate_significance_matrices
+from risk.stats import (
+    calculate_significance_matrices,
+    compute_fisher_exact_test,
+    compute_hypergeom_test,
+    compute_permutation_test,
+)
 class RISK(NetworkIO, AnnotationsIO):
@@ -27,85 +33,39 @@ class RISK(NetworkIO, AnnotationsIO):
     and performing network-based statistical analysis, such as neighborhood significance testing.
     """
-    def __init__(
-        self,
-        compute_sphere: bool = True,
-        surface_depth: float = 0.0,
-        distance_metric: str = "dijkstra",
-        louvain_resolution: float = 0.1,
-        min_edges_per_node: int = 0,
-        edge_length_threshold: float = 0.5,
-        include_edge_weight: bool = True,
-        weight_label: str = "weight",
-    ):
-        """Initialize the RISK class with configuration settings.
-        Args:
-            compute_sphere (bool, optional): Whether to map nodes to a sphere. Defaults to True.
-            surface_depth (float, optional): Surface depth for the sphere. Defaults to 0.0.
-            distance_metric (str, optional): Distance metric to use in network analysis. Defaults to "dijkstra".
-            louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
-            min_edges_per_node (int, optional): Minimum number of edges per node. Defaults to 0.
-            edge_length_threshold (float, optional): Edge length threshold for analysis. Defaults to 0.5.
-            include_edge_weight (bool, optional): Whether to include edge weights in calculations. Defaults to True.
-            weight_label (str, optional): Label for edge weights. Defaults to "weight".
-        """
+    def __init__(self, *args, **kwargs):
+        """Initialize the RISK class with configuration settings."""
         # Initialize and log network parameters
         params.initialize()
-        params.log_network(
-            compute_sphere=compute_sphere,
-            surface_depth=surface_depth,
-            distance_metric=distance_metric,
-            louvain_resolution=louvain_resolution,
-            min_edges_per_node=min_edges_per_node,
-            edge_length_threshold=edge_length_threshold,
-            include_edge_weight=include_edge_weight,
-            weight_label=weight_label,
-        )
-        # Initialize parent classes
-        NetworkIO.__init__(
-            self,
-            compute_sphere=compute_sphere,
-            surface_depth=surface_depth,
-            distance_metric=distance_metric,
-            louvain_resolution=louvain_resolution,
-            min_edges_per_node=min_edges_per_node,
-            edge_length_threshold=edge_length_threshold,
-            include_edge_weight=include_edge_weight,
-            weight_label=weight_label,
-        )
-        AnnotationsIO.__init__(self)
-        # Set class attributes
-        self.compute_sphere = compute_sphere
-        self.surface_depth = surface_depth
-        self.distance_metric = distance_metric
-        self.louvain_resolution = louvain_resolution
-        self.min_edges_per_node = min_edges_per_node
-        self.edge_length_threshold = edge_length_threshold
-        self.include_edge_weight = include_edge_weight
-        self.weight_label = weight_label
+        # Initialize the parent classes
+        super().__init__(*args, **kwargs)
     @property
     def params(self):
         """Access the logged parameters."""
         return params
-    def load_neighborhoods(
+    def load_neighborhoods_by_permutation(
         self,
         network: nx.Graph,
         annotations: Dict[str, Any],
+        distance_metric: str = "dijkstra",
+        louvain_resolution: float = 0.1,
+        edge_length_threshold: float = 0.5,
         score_metric: str = "sum",
         null_distribution: str = "network",
         num_permutations: int = 1000,
         random_seed: int = 888,
         max_workers: int = 1,
     ) -> Dict[str, Any]:
-        """Load significant neighborhoods for the network.
+        """Load significant neighborhoods for the network using the permutation test.
         Args:
             network (nx.Graph): The network graph.
             annotations (pd.DataFrame): The matrix of annotations associated with the network.
+            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
+            louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
+            edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
             score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
             null_distribution (str, optional): Distribution used for permutation tests. Defaults to "network".
             num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
@@ -118,6 +78,10 @@ class RISK(NetworkIO, AnnotationsIO):
         print_header("Running permutation test")
         # Log neighborhood analysis parameters
         params.log_neighborhoods(
+            distance_metric=distance_metric,
+            louvain_resolution=louvain_resolution,
+            edge_length_threshold=edge_length_threshold,
+            statistical_test_function="permutation",
             score_metric=score_metric,
             null_distribution=null_distribution,
             num_permutations=num_permutations,
@@ -125,27 +89,22 @@ class RISK(NetworkIO, AnnotationsIO):
             max_workers=max_workers,
         )
-        # Display the chosen distance metric
-        if self.distance_metric == "louvain":
-            for_print_distance_metric = f"louvain (resolution={self.louvain_resolution})"
-        else:
-            for_print_distance_metric = self.distance_metric
-        print(f"Distance metric: '{for_print_distance_metric}'")
-        # Compute neighborhoods based on the network and distance metric
-        neighborhoods = get_network_neighborhoods(
+        # Load neighborhoods based on the network and distance metric
+        neighborhoods = self._load_neighborhoods(
             network,
-            self.distance_metric,
-            self.edge_length_threshold,
-            louvain_resolution=self.louvain_resolution,
+            distance_metric,
+            louvain_resolution=louvain_resolution,
+            edge_length_threshold=edge_length_threshold,
             random_seed=random_seed,
         )
         # Log and display permutation test settings
-        print(f"Null distribution: '{null_distribution}'")
         print(f"Neighborhood scoring metric: '{score_metric}'")
+        print(f"Null distribution: '{null_distribution}'")
         print(f"Number of permutations: {num_permutations}")
-        # Run the permutation test to compute neighborhood significance
-        neighborhood_significance = compute_permutation(
+        print(f"Maximum workers: {max_workers}")
+        # Run permutation test to compute neighborhood significance
+        neighborhood_significance = compute_permutation_test(
             neighborhoods=neighborhoods,
             annotations=annotations["matrix"],
             score_metric=score_metric,
@@ -157,6 +116,116 @@ class RISK(NetworkIO, AnnotationsIO):
         return neighborhood_significance
+    def load_neighborhoods_by_fisher_exact(
+        self,
+        network: nx.Graph,
+        annotations: Dict[str, Any],
+        distance_metric: str = "dijkstra",
+        louvain_resolution: float = 0.1,
+        edge_length_threshold: float = 0.5,
+        random_seed: int = 888,
+        max_workers: int = 1,
+    ) -> Dict[str, Any]:
+        """Load significant neighborhoods for the network using the Fisher's exact test.
+        Args:
+            network (nx.Graph): The network graph.
+            annotations (pd.DataFrame): The matrix of annotations associated with the network.
+            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
+            louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
+            edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
+            random_seed (int, optional): Seed for random number generation. Defaults to 888.
+            max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
+        Returns:
+            dict: Computed significance of neighborhoods.
+        """
+        print_header("Running Fisher's exact test")
+        # Log neighborhood analysis parameters
+        params.log_neighborhoods(
+            distance_metric=distance_metric,
+            louvain_resolution=louvain_resolution,
+            edge_length_threshold=edge_length_threshold,
+            statistical_test_function="fisher_exact",
+            random_seed=random_seed,
+            max_workers=max_workers,
+        )
+        # Load neighborhoods based on the network and distance metric
+        neighborhoods = self._load_neighborhoods(
+            network,
+            distance_metric,
+            louvain_resolution=louvain_resolution,
+            edge_length_threshold=edge_length_threshold,
+            random_seed=random_seed,
+        )
+        # Log and display Fisher's exact test settings
+        print(f"Maximum workers: {max_workers}")
+        # Run Fisher's exact test to compute neighborhood significance
+        neighborhood_significance = compute_fisher_exact_test(
+            neighborhoods=neighborhoods,
+            annotations=annotations["matrix"],
+            max_workers=max_workers,
+        )
+        return neighborhood_significance
+    def load_neighborhoods_by_hypergeom(
+        self,
+        network: nx.Graph,
+        annotations: Dict[str, Any],
+        distance_metric: str = "dijkstra",
+        louvain_resolution: float = 0.1,
+        edge_length_threshold: float = 0.5,
+        random_seed: int = 888,
+        max_workers: int = 1,
+    ) -> Dict[str, Any]:
+        """Load significant neighborhoods for the network using the hypergeometric test.
+        Args:
+            network (nx.Graph): The network graph.
+            annotations (pd.DataFrame): The matrix of annotations associated with the network.
+            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
+            louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
+            edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
+            random_seed (int, optional): Seed for random number generation. Defaults to 888.
+            max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
+        Returns:
+            dict: Computed significance of neighborhoods.
+        """
+        print_header("Running hypergeometric test")
+        # Log neighborhood analysis parameters
+        params.log_neighborhoods(
+            distance_metric=distance_metric,
+            louvain_resolution=louvain_resolution,
+            edge_length_threshold=edge_length_threshold,
+            statistical_test_function="hypergeom",
+            random_seed=random_seed,
+            max_workers=max_workers,
+        )
+        # Load neighborhoods based on the network and distance metric
+        neighborhoods = self._load_neighborhoods(
+            network,
+            distance_metric,
+            louvain_resolution=louvain_resolution,
+            edge_length_threshold=edge_length_threshold,
+            random_seed=random_seed,
+        )
+        # Log and display hypergeometric test settings
+        print(f"Maximum workers: {max_workers}")
+        # Run hypergeometric test to compute neighborhood significance
+        neighborhood_significance = compute_hypergeom_test(
+            neighborhoods=neighborhoods,
+            annotations=annotations["matrix"],
+            max_workers=max_workers,
+        )
+        return neighborhood_significance
     def load_graph(
         self,
         network: nx.Graph,
@@ -180,7 +249,7 @@ class RISK(NetworkIO, AnnotationsIO):
             annotations (pd.DataFrame): DataFrame containing annotation data for the network.
             neighborhoods (dict): Neighborhood enrichment data.
             tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
-            pval_cutoff (float, optional): P-value cutoff for significance. Defaults to 0.01.
+            pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
             fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
             impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
             prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
@@ -208,7 +277,7 @@ class RISK(NetworkIO, AnnotationsIO):
             max_cluster_size=max_cluster_size,
         )
-        print(f"P-value cutoff: {pval_cutoff}")
+        print(f"p-value cutoff: {pval_cutoff}")
         print(f"FDR BH cutoff: {fdr_cutoff}")
         print(
             f"Significance tail: '{tail}' ({'enrichment' if tail == 'right' else 'depletion' if tail == 'left' else 'both'})"
@@ -306,6 +375,7 @@ class RISK(NetworkIO, AnnotationsIO):
             outline_color=outline_color,
             outline_scale=outline_scale,
         )
         # Initialize and return a NetworkPlotter object
         return NetworkPlotter(
             graph,
@@ -316,6 +386,48 @@ class RISK(NetworkIO, AnnotationsIO):
             outline_scale=outline_scale,
         )
+    def _load_neighborhoods(
+        self,
+        network: nx.Graph,
+        distance_metric: str = "dijkstra",
+        louvain_resolution: float = 0.1,
+        edge_length_threshold: float = 0.5,
+        random_seed: int = 888,
+    ) -> np.ndarray:
+        """Load significant neighborhoods for the network.
+        Args:
+            network (nx.Graph): The network graph.
+            annotations (pd.DataFrame): The matrix of annotations associated with the network.
+            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
+            louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
+            edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
+            random_seed (int, optional): Seed for random number generation. Defaults to 888.
+        Returns:
+            np.ndarray: Neighborhood matrix calculated based on the selected distance metric.
+        """
+        # Display the chosen distance metric
+        if distance_metric == "louvain":
+            for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
+        else:
+            for_print_distance_metric = distance_metric
+        # Log and display neighborhood settings
+        print(f"Distance metric: '{for_print_distance_metric}'")
+        print(f"Edge length threshold: {edge_length_threshold}")
+        print(f"Random seed: {random_seed}")
+        # Compute neighborhoods based on the network and distance metric
+        neighborhoods = get_network_neighborhoods(
+            network,
+            distance_metric,
+            edge_length_threshold,
+            louvain_resolution=louvain_resolution,
+            random_seed=random_seed,
+        )
+        return neighborhoods
     def _define_top_annotations(
         self,
         network: nx.Graph,

risk/stats/__init__.py CHANGED Viewed

@@ -3,4 +3,7 @@ risk/stats
 ~~~~~~~~~~
 """
-from .stats import calculate_significance_matrices, compute_permutation
+from .stats import calculate_significance_matrices
+from .fisher_exact import compute_fisher_exact_test
+from .hypergeom import compute_hypergeom_test
+from .permutation import compute_permutation_test

risk/stats/fisher_exact.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""
+risk/stats/fisher_exact
+~~~~~~~~~~~~~~~~~~~~~~~
+"""
+from multiprocessing import get_context, Manager
+from tqdm import tqdm
+from typing import Any, Dict
+import numpy as np
+from scipy.stats import fisher_exact
+def compute_fisher_exact_test(
+    neighborhoods: np.ndarray,
+    annotations: np.ndarray,
+    max_workers: int = 4,
+) -> Dict[str, Any]:
+    """Compute Fisher's exact test for enrichment and depletion in neighborhoods.
+    Args:
+        neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
+        annotations (np.ndarray): Binary matrix representing annotations.
+        max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
+    Returns:
+        dict: Dictionary containing depletion and enrichment p-values.
+    """
+    # Ensure that the matrices are binary (boolean) and free of NaN values
+    neighborhoods = neighborhoods.astype(bool)  # Convert to boolean
+    annotations = annotations.astype(bool)  # Convert to boolean
+    # Initialize the process of calculating p-values using multiprocessing
+    ctx = get_context("spawn")
+    manager = Manager()
+    progress_counter = manager.Value("i", 0)
+    total_tasks = neighborhoods.shape[1] * annotations.shape[1]
+    # Calculate the workload per worker
+    chunk_size = total_tasks // max_workers
+    remainder = total_tasks % max_workers
+    # Execute the Fisher's exact test using multiprocessing
+    with ctx.Pool(max_workers) as pool:
+        with tqdm(total=total_tasks, desc="Total progress", position=0) as progress:
+            params_list = []
+            start_idx = 0
+            for i in range(max_workers):
+                end_idx = start_idx + chunk_size + (1 if i < remainder else 0)
+                params_list.append(
+                    (neighborhoods, annotations, start_idx, end_idx, progress_counter)
+                )
+                start_idx = end_idx
+            # Start the Fisher's exact test process in parallel
+            results = pool.starmap_async(_fisher_exact_process_subset, params_list, chunksize=1)
+            # Update progress bar based on progress_counter
+            while not results.ready():
+                progress.update(progress_counter.value - progress.n)
+                results.wait(0.05)  # Wait for 50ms
+            # Ensure progress bar reaches 100%
+            progress.update(total_tasks - progress.n)
+            # Accumulate results from each worker
+            depletion_pvals, enrichment_pvals = [], []
+            for dp, ep in results.get():
+                depletion_pvals.extend(dp)
+                enrichment_pvals.extend(ep)
+    # Reshape the results back into arrays with the appropriate dimensions
+    depletion_pvals = np.array(depletion_pvals).reshape(
+        neighborhoods.shape[1], annotations.shape[1]
+    )
+    enrichment_pvals = np.array(enrichment_pvals).reshape(
+        neighborhoods.shape[1], annotations.shape[1]
+    )
+    return {
+        "depletion_pvals": depletion_pvals,
+        "enrichment_pvals": enrichment_pvals,
+    }
+def _fisher_exact_process_subset(
+    neighborhoods: np.ndarray,
+    annotations: np.ndarray,
+    start_idx: int,
+    end_idx: int,
+    progress_counter,
+) -> tuple:
+    """Process a subset of neighborhoods using Fisher's exact test.
+    Args:
+        neighborhoods (np.ndarray): The full neighborhood matrix.
+        annotations (np.ndarray): The annotation matrix.
+        start_idx (int): Starting index of the neighborhood-annotation pairs to process.
+        end_idx (int): Ending index of the neighborhood-annotation pairs to process.
+        progress_counter: Shared counter for tracking progress.
+    Returns:
+        tuple: Local p-values for depletion and enrichment.
+    """
+    # Initialize lists to store p-values for depletion and enrichment
+    depletion_pvals = []
+    enrichment_pvals = []
+    # Process the subset of tasks assigned to this worker
+    for idx in range(start_idx, end_idx):
+        i = idx // annotations.shape[1]  # Neighborhood index
+        j = idx % annotations.shape[1]  # Annotation index
+        neighborhood = neighborhoods[:, i]
+        annotation = annotations[:, j]
+        # Calculate the contingency table values
+        TP = np.sum(neighborhood & annotation)
+        FP = np.sum(neighborhood & ~annotation)
+        FN = np.sum(~neighborhood & annotation)
+        TN = np.sum(~neighborhood & ~annotation)
+        table = np.array([[TP, FP], [FN, TN]])
+        # Perform Fisher's exact test for depletion (alternative='less')
+        _, p_value_depletion = fisher_exact(table, alternative="less")
+        depletion_pvals.append(p_value_depletion)
+        # Perform Fisher's exact test for enrichment (alternative='greater')
+        _, p_value_enrichment = fisher_exact(table, alternative="greater")
+        enrichment_pvals.append(p_value_enrichment)
+        # Update the shared progress counter
+        progress_counter.value += 1
+    return depletion_pvals, enrichment_pvals

risk/stats/hypergeom.py ADDED Viewed

@@ -0,0 +1,131 @@
+"""
+risk/stats/hypergeom
+~~~~~~~~~~~~~~~~~~~~
+"""
+from multiprocessing import get_context, Manager
+from tqdm import tqdm
+from typing import Any, Dict
+import numpy as np
+from scipy.stats import hypergeom
+def compute_hypergeom_test(
+    neighborhoods: np.ndarray,
+    annotations: np.ndarray,
+    max_workers: int = 4,
+) -> Dict[str, Any]:
+    """Compute hypergeometric test for enrichment and depletion in neighborhoods.
+    Args:
+        neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
+        annotations (np.ndarray): Binary matrix representing annotations.
+        max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
+    Returns:
+        dict: Dictionary containing depletion and enrichment p-values.
+    """
+    # Ensure that the matrices are binary (boolean) and free of NaN values
+    neighborhoods = neighborhoods.astype(bool)  # Convert to boolean
+    annotations = annotations.astype(bool)  # Convert to boolean
+    # Initialize the process of calculating p-values using multiprocessing
+    ctx = get_context("spawn")
+    manager = Manager()
+    progress_counter = manager.Value("i", 0)
+    total_tasks = neighborhoods.shape[1] * annotations.shape[1]
+    # Calculate the workload per worker
+    chunk_size = total_tasks // max_workers
+    remainder = total_tasks % max_workers
+    # Execute the hypergeometric test using multiprocessing
+    with ctx.Pool(max_workers) as pool:
+        with tqdm(total=total_tasks, desc="Total progress", position=0) as progress:
+            params_list = []
+            start_idx = 0
+            for i in range(max_workers):
+                end_idx = start_idx + chunk_size + (1 if i < remainder else 0)
+                params_list.append(
+                    (neighborhoods, annotations, start_idx, end_idx, progress_counter)
+                )
+                start_idx = end_idx
+            # Start the hypergeometric test process in parallel
+            results = pool.starmap_async(_hypergeom_process_subset, params_list, chunksize=1)
+            # Update progress bar based on progress_counter
+            while not results.ready():
+                progress.update(progress_counter.value - progress.n)
+                results.wait(0.05)  # Wait for 50ms
+            # Ensure progress bar reaches 100%
+            progress.update(total_tasks - progress.n)
+            # Accumulate results from each worker
+            depletion_pvals, enrichment_pvals = [], []
+            for dp, ep in results.get():
+                depletion_pvals.extend(dp)
+                enrichment_pvals.extend(ep)
+    # Reshape the results back into arrays with the appropriate dimensions
+    depletion_pvals = np.array(depletion_pvals).reshape(
+        neighborhoods.shape[1], annotations.shape[1]
+    )
+    enrichment_pvals = np.array(enrichment_pvals).reshape(
+        neighborhoods.shape[1], annotations.shape[1]
+    )
+    return {
+        "depletion_pvals": depletion_pvals,
+        "enrichment_pvals": enrichment_pvals,
+    }
+def _hypergeom_process_subset(
+    neighborhoods: np.ndarray,
+    annotations: np.ndarray,
+    start_idx: int,
+    end_idx: int,
+    progress_counter,
+) -> tuple:
+    """Process a subset of neighborhoods using the hypergeometric test.
+    Args:
+        neighborhoods (np.ndarray): The full neighborhood matrix.
+        annotations (np.ndarray): The annotation matrix.
+        start_idx (int): Starting index of the neighborhood-annotation pairs to process.
+        end_idx (int): Ending index of the neighborhood-annotation pairs to process.
+        progress_counter: Shared counter for tracking progress.
+    Returns:
+        tuple: Local p-values for depletion and enrichment.
+    """
+    # Initialize lists to store p-values for depletion and enrichment
+    depletion_pvals = []
+    enrichment_pvals = []
+    # Process the subset of tasks assigned to this worker
+    for idx in range(start_idx, end_idx):
+        i = idx // annotations.shape[1]  # Neighborhood index
+        j = idx % annotations.shape[1]  # Annotation index
+        neighborhood = neighborhoods[:, i]
+        annotation = annotations[:, j]
+        # Calculate the required values for the hypergeometric test
+        M = annotations.shape[0]  # Total number of items (population size)
+        n = np.sum(annotation)  # Total number of successes in population
+        N = np.sum(neighborhood)  # Total number of draws (sample size)
+        k = np.sum(neighborhood & annotation)  # Number of successes in sample
+        # Perform hypergeometric test for depletion
+        p_value_depletion = hypergeom.cdf(k, M, n, N)
+        depletion_pvals.append(p_value_depletion)
+        # Perform hypergeometric test for enrichment
+        p_value_enrichment = hypergeom.sf(k - 1, M, n, N)
+        enrichment_pvals.append(p_value_enrichment)
+        # Update the shared progress counter
+        progress_counter.value += 1
+    return depletion_pvals, enrichment_pvals

risk/stats/permutation/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""
+risk/stats/permutation
+~~~~~~~~~~~~~~~~~~~~~~
+"""
+from .permutation import compute_permutation_test

risk-network 0.0.3b4__py3-none-any.whl → 0.0.4__py3-none-any.whl

risk-network 0.0.3b4py3-none-any.whl → 0.0.4py3-none-any.whl