PyPI - risk-network - Versions diffs - 0.0.7b3__tar.gz → 0.0.7b5__tar.gz - Mend

risk-network 0.0.7b3tar.gz → 0.0.7b5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{risk_network-0.0.7b3 → risk_network-0.0.7b5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: risk-network
-Version: 0.0.7b3
+Version: 0.0.7b5
 Summary: A Python package for biological network analysis
 Author: Ira Horecka
 Author-email: Ira Horecka <ira89@icloud.com>

{risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/__init__.py RENAMED Viewed

@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
 from risk.risk import RISK
-__version__ = "0.0.7-beta.3"
+__version__ = "0.0.7-beta.5"

{risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/neighborhoods/neighborhoods.py RENAMED Viewed

@@ -200,7 +200,6 @@ def _impute_neighbors_with_similarity(
     depth = 1
     rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
     while len(rows_to_impute) and depth <= max_depth:
-        next_rows_to_impute = []
         # Iterate over all enriched nodes
         for row_index in range(binary_enrichment_matrix.shape[0]):
             if binary_enrichment_matrix[row_index].sum() != 0:

{risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/network/graph.py RENAMED Viewed

@@ -3,7 +3,6 @@ risk/network/graph
 ~~~~~~~~~~~~~~~~~~
 """
-import random
 from collections import defaultdict
 from typing import Any, Dict, List, Tuple, Union
@@ -307,7 +306,7 @@ def _get_colors(
         List[Tuple]: List of RGBA colors.
     """
     # Set random seed for reproducibility
-    random.seed(random_seed)
+    np.random.seed(random_seed)
     # Determine the number of colors to generate based on the number of domains
     num_colors_to_generate = len(domain_id_to_node_ids_map)
     if color:
@@ -322,23 +321,15 @@ def _get_colors(
     # Step 2: Calculate pairwise distances between centroids
     centroid_array = np.array(centroids)
     dist_matrix = np.linalg.norm(centroid_array[:, None] - centroid_array, axis=-1)
-    # Step 3: Generate positions in the colormap, with a focus on centroids that are close
-    remaining_indices = set(range(num_colors_to_generate))
-    # Assign distant colors to close centroids
-    color_positions = _assign_distant_colors(
-        remaining_indices, dist_matrix, colormap, num_colors_to_generate
-    )
-    # Step 4: Randomly shuffle color positions to generate a new color palette
-    # while maintaining the dissimilarity between neighboring colors. This shuffling
-    # preserves the relative distances between centroids, ensuring that close centroids
-    # remain visually distinct while introducing randomness into the overall color arrangement.
-    random.shuffle(color_positions)
-    # Ensure that all positions remain between 0 and 1
+    # Step 3: Assign distant colors to close centroids
+    color_positions = _assign_distant_colors(dist_matrix, num_colors_to_generate)
+    # Step 4: Randomly shift the entire color palette while maintaining relative distances
+    global_shift = np.random.uniform(-0.1, 0.1)  # Small global shift to change the overall palette
+    color_positions = (color_positions + global_shift) % 1  # Wrap around to keep within [0, 1]
+    # Step 5: Ensure that all positions remain between 0 and 1
     color_positions = np.clip(color_positions, 0, 1)
-    # Step 5: Generate colors based on positions
+    # Step 6: Generate RGBA colors based on positions
     return [colormap(pos) for pos in color_positions]
@@ -365,28 +356,26 @@ def _calculate_centroids(network, domain_id_to_node_ids_map):
     return centroids
-def _assign_distant_colors(remaining_indices, dist_matrix, colormap, num_colors_to_generate):
+def _assign_distant_colors(dist_matrix, num_colors_to_generate):
     """Assign colors to centroids that are close in space, ensuring stark color differences.
     Args:
-        remaining_indices (set): Indices of centroids left to color.
         dist_matrix (ndarray): Matrix of pairwise centroid distances.
-        colormap (Colormap): The colormap used to assign colors.
         num_colors_to_generate (int): Number of colors to generate.
     Returns:
-        np.array: Array of color positions in the colormap.
+        np.array: Array of color positions in the range [0, 1].
     """
     color_positions = np.zeros(num_colors_to_generate)
-    # Convert the set to a list to index over it
-    remaining_indices = list(remaining_indices)
-    # Sort remaining indices by centroid proximity (based on sum of distances to others)
-    proximity_order = sorted(remaining_indices, key=lambda idx: np.sum(dist_matrix[idx]))
-    # Assign colors starting with the most distant points in proximity order
+    # Step 1: Sort indices by centroid proximity (based on sum of distances to others)
+    proximity_order = sorted(
+        range(num_colors_to_generate), key=lambda idx: np.sum(dist_matrix[idx])
+    )
+    # Step 2: Assign colors starting with the most distant points in proximity order
     for i, idx in enumerate(proximity_order):
         color_positions[idx] = i / num_colors_to_generate
-    # Adjust colors so that centroids close to one another are maximally distant on the color spectrum
+    # Step 3: Adjust colors so that centroids close to one another are maximally distant on the color spectrum
     half_spectrum = int(num_colors_to_generate / 2)
     for i in range(half_spectrum):
         # Split the spectrum so that close centroids are assigned distant colors

{risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/risk.py RENAMED Viewed

@@ -20,9 +20,9 @@ from risk.neighborhoods import (
 from risk.network import NetworkIO, NetworkGraph, NetworkPlotter
 from risk.stats import (
     calculate_significance_matrices,
-    compute_fisher_exact_test,
     compute_hypergeom_test,
     compute_permutation_test,
+    compute_poisson_test,
 )
@@ -45,20 +45,16 @@ class RISK(NetworkIO, AnnotationsIO):
         """Access the logged parameters."""
         return params
-    def load_neighborhoods_by_permutation(
+    def load_neighborhoods_by_hypergeom(
         self,
         network: nx.Graph,
         annotations: Dict[str, Any],
         distance_metric: str = "dijkstra",
         louvain_resolution: float = 0.1,
         edge_length_threshold: float = 0.5,
-        score_metric: str = "sum",
-        null_distribution: str = "network",
-        num_permutations: int = 1000,
         random_seed: int = 888,
-        max_workers: int = 1,
     ) -> Dict[str, Any]:
-        """Load significant neighborhoods for the network using the permutation test.
+        """Load significant neighborhoods for the network using the hypergeometric test.
         Args:
             network (nx.Graph): The network graph.
@@ -66,27 +62,19 @@ class RISK(NetworkIO, AnnotationsIO):
             distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
             louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
             edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
-            score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
-            null_distribution (str, optional): Distribution used for permutation tests. Defaults to "network".
-            num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
-            max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
         Returns:
             dict: Computed significance of neighborhoods.
         """
-        print_header("Running permutation test")
+        print_header("Running hypergeometric test")
         # Log neighborhood analysis parameters
         params.log_neighborhoods(
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             edge_length_threshold=edge_length_threshold,
-            statistical_test_function="permutation",
-            score_metric=score_metric,
-            null_distribution=null_distribution,
-            num_permutations=num_permutations,
+            statistical_test_function="hypergeom",
             random_seed=random_seed,
-            max_workers=max_workers,
         )
         # Load neighborhoods based on the network and distance metric
@@ -97,27 +85,16 @@ class RISK(NetworkIO, AnnotationsIO):
             edge_length_threshold=edge_length_threshold,
             random_seed=random_seed,
         )
-        # Log and display permutation test settings
-        print(f"Neighborhood scoring metric: '{score_metric}'")
-        print(f"Null distribution: '{null_distribution}'")
-        print(f"Number of permutations: {num_permutations}")
-        print(f"Maximum workers: {max_workers}")
-        # Run permutation test to compute neighborhood significance
-        neighborhood_significance = compute_permutation_test(
+        # Run hypergeometric test to compute neighborhood significance
+        neighborhood_significance = compute_hypergeom_test(
             neighborhoods=neighborhoods,
             annotations=annotations["matrix"],
-            score_metric=score_metric,
-            null_distribution=null_distribution,
-            num_permutations=num_permutations,
-            random_seed=random_seed,
-            max_workers=max_workers,
         )
         # Return the computed neighborhood significance
         return neighborhood_significance
-    def load_neighborhoods_by_fisher_exact(
+    def load_neighborhoods_by_poisson(
         self,
         network: nx.Graph,
         annotations: Dict[str, Any],
@@ -125,9 +102,8 @@ class RISK(NetworkIO, AnnotationsIO):
         louvain_resolution: float = 0.1,
         edge_length_threshold: float = 0.5,
         random_seed: int = 888,
-        max_workers: int = 1,
     ) -> Dict[str, Any]:
-        """Load significant neighborhoods for the network using the Fisher's exact test.
+        """Load significant neighborhoods for the network using the Poisson test.
         Args:
             network (nx.Graph): The network graph.
@@ -136,20 +112,18 @@ class RISK(NetworkIO, AnnotationsIO):
             louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
             edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
-            max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
         Returns:
             dict: Computed significance of neighborhoods.
         """
-        print_header("Running Fisher's exact test")
+        print_header("Running Poisson test")
         # Log neighborhood analysis parameters
         params.log_neighborhoods(
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             edge_length_threshold=edge_length_threshold,
-            statistical_test_function="fisher_exact",
+            statistical_test_function="poisson",
             random_seed=random_seed,
-            max_workers=max_workers,
         )
         # Load neighborhoods based on the network and distance metric
@@ -160,30 +134,29 @@ class RISK(NetworkIO, AnnotationsIO):
             edge_length_threshold=edge_length_threshold,
             random_seed=random_seed,
         )
-        # Log and display Fisher's exact test settings
-        print(f"Maximum workers: {max_workers}")
-        # Run Fisher's exact test to compute neighborhood significance
-        neighborhood_significance = compute_fisher_exact_test(
+        # Run Poisson test to compute neighborhood significance
+        neighborhood_significance = compute_poisson_test(
             neighborhoods=neighborhoods,
             annotations=annotations["matrix"],
-            max_workers=max_workers,
         )
         # Return the computed neighborhood significance
         return neighborhood_significance
-    def load_neighborhoods_by_hypergeom(
+    def load_neighborhoods_by_permutation(
         self,
         network: nx.Graph,
         annotations: Dict[str, Any],
         distance_metric: str = "dijkstra",
         louvain_resolution: float = 0.1,
         edge_length_threshold: float = 0.5,
+        score_metric: str = "sum",
+        null_distribution: str = "network",
+        num_permutations: int = 1000,
         random_seed: int = 888,
         max_workers: int = 1,
     ) -> Dict[str, Any]:
-        """Load significant neighborhoods for the network using the hypergeometric test.
+        """Load significant neighborhoods for the network using the permutation test.
         Args:
             network (nx.Graph): The network graph.
@@ -191,19 +164,25 @@ class RISK(NetworkIO, AnnotationsIO):
             distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
             louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
             edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
+            score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
+            null_distribution (str, optional): Distribution used for permutation tests. Defaults to "network".
+            num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
             max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
         Returns:
             dict: Computed significance of neighborhoods.
         """
-        print_header("Running hypergeometric test")
+        print_header("Running permutation test")
         # Log neighborhood analysis parameters
         params.log_neighborhoods(
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             edge_length_threshold=edge_length_threshold,
-            statistical_test_function="hypergeom",
+            statistical_test_function="permutation",
+            score_metric=score_metric,
+            null_distribution=null_distribution,
+            num_permutations=num_permutations,
             random_seed=random_seed,
             max_workers=max_workers,
         )
@@ -217,12 +196,19 @@ class RISK(NetworkIO, AnnotationsIO):
             random_seed=random_seed,
         )
-        # Log and display hypergeometric test settings
+        # Log and display permutation test settings
+        print(f"Neighborhood scoring metric: '{score_metric}'")
+        print(f"Null distribution: '{null_distribution}'")
+        print(f"Number of permutations: {num_permutations}")
         print(f"Maximum workers: {max_workers}")
-        # Run hypergeometric test to compute neighborhood significance
-        neighborhood_significance = compute_hypergeom_test(
+        # Run permutation test to compute neighborhood significance
+        neighborhood_significance = compute_permutation_test(
             neighborhoods=neighborhoods,
             annotations=annotations["matrix"],
+            score_metric=score_metric,
+            null_distribution=null_distribution,
+            num_permutations=num_permutations,
+            random_seed=random_seed,
             max_workers=max_workers,
         )
@@ -315,7 +301,7 @@ class RISK(NetworkIO, AnnotationsIO):
             max_cluster_size=max_cluster_size,
         )
-        print_header(f"Optimizing distance threshold for domains")
+        print_header("Optimizing distance threshold for domains")
         # Define domains in the network using the specified clustering settings
         domains = self._define_domains(
             neighborhoods=processed_neighborhoods,

{risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/stats/__init__.py RENAMED Viewed

@@ -3,7 +3,7 @@ risk/stats
 ~~~~~~~~~~
 """
-from .stats import calculate_significance_matrices
-from .fisher_exact import compute_fisher_exact_test
 from .hypergeom import compute_hypergeom_test
 from .permutation import compute_permutation_test
+from .poisson import compute_poisson_test
+from .stats import calculate_significance_matrices

risk_network-0.0.7b5/risk/stats/hypergeom.py ADDED Viewed

@@ -0,0 +1,55 @@
+"""
+risk/stats/hypergeom
+~~~~~~~~~~~~~~~~~~~~
+"""
+from typing import Any, Dict
+import numpy as np
+from scipy.stats import hypergeom
+def compute_hypergeom_test(
+    neighborhoods: np.ndarray,
+    annotations: np.ndarray,
+) -> Dict[str, Any]:
+    """Compute hypergeometric test for enrichment and depletion in neighborhoods.
+    Args:
+        neighborhoods (np.ndarray): Binary matrix representing neighborhoods, where rows are nodes
+            and columns are neighborhoods. Entries indicate the presence (1) or absence (0) of a node
+            in a neighborhood.
+        annotations (np.ndarray): Binary matrix representing annotations, where rows are nodes
+            and columns are annotations. Entries indicate the presence (1) or absence (0) of a node
+            being annotated.
+    Returns:
+        Dict[str, Any]: A dictionary with two keys:
+            - "enrichment_pvals" (np.ndarray): P-values for enrichment, indicating the probability
+              of observing more annotations in a neighborhood than expected under the hypergeometric test.
+            - "depletion_pvals" (np.ndarray): P-values for depletion, indicating the probability
+              of observing fewer annotations in a neighborhood than expected under the hypergeometric test.
+    """
+    # Ensure both matrices are binary (presence/absence)
+    neighborhoods = (neighborhoods > 0).astype(int)
+    annotations = (annotations > 0).astype(int)
+    total_node_count = annotations.shape[0]
+    # Sum of values in each neighborhood
+    neighborhood_sums = np.sum(neighborhoods, axis=0)[:, np.newaxis]
+    # Repeating neighborhood sums for each annotation
+    neighborhood_size_matrix = np.tile(neighborhood_sums, (1, annotations.shape[1]))
+    # Total number of nodes annotated to each attribute
+    annotated_node_counts = np.tile(np.sum(annotations, axis=0), (neighborhoods.shape[1], 1))
+    # Nodes in each neighborhood annotated to each attribute
+    annotated_in_neighborhood = np.dot(neighborhoods, annotations)
+    # Calculate p-values using the hypergeometric distribution
+    depletion_pvals = hypergeom.cdf(
+        annotated_in_neighborhood, total_node_count, annotated_node_counts, neighborhood_size_matrix
+    )
+    enrichment_pvals = hypergeom.sf(
+        annotated_in_neighborhood - 1,
+        total_node_count,
+        annotated_node_counts,
+        neighborhood_size_matrix,
+    )
+    return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}

risk_network-0.0.7b5/risk/stats/poisson.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""
+risk/stats/poisson
+~~~~~~~~~~~~~~~~~~
+"""
+from typing import Dict, Any
+import numpy as np
+from scipy.stats import poisson
+def compute_poisson_test(neighborhoods: np.ndarray, annotations: np.ndarray) -> Dict[str, Any]:
+    """Compute Poisson test for enrichment and depletion in neighborhoods.
+    Args:
+        neighborhoods (np.ndarray): Binary matrix representing neighborhoods, where rows are nodes
+            and columns are neighborhoods. Entries indicate the presence (1) or absence (0) of a node
+            in a neighborhood.
+        annotations (np.ndarray): Binary matrix representing annotations, where rows are nodes
+            and columns are annotations. Entries indicate the presence (1) or absence (0) of a node
+            being annotated.
+    Returns:
+        Dict[str, Any]: A dictionary with two keys:
+            - "enrichment_pvals" (np.ndarray): P-values for enrichment, indicating the probability
+              of observing more annotations in a neighborhood than expected under the Poisson distribution.
+            - "depletion_pvals" (np.ndarray): P-values for depletion, indicating the probability of
+              observing fewer annotations in a neighborhood than expected under the Poisson distribution.
+    """
+    neighborhoods = (neighborhoods > 0).astype(int)
+    annotations = (annotations > 0).astype(int)
+    annotated_in_neighborhood = np.dot(neighborhoods, annotations)
+    lambda_expected = np.mean(annotated_in_neighborhood, axis=0)
+    # Enrichment (observing more than expected)
+    enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood - 1, lambda_expected)
+    # Depletion (observing fewer than expected)
+    depletion_pvals = poisson.cdf(annotated_in_neighborhood, lambda_expected)
+    return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}

{risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk_network.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: risk-network
-Version: 0.0.7b3
+Version: 0.0.7b5
 Summary: A Python package for biological network analysis
 Author: Ira Horecka
 Author-email: Ira Horecka <ira89@icloud.com>

{risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk_network.egg-info/SOURCES.txt RENAMED Viewed

@@ -22,8 +22,8 @@ risk/network/graph.py
 risk/network/io.py
 risk/network/plot.py
 risk/stats/__init__.py
-risk/stats/fisher_exact.py
 risk/stats/hypergeom.py
+risk/stats/poisson.py
 risk/stats/stats.py
 risk/stats/permutation/__init__.py
 risk/stats/permutation/permutation.py

risk_network-0.0.7b3/risk/stats/fisher_exact.py DELETED Viewed

@@ -1,132 +0,0 @@
-"""
-risk/stats/fisher_exact
-~~~~~~~~~~~~~~~~~~~~~~~
-"""
-from multiprocessing import get_context, Manager
-from tqdm import tqdm
-from typing import Any, Dict
-import numpy as np
-from scipy.stats import fisher_exact
-def compute_fisher_exact_test(
-    neighborhoods: np.ndarray,
-    annotations: np.ndarray,
-    max_workers: int = 4,
-) -> Dict[str, Any]:
-    """Compute Fisher's exact test for enrichment and depletion in neighborhoods.
-    Args:
-        neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
-        annotations (np.ndarray): Binary matrix representing annotations.
-        max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
-    Returns:
-        dict: Dictionary containing depletion and enrichment p-values.
-    """
-    # Ensure that the matrices are binary (boolean) and free of NaN values
-    neighborhoods = neighborhoods.astype(bool)  # Convert to boolean
-    annotations = annotations.astype(bool)  # Convert to boolean
-    # Initialize the process of calculating p-values using multiprocessing
-    ctx = get_context("spawn")
-    manager = Manager()
-    progress_counter = manager.Value("i", 0)
-    total_tasks = neighborhoods.shape[1] * annotations.shape[1]
-    # Calculate the workload per worker
-    chunk_size = total_tasks // max_workers
-    remainder = total_tasks % max_workers
-    # Execute the Fisher's exact test using multiprocessing
-    with ctx.Pool(max_workers) as pool:
-        with tqdm(total=total_tasks, desc="Total progress", position=0) as progress:
-            params_list = []
-            start_idx = 0
-            for i in range(max_workers):
-                end_idx = start_idx + chunk_size + (1 if i < remainder else 0)
-                params_list.append(
-                    (neighborhoods, annotations, start_idx, end_idx, progress_counter)
-                )
-                start_idx = end_idx
-            # Start the Fisher's exact test process in parallel
-            results = pool.starmap_async(_fisher_exact_process_subset, params_list, chunksize=1)
-            # Update progress bar based on progress_counter
-            while not results.ready():
-                progress.update(progress_counter.value - progress.n)
-                results.wait(0.05)  # Wait for 50ms
-            # Ensure progress bar reaches 100%
-            progress.update(total_tasks - progress.n)
-            # Accumulate results from each worker
-            depletion_pvals, enrichment_pvals = [], []
-            for dp, ep in results.get():
-                depletion_pvals.extend(dp)
-                enrichment_pvals.extend(ep)
-    # Reshape the results back into arrays with the appropriate dimensions
-    depletion_pvals = np.array(depletion_pvals).reshape(
-        neighborhoods.shape[1], annotations.shape[1]
-    )
-    enrichment_pvals = np.array(enrichment_pvals).reshape(
-        neighborhoods.shape[1], annotations.shape[1]
-    )
-    return {
-        "depletion_pvals": depletion_pvals,
-        "enrichment_pvals": enrichment_pvals,
-    }
-def _fisher_exact_process_subset(
-    neighborhoods: np.ndarray,
-    annotations: np.ndarray,
-    start_idx: int,
-    end_idx: int,
-    progress_counter,
-) -> tuple:
-    """Process a subset of neighborhoods using Fisher's exact test.
-    Args:
-        neighborhoods (np.ndarray): The full neighborhood matrix.
-        annotations (np.ndarray): The annotation matrix.
-        start_idx (int): Starting index of the neighborhood-annotation pairs to process.
-        end_idx (int): Ending index of the neighborhood-annotation pairs to process.
-        progress_counter: Shared counter for tracking progress.
-    Returns:
-        tuple: Local p-values for depletion and enrichment.
-    """
-    # Initialize lists to store p-values for depletion and enrichment
-    depletion_pvals = []
-    enrichment_pvals = []
-    # Process the subset of tasks assigned to this worker
-    for idx in range(start_idx, end_idx):
-        i = idx // annotations.shape[1]  # Neighborhood index
-        j = idx % annotations.shape[1]  # Annotation index
-        neighborhood = neighborhoods[:, i]
-        annotation = annotations[:, j]
-        # Calculate the contingency table values
-        TP = np.sum(neighborhood & annotation)
-        FP = np.sum(neighborhood & ~annotation)
-        FN = np.sum(~neighborhood & annotation)
-        TN = np.sum(~neighborhood & ~annotation)
-        table = np.array([[TP, FP], [FN, TN]])
-        # Perform Fisher's exact test for depletion (alternative='less')
-        _, p_value_depletion = fisher_exact(table, alternative="less")
-        depletion_pvals.append(p_value_depletion)
-        # Perform Fisher's exact test for enrichment (alternative='greater')
-        _, p_value_enrichment = fisher_exact(table, alternative="greater")
-        enrichment_pvals.append(p_value_enrichment)
-        # Update the shared progress counter
-        progress_counter.value += 1
-    return depletion_pvals, enrichment_pvals

risk_network-0.0.7b3/risk/stats/hypergeom.py DELETED Viewed

@@ -1,131 +0,0 @@
-"""
-risk/stats/hypergeom
-~~~~~~~~~~~~~~~~~~~~
-"""
-from multiprocessing import get_context, Manager
-from tqdm import tqdm
-from typing import Any, Dict
-import numpy as np
-from scipy.stats import hypergeom
-def compute_hypergeom_test(
-    neighborhoods: np.ndarray,
-    annotations: np.ndarray,
-    max_workers: int = 4,
-) -> Dict[str, Any]:
-    """Compute hypergeometric test for enrichment and depletion in neighborhoods.
-    Args:
-        neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
-        annotations (np.ndarray): Binary matrix representing annotations.
-        max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
-    Returns:
-        dict: Dictionary containing depletion and enrichment p-values.
-    """
-    # Ensure that the matrices are binary (boolean) and free of NaN values
-    neighborhoods = neighborhoods.astype(bool)  # Convert to boolean
-    annotations = annotations.astype(bool)  # Convert to boolean
-    # Initialize the process of calculating p-values using multiprocessing
-    ctx = get_context("spawn")
-    manager = Manager()
-    progress_counter = manager.Value("i", 0)
-    total_tasks = neighborhoods.shape[1] * annotations.shape[1]
-    # Calculate the workload per worker
-    chunk_size = total_tasks // max_workers
-    remainder = total_tasks % max_workers
-    # Execute the hypergeometric test using multiprocessing
-    with ctx.Pool(max_workers) as pool:
-        with tqdm(total=total_tasks, desc="Total progress", position=0) as progress:
-            params_list = []
-            start_idx = 0
-            for i in range(max_workers):
-                end_idx = start_idx + chunk_size + (1 if i < remainder else 0)
-                params_list.append(
-                    (neighborhoods, annotations, start_idx, end_idx, progress_counter)
-                )
-                start_idx = end_idx
-            # Start the hypergeometric test process in parallel
-            results = pool.starmap_async(_hypergeom_process_subset, params_list, chunksize=1)
-            # Update progress bar based on progress_counter
-            while not results.ready():
-                progress.update(progress_counter.value - progress.n)
-                results.wait(0.05)  # Wait for 50ms
-            # Ensure progress bar reaches 100%
-            progress.update(total_tasks - progress.n)
-            # Accumulate results from each worker
-            depletion_pvals, enrichment_pvals = [], []
-            for dp, ep in results.get():
-                depletion_pvals.extend(dp)
-                enrichment_pvals.extend(ep)
-    # Reshape the results back into arrays with the appropriate dimensions
-    depletion_pvals = np.array(depletion_pvals).reshape(
-        neighborhoods.shape[1], annotations.shape[1]
-    )
-    enrichment_pvals = np.array(enrichment_pvals).reshape(
-        neighborhoods.shape[1], annotations.shape[1]
-    )
-    return {
-        "depletion_pvals": depletion_pvals,
-        "enrichment_pvals": enrichment_pvals,
-    }
-def _hypergeom_process_subset(
-    neighborhoods: np.ndarray,
-    annotations: np.ndarray,
-    start_idx: int,
-    end_idx: int,
-    progress_counter,
-) -> tuple:
-    """Process a subset of neighborhoods using the hypergeometric test.
-    Args:
-        neighborhoods (np.ndarray): The full neighborhood matrix.
-        annotations (np.ndarray): The annotation matrix.
-        start_idx (int): Starting index of the neighborhood-annotation pairs to process.
-        end_idx (int): Ending index of the neighborhood-annotation pairs to process.
-        progress_counter: Shared counter for tracking progress.
-    Returns:
-        tuple: Local p-values for depletion and enrichment.
-    """
-    # Initialize lists to store p-values for depletion and enrichment
-    depletion_pvals = []
-    enrichment_pvals = []
-    # Process the subset of tasks assigned to this worker
-    for idx in range(start_idx, end_idx):
-        i = idx // annotations.shape[1]  # Neighborhood index
-        j = idx % annotations.shape[1]  # Annotation index
-        neighborhood = neighborhoods[:, i]
-        annotation = annotations[:, j]
-        # Calculate the required values for the hypergeometric test
-        M = annotations.shape[0]  # Total number of items (population size)
-        n = np.sum(annotation)  # Total number of successes in population
-        N = np.sum(neighborhood)  # Total number of draws (sample size)
-        k = np.sum(neighborhood & annotation)  # Number of successes in sample
-        # Perform hypergeometric test for depletion
-        p_value_depletion = hypergeom.cdf(k, M, n, N)
-        depletion_pvals.append(p_value_depletion)
-        # Perform hypergeometric test for enrichment
-        p_value_enrichment = hypergeom.sf(k - 1, M, n, N)
-        enrichment_pvals.append(p_value_enrichment)
-        # Update the shared progress counter
-        progress_counter.value += 1
-    return depletion_pvals, enrichment_pvals