PyPI - risk-network - Versions diffs - 0.0.7b5__py3-none-any.whl → 0.0.7b6__py3-none-any.whl - Mend

risk-network 0.0.7b5py3-none-any.whl → 0.0.7b6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

risk/__init__.py +1 -1
risk/annotations/annotations.py +40 -25
risk/neighborhoods/community.py +25 -36
risk/neighborhoods/neighborhoods.py +35 -17
risk/network/plot.py +5 -1
risk/risk.py +17 -9
risk/stats/hypergeom.py +30 -29
risk/stats/permutation/permutation.py +7 -3
risk/stats/poisson.py +25 -18
{risk_network-0.0.7b5.dist-info → risk_network-0.0.7b6.dist-info}/METADATA +1 -1
{risk_network-0.0.7b5.dist-info → risk_network-0.0.7b6.dist-info}/RECORD +14 -14
{risk_network-0.0.7b5.dist-info → risk_network-0.0.7b6.dist-info}/LICENSE +0 -0
{risk_network-0.0.7b5.dist-info → risk_network-0.0.7b6.dist-info}/WHEEL +0 -0
{risk_network-0.0.7b5.dist-info → risk_network-0.0.7b6.dist-info}/top_level.txt +0 -0

risk/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
 from risk.risk import RISK
-__version__ = "0.0.7-beta.5"
+__version__ = "0.0.7-beta.6"

risk/annotations/annotations.py CHANGED Viewed

@@ -39,7 +39,7 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
         annotations_input (dict): A dictionary with annotations.
     Returns:
-        dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
+        dict: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
     """
     # Flatten the dictionary to a list of tuples for easier DataFrame creation
     flattened_annotations = [
@@ -66,7 +66,8 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
     # Extract ordered nodes and annotations
     ordered_nodes = tuple(annotations_pivot.index)
     ordered_annotations = tuple(annotations_pivot.columns)
-    annotations_pivot_numpy = annotations_pivot.fillna(0).to_numpy()
+    # Convert the annotations_pivot matrix to a numpy array and ensure it's binary
+    annotations_pivot_numpy = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
     return {
         "ordered_nodes": ordered_nodes,
@@ -163,8 +164,8 @@ def define_top_annotations(
 def get_description(words_column: pd.Series) -> str:
-    """Process input Series to identify and return the top N frequent, significant words,
-    filtering based on stopwords and similarity (Jaccard index).
+    """Process input Series to identify and return the top frequent, significant words,
+    filtering based on stopwords and gracefully handling numerical strings.
     Args:
         words_column (pd.Series): A pandas Series containing strings to process.
@@ -172,19 +173,30 @@ def get_description(words_column: pd.Series) -> str:
     Returns:
         str: A coherent description formed from the most frequent and significant words.
     """
-    # Define stopwords
-    stop_words = set(stopwords.words("english"))
-    # Tokenize the concatenated string and filter out stopwords and non-alphabetic words
+    # Concatenate all rows into a single string and tokenize into words
+    all_words = words_column.str.cat(sep=" ")
+    tokens = word_tokenize(all_words)
+    # Check if all tokens are numeric strings or contain a mixture of strings and numbers
+    numeric_tokens = [token for token in tokens if token.replace(".", "", 1).isdigit()]
+    non_numeric_tokens = [token for token in tokens if not token.replace(".", "", 1).isdigit()]
+    # If there's only one unique numeric value, return it directly as a string
+    unique_numeric_values = set(numeric_tokens)
+    if len(unique_numeric_values) == 1:
+        return f"{list(unique_numeric_values)[0]}"
+    # Allow the inclusion of both alphabetic and numeric tokens if mixture is detected
     words = [
         (
             word.lower() if word.istitle() else word
         )  # Lowercase all words except proper nouns (e.g., RNA, mRNA)
-        for word in word_tokenize(words_column.str.cat(sep=" "))
-        if word.isalpha() and word.lower() not in stop_words
+        for word in tokens
+        if word.isalpha()
+        or word.replace(".", "", 1).isdigit()  # Keep alphabetic words and numeric strings
     ]
-    # Simplify the word list to remove similar words based on the Jaccard index and generate coherent description
-    simplified_words = _simplify_word_list(words, threshold=0.90)
-    description = _generate_coherent_description(simplified_words)
+    # Generate a coherent description from the processed words
+    description = _generate_coherent_description(words)
     return description
@@ -242,25 +254,28 @@ def _calculate_jaccard_index(set1: Set[Any], set2: Set[Any]) -> float:
 def _generate_coherent_description(words: List[str]) -> str:
-    """Generate a coherent description from a list of words.
+    """Generate a coherent description from a list of words or numerical string values.
+    If there is only one unique entry, return it directly.
     Args:
-        words (list of str): A list of words from which to generate the description.
+        words (list): A list of words or numerical string values.
     Returns:
         str: A coherent description formed by arranging the words in a logical sequence.
     """
-    # Count the frequency of each word
+    # If there are no words or the input is invalid, raise an error
+    if not words or not isinstance(words, list) or not all(isinstance(word, str) for word in words):
+        raise ValueError("Input must be a list of strings.")
+    # If there's only one unique word, return it directly (even if it's a number-like string)
+    unique_words = set(words)
+    if len(unique_words) == 1:
+        return list(unique_words)[0]
+    # Count the frequency of each word and sort them by frequency
     word_counts = Counter(words)
-    # Get the most common words
     most_common_words = [word for word, _ in word_counts.most_common()]
-    # Filter out common stopwords
-    stop_words = set(stopwords.words("english"))
-    filtered_words = [word for word in most_common_words if word.lower() not in stop_words]
-    # Generate permutations of the filtered words to find a logical order
-    perm = permutations(filtered_words)
-    # Assume the first permutation as the logical sequence (since they're all equally likely without additional context)
-    logical_sequence = next(perm)
-    # Join the words to form a coherent description
-    description = " ".join(logical_sequence)
+    # Join the most common words to form a coherent description based on frequency
+    description = " ".join(most_common_words)
     return description

risk/neighborhoods/community.py CHANGED Viewed

@@ -7,32 +7,29 @@ import community as community_louvain
 import networkx as nx
 import numpy as np
 import markov_clustering as mc
-from networkx.algorithms.community import asyn_lpa_communities
+from networkx.algorithms.community import asyn_lpa_communities, greedy_modularity_communities
-def calculate_dijkstra_neighborhoods(network: nx.Graph) -> np.ndarray:
-    """Calculate neighborhoods using Dijkstra's shortest path distances.
+def calculate_greedy_modularity_neighborhoods(network: nx.Graph) -> np.ndarray:
+    """Calculate neighborhoods using the Greedy Modularity method.
     Args:
-        network (nx.Graph): The network graph.
+        network (nx.Graph): The network graph to analyze for community structure.
     Returns:
-        np.ndarray: Neighborhood matrix based on Dijkstra's distances.
+        np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
     """
-    # Compute Dijkstra's distance for all pairs of nodes in the network
-    all_dijkstra_paths = dict(nx.all_pairs_dijkstra_path_length(network, weight="length"))
+    # Detect communities using the Greedy Modularity method
+    communities = greedy_modularity_communities(network)
+    # Create a mapping from node to community
+    community_dict = {node: idx for idx, community in enumerate(communities) for node in community}
+    # Create a binary neighborhood matrix
     neighborhoods = np.zeros((network.number_of_nodes(), network.number_of_nodes()), dtype=int)
-    # Populate the neighborhoods matrix based on Dijkstra's distances
-    for source, targets in all_dijkstra_paths.items():
-        max_length = max(targets.values()) if targets else 1  # Handle cases with no targets
-        for target, length in targets.items():
-            if np.isnan(length):
-                neighborhoods[source, target] = max_length  # Use max distance for NaN
-            elif length == 0:
-                neighborhoods[source, target] = 1  # Assign 1 for zero-length paths (self-loops)
-            else:
-                neighborhoods[source, target] = 1 / length  # Inverse of the distance
+    node_index = {node: i for i, node in enumerate(network.nodes())}
+    for node_i, community_i in community_dict.items():
+        for node_j, community_j in community_dict.items():
+            if community_i == community_j:
+                neighborhoods[node_index[node_i], node_index[node_j]] = 1
     return neighborhoods
@@ -44,21 +41,19 @@ def calculate_label_propagation_neighborhoods(network: nx.Graph) -> np.ndarray:
         network (nx.Graph): The network graph.
     Returns:
-        np.ndarray: Neighborhood matrix based on Label Propagation.
+        np.ndarray: Binary neighborhood matrix on Label Propagation.
     """
     # Apply Label Propagation
     communities = nx.algorithms.community.label_propagation.label_propagation_communities(network)
     # Create a mapping from node to community
     community_dict = {}
     for community_id, community in enumerate(communities):
         for node in community:
             community_dict[node] = community_id
-    # Create a neighborhood matrix
+    # Create a binary neighborhood matrix
     num_nodes = network.number_of_nodes()
     neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
     # Assign neighborhoods based on community labels
     for node_i, community_i in community_dict.items():
         for node_j, community_j in community_dict.items():
@@ -79,14 +74,14 @@ def calculate_louvain_neighborhoods(
         random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
     Returns:
-        np.ndarray: Neighborhood matrix based on the Louvain method.
+        np.ndarray: Binary neighborhood matrix on the Louvain method.
     """
     # Apply Louvain method to partition the network
     partition = community_louvain.best_partition(
         network, resolution=resolution, random_state=random_seed
     )
+    # Create a binary neighborhood matrix
     neighborhoods = np.zeros((network.number_of_nodes(), network.number_of_nodes()), dtype=int)
     # Assign neighborhoods based on community partitions
     for node_i, community_i in partition.items():
         for node_j, community_j in partition.items():
@@ -103,7 +98,7 @@ def calculate_markov_clustering_neighborhoods(network: nx.Graph) -> np.ndarray:
         network (nx.Graph): The network graph.
     Returns:
-        np.ndarray: Neighborhood matrix based on Markov Clustering.
+        np.ndarray: Binary neighborhood matrix on Markov Clustering.
     """
     # Convert the graph to an adjacency matrix
     adjacency_matrix = nx.to_numpy_array(network)
@@ -111,17 +106,15 @@ def calculate_markov_clustering_neighborhoods(network: nx.Graph) -> np.ndarray:
     result = mc.run_mcl(adjacency_matrix)  # Run MCL with default parameters
     # Get clusters
     clusters = mc.get_clusters(result)
     # Create a community label for each node
     community_dict = {}
     for community_id, community in enumerate(clusters):
         for node in community:
             community_dict[node] = community_id
-    # Create a neighborhood matrix
+    # Create a binary neighborhood matrix
     num_nodes = network.number_of_nodes()
     neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
     # Assign neighborhoods based on community labels
     for node_i, community_i in community_dict.items():
         for node_j, community_j in community_dict.items():
@@ -138,21 +131,19 @@ def calculate_spinglass_neighborhoods(network: nx.Graph) -> np.ndarray:
         network (nx.Graph): The network graph.
     Returns:
-        np.ndarray: Neighborhood matrix based on Spin Glass communities.
+        np.ndarray: Binary neighborhood matrix on Spin Glass communities.
     """
     # Use the asynchronous label propagation algorithm as a proxy for Spin Glass
     communities = asyn_lpa_communities(network)
     # Create a community label for each node
     community_dict = {}
     for community_id, community in enumerate(communities):
         for node in community:
             community_dict[node] = community_id
-    # Create a neighborhood matrix
+    # Create a binary neighborhood matrix
     num_nodes = network.number_of_nodes()
     neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
     # Assign neighborhoods based on community labels
     for node_i, community_i in community_dict.items():
         for node_j, community_j in community_dict.items():
@@ -169,21 +160,19 @@ def calculate_walktrap_neighborhoods(network: nx.Graph) -> np.ndarray:
         network (nx.Graph): The network graph.
     Returns:
-        np.ndarray: Neighborhood matrix based on Walktrap communities.
+        np.ndarray: Binary neighborhood matrix on Walktrap communities.
     """
     # Use the asynchronous label propagation algorithm as a proxy for Walktrap
     communities = asyn_lpa_communities(network)
     # Create a community label for each node
     community_dict = {}
     for community_id, community in enumerate(communities):
         for node in community:
             community_dict[node] = community_id
-    # Create a neighborhood matrix
+    # Create a binary neighborhood matrix
     num_nodes = network.number_of_nodes()
     neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
     # Assign neighborhoods based on community labels
     for node_i, community_i in community_dict.items():
         for node_j, community_j in community_dict.items():

risk/neighborhoods/neighborhoods.py CHANGED Viewed

@@ -3,6 +3,7 @@ risk/neighborhoods/neighborhoods
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 """
+import random
 import warnings
 from typing import Any, Dict, List, Tuple
@@ -12,7 +13,7 @@ from sklearn.exceptions import DataConversionWarning
 from sklearn.metrics.pairwise import cosine_similarity
 from risk.neighborhoods.community import (
-    calculate_dijkstra_neighborhoods,
+    calculate_greedy_modularity_neighborhoods,
     calculate_label_propagation_neighborhoods,
     calculate_louvain_neighborhoods,
     calculate_markov_clustering_neighborhoods,
@@ -26,7 +27,7 @@ warnings.filterwarnings(action="ignore", category=DataConversionWarning)
 def get_network_neighborhoods(
     network: nx.Graph,
-    distance_metric: str = "dijkstra",
+    distance_metric: str = "louvain",
     edge_length_threshold: float = 1.0,
     louvain_resolution: float = 1.0,
     random_seed: int = 888,
@@ -35,8 +36,8 @@ def get_network_neighborhoods(
     Args:
         network (nx.Graph): The network graph.
-        distance_metric (str): The distance metric to use ('euclidean', 'dijkstra', 'louvain', 'affinity_propagation',
-            'label_propagation', 'markov_clustering', 'walktrap', 'spinglass').
+        distance_metric (str): The distance metric to use ('greedy_modularity', 'louvain', 'label_propagation',
+            'markov_clustering', 'walktrap', 'spinglass').
         edge_length_threshold (float): The edge length threshold for the neighborhoods.
         louvain_resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 1.0.
         random_seed (int, optional): Random seed for methods requiring random initialization. Defaults to 888.
@@ -44,10 +45,17 @@ def get_network_neighborhoods(
     Returns:
         np.ndarray: Neighborhood matrix calculated based on the selected distance metric.
     """
-    network = _create_percentile_limited_subgraph(network, edge_length_threshold)
+    # Set random seed for reproducibility in all methods besides Louvain, which requires a separate seed
+    random.seed(random_seed)
+    np.random.seed(random_seed)
-    if distance_metric == "dijkstra":
-        return calculate_dijkstra_neighborhoods(network)
+    # Create a subgraph based on the edge length percentile threshold
+    network = _create_percentile_limited_subgraph(
+        network, edge_length_percentile=edge_length_threshold
+    )
+    if distance_metric == "greedy_modularity":
+        return calculate_greedy_modularity_neighborhoods(network)
     if distance_metric == "louvain":
         return calculate_louvain_neighborhoods(network, louvain_resolution, random_seed=random_seed)
     if distance_metric == "label_propagation":
@@ -60,34 +68,44 @@ def get_network_neighborhoods(
         return calculate_spinglass_neighborhoods(network)
     raise ValueError(
-        "Incorrect distance metric specified. Please choose from 'dijkstra', 'louvain',"
+        "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'louvain',"
         "'label_propagation', 'markov_clustering', 'walktrap', 'spinglass'."
     )
 def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: float) -> nx.Graph:
-    """Calculate the edge length corresponding to the given percentile of edge lengths in the graph
-    and create a subgraph with all nodes and edges below this length.
+    """Create a subgraph containing all nodes and edges where the edge length is below the
+    specified percentile of all edge lengths in the input graph.
     Args:
-        G (nx.Graph): The input graph.
-        edge_length_percentile (float): The percentile to calculate (between 0 and 1).
+        G (nx.Graph): The input graph with 'length' attributes on edges.
+        edge_length_percentile (float): The percentile (between 0 and 1) to filter edges by length.
     Returns:
-        nx.Graph: A subgraph with all nodes and edges below the edge length corresponding to the given percentile.
+        nx.Graph: A subgraph with all nodes and edges where the edge length is below the
+        calculated threshold length.
     """
-    # Extract edge lengths from the graph
+    # Extract edge lengths and handle missing lengths
     edge_lengths = [d["length"] for _, _, d in G.edges(data=True) if "length" in d]
+    if not edge_lengths:
+        raise ValueError(
+            "No edge lengths found in the graph. Ensure edges have 'length' attributes."
+        )
     # Calculate the specific edge length for the given percentile
     percentile_length = np.percentile(edge_lengths, edge_length_percentile * 100)
-    # Create a new graph with all nodes from the original graph
+    # Create the subgraph by directly filtering edges during iteration
     subgraph = nx.Graph()
-    subgraph.add_nodes_from(G.nodes(data=True))
-    # Add edges to the subgraph if they are below the specified percentile length
+    subgraph.add_nodes_from(G.nodes(data=True))  # Retain all nodes from the original graph
+    # Add edges below the specified percentile length in a single pass
     for u, v, d in G.edges(data=True):
         if d.get("length", 1) <= percentile_length:
             subgraph.add_edge(u, v, **d)
+    # Return the subgraph; optionally check if it's too sparse
+    if subgraph.number_of_edges() == 0:
+        raise Warning("The resulting subgraph has no edges. Consider adjusting the percentile.")
     return subgraph

risk/network/plot.py CHANGED Viewed

@@ -9,6 +9,7 @@ import matplotlib.colors as mcolors
 import matplotlib.pyplot as plt
 import networkx as nx
 import numpy as np
+import pandas as pd
 from scipy.ndimage import label
 from scipy.stats import gaussian_kde
@@ -601,7 +602,7 @@ class NetworkPlotter:
             min_words (int, optional): Minimum number of words required to display a label. Defaults to 1.
             max_word_length (int, optional): Maximum number of characters in a word to display. Defaults to 20.
             min_word_length (int, optional): Minimum number of characters in a word to display. Defaults to 1.
-            words_to_omit (List, optional): List of words to omit from the labels. Defaults to None.
+            words_to_omit (list, optional): List of words to omit from the labels. Defaults to None.
             overlay_ids (bool, optional): Whether to overlay domain IDs in the center of the centroids. Defaults to False.
             ids_to_keep (list, tuple, np.ndarray, or None, optional): IDs of domains that must be labeled. To discover domain IDs,
                 you can set `overlay_ids=True`. Defaults to None.
@@ -710,6 +711,9 @@ class NetworkPlotter:
         # Process remaining domains to fill in additional labels, if there are slots left
         if remaining_labels and remaining_labels > 0:
             for idx, (domain, centroid) in enumerate(domain_centroids.items()):
+                # Check if the domain is NaN and continue if true
+                if pd.isna(domain) or (isinstance(domain, float) and np.isnan(domain)):
+                    continue  # Skip NaN domains
                 if ids_to_keep and domain in ids_to_keep:
                     continue  # Skip domains already handled by ids_to_keep

risk/risk.py CHANGED Viewed

@@ -49,9 +49,10 @@ class RISK(NetworkIO, AnnotationsIO):
         self,
         network: nx.Graph,
         annotations: Dict[str, Any],
-        distance_metric: str = "dijkstra",
+        distance_metric: str = "louvain",
         louvain_resolution: float = 0.1,
         edge_length_threshold: float = 0.5,
+        null_distribution: str = "network",
         random_seed: int = 888,
     ) -> Dict[str, Any]:
         """Load significant neighborhoods for the network using the hypergeometric test.
@@ -59,9 +60,10 @@ class RISK(NetworkIO, AnnotationsIO):
         Args:
             network (nx.Graph): The network graph.
             annotations (dict): The annotations associated with the network.
-            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
+            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
             louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
             edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -74,6 +76,7 @@ class RISK(NetworkIO, AnnotationsIO):
             louvain_resolution=louvain_resolution,
             edge_length_threshold=edge_length_threshold,
             statistical_test_function="hypergeom",
+            null_distribution=null_distribution,
             random_seed=random_seed,
         )
@@ -89,6 +92,7 @@ class RISK(NetworkIO, AnnotationsIO):
         neighborhood_significance = compute_hypergeom_test(
             neighborhoods=neighborhoods,
             annotations=annotations["matrix"],
+            null_distribution=null_distribution,
         )
         # Return the computed neighborhood significance
@@ -98,9 +102,10 @@ class RISK(NetworkIO, AnnotationsIO):
         self,
         network: nx.Graph,
         annotations: Dict[str, Any],
-        distance_metric: str = "dijkstra",
+        distance_metric: str = "louvain",
         louvain_resolution: float = 0.1,
         edge_length_threshold: float = 0.5,
+        null_distribution: str = "network",
         random_seed: int = 888,
     ) -> Dict[str, Any]:
         """Load significant neighborhoods for the network using the Poisson test.
@@ -108,9 +113,10 @@ class RISK(NetworkIO, AnnotationsIO):
         Args:
             network (nx.Graph): The network graph.
             annotations (dict): The annotations associated with the network.
-            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
+            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
             louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
             edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -123,6 +129,7 @@ class RISK(NetworkIO, AnnotationsIO):
             louvain_resolution=louvain_resolution,
             edge_length_threshold=edge_length_threshold,
             statistical_test_function="poisson",
+            null_distribution=null_distribution,
             random_seed=random_seed,
         )
@@ -138,6 +145,7 @@ class RISK(NetworkIO, AnnotationsIO):
         neighborhood_significance = compute_poisson_test(
             neighborhoods=neighborhoods,
             annotations=annotations["matrix"],
+            null_distribution=null_distribution,
         )
         # Return the computed neighborhood significance
@@ -147,7 +155,7 @@ class RISK(NetworkIO, AnnotationsIO):
         self,
         network: nx.Graph,
         annotations: Dict[str, Any],
-        distance_metric: str = "dijkstra",
+        distance_metric: str = "louvain",
         louvain_resolution: float = 0.1,
         edge_length_threshold: float = 0.5,
         score_metric: str = "sum",
@@ -161,11 +169,11 @@ class RISK(NetworkIO, AnnotationsIO):
         Args:
             network (nx.Graph): The network graph.
             annotations (dict): The annotations associated with the network.
-            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
+            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
             louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
             edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
             score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
-            null_distribution (str, optional): Distribution used for permutation tests. Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
             num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
             max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
@@ -366,7 +374,7 @@ class RISK(NetworkIO, AnnotationsIO):
     def _load_neighborhoods(
         self,
         network: nx.Graph,
-        distance_metric: str = "dijkstra",
+        distance_metric: str = "louvain",
         louvain_resolution: float = 0.1,
         edge_length_threshold: float = 0.5,
         random_seed: int = 888,
@@ -376,7 +384,7 @@ class RISK(NetworkIO, AnnotationsIO):
         Args:
             network (nx.Graph): The network graph.
             annotations (pd.DataFrame): The matrix of annotations associated with the network.
-            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
+            distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
             louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
             edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
             random_seed (int, optional): Seed for random number generation. Defaults to 888.

risk/stats/hypergeom.py CHANGED Viewed

@@ -10,46 +10,47 @@ from scipy.stats import hypergeom
 def compute_hypergeom_test(
-    neighborhoods: np.ndarray,
-    annotations: np.ndarray,
+    neighborhoods: np.ndarray, annotations: np.ndarray, null_distribution: str = "network"
 ) -> Dict[str, Any]:
-    """Compute hypergeometric test for enrichment and depletion in neighborhoods.
+    """Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
     Args:
-        neighborhoods (np.ndarray): Binary matrix representing neighborhoods, where rows are nodes
-            and columns are neighborhoods. Entries indicate the presence (1) or absence (0) of a node
-            in a neighborhood.
-        annotations (np.ndarray): Binary matrix representing annotations, where rows are nodes
-            and columns are annotations. Entries indicate the presence (1) or absence (0) of a node
-            being annotated.
+        neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
+        annotations (np.ndarray): Binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
     Returns:
-        Dict[str, Any]: A dictionary with two keys:
-            - "enrichment_pvals" (np.ndarray): P-values for enrichment, indicating the probability
-              of observing more annotations in a neighborhood than expected under the hypergeometric test.
-            - "depletion_pvals" (np.ndarray): P-values for depletion, indicating the probability
-              of observing fewer annotations in a neighborhood than expected under the hypergeometric test.
+        dict: Dictionary containing depletion and enrichment p-values.
     """
     # Ensure both matrices are binary (presence/absence)
     neighborhoods = (neighborhoods > 0).astype(int)
     annotations = (annotations > 0).astype(int)
-    total_node_count = annotations.shape[0]
-    # Sum of values in each neighborhood
-    neighborhood_sums = np.sum(neighborhoods, axis=0)[:, np.newaxis]
-    # Repeating neighborhood sums for each annotation
-    neighborhood_size_matrix = np.tile(neighborhood_sums, (1, annotations.shape[1]))
-    # Total number of nodes annotated to each attribute
-    annotated_node_counts = np.tile(np.sum(annotations, axis=0), (neighborhoods.shape[1], 1))
-    # Nodes in each neighborhood annotated to each attribute
-    annotated_in_neighborhood = np.dot(neighborhoods, annotations)
-    # Calculate p-values using the hypergeometric distribution
+    total_node_count = neighborhoods.shape[0]
+    if null_distribution == "network":
+        # Case 1: Use all nodes as the background
+        background_population = total_node_count
+        neighborhood_sums = np.sum(neighborhoods, axis=0, keepdims=True).T
+        annotation_sums = np.sum(annotations, axis=0, keepdims=True)
+    elif null_distribution == "annotations":
+        # Case 2: Only consider nodes with at least one annotation
+        annotated_nodes = np.sum(annotations, axis=1) > 0
+        background_population = np.sum(annotated_nodes)
+        neighborhood_sums = np.sum(neighborhoods[annotated_nodes], axis=0, keepdims=True).T
+        annotation_sums = np.sum(annotations[annotated_nodes], axis=0, keepdims=True)
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Matrix multiplication for annotated nodes in each neighborhood
+    annotated_in_neighborhood = neighborhoods.T @ annotations
+    # Calculate depletion and enrichment p-values using the hypergeometric distribution
     depletion_pvals = hypergeom.cdf(
-        annotated_in_neighborhood, total_node_count, annotated_node_counts, neighborhood_size_matrix
+        annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
     )
     enrichment_pvals = hypergeom.sf(
-        annotated_in_neighborhood - 1,
-        total_node_count,
-        annotated_node_counts,
-        neighborhood_size_matrix,
+        annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
     )
     return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}

risk/stats/permutation/permutation.py CHANGED Viewed

@@ -28,7 +28,7 @@ def compute_permutation_test(
         neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
         annotations (np.ndarray): Binary matrix representing annotations.
         score_metric (str, optional): Metric to use for scoring ('sum', 'mean', etc.). Defaults to "sum".
-        null_distribution (str, optional): Type of null distribution ('network' or other). Defaults to "network".
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
         num_permutations (int, optional): Number of permutations to run. Defaults to 1000.
         random_seed (int, optional): Seed for random number generation. Defaults to 888.
         max_workers (int, optional): Number of workers for multiprocessing. Defaults to 1.
@@ -78,7 +78,7 @@ def _run_permutation_test(
         neighborhoods (np.ndarray): The neighborhood matrix.
         annotations (np.ndarray): The annotation matrix.
         neighborhood_score_func (Callable): Function to calculate neighborhood scores.
-        null_distribution (str, optional): Type of null distribution. Defaults to "network".
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
         num_permutations (int, optional): Number of permutations. Defaults to 1000.
         random_seed (int, optional): Seed for random number generation. Defaults to 888.
         max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
@@ -91,8 +91,12 @@ def _run_permutation_test(
     # Determine the indices to use based on the null distribution type
     if null_distribution == "network":
         idxs = range(annotations.shape[0])
-    else:
+    elif null_distribution == "annotations":
         idxs = np.nonzero(np.sum(~np.isnan(annotations), axis=1))[0]
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
     # Replace NaNs with zeros in the annotations matrix
     annotations[np.isnan(annotations)] = 0

risk/stats/poisson.py CHANGED Viewed

@@ -9,32 +9,39 @@ import numpy as np
 from scipy.stats import poisson
-def compute_poisson_test(neighborhoods: np.ndarray, annotations: np.ndarray) -> Dict[str, Any]:
-    """Compute Poisson test for enrichment and depletion in neighborhoods.
+def compute_poisson_test(
+    neighborhoods: np.ndarray, annotations: np.ndarray, null_distribution: str = "network"
+) -> Dict[str, Any]:
+    """Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
     Args:
-        neighborhoods (np.ndarray): Binary matrix representing neighborhoods, where rows are nodes
-            and columns are neighborhoods. Entries indicate the presence (1) or absence (0) of a node
-            in a neighborhood.
-        annotations (np.ndarray): Binary matrix representing annotations, where rows are nodes
-            and columns are annotations. Entries indicate the presence (1) or absence (0) of a node
-            being annotated.
+        neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
+        annotations (np.ndarray): Binary matrix representing annotations.
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
     Returns:
-        Dict[str, Any]: A dictionary with two keys:
-            - "enrichment_pvals" (np.ndarray): P-values for enrichment, indicating the probability
-              of observing more annotations in a neighborhood than expected under the Poisson distribution.
-            - "depletion_pvals" (np.ndarray): P-values for depletion, indicating the probability of
-              observing fewer annotations in a neighborhood than expected under the Poisson distribution.
+        dict: Dictionary containing depletion and enrichment p-values.
     """
+    # Ensure both matrices are binary (presence/absence)
     neighborhoods = (neighborhoods > 0).astype(int)
     annotations = (annotations > 0).astype(int)
-    annotated_in_neighborhood = np.dot(neighborhoods, annotations)
-    lambda_expected = np.mean(annotated_in_neighborhood, axis=0)
-    # Enrichment (observing more than expected)
+    # Matrix multiplication to get the number of annotated nodes in each neighborhood
+    annotated_in_neighborhood = neighborhoods @ annotations
+    # Compute lambda_expected based on the chosen null distribution
+    if null_distribution == "network":
+        # Use the mean across neighborhoods (axis=1)
+        lambda_expected = np.mean(annotated_in_neighborhood, axis=1, keepdims=True)
+    elif null_distribution == "annotations":
+        # Use the mean across annotations (axis=0)
+        lambda_expected = np.mean(annotated_in_neighborhood, axis=0, keepdims=True)
+    else:
+        raise ValueError(
+            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+        )
+    # Compute p-values for enrichment and depletion using Poisson distribution
     enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood - 1, lambda_expected)
-    # Depletion (observing fewer than expected)
     depletion_pvals = poisson.cdf(annotated_in_neighborhood, lambda_expected)
     return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}

{risk_network-0.0.7b5.dist-info → risk_network-0.0.7b6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: risk-network
-Version: 0.0.7b5
+Version: 0.0.7b6
 Summary: A Python package for biological network analysis
 Author: Ira Horecka
 Author-email: Ira Horecka <ira89@icloud.com>

{risk_network-0.0.7b5.dist-info → risk_network-0.0.7b6.dist-info}/RECORD RENAMED Viewed

@@ -1,30 +1,30 @@
-risk/__init__.py,sha256=dGMZvusp_heb_yF3HEnVZDfVhFlvQDEuBQKDQfIAJvk,112
+risk/__init__.py,sha256=usWMc5kXOn1-bcSacSXIi_nGKYj4cIQyRvSzvAWGbMI,112
 risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
-risk/risk.py,sha256=EhKdNC5ntEsBAXG7Rw1Y-ho0HzbsvoU9XYE8djD-Axs,19972
+risk/risk.py,sha256=6666BzdMTgOaQl98ZKiJ19c6XBot26eTJ0iIlk-ZCZQ,20515
 risk/annotations/__init__.py,sha256=vUpVvMRE5if01Ic8QY6M2Ae3EFGJHdugEe9PdEkAW4Y,138
-risk/annotations/annotations.py,sha256=K7cUA6vYTKYAvj0xHqrAwNEYtmPq4H7LDYENAOVQdQ0,11014
+risk/annotations/annotations.py,sha256=k9LGTL2uqdYvI5F3jU3UKz-O855B-DoazGPMzSn-XUc,11673
 risk/annotations/io.py,sha256=lo7NKqOVkeeBp58JBxWJHtA0xjL5Yoxqe9Ox0daKlZk,9457
 risk/log/__init__.py,sha256=xuLImfxFlKpnVhzi_gDYlr2_c9cLkrw2c_3iEsXb1as,107
 risk/log/console.py,sha256=im9DRExwf6wHlcn9fewoDcKIpo3vPcorZIaNAl-0csY,355
 risk/log/params.py,sha256=Rfdg5UcGCrG80m6V79FyORERWUqIzHFO7tGiY4zAImM,6347
 risk/neighborhoods/__init__.py,sha256=tKKEg4lsbqFukpgYlUGxU_v_9FOqK7V0uvM9T2QzoL0,206
-risk/neighborhoods/community.py,sha256=7ebo1Q5KokSQISnxZIh2SQxsKXdXm8aVkp-h_DiQ3K0,6818
+risk/neighborhoods/community.py,sha256=stYYBXeZlGLMV-k8ckQeIqThT6v9y-S3hETobAo9590,6817
 risk/neighborhoods/domains.py,sha256=bxJUxqFTynzX0mf3E8-AA4_Rfccje1reeVVhfzb1-pE,10672
-risk/neighborhoods/neighborhoods.py,sha256=N02r2nnCfDtzVicuUt2WA77EUPHtruqjX8qJmXUP7ik,17475
+risk/neighborhoods/neighborhoods.py,sha256=r-JeUb6dTjzMtnaMDvJy6MI3mTl-yUzILcdcjtOhFdM,18218
 risk/network/__init__.py,sha256=iEPeJdZfqp0toxtbElryB8jbz9_t_k4QQ3iDvKE8C_0,126
 risk/network/geometry.py,sha256=H1yGVVqgbfpzBzJwEheDLfvGLSA284jGQQTn612L4Vc,6759
 risk/network/graph.py,sha256=_LEoom4EEowGALuJKSXcev9RAAHu2FqIeq3u7mkifW0,16479
 risk/network/io.py,sha256=gG50kOknO-D3HkW1HsbHMkTMvjUtn3l4W4Jwd-rXNr8,21202
-risk/network/plot.py,sha256=F6KPjmBYWrThKZScHs9SuzoKQiytBvzrmGhGberHjwo,62063
+risk/network/plot.py,sha256=3OucCoKJwx9M9H4lqAvcQdM9YiCSyIxz21jyqDbpffc,62286
 risk/stats/__init__.py,sha256=WcgoETQ-hS0LQqKRsAMIPtP15xZ-4eul6VUBuUx4Wzc,220
-risk/stats/hypergeom.py,sha256=CfGJ1fd7QKIbBVy85p6-upXwNi19TJioDuekA65PHCQ,2473
-risk/stats/poisson.py,sha256=eCBgxVdNUTJ_0aVxSU8ddSFGIXeSOY7Vx3YQBaEzN2k,1836
+risk/stats/hypergeom.py,sha256=DcGYjmfcgt1qshNZPJt5IHGIHtxw9tWRS1r6QJ6V3dI,2378
+risk/stats/poisson.py,sha256=CnLk65CHViR4YhAaN3ix37iyLm_YQYGo851bSnGyyxY,1950
 risk/stats/stats.py,sha256=kvShov-94W6ffgDUTb522vB9hDJQSyTsYif_UIaFfSM,7059
 risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
-risk/stats/permutation/permutation.py,sha256=qLWdwxEY6nmkYPxpM8HLDcd2mbqYv9Qr7CKtJvhLqIM,9220
+risk/stats/permutation/permutation.py,sha256=bFcgTJZI8cLODvGoW4QtMeBnuUs2HibJ42OZyC74Tz0,9427
 risk/stats/permutation/test_functions.py,sha256=HuDIM-V1jkkfE1rlaIqrWWBSKZt3dQ1f-YEDjWpnLSE,2343
-risk_network-0.0.7b5.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
-risk_network-0.0.7b5.dist-info/METADATA,sha256=DaAqg8en6KjGKUGgxI96K749ZwhFRY92h0RsnvoGqx4,43142
-risk_network-0.0.7b5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-risk_network-0.0.7b5.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
-risk_network-0.0.7b5.dist-info/RECORD,,
+risk_network-0.0.7b6.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
+risk_network-0.0.7b6.dist-info/METADATA,sha256=BQRgen5tB4jtWEWvm6VXkYPX3WudtzPpsxtFUtz3Ej0,43142
+risk_network-0.0.7b6.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+risk_network-0.0.7b6.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
+risk_network-0.0.7b6.dist-info/RECORD,,

{risk_network-0.0.7b5.dist-info → risk_network-0.0.7b6.dist-info}/LICENSE RENAMED Viewed

File without changes

{risk_network-0.0.7b5.dist-info → risk_network-0.0.7b6.dist-info}/WHEEL RENAMED Viewed

File without changes

{risk_network-0.0.7b5.dist-info → risk_network-0.0.7b6.dist-info}/top_level.txt RENAMED Viewed

File without changes

risk-network 0.0.7b5__py3-none-any.whl → 0.0.7b6__py3-none-any.whl

risk-network 0.0.7b5py3-none-any.whl → 0.0.7b6py3-none-any.whl