PyPI - risk-network - Versions diffs - 0.0.7b12__py3-none-any.whl → 0.0.8__py3-none-any.whl - Mend

risk-network 0.0.7b12py3-none-any.whl → 0.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

risk/__init__.py +1 -1
risk/annotations/__init__.py +1 -1
risk/annotations/annotations.py +85 -53
risk/annotations/io.py +3 -3
risk/log/__init__.py +1 -1
risk/log/{config.py → console.py} +2 -2
risk/log/params.py +6 -6
risk/neighborhoods/community.py +68 -61
risk/neighborhoods/domains.py +41 -18
risk/neighborhoods/neighborhoods.py +134 -69
risk/network/geometry.py +5 -2
risk/network/graph.py +69 -235
risk/network/io.py +44 -6
risk/network/plot/__init__.py +6 -0
risk/network/plot/canvas.py +290 -0
risk/network/plot/contour.py +327 -0
risk/network/plot/labels.py +929 -0
risk/network/plot/network.py +288 -0
risk/network/plot/plotter.py +137 -0
risk/network/plot/utils/color.py +424 -0
risk/network/plot/utils/layout.py +91 -0
risk/risk.py +70 -37
risk/stats/hypergeom.py +1 -1
risk/stats/permutation/permutation.py +21 -8
risk/stats/poisson.py +2 -2
risk/stats/stats.py +12 -10
{risk_network-0.0.7b12.dist-info → risk_network-0.0.8.dist-info}/METADATA +84 -21
risk_network-0.0.8.dist-info/RECORD +37 -0
{risk_network-0.0.7b12.dist-info → risk_network-0.0.8.dist-info}/WHEEL +1 -1
risk/network/plot.py +0 -1450
risk_network-0.0.7b12.dist-info/RECORD +0 -30
{risk_network-0.0.7b12.dist-info → risk_network-0.0.8.dist-info}/LICENSE +0 -0
{risk_network-0.0.7b12.dist-info → risk_network-0.0.8.dist-info}/top_level.txt +0 -0

risk/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
 from risk.risk import RISK
-__version__ = "0.0.7-beta.12"
+__version__ = "0.0.8"

risk/annotations/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ risk/annotations
 ~~~~~~~~~~~~~~~~
 """
-from .annotations import define_top_annotations, get_description
+from .annotations import define_top_annotations, get_weighted_description
 from .io import AnnotationsIO

risk/annotations/annotations.py CHANGED Viewed

@@ -3,6 +3,7 @@ risk/annotations/annotations
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 """
+import re
 from collections import Counter
 from itertools import compress
 from typing import Any, Dict, List, Set
@@ -30,27 +31,30 @@ def _setup_nltk():
 # Ensure you have the necessary NLTK data
 _setup_nltk()
+# Initialize English stopwords
+stop_words = set(stopwords.words("english"))
 def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Dict[str, Any]:
     """Convert annotations input to a DataFrame and reindex based on the network's node labels.
     Args:
-        annotations_input (dict): A dictionary with annotations.
+        network (nx.Graph): The network graph.
+        annotations_input (Dict[str, Any]): A dictionary with annotations.
     Returns:
-        dict: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
+        Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
     """
     # Flatten the dictionary to a list of tuples for easier DataFrame creation
     flattened_annotations = [
         (node, annotation) for annotation, nodes in annotations_input.items() for node in nodes
     ]
     # Create a DataFrame from the flattened list
-    annotations = pd.DataFrame(flattened_annotations, columns=["Node", "Annotations"])
-    annotations["Is Member"] = 1
+    annotations = pd.DataFrame(flattened_annotations, columns=["node", "annotations"])
+    annotations["is_member"] = 1
     # Pivot to create a binary matrix with nodes as rows and annotations as columns
     annotations_pivot = annotations.pivot_table(
-        index="Node", columns="Annotations", values="Is Member", fill_value=0, dropna=False
+        index="node", columns="annotations", values="is_member", fill_value=0, dropna=False
     )
     # Reindex the annotations matrix based on the node labels from the network
     node_label_order = list(nx.get_node_attributes(network, "label").values())
@@ -80,7 +84,8 @@ def define_top_annotations(
     network: nx.Graph,
     ordered_annotation_labels: List[str],
     neighborhood_enrichment_sums: List[int],
-    binary_enrichment_matrix: np.ndarray,
+    significant_enrichment_matrix: np.ndarray,
+    significant_binary_enrichment_matrix: np.ndarray,
     min_cluster_size: int = 5,
     max_cluster_size: int = 1000,
 ) -> pd.DataFrame:
@@ -90,42 +95,52 @@ def define_top_annotations(
         network (NetworkX graph): The network graph.
         ordered_annotation_labels (list of str): List of ordered annotation labels.
         neighborhood_enrichment_sums (list of int): List of neighborhood enrichment sums.
-        binary_enrichment_matrix (np.ndarray): Binary enrichment matrix below alpha threshold.
+        significant_enrichment_matrix (np.ndarray): Enrichment matrix below alpha threshold.
+        significant_binary_enrichment_matrix (np.ndarray): Binary enrichment matrix below alpha threshold.
         min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
         max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
     Returns:
         pd.DataFrame: DataFrame with top annotations and their properties.
     """
-    # Create DataFrame to store annotations and their neighborhood enrichment sums
+    # Sum the columns of the significant enrichment matrix (positive floating point values)
+    significant_enrichment_scores = significant_enrichment_matrix.sum(axis=0)
+    # Create DataFrame to store annotations, their neighborhood enrichment sums, and enrichment scores
     annotations_enrichment_matrix = pd.DataFrame(
         {
             "id": range(len(ordered_annotation_labels)),
-            "words": ordered_annotation_labels,
-            "neighborhood enrichment sums": neighborhood_enrichment_sums,
+            "full_terms": ordered_annotation_labels,
+            "significant_neighborhood_enrichment_sums": neighborhood_enrichment_sums,
+            "significant_enrichment_score": significant_enrichment_scores,
         }
     )
-    annotations_enrichment_matrix["top attributes"] = False
-    # Apply size constraints to identify potential top attributes
+    annotations_enrichment_matrix["significant_annotations"] = False
+    # Apply size constraints to identify potential significant annotations
     annotations_enrichment_matrix.loc[
-        (annotations_enrichment_matrix["neighborhood enrichment sums"] >= min_cluster_size)
-        & (annotations_enrichment_matrix["neighborhood enrichment sums"] <= max_cluster_size),
-        "top attributes",
+        (
+            annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
+            >= min_cluster_size
+        )
+        & (
+            annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
+            <= max_cluster_size
+        ),
+        "significant_annotations",
     ] = True
     # Initialize columns for connected components analysis
-    annotations_enrichment_matrix["num connected components"] = 0
-    annotations_enrichment_matrix["size connected components"] = None
-    annotations_enrichment_matrix["size connected components"] = annotations_enrichment_matrix[
-        "size connected components"
+    annotations_enrichment_matrix["num_connected_components"] = 0
+    annotations_enrichment_matrix["size_connected_components"] = None
+    annotations_enrichment_matrix["size_connected_components"] = annotations_enrichment_matrix[
+        "size_connected_components"
     ].astype(object)
-    annotations_enrichment_matrix["num large connected components"] = 0
+    annotations_enrichment_matrix["num_large_connected_components"] = 0
     for attribute in annotations_enrichment_matrix.index.values[
-        annotations_enrichment_matrix["top attributes"]
+        annotations_enrichment_matrix["significant_annotations"]
     ]:
         # Identify enriched neighborhoods based on the binary enrichment matrix
         enriched_neighborhoods = list(
-            compress(list(network), binary_enrichment_matrix[:, attribute])
+            compress(list(network), significant_binary_enrichment_matrix[:, attribute])
         )
         enriched_network = nx.subgraph(network, enriched_neighborhoods)
         # Analyze connected components within the enriched subnetwork
@@ -144,57 +159,74 @@ def define_top_annotations(
         num_large_connected_components = len(filtered_size_connected_components)
         # Assign the number of connected components
-        annotations_enrichment_matrix.loc[attribute, "num connected components"] = (
+        annotations_enrichment_matrix.loc[attribute, "num_connected_components"] = (
             num_connected_components
         )
         # Filter out attributes with more than one connected component
         annotations_enrichment_matrix.loc[
-            annotations_enrichment_matrix["num connected components"] > 1, "top attributes"
+            annotations_enrichment_matrix["num_connected_components"] > 1, "significant_annotations"
         ] = False
         # Assign the number of large connected components
-        annotations_enrichment_matrix.loc[attribute, "num large connected components"] = (
+        annotations_enrichment_matrix.loc[attribute, "num_large_connected_components"] = (
             num_large_connected_components
         )
         # Assign the size of connected components, ensuring it is always a list
-        annotations_enrichment_matrix.at[attribute, "size connected components"] = (
+        annotations_enrichment_matrix.at[attribute, "size_connected_components"] = (
             filtered_size_connected_components.tolist()
         )
     return annotations_enrichment_matrix
-def get_description(words_column: pd.Series) -> str:
-    """Process input Series to identify and return the top frequent, significant words,
-    filtering based on stopwords and gracefully handling numerical strings.
+def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
+    """Generate a weighted description from words and their corresponding scores,
+    with support for stopwords filtering and improved weighting logic.
     Args:
         words_column (pd.Series): A pandas Series containing strings to process.
+        scores_column (pd.Series): A pandas Series containing enrichment scores to weigh the terms.
     Returns:
-        str: A coherent description formed from the most frequent and significant words.
+        str: A coherent description formed from the most frequent and significant words, weighed by enrichment scores.
     """
-    # Concatenate all rows into a single string and tokenize into words
-    all_words = words_column.str.cat(sep=" ")
-    tokens = word_tokenize(all_words)
-    # Separate numeric tokens
-    numeric_tokens = [token for token in tokens if token.replace(".", "", 1).isdigit()]
-    # If there's only one unique numeric value, return it directly as a string
-    unique_numeric_values = set(numeric_tokens)
-    if len(unique_numeric_values) == 1:
-        return f"{list(unique_numeric_values)[0]}"
-    # Ensure that all values in 'words' are strings and include both alphabetic and numeric tokens
-    words = [
-        str(
-            word.lower() if word.istitle() else word
-        )  # Convert to string and lowercase all words except proper nouns (e.g., RNA, mRNA)
-        for word in tokens
-        if word.isalpha()
-        or word.replace(".", "", 1).isdigit()  # Keep alphabetic words and numeric strings
-    ]
-    # Generate a coherent description from the processed words
-    description = _generate_coherent_description(words)
+    # Handle case where all scores are the same
+    if scores_column.max() == scores_column.min():
+        normalized_scores = pd.Series([1] * len(scores_column))
+    else:
+        # Normalize the enrichment scores to be between 0 and 1
+        normalized_scores = (scores_column - scores_column.min()) / (
+            scores_column.max() - scores_column.min()
+        )
+    # Combine words and normalized scores to create weighted words
+    weighted_words = []
+    for word, score in zip(words_column, normalized_scores):
+        word = str(word)
+        if word not in stop_words:  # Skip stopwords
+            weight = max(1, int((0 if pd.isna(score) else score) * 10))
+            weighted_words.extend([word] * weight)
+    # Tokenize the weighted words, but preserve number-word patterns like '4-alpha'
+    tokens = word_tokenize(" ".join(weighted_words))
+    # Ensure we treat "4-alpha" or other "number-word" patterns as single tokens
+    combined_tokens = []
+    for token in tokens:
+        # Match patterns like '4-alpha' or '5-hydroxy' and keep them together
+        if re.match(r"^\d+-\w+", token):
+            combined_tokens.append(token)
+        elif token.replace(".", "", 1).isdigit():  # Handle pure numeric tokens
+            # Ignore pure numbers as descriptions unless necessary
+            continue
+        else:
+            combined_tokens.append(token)
+    # Prevent descriptions like just '4' from being selected
+    if len(combined_tokens) == 1 and combined_tokens[0].isdigit():
+        return "N/A"  # Return "N/A" for cases where it's just a number
+    # Simplify the word list and generate the description
+    simplified_words = _simplify_word_list(combined_tokens)
+    description = _generate_coherent_description(simplified_words)
     return description
@@ -257,7 +289,7 @@ def _generate_coherent_description(words: List[str]) -> str:
     If there is only one unique entry, return it directly.
     Args:
-        words (list): A list of words or numerical string values.
+        words (List): A list of words or numerical string values.
     Returns:
         str: A coherent description formed by arranging the words in a logical sequence.

risk/annotations/io.py CHANGED Viewed

@@ -33,7 +33,7 @@ class AnnotationsIO:
             filepath (str): Path to the JSON annotations file.
         Returns:
-            dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
+            Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
         """
         filetype = "JSON"
         # Log the loading of the JSON file
@@ -158,10 +158,10 @@ class AnnotationsIO:
         Args:
             network (NetworkX graph): The network to which the annotations are related.
-            content (dict): The annotations dictionary to load.
+            content (Dict[str, Any]): The annotations dictionary to load.
         Returns:
-            dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
+            Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
         """
         # Ensure the input content is a dictionary
         if not isinstance(content, dict):

risk/log/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ risk/log
 ~~~~~~~~
 """
-from .config import logger, log_header, set_global_verbosity
+from .console import logger, log_header, set_global_verbosity
 from .params import Params
 params = Params()

risk/log/{config.py → console.py} RENAMED Viewed

@@ -1,6 +1,6 @@
 """
-risk/log/config
-~~~~~~~~~~~~~~~
+risk/log/console
+~~~~~~~~~~~~~~~~
 """
 import logging

risk/log/params.py CHANGED Viewed

@@ -12,7 +12,7 @@ from typing import Any, Dict
 import numpy as np
-from .config import logger, log_header
+from .console import logger, log_header
 # Suppress all warnings - this is to resolve warnings from multiprocessing
 warnings.filterwarnings("ignore")
@@ -159,7 +159,7 @@ class Params:
         """Load and process various parameters, converting any np.ndarray values to lists.
         Returns:
-            dict: A dictionary containing the processed parameters.
+            Dict[str, Any]: A dictionary containing the processed parameters.
         """
         log_header("Loading parameters")
         return _convert_ndarray_to_list(
@@ -174,14 +174,14 @@ class Params:
         )
-def _convert_ndarray_to_list(d: Any) -> Any:
+def _convert_ndarray_to_list(d: Dict[str, Any]) -> Dict[str, Any]:
     """Recursively convert all np.ndarray values in the dictionary to lists.
     Args:
-        d (dict): The dictionary to process.
+        d (Dict[str, Any]): The dictionary to process.
     Returns:
-        dict: The processed dictionary with np.ndarray values converted to lists.
+        Dict[str, Any]: The processed dictionary with np.ndarray values converted to lists.
     """
     if isinstance(d, dict):
         # Recursively process each value in the dictionary
@@ -193,5 +193,5 @@ def _convert_ndarray_to_list(d: Any) -> Any:
         # Convert numpy arrays to lists
         return d.tolist()
     else:
-        # Return the value unchanged if it's not a dict, list, or ndarray
+        # Return the value unchanged if it's not a dict, List, or ndarray
         return d

risk/neighborhoods/community.py CHANGED Viewed

@@ -21,15 +21,20 @@ def calculate_greedy_modularity_neighborhoods(network: nx.Graph) -> np.ndarray:
     """
     # Detect communities using the Greedy Modularity method
     communities = greedy_modularity_communities(network)
-    # Create a mapping from node to community
-    community_dict = {node: idx for idx, community in enumerate(communities) for node in community}
     # Create a binary neighborhood matrix
-    neighborhoods = np.zeros((network.number_of_nodes(), network.number_of_nodes()), dtype=int)
+    n_nodes = network.number_of_nodes()
+    neighborhoods = np.zeros((n_nodes, n_nodes), dtype=int)
+    # Create a mapping from node to index in the matrix
     node_index = {node: i for i, node in enumerate(network.nodes())}
-    for node_i, community_i in community_dict.items():
-        for node_j, community_j in community_dict.items():
-            if community_i == community_j:
-                neighborhoods[node_index[node_i], node_index[node_j]] = 1
+    # Fill in the neighborhood matrix for nodes in the same community
+    for community in communities:
+        # Iterate through all pairs of nodes in the same community
+        for node_i in community:
+            idx_i = node_index[node_i]
+            for node_j in community:
+                idx_j = node_index[node_j]
+                # Set them as neighbors (1) in the binary matrix
+                neighborhoods[idx_i, idx_j] = 1
     return neighborhoods
@@ -43,22 +48,20 @@ def calculate_label_propagation_neighborhoods(network: nx.Graph) -> np.ndarray:
     Returns:
         np.ndarray: Binary neighborhood matrix on Label Propagation.
     """
-    # Apply Label Propagation
+    # Apply Label Propagation for community detection
     communities = nx.algorithms.community.label_propagation.label_propagation_communities(network)
-    # Create a mapping from node to community
-    community_dict = {}
-    for community_id, community in enumerate(communities):
-        for node in community:
-            community_dict[node] = community_id
     # Create a binary neighborhood matrix
     num_nodes = network.number_of_nodes()
     neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
+    # Create a mapping from node to index in the matrix
+    node_index = {node: i for i, node in enumerate(network.nodes())}
     # Assign neighborhoods based on community labels
-    for node_i, community_i in community_dict.items():
-        for node_j, community_j in community_dict.items():
-            if community_i == community_j:
-                neighborhoods[node_i, node_j] = 1
+    for community in communities:
+        for node_i in community:
+            idx_i = node_index[node_i]
+            for node_j in community:
+                idx_j = node_index[node_j]
+                neighborhoods[idx_i, idx_j] = 1
     return neighborhoods
@@ -81,12 +84,22 @@ def calculate_louvain_neighborhoods(
         network, resolution=resolution, random_state=random_seed
     )
     # Create a binary neighborhood matrix
-    neighborhoods = np.zeros((network.number_of_nodes(), network.number_of_nodes()), dtype=int)
+    num_nodes = network.number_of_nodes()
+    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
+    # Create a mapping from node to index in the matrix
+    node_index = {node: i for i, node in enumerate(network.nodes())}
+    # Group nodes by community
+    community_groups = {}
+    for node, community in partition.items():
+        community_groups.setdefault(community, []).append(node)
     # Assign neighborhoods based on community partitions
-    for node_i, community_i in partition.items():
-        for node_j, community_j in partition.items():
-            if community_i == community_j:
-                neighborhoods[node_i, node_j] = 1
+    for community, nodes in community_groups.items():
+        for node_i in nodes:
+            idx_i = node_index[node_i]
+            for node_j in nodes:
+                idx_j = node_index[node_j]
+                neighborhoods[idx_i, idx_j] = 1
     return neighborhoods
@@ -102,24 +115,22 @@ def calculate_markov_clustering_neighborhoods(network: nx.Graph) -> np.ndarray:
     """
     # Convert the graph to an adjacency matrix
     adjacency_matrix = nx.to_numpy_array(network)
-    # Run Markov Clustering
-    result = mc.run_mcl(adjacency_matrix)  # Run MCL with default parameters
-    # Get clusters
+    # Run Markov Clustering (MCL)
+    result = mc.run_mcl(adjacency_matrix)  # MCL with default parameters
+    # Get clusters (communities) from MCL result
     clusters = mc.get_clusters(result)
-    # Create a community label for each node
-    community_dict = {}
-    for community_id, community in enumerate(clusters):
-        for node in community:
-            community_dict[node] = community_id
     # Create a binary neighborhood matrix
     num_nodes = network.number_of_nodes()
     neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
-    # Assign neighborhoods based on community labels
-    for node_i, community_i in community_dict.items():
-        for node_j, community_j in community_dict.items():
-            if community_i == community_j:
-                neighborhoods[node_i, node_j] = 1
+    # Create a mapping from node to index in the matrix
+    node_index = {node: i for i, node in enumerate(network.nodes())}
+    # Assign neighborhoods based on MCL clusters
+    for cluster in clusters:
+        for node_i in cluster:
+            idx_i = node_index[node_i]
+            for node_j in cluster:
+                idx_j = node_index[node_j]
+                neighborhoods[idx_i, idx_j] = 1
     return neighborhoods
@@ -133,22 +144,20 @@ def calculate_spinglass_neighborhoods(network: nx.Graph) -> np.ndarray:
     Returns:
         np.ndarray: Binary neighborhood matrix on Spin Glass communities.
     """
-    # Use the asynchronous label propagation algorithm as a proxy for Spin Glass
+    # Apply Asynchronous Label Propagation (LPA)
     communities = asyn_lpa_communities(network)
-    # Create a community label for each node
-    community_dict = {}
-    for community_id, community in enumerate(communities):
-        for node in community:
-            community_dict[node] = community_id
     # Create a binary neighborhood matrix
     num_nodes = network.number_of_nodes()
     neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
-    # Assign neighborhoods based on community labels
-    for node_i, community_i in community_dict.items():
-        for node_j, community_j in community_dict.items():
-            if community_i == community_j:
-                neighborhoods[node_i, node_j] = 1
+    # Create a mapping from node to index in the matrix
+    node_index = {node: i for i, node in enumerate(network.nodes())}
+    # Assign neighborhoods based on community labels from LPA
+    for community in communities:
+        for node_i in community:
+            idx_i = node_index[node_i]
+            for node_j in community:
+                idx_j = node_index[node_j]
+                neighborhoods[idx_i, idx_j] = 1
     return neighborhoods
@@ -162,21 +171,19 @@ def calculate_walktrap_neighborhoods(network: nx.Graph) -> np.ndarray:
     Returns:
         np.ndarray: Binary neighborhood matrix on Walktrap communities.
     """
-    # Use the asynchronous label propagation algorithm as a proxy for Walktrap
+    # Apply Asynchronous Label Propagation (LPA)
     communities = asyn_lpa_communities(network)
-    # Create a community label for each node
-    community_dict = {}
-    for community_id, community in enumerate(communities):
-        for node in community:
-            community_dict[node] = community_id
     # Create a binary neighborhood matrix
     num_nodes = network.number_of_nodes()
     neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
-    # Assign neighborhoods based on community labels
-    for node_i, community_i in community_dict.items():
-        for node_j, community_j in community_dict.items():
-            if community_i == community_j:
-                neighborhoods[node_i, node_j] = 1
+    # Create a mapping from node to index in the matrix
+    node_index = {node: i for i, node in enumerate(network.nodes())}
+    # Assign neighborhoods based on community labels from LPA
+    for community in communities:
+        for node_i in community:
+            idx_i = node_index[node_i]
+            for node_j in community:
+                idx_j = node_index[node_j]
+                neighborhoods[idx_i, idx_j] = 1
     return neighborhoods

risk-network 0.0.7b12__py3-none-any.whl → 0.0.8__py3-none-any.whl

risk-network 0.0.7b12py3-none-any.whl → 0.0.8py3-none-any.whl