PyPI - risk-network - Versions diffs - 0.0.9b9__tar.gz → 0.0.9b11__tar.gz - Mend

risk-network 0.0.9b9tar.gz → 0.0.9b11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{risk_network-0.0.9b9 → risk_network-0.0.9b11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: risk-network
-Version: 0.0.9b9
+Version: 0.0.9b11
 Summary: A Python package for biological network analysis
 Author: Ira Horecka
 Author-email: Ira Horecka <ira89@icloud.com>
@@ -695,6 +695,7 @@ Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: ipywidgets
+Requires-Dist: leidenalg
 Requires-Dist: markov_clustering
 Requires-Dist: matplotlib
 Requires-Dist: networkx
@@ -702,6 +703,7 @@ Requires-Dist: nltk==3.8.1
 Requires-Dist: numpy
 Requires-Dist: openpyxl
 Requires-Dist: pandas
+Requires-Dist: python-igraph
 Requires-Dist: python-louvain
 Requires-Dist: scikit-learn
 Requires-Dist: scipy

{risk_network-0.0.9b9 → risk_network-0.0.9b11}/pyproject.toml RENAMED Viewed

@@ -27,6 +27,7 @@ classifiers = [
 ]
 dependencies = [
     "ipywidgets",
+    "leidenalg",
     "markov_clustering",
     "matplotlib",
     "networkx",
@@ -34,6 +35,7 @@ dependencies = [
     "numpy",
     "openpyxl",
     "pandas",
+    "python-igraph",
     "python-louvain",
     "scikit-learn",
     "scipy",

{risk_network-0.0.9b9 → risk_network-0.0.9b11}/risk/__init__.py RENAMED Viewed

@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
 from risk.risk import RISK
-__version__ = "0.0.9-beta.9"
+__version__ = "0.0.9-beta.11"

risk_network-0.0.9b11/risk/neighborhoods/community.py ADDED Viewed

@@ -0,0 +1,259 @@
+"""
+risk/neighborhoods/community
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"""
+import community as community_louvain
+import igraph as ig
+import markov_clustering as mc
+import networkx as nx
+import numpy as np
+from leidenalg import find_partition, RBConfigurationVertexPartition
+from networkx.algorithms.community import greedy_modularity_communities
+def calculate_greedy_modularity_neighborhoods(network: nx.Graph) -> np.ndarray:
+    """Calculate neighborhoods using the Greedy Modularity method.
+    Args:
+        network (nx.Graph): The network graph to analyze for community structure.
+    Returns:
+        np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
+    """
+    # Detect communities using the Greedy Modularity method
+    communities = greedy_modularity_communities(network)
+    # Get the list of nodes in the original NetworkX graph
+    nodes = list(network.nodes())
+    node_index_map = {node: idx for idx, node in enumerate(nodes)}
+    # Create a binary neighborhood matrix
+    n_nodes = len(nodes)
+    neighborhoods = np.zeros((n_nodes, n_nodes), dtype=int)
+    # Fill in the neighborhood matrix for nodes in the same community
+    for community in communities:
+        # Iterate through all pairs of nodes in the same community
+        for node_i in community:
+            for node_j in community:
+                idx_i = node_index_map[node_i]
+                idx_j = node_index_map[node_j]
+                # Set them as neighbors (1) in the binary matrix
+                neighborhoods[idx_i, idx_j] = 1
+    return neighborhoods
+def calculate_label_propagation_neighborhoods(network: nx.Graph) -> np.ndarray:
+    """Apply Label Propagation to the network to detect communities.
+    Args:
+        network (nx.Graph): The network graph.
+    Returns:
+        np.ndarray: A binary neighborhood matrix on Label Propagation.
+    """
+    # Apply Label Propagation for community detection
+    communities = nx.algorithms.community.label_propagation.label_propagation_communities(network)
+    # Get the list of nodes in the network
+    nodes = list(network.nodes())
+    node_index_map = {node: idx for idx, node in enumerate(nodes)}
+    # Create a binary neighborhood matrix
+    num_nodes = len(nodes)
+    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
+    # Assign neighborhoods based on community labels using the mapped indices
+    for community in communities:
+        for node_i in community:
+            for node_j in community:
+                idx_i = node_index_map[node_i]
+                idx_j = node_index_map[node_j]
+                neighborhoods[idx_i, idx_j] = 1
+    return neighborhoods
+def calculate_leiden_neighborhoods(
+    network: nx.Graph, resolution: float = 1.0, random_seed: int = 888
+) -> np.ndarray:
+    """Calculate neighborhoods using the Leiden method.
+    Args:
+        network (nx.Graph): The network graph.
+        resolution (float, optional): Resolution parameter for the Leiden method. Defaults to 1.0.
+        random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
+    Returns:
+        np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
+    """
+    # Convert NetworkX graph to iGraph
+    igraph_network = ig.Graph.from_networkx(network)
+    # Apply Leiden algorithm using RBConfigurationVertexPartition, which supports resolution
+    partition = find_partition(
+        igraph_network,
+        partition_type=RBConfigurationVertexPartition,
+        resolution_parameter=resolution,
+        seed=random_seed,
+    )
+    # Get the list of nodes in the original NetworkX graph
+    nodes = list(network.nodes())
+    node_index_map = {node: idx for idx, node in enumerate(nodes)}
+    # Create a binary neighborhood matrix
+    num_nodes = len(nodes)
+    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
+    # Assign neighborhoods based on community partitions using the mapped indices
+    for community in partition:
+        for node_i in community:
+            for node_j in community:
+                idx_i = node_index_map[igraph_network.vs[node_i]["_nx_name"]]
+                idx_j = node_index_map[igraph_network.vs[node_j]["_nx_name"]]
+                neighborhoods[idx_i, idx_j] = 1
+    return neighborhoods
+def calculate_louvain_neighborhoods(
+    network: nx.Graph, resolution: float, random_seed: int = 888
+) -> np.ndarray:
+    """Calculate neighborhoods using the Louvain method.
+    Args:
+        network (nx.Graph): The network graph.
+        resolution (float): Resolution parameter for the Louvain method.
+        random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
+    Returns:
+        np.ndarray: A binary neighborhood matrix on the Louvain method.
+    """
+    # Apply Louvain method to partition the network
+    partition = community_louvain.best_partition(
+        network, resolution=resolution, random_state=random_seed
+    )
+    # Get the list of nodes in the network and create a mapping to indices
+    nodes = list(network.nodes())
+    node_index_map = {node: idx for idx, node in enumerate(nodes)}
+    # Create a binary neighborhood matrix
+    num_nodes = len(nodes)
+    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
+    # Group nodes by community
+    community_groups = {}
+    for node, community in partition.items():
+        community_groups.setdefault(community, []).append(node)
+    # Assign neighborhoods based on community partitions using the mapped indices
+    for community, nodes in community_groups.items():
+        for node_i in nodes:
+            for node_j in nodes:
+                idx_i = node_index_map[node_i]
+                idx_j = node_index_map[node_j]
+                neighborhoods[idx_i, idx_j] = 1
+    return neighborhoods
+def calculate_markov_clustering_neighborhoods(network: nx.Graph) -> np.ndarray:
+    """Apply Markov Clustering (MCL) to the network.
+    Args:
+        network (nx.Graph): The network graph.
+    Returns:
+        np.ndarray: A binary neighborhood matrix on Markov Clustering.
+    """
+    # Step 1: Convert the graph to an adjacency matrix
+    nodes = list(network.nodes())
+    node_index_map = {node: idx for idx, node in enumerate(nodes)}
+    # Step 2: Create a reverse mapping from index to node
+    index_node_map = {idx: node for node, idx in node_index_map.items()}
+    adjacency_matrix = nx.to_numpy_array(network, nodelist=nodes)
+    # Step 3: Run Markov Clustering (MCL) on the adjacency matrix
+    result = mc.run_mcl(adjacency_matrix)
+    # Step 4: Get clusters (communities) from MCL result
+    clusters = mc.get_clusters(result)
+    # Step 5: Create a binary neighborhood matrix
+    num_nodes = len(nodes)
+    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
+    # Step 6: Assign neighborhoods based on MCL clusters using the original node labels
+    for cluster in clusters:
+        for node_i in cluster:
+            for node_j in cluster:
+                # Map the matrix indices back to the original node labels
+                original_node_i = index_node_map[node_i]
+                original_node_j = index_node_map[node_j]
+                idx_i = node_index_map[original_node_i]
+                idx_j = node_index_map[original_node_j]
+                neighborhoods[idx_i, idx_j] = 1
+    return neighborhoods
+def calculate_spinglass_neighborhoods(network: nx.Graph) -> np.ndarray:
+    """Apply Spinglass Community Detection to the network, handling disconnected components.
+    Args:
+        network (nx.Graph): The input network graph with 'x' and 'y' attributes for node positions.
+    Returns:
+        np.ndarray: A binary neighborhood matrix based on Spinglass communities.
+    """
+    # Step 1: Find connected components in the graph
+    components = list(nx.connected_components(network))
+    # Prepare to store community results
+    nodes = list(network.nodes())
+    node_index_map = {node: idx for idx, node in enumerate(nodes)}
+    num_nodes = len(nodes)
+    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
+    # Step 2: Run Spinglass on each connected component
+    for component in components:
+        # Extract the subgraph corresponding to the current component
+        subgraph = network.subgraph(component)
+        # Convert the subgraph to an iGraph object
+        igraph_subgraph = ig.Graph.from_networkx(subgraph)
+        # Ensure the subgraph is connected before running Spinglass
+        if not igraph_subgraph.is_connected():
+            print("Warning: Subgraph is not connected. Skipping...")
+            continue
+        # Apply Spinglass community detection
+        try:
+            communities = igraph_subgraph.community_spinglass()
+        except Exception as e:
+            print(f"Error running Spinglass on component: {e}")
+            continue
+        # Step 3: Assign neighborhoods based on community labels
+        for community in communities:
+            for node_i in community:
+                for node_j in community:
+                    idx_i = node_index_map[igraph_subgraph.vs[node_i]["_nx_name"]]
+                    idx_j = node_index_map[igraph_subgraph.vs[node_j]["_nx_name"]]
+                    neighborhoods[idx_i, idx_j] = 1
+    return neighborhoods
+def calculate_walktrap_neighborhoods(network: nx.Graph) -> np.ndarray:
+    """Apply Walktrap Community Detection to the network.
+    Args:
+        network (nx.Graph): The network graph.
+    Returns:
+        np.ndarray: A binary neighborhood matrix on Walktrap communities.
+    """
+    # Convert NetworkX graph to iGraph
+    igraph_network = ig.Graph.from_networkx(network)
+    # Apply Walktrap community detection
+    communities = igraph_network.community_walktrap().as_clustering()
+    # Get the list of nodes in the original NetworkX graph
+    nodes = list(network.nodes())
+    node_index_map = {node: idx for idx, node in enumerate(nodes)}
+    # Create a binary neighborhood matrix
+    num_nodes = len(nodes)
+    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
+    # Assign neighborhoods based on community labels
+    for community in communities:
+        for node_i in community:
+            for node_j in community:
+                idx_i = node_index_map[igraph_network.vs[node_i]["_nx_name"]]
+                idx_j = node_index_map[igraph_network.vs[node_j]["_nx_name"]]
+                neighborhoods[idx_i, idx_j] = 1
+    return neighborhoods

{risk_network-0.0.9b9 → risk_network-0.0.9b11}/risk/neighborhoods/neighborhoods.py RENAMED Viewed

@@ -15,6 +15,7 @@ from sklearn.metrics.pairwise import cosine_similarity
 from risk.neighborhoods.community import (
     calculate_greedy_modularity_neighborhoods,
     calculate_label_propagation_neighborhoods,
+    calculate_leiden_neighborhoods,
     calculate_louvain_neighborhoods,
     calculate_markov_clustering_neighborhoods,
     calculate_spinglass_neighborhoods,
@@ -29,22 +30,20 @@ warnings.filterwarnings(action="ignore", category=DataConversionWarning)
 def get_network_neighborhoods(
     network: nx.Graph,
     distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
-    edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 1.0,
-    louvain_resolution: float = 1.0,
+    edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 1.0,
+    louvain_resolution: float = 0.1,
+    leiden_resolution: float = 1.0,
     random_seed: int = 888,
 ) -> np.ndarray:
     """Calculate the combined neighborhoods for each node based on the specified community detection algorithm(s).
     Args:
         network (nx.Graph): The network graph.
-        distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
-            metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
-            'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
-        edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
-            Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
-            Defaults to 1.0.
-        louvain_resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 1.0.
-        random_seed (int, optional): Random seed for methods requiring random initialization. Defaults to 888.
+        distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use.
+        edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
+        louvain_resolution (float, optional): Resolution parameter for the Louvain method.
+        leiden_resolution (float, optional): Resolution parameter for the Leiden method.
+        random_seed (int, optional): Random seed for methods requiring random initialization.
     Returns:
         np.ndarray: Summed neighborhood matrix from all selected algorithms.
@@ -53,14 +52,13 @@ def get_network_neighborhoods(
     random.seed(random_seed)
     np.random.seed(random_seed)
-    # Ensure distance_metric is a list/tuple for multi-algorithm handling
+    # Ensure distance_metric and edge_rank_percentile are lists
     if isinstance(distance_metric, (str, np.ndarray)):
         distance_metric = [distance_metric]
-    # Ensure edge_length_threshold is a list/tuple for multi-threshold handling
-    if isinstance(edge_length_threshold, (float, int)):
-        edge_length_threshold = [edge_length_threshold] * len(distance_metric)
-    # Check that the number of distance metrics matches the number of edge length thresholds
-    if len(distance_metric) != len(edge_length_threshold):
+    if isinstance(edge_rank_percentile, (float, int)):
+        edge_rank_percentile = [edge_rank_percentile] * len(distance_metric)
+    if len(distance_metric) != len(edge_rank_percentile):
         raise ValueError(
             "The number of distance metrics must match the number of edge length thresholds."
         )
@@ -70,80 +68,114 @@ def get_network_neighborhoods(
     combined_neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
     # Loop through each distance metric and corresponding edge length threshold
-    for metric, threshold in zip(distance_metric, edge_length_threshold):
-        # Create a subgraph based on the specific edge length threshold for this algorithm
-        subgraph = _create_percentile_limited_subgraph(network, edge_length_percentile=threshold)
-        # Call the appropriate neighborhood function based on the metric
-        if metric == "louvain":
-            neighborhoods = calculate_louvain_neighborhoods(
-                subgraph, louvain_resolution, random_seed=random_seed
-            )
-        elif metric == "greedy_modularity":
+    for metric, threshold in zip(distance_metric, edge_rank_percentile):
+        # Create a subgraph based on the edge length threshold
+        subgraph = _create_percentile_limited_subgraph(network, edge_rank_percentile=threshold)
+        subgraph_nodes = list(subgraph.nodes)
+        # Calculate neighborhoods based on the specified metric
+        if metric == "greedy_modularity":
             neighborhoods = calculate_greedy_modularity_neighborhoods(subgraph)
         elif metric == "label_propagation":
             neighborhoods = calculate_label_propagation_neighborhoods(subgraph)
+        elif metric == "leiden":
+            neighborhoods = calculate_leiden_neighborhoods(
+                subgraph, leiden_resolution, random_seed=random_seed
+            )
+        elif metric == "louvain":
+            neighborhoods = calculate_louvain_neighborhoods(
+                subgraph, louvain_resolution, random_seed=random_seed
+            )
         elif metric == "markov_clustering":
             neighborhoods = calculate_markov_clustering_neighborhoods(subgraph)
-        elif metric == "walktrap":
-            neighborhoods = calculate_walktrap_neighborhoods(subgraph)
         elif metric == "spinglass":
             neighborhoods = calculate_spinglass_neighborhoods(subgraph)
+        elif metric == "walktrap":
+            neighborhoods = calculate_walktrap_neighborhoods(subgraph)
         else:
             raise ValueError(
-                "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'louvain',"
-                "'label_propagation', 'markov_clustering', 'walktrap', 'spinglass'."
+                "Invalid distance metric specified. Please choose from 'greedy_modularity', 'label_propagation',"
+                "'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
             )
-        # Sum the neighborhood matrices
-        combined_neighborhoods += neighborhoods
+        # Expand the neighborhood matrix to match the original network's size
+        expanded_neighborhoods = expand_neighborhood_matrix(
+            neighborhoods, subgraph_nodes, num_nodes
+        )
+        # Sum the expanded neighborhood matrices
+        combined_neighborhoods += expanded_neighborhoods
-    # Ensure that the maximum value in each row is set to 1
-    # This ensures that for each row, only the strongest relationship (the maximum value) is retained,
-    # while all other values are reset to 0. This transformation simplifies the neighborhood matrix by
-    # focusing on the most significant connection per row.
-    combined_neighborhoods = _set_max_to_one(combined_neighborhoods)
+    # Convert combined_neighborhoods to binary: values > 0 are set to 1
+    combined_neighborhoods = (combined_neighborhoods > 0).astype(int)
     return combined_neighborhoods
-def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: float) -> nx.Graph:
-    """Create a subgraph containing all nodes and edges where the edge length is below the
-    specified percentile of all edge lengths in the input graph.
+def expand_neighborhood_matrix(
+    subgraph_matrix: np.ndarray, subgraph_nodes: list, original_size: int
+) -> np.ndarray:
+    """Expand a subgraph neighborhood matrix back to the size of the original graph.
     Args:
-        G (nx.Graph): The input graph with 'length' attributes on edges.
-        edge_length_percentile (float): The percentile (between 0 and 1) to filter edges by length.
+        subgraph_matrix (np.ndarray): The neighborhood matrix for the subgraph.
+        subgraph_nodes (list): List of nodes in the subgraph, corresponding to rows/columns in subgraph_matrix.
+        original_size (int): The number of nodes in the original graph.
     Returns:
-        nx.Graph: A subgraph with all nodes and edges where the edge length is below the
-        calculated threshold length.
+        np.ndarray: The expanded matrix with the original size, with subgraph values mapped correctly.
     """
-    # Extract edge lengths and handle missing lengths
-    edge_lengths = [d["length"] for _, _, d in G.edges(data=True) if "length" in d]
-    if not edge_lengths:
-        raise ValueError(
-            "No edge lengths found in the graph. Ensure edges have 'length' attributes."
-        )
+    expanded_matrix = np.zeros((original_size, original_size), dtype=int)
+    for i, node_i in enumerate(subgraph_nodes):
+        for j, node_j in enumerate(subgraph_nodes):
+            expanded_matrix[node_i, node_j] = subgraph_matrix[i, j]
+    return expanded_matrix
-    # Calculate the specific edge length for the given percentile
-    percentile_length = np.percentile(edge_lengths, edge_length_percentile * 100)
-    # Create the subgraph by directly filtering edges during iteration
-    subgraph = nx.Graph()
-    subgraph.add_nodes_from(G.nodes(data=True))  # Retain all nodes from the original graph
-    # Add edges below the specified percentile length in a single pass
-    for u, v, d in G.edges(data=True):
-        if d.get("length", 1) <= percentile_length:
-            subgraph.add_edge(u, v, **d)
-    # Return the subgraph; optionally check if it's too sparse
+def _create_percentile_limited_subgraph(G: nx.Graph, edge_rank_percentile: float) -> nx.Graph:
+    """Create a subgraph containing all nodes and edges where the edge length is within the
+    specified rank percentile of all edges in the input graph. Isolated nodes are removed.
+    Args:
+        G (nx.Graph): The input graph with 'length' attributes on edges.
+        edge_rank_percentile (float): The rank percentile (between 0 and 1) to filter edges.
+    Returns:
+        nx.Graph: A subgraph with nodes and edges where the edge length is within the
+        specified percentile, with isolated nodes removed, retaining all original attributes.
+    """
+    # Extract edges with their lengths
+    edges_with_length = [(u, v, d) for u, v, d in G.edges(data=True) if "length" in d]
+    if not edges_with_length:
+        raise ValueError("No edge lengths found. Ensure edges have 'length' attributes.")
+    # Sort edges by length in ascending order
+    edges_with_length.sort(key=lambda x: x[2]["length"])
+    # Calculate the cutoff based on the specified rank percentile
+    cutoff_index = int(edge_rank_percentile * len(edges_with_length))
+    if cutoff_index == 0:
+        raise ValueError("The rank percentile is too low, resulting in no edges being included.")
+    # Keep only the edges within the specified percentile
+    selected_edges = edges_with_length[:cutoff_index]
+    # Create a new subgraph with the selected edges, retaining all attributes
+    subgraph = nx.Graph()
+    subgraph.add_edges_from((u, v, d) for u, v, d in selected_edges)
+    # Copy over all node attributes from the original graph
+    subgraph.add_nodes_from((node, G.nodes[node]) for node in subgraph.nodes())
+    # Remove isolated nodes (if any)
+    isolated_nodes = [node for node, degree in subgraph.degree() if degree == 0]
+    subgraph.remove_nodes_from(isolated_nodes)
+    # Check if the resulting subgraph has no edges
     if subgraph.number_of_edges() == 0:
-        raise Warning("The resulting subgraph has no edges. Consider adjusting the percentile.")
+        raise ValueError("The resulting subgraph has no edges. Adjust the rank percentile.")
     return subgraph
-def _set_max_to_one(matrix: np.ndarray) -> np.ndarray:
-    """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0.
+def _set_max_row_value_to_one(matrix: np.ndarray) -> np.ndarray:
+    """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0. This is particularly
+    useful for neighborhood matrices that have undergone multiple neighborhood detection algorithms, where the
+    maximum value in each row represents the most significant relationship per node in the combined neighborhoods.
     Args:
         matrix (np.ndarray): A 2D numpy array representing the neighborhood matrix.

{risk_network-0.0.9b9 → risk_network-0.0.9b11}/risk/network/plot/labels.py RENAMED Viewed

@@ -617,8 +617,7 @@ class Labels:
         """
         # Return custom labels if domain is in ids_to_labels
         if ids_to_labels and domain in ids_to_labels:
-            terms = ids_to_labels[domain].replace(" ", TERM_DELIMITER)
-            return terms
+            return ids_to_labels[domain]
         else:
             terms = self.graph.domain_id_to_domain_terms_map[domain].split(" ")

{risk_network-0.0.9b9 → risk_network-0.0.9b11}/risk/risk.py RENAMED Viewed

@@ -52,7 +52,8 @@ class RISK(NetworkIO, AnnotationsIO):
         annotations: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
-        edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
+        leiden_resolution: float = 1.0,
+        edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 0.5,
         null_distribution: str = "network",
         random_seed: int = 888,
     ) -> Dict[str, Any]:
@@ -65,7 +66,8 @@ class RISK(NetworkIO, AnnotationsIO):
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
             louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
-            edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
+            leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
+            edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
             null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
@@ -79,7 +81,8 @@ class RISK(NetworkIO, AnnotationsIO):
         params.log_neighborhoods(
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
-            edge_length_threshold=edge_length_threshold,
+            leiden_resolution=leiden_resolution,
+            edge_rank_percentile=edge_rank_percentile,
             statistical_test_function="hypergeom",
             null_distribution=null_distribution,
             random_seed=random_seed,
@@ -93,7 +96,8 @@ class RISK(NetworkIO, AnnotationsIO):
             network,
             distance_metric,
             louvain_resolution=louvain_resolution,
-            edge_length_threshold=edge_length_threshold,
+            leiden_resolution=leiden_resolution,
+            edge_rank_percentile=edge_rank_percentile,
             random_seed=random_seed,
         )
         # Run hypergeometric test to compute neighborhood significance
@@ -112,7 +116,8 @@ class RISK(NetworkIO, AnnotationsIO):
         annotations: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
-        edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
+        leiden_resolution: float = 1.0,
+        edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 0.5,
         null_distribution: str = "network",
         random_seed: int = 888,
     ) -> Dict[str, Any]:
@@ -125,7 +130,8 @@ class RISK(NetworkIO, AnnotationsIO):
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
             louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
-            edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
+            leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
+            edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
             null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
@@ -139,7 +145,8 @@ class RISK(NetworkIO, AnnotationsIO):
         params.log_neighborhoods(
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
-            edge_length_threshold=edge_length_threshold,
+            leiden_resolution=leiden_resolution,
+            edge_rank_percentile=edge_rank_percentile,
             statistical_test_function="poisson",
             null_distribution=null_distribution,
             random_seed=random_seed,
@@ -153,7 +160,8 @@ class RISK(NetworkIO, AnnotationsIO):
             network,
             distance_metric,
             louvain_resolution=louvain_resolution,
-            edge_length_threshold=edge_length_threshold,
+            leiden_resolution=leiden_resolution,
+            edge_rank_percentile=edge_rank_percentile,
             random_seed=random_seed,
         )
         # Run Poisson test to compute neighborhood significance
@@ -172,7 +180,8 @@ class RISK(NetworkIO, AnnotationsIO):
         annotations: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
-        edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
+        leiden_resolution: float = 1.0,
+        edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 0.5,
         score_metric: str = "sum",
         null_distribution: str = "network",
         num_permutations: int = 1000,
@@ -188,7 +197,8 @@ class RISK(NetworkIO, AnnotationsIO):
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
             louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
-            edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
+            leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
+            edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
             score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
@@ -205,7 +215,8 @@ class RISK(NetworkIO, AnnotationsIO):
         params.log_neighborhoods(
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
-            edge_length_threshold=edge_length_threshold,
+            leiden_resolution=leiden_resolution,
+            edge_rank_percentile=edge_rank_percentile,
             statistical_test_function="permutation",
             score_metric=score_metric,
             null_distribution=null_distribution,
@@ -222,7 +233,8 @@ class RISK(NetworkIO, AnnotationsIO):
             network,
             distance_metric,
             louvain_resolution=louvain_resolution,
-            edge_length_threshold=edge_length_threshold,
+            leiden_resolution=leiden_resolution,
+            edge_rank_percentile=edge_rank_percentile,
             random_seed=random_seed,
         )
@@ -408,7 +420,8 @@ class RISK(NetworkIO, AnnotationsIO):
         network: nx.Graph,
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
-        edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
+        leiden_resolution: float = 1.0,
+        edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 0.5,
         random_seed: int = 888,
     ) -> np.ndarray:
         """Load significant neighborhoods for the network.
@@ -420,7 +433,8 @@ class RISK(NetworkIO, AnnotationsIO):
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
             louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
-            edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
+            leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
+            edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
@@ -431,19 +445,22 @@ class RISK(NetworkIO, AnnotationsIO):
         # Display the chosen distance metric
         if distance_metric == "louvain":
             for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
+        elif distance_metric == "leiden":
+            for_print_distance_metric = f"leiden (resolution={leiden_resolution})"
         else:
             for_print_distance_metric = distance_metric
         # Log and display neighborhood settings
         logger.debug(f"Distance metric: '{for_print_distance_metric}'")
-        logger.debug(f"Edge length threshold: {edge_length_threshold}")
+        logger.debug(f"Edge length threshold: {edge_rank_percentile}")
         logger.debug(f"Random seed: {random_seed}")
         # Compute neighborhoods based on the network and distance metric
         neighborhoods = get_network_neighborhoods(
             network,
             distance_metric,
-            edge_length_threshold,
+            edge_rank_percentile,
             louvain_resolution=louvain_resolution,
+            leiden_resolution=leiden_resolution,
             random_seed=random_seed,
         )

{risk_network-0.0.9b9 → risk_network-0.0.9b11}/risk_network.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: risk-network
-Version: 0.0.9b9
+Version: 0.0.9b11
 Summary: A Python package for biological network analysis
 Author: Ira Horecka
 Author-email: Ira Horecka <ira89@icloud.com>
@@ -695,6 +695,7 @@ Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: ipywidgets
+Requires-Dist: leidenalg
 Requires-Dist: markov_clustering
 Requires-Dist: matplotlib
 Requires-Dist: networkx
@@ -702,6 +703,7 @@ Requires-Dist: nltk==3.8.1
 Requires-Dist: numpy
 Requires-Dist: openpyxl
 Requires-Dist: pandas
+Requires-Dist: python-igraph
 Requires-Dist: python-louvain
 Requires-Dist: scikit-learn
 Requires-Dist: scipy

{risk_network-0.0.9b9 → risk_network-0.0.9b11}/risk_network.egg-info/requires.txt RENAMED Viewed

@@ -1,4 +1,5 @@
 ipywidgets
+leidenalg
 markov_clustering
 matplotlib
 networkx
@@ -6,6 +7,7 @@ nltk==3.8.1
 numpy
 openpyxl
 pandas
+python-igraph
 python-louvain
 scikit-learn
 scipy

{risk_network-0.0.9b9 → risk_network-0.0.9b11}/setup.py RENAMED Viewed

@@ -31,13 +31,14 @@ setup(
     include_package_data=True,
     install_requires=[
         "ipywidgets",
+        "leidenalg",
         "markov_clustering",
         "matplotlib",
         "networkx",
         "nltk==3.8.1",
         "numpy",
         "openpyxl",
-        "pandas",
+        "pandas" "python-igraph",
         "python-louvain",
         "scikit-learn",
         "scipy",

risk_network-0.0.9b9/risk/neighborhoods/community.py DELETED Viewed

@@ -1,189 +0,0 @@
-"""
-risk/neighborhoods/community
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-"""
-import community as community_louvain
-import networkx as nx
-import numpy as np
-import markov_clustering as mc
-from networkx.algorithms.community import asyn_lpa_communities, greedy_modularity_communities
-def calculate_greedy_modularity_neighborhoods(network: nx.Graph) -> np.ndarray:
-    """Calculate neighborhoods using the Greedy Modularity method.
-    Args:
-        network (nx.Graph): The network graph to analyze for community structure.
-    Returns:
-        np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
-    """
-    # Detect communities using the Greedy Modularity method
-    communities = greedy_modularity_communities(network)
-    # Create a binary neighborhood matrix
-    n_nodes = network.number_of_nodes()
-    neighborhoods = np.zeros((n_nodes, n_nodes), dtype=int)
-    # Create a mapping from node to index in the matrix
-    node_index = {node: i for i, node in enumerate(network.nodes())}
-    # Fill in the neighborhood matrix for nodes in the same community
-    for community in communities:
-        # Iterate through all pairs of nodes in the same community
-        for node_i in community:
-            idx_i = node_index[node_i]
-            for node_j in community:
-                idx_j = node_index[node_j]
-                # Set them as neighbors (1) in the binary matrix
-                neighborhoods[idx_i, idx_j] = 1
-    return neighborhoods
-def calculate_label_propagation_neighborhoods(network: nx.Graph) -> np.ndarray:
-    """Apply Label Propagation to the network to detect communities.
-    Args:
-        network (nx.Graph): The network graph.
-    Returns:
-        np.ndarray: Binary neighborhood matrix on Label Propagation.
-    """
-    # Apply Label Propagation for community detection
-    communities = nx.algorithms.community.label_propagation.label_propagation_communities(network)
-    # Create a binary neighborhood matrix
-    num_nodes = network.number_of_nodes()
-    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
-    # Create a mapping from node to index in the matrix
-    node_index = {node: i for i, node in enumerate(network.nodes())}
-    # Assign neighborhoods based on community labels
-    for community in communities:
-        for node_i in community:
-            idx_i = node_index[node_i]
-            for node_j in community:
-                idx_j = node_index[node_j]
-                neighborhoods[idx_i, idx_j] = 1
-    return neighborhoods
-def calculate_louvain_neighborhoods(
-    network: nx.Graph, resolution: float, random_seed: int = 888
-) -> np.ndarray:
-    """Calculate neighborhoods using the Louvain method.
-    Args:
-        network (nx.Graph): The network graph.
-        resolution (float): Resolution parameter for the Louvain method.
-        random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
-    Returns:
-        np.ndarray: Binary neighborhood matrix on the Louvain method.
-    """
-    # Apply Louvain method to partition the network
-    partition = community_louvain.best_partition(
-        network, resolution=resolution, random_state=random_seed
-    )
-    # Create a binary neighborhood matrix
-    num_nodes = network.number_of_nodes()
-    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
-    # Create a mapping from node to index in the matrix
-    node_index = {node: i for i, node in enumerate(network.nodes())}
-    # Group nodes by community
-    community_groups = {}
-    for node, community in partition.items():
-        community_groups.setdefault(community, []).append(node)
-    # Assign neighborhoods based on community partitions
-    for community, nodes in community_groups.items():
-        for node_i in nodes:
-            idx_i = node_index[node_i]
-            for node_j in nodes:
-                idx_j = node_index[node_j]
-                neighborhoods[idx_i, idx_j] = 1
-    return neighborhoods
-def calculate_markov_clustering_neighborhoods(network: nx.Graph) -> np.ndarray:
-    """Apply Markov Clustering (MCL) to the network.
-    Args:
-        network (nx.Graph): The network graph.
-    Returns:
-        np.ndarray: Binary neighborhood matrix on Markov Clustering.
-    """
-    # Convert the graph to an adjacency matrix
-    adjacency_matrix = nx.to_numpy_array(network)
-    # Run Markov Clustering (MCL)
-    result = mc.run_mcl(adjacency_matrix)  # MCL with default parameters
-    # Get clusters (communities) from MCL result
-    clusters = mc.get_clusters(result)
-    # Create a binary neighborhood matrix
-    num_nodes = network.number_of_nodes()
-    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
-    # Create a mapping from node to index in the matrix
-    node_index = {node: i for i, node in enumerate(network.nodes())}
-    # Assign neighborhoods based on MCL clusters
-    for cluster in clusters:
-        for node_i in cluster:
-            idx_i = node_index[node_i]
-            for node_j in cluster:
-                idx_j = node_index[node_j]
-                neighborhoods[idx_i, idx_j] = 1
-    return neighborhoods
-def calculate_spinglass_neighborhoods(network: nx.Graph) -> np.ndarray:
-    """Apply Spin Glass Community Detection to the network.
-    Args:
-        network (nx.Graph): The network graph.
-    Returns:
-        np.ndarray: Binary neighborhood matrix on Spin Glass communities.
-    """
-    # Apply Asynchronous Label Propagation (LPA)
-    communities = asyn_lpa_communities(network)
-    # Create a binary neighborhood matrix
-    num_nodes = network.number_of_nodes()
-    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
-    # Create a mapping from node to index in the matrix
-    node_index = {node: i for i, node in enumerate(network.nodes())}
-    # Assign neighborhoods based on community labels from LPA
-    for community in communities:
-        for node_i in community:
-            idx_i = node_index[node_i]
-            for node_j in community:
-                idx_j = node_index[node_j]
-                neighborhoods[idx_i, idx_j] = 1
-    return neighborhoods
-def calculate_walktrap_neighborhoods(network: nx.Graph) -> np.ndarray:
-    """Apply Walktrap Community Detection to the network.
-    Args:
-        network (nx.Graph): The network graph.
-    Returns:
-        np.ndarray: Binary neighborhood matrix on Walktrap communities.
-    """
-    # Apply Asynchronous Label Propagation (LPA)
-    communities = asyn_lpa_communities(network)
-    # Create a binary neighborhood matrix
-    num_nodes = network.number_of_nodes()
-    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
-    # Create a mapping from node to index in the matrix
-    node_index = {node: i for i, node in enumerate(network.nodes())}
-    # Assign neighborhoods based on community labels from LPA
-    for community in communities:
-        for node_i in community:
-            idx_i = node_index[node_i]
-            for node_j in community:
-                idx_j = node_index[node_j]
-                neighborhoods[idx_i, idx_j] = 1
-    return neighborhoods