PyPI - risk-network - Versions diffs - 0.0.9b26__py3-none-any.whl → 0.0.9b28__py3-none-any.whl - Mend

risk-network 0.0.9b26py3-none-any.whl → 0.0.9b28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

risk/__init__.py +1 -1
risk/annotations/annotations.py +39 -38
risk/neighborhoods/api.py +1 -5
risk/neighborhoods/community.py +140 -95
risk/neighborhoods/neighborhoods.py +34 -18
risk/network/geometry.py +24 -27
risk/network/graph/api.py +6 -6
risk/network/graph/{network.py → graph.py} +7 -7
risk/network/graph/summary.py +3 -3
risk/network/io.py +39 -15
risk/network/plotter/__init__.py +2 -2
risk/network/plotter/api.py +12 -12
risk/network/plotter/canvas.py +7 -7
risk/network/plotter/contour.py +6 -6
risk/network/plotter/labels.py +5 -5
risk/network/plotter/network.py +6 -136
risk/network/plotter/plotter.py +143 -0
risk/network/plotter/utils/colors.py +11 -11
risk/network/plotter/utils/layout.py +2 -2
risk/stats/__init__.py +8 -6
risk/stats/{stats.py → significance.py} +2 -2
risk/stats/stat_tests.py +272 -0
{risk_network-0.0.9b26.dist-info → risk_network-0.0.9b28.dist-info}/METADATA +1 -1
risk_network-0.0.9b28.dist-info/RECORD +41 -0
risk/stats/binom.py +0 -51
risk/stats/chi2.py +0 -69
risk/stats/hypergeom.py +0 -64
risk/stats/poisson.py +0 -50
risk/stats/zscore.py +0 -68
risk_network-0.0.9b26.dist-info/RECORD +0 -44
{risk_network-0.0.9b26.dist-info → risk_network-0.0.9b28.dist-info}/LICENSE +0 -0
{risk_network-0.0.9b26.dist-info → risk_network-0.0.9b28.dist-info}/WHEEL +0 -0
{risk_network-0.0.9b26.dist-info → risk_network-0.0.9b28.dist-info}/top_level.txt +0 -0

risk/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
 from risk.risk import RISK
-__version__ = "0.0.9-beta.26"
+__version__ = "0.0.9-beta.28"

risk/annotations/annotations.py CHANGED Viewed

@@ -16,7 +16,7 @@ from nltk.tokenize import word_tokenize
 from nltk.corpus import stopwords
 from risk.log import logger
-from scipy.sparse import csr_matrix
+from scipy.sparse import coo_matrix
 def _setup_nltk():
@@ -41,14 +41,13 @@ stop_words = set(stopwords.words("english"))
 def load_annotations(
     network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
 ) -> Dict[str, Any]:
-    """Convert annotations input to a DataFrame and reindex based on the network's node labels.
+    """Convert annotations input to a sparse matrix and reindex based on the network's node labels.
     Args:
         network (nx.Graph): The network graph.
         annotations_input (Dict[str, Any]): A dictionary with annotations.
         min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
             term to be included. Defaults to 2.
-        use_sparse (bool, optional): Whether to return the annotations matrix as a sparse matrix. Defaults to True.
     Returns:
         Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the sparse binary annotations
@@ -58,51 +57,53 @@ def load_annotations(
         ValueError: If no annotations are found for the nodes in the network.
         ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
     """
-    # Flatten the dictionary to a list of tuples for easier DataFrame creation
-    flattened_annotations = [
-        (node, annotation) for annotation, nodes in annotations_input.items() for node in nodes
-    ]
-    # Create a DataFrame from the flattened list
-    annotations = pd.DataFrame(flattened_annotations, columns=["node", "annotations"])
-    annotations["is_member"] = 1
-    # Pivot to create a binary matrix with nodes as rows and annotations as columns
-    annotations_pivot = annotations.pivot_table(
-        index="node", columns="annotations", values="is_member", fill_value=0, dropna=False
-    )
-    # Reindex the annotations matrix based on the node labels from the network
-    node_label_order = (attr["label"] for _, attr in network.nodes(data=True) if "label" in attr)
-    annotations_pivot = annotations_pivot.reindex(index=node_label_order)
-    # Raise an error if no valid annotations are found for the nodes in the network
-    if annotations_pivot.notnull().sum().sum() == 0:
+    # Step 1: Map nodes and annotations to indices
+    node_label_order = [attr["label"] for _, attr in network.nodes(data=True) if "label" in attr]
+    node_to_idx = {node: i for i, node in enumerate(node_label_order)}
+    annotation_to_idx = {annotation: i for i, annotation in enumerate(annotations_input)}
+    # Step 2: Construct a sparse binary matrix directly
+    row = []
+    col = []
+    data = []
+    for annotation, nodes in annotations_input.items():
+        for node in nodes:
+            if node in node_to_idx and annotation in annotation_to_idx:
+                row.append(node_to_idx[node])
+                col.append(annotation_to_idx[annotation])
+                data.append(1)
+    # Create a sparse binary matrix
+    num_nodes = len(node_to_idx)
+    num_annotations = len(annotation_to_idx)
+    annotations_pivot = coo_matrix((data, (row, col)), shape=(num_nodes, num_annotations)).tocsr()
+    # Step 3: Filter out annotations with fewer than min_nodes_per_term occurrences
+    valid_annotations = annotations_pivot.sum(axis=0).A1 >= min_nodes_per_term
+    annotations_pivot = annotations_pivot[:, valid_annotations]
+    # Step 4: Raise errors for empty matrices
+    if annotations_pivot.nnz == 0:
         raise ValueError("No terms found in the annotation file for the nodes in the network.")
-    # Filter out annotations with fewer than min_nodes_per_term occurrences
-    num_terms_before_filtering = annotations_pivot.shape[1]
-    annotations_pivot = annotations_pivot.loc[
-        :, (annotations_pivot.sum(axis=0) >= min_nodes_per_term)
-    ]
-    num_terms_after_filtering = annotations_pivot.shape[1]
-    # Log the number of annotations before and after filtering
-    logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
-    logger.info(f"Number of input annotation terms: {num_terms_before_filtering}")
-    logger.info(f"Number of remaining annotation terms: {num_terms_after_filtering}")
-    if num_terms_after_filtering == 0:
+    num_remaining_annotations = annotations_pivot.shape[1]
+    if num_remaining_annotations == 0:
         raise ValueError(
             f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
         )
-    # Extract ordered nodes and annotations
-    ordered_nodes = tuple(annotations_pivot.index)
-    ordered_annotations = tuple(annotations_pivot.columns)
-    # Convert the annotations_pivot matrix to a numpy array or sparse matrix
-    annotations_pivot_binary = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
-    # Convert the binary annotations matrix to a sparse matrix
-    annotations_pivot_binary = csr_matrix(annotations_pivot_binary)
+    # Step 5: Extract ordered nodes and annotations
+    ordered_nodes = tuple(node_label_order)
+    ordered_annotations = tuple(
+        annotation for annotation, is_valid in zip(annotation_to_idx, valid_annotations) if is_valid
+    )
+    # Log the filtering details
+    logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
+    logger.info(f"Number of input annotation terms: {num_annotations}")
+    logger.info(f"Number of remaining annotation terms: {num_remaining_annotations}")
     return {
         "ordered_nodes": ordered_nodes,
         "ordered_annotations": ordered_annotations,
-        "matrix": annotations_pivot_binary,
+        "matrix": annotations_pivot,
     }

risk/neighborhoods/api.py CHANGED Viewed

@@ -368,7 +368,7 @@ class NeighborhoodsAPI:
         )
         # Make a copy of the network to avoid modifying the original
-        network = copy.deepcopy(network)
+        network = copy.copy(network)
         # Load neighborhoods based on the network and distance metric
         neighborhoods = self._load_neighborhoods(
             network,
@@ -438,9 +438,5 @@ class NeighborhoodsAPI:
             random_seed=random_seed,
         )
-        # Ensure the neighborhood matrix is in sparse format
-        if not isinstance(neighborhoods, csr_matrix):
-            neighborhoods = csr_matrix(neighborhoods)
         # Return the sparse neighborhood matrix
         return neighborhoods

risk/neighborhoods/community.py CHANGED Viewed

@@ -10,22 +10,23 @@ import networkx as nx
 import numpy as np
 from leidenalg import find_partition, RBConfigurationVertexPartition
 from networkx.algorithms.community import greedy_modularity_communities
+from scipy.sparse import csr_matrix
 from risk.log import logger
 def calculate_greedy_modularity_neighborhoods(
     network: nx.Graph, fraction_shortest_edges: float = 1.0
-) -> np.ndarray:
-    """Calculate neighborhoods using the Greedy Modularity method.
+) -> csr_matrix:
+    """Calculate neighborhoods using the Greedy Modularity method with CSR matrix output.
     Args:
         network (nx.Graph): The network graph.
         fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
-            subgraphs before clustering.
+            subgraphs before clustering. Defaults to 1.0.
     Returns:
-        np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
+        csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -36,35 +37,36 @@ def calculate_greedy_modularity_neighborhoods(
     # Get the list of nodes in the original NetworkX graph
     nodes = list(network.nodes())
     node_index_map = {node: idx for idx, node in enumerate(nodes)}
-    # Create a binary neighborhood matrix
-    num_nodes = len(nodes)
-    # Initialize neighborhoods with zeros and set self-self entries to 1
-    neighborhoods = np.eye(num_nodes, dtype=int)
-    # Fill in the neighborhood matrix for nodes in the same community
+    # Prepare data for CSR matrix
+    row_indices = []
+    col_indices = []
     for community in communities:
-        # Iterate through all pairs of nodes in the same community
-        for node_i in community:
-            for node_j in community:
-                idx_i = node_index_map[node_i]
-                idx_j = node_index_map[node_j]
-                # Set them as neighbors (1) in the binary matrix
-                neighborhoods[idx_i, idx_j] = 1
+        mapped_indices = [node_index_map[node] for node in community]
+        for i in mapped_indices:
+            for j in mapped_indices:
+                row_indices.append(i)
+                col_indices.append(j)
+    # Create a CSR matrix
+    num_nodes = len(nodes)
+    data = np.ones(len(row_indices), dtype=int)
+    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
     return neighborhoods
 def calculate_label_propagation_neighborhoods(
     network: nx.Graph, fraction_shortest_edges: float = 1.0
-) -> np.ndarray:
+) -> csr_matrix:
     """Apply Label Propagation to the network to detect communities.
     Args:
         network (nx.Graph): The network graph.
         fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
-            subgraphs before clustering.
+            subgraphs before clustering. Defaults to 1.0.
     Returns:
-        np.ndarray: A binary neighborhood matrix on Label Propagation.
+        csr_matrix: A binary neighborhood matrix (CSR) on Label Propagation.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -77,17 +79,21 @@ def calculate_label_propagation_neighborhoods(
     # Get the list of nodes in the network
     nodes = list(network.nodes())
     node_index_map = {node: idx for idx, node in enumerate(nodes)}
-    # Create a binary neighborhood matrix
-    num_nodes = len(nodes)
-    # Initialize neighborhoods with zeros and set self-self entries to 1
-    neighborhoods = np.eye(num_nodes, dtype=int)
+    # Prepare data for CSR matrix
+    row_indices = []
+    col_indices = []
     # Assign neighborhoods based on community labels using the mapped indices
     for community in communities:
-        for node_i in community:
-            for node_j in community:
-                idx_i = node_index_map[node_i]
-                idx_j = node_index_map[node_j]
-                neighborhoods[idx_i, idx_j] = 1
+        mapped_indices = [node_index_map[node] for node in community]
+        for i in mapped_indices:
+            for j in mapped_indices:
+                row_indices.append(i)
+                col_indices.append(j)
+    # Create a CSR matrix
+    num_nodes = len(nodes)
+    data = np.ones(len(row_indices), dtype=int)
+    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
     return neighborhoods
@@ -97,18 +103,18 @@ def calculate_leiden_neighborhoods(
     resolution: float = 1.0,
     fraction_shortest_edges: float = 1.0,
     random_seed: int = 888,
-) -> np.ndarray:
-    """Calculate neighborhoods using the Leiden method.
+) -> csr_matrix:
+    """Calculate neighborhoods using the Leiden method with CSR matrix output.
     Args:
         network (nx.Graph): The network graph.
         resolution (float, optional): Resolution parameter for the Leiden method. Defaults to 1.0.
         fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
-            subgraphs before clustering.
+            subgraphs before clustering. Defaults to 1.0.
         random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
     Returns:
-        np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
+        csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -126,17 +132,20 @@ def calculate_leiden_neighborhoods(
     # Get the list of nodes in the original NetworkX graph
     nodes = list(network.nodes())
     node_index_map = {node: idx for idx, node in enumerate(nodes)}
-    # Create a binary neighborhood matrix
-    num_nodes = len(nodes)
-    # Initialize neighborhoods with zeros and set self-self entries to 1
-    neighborhoods = np.eye(num_nodes, dtype=int)
-    # Assign neighborhoods based on community partitions using the mapped indices
+    # Prepare data for CSR matrix
+    row_indices = []
+    col_indices = []
     for community in partition:
-        for node_i in community:
-            for node_j in community:
-                idx_i = node_index_map[igraph_network.vs[node_i]["_nx_name"]]
-                idx_j = node_index_map[igraph_network.vs[node_j]["_nx_name"]]
-                neighborhoods[idx_i, idx_j] = 1
+        mapped_indices = [node_index_map[igraph_network.vs[node]["_nx_name"]] for node in community]
+        for i in mapped_indices:
+            for j in mapped_indices:
+                row_indices.append(i)
+                col_indices.append(j)
+    # Create a CSR matrix
+    num_nodes = len(nodes)
+    data = np.ones(len(row_indices), dtype=int)
+    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
     return neighborhoods
@@ -146,18 +155,18 @@ def calculate_louvain_neighborhoods(
     resolution: float = 0.1,
     fraction_shortest_edges: float = 1.0,
     random_seed: int = 888,
-) -> np.ndarray:
+) -> csr_matrix:
     """Calculate neighborhoods using the Louvain method.
     Args:
         network (nx.Graph): The network graph.
         resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 0.1.
         fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
-            subgraphs before clustering.
+            subgraphs before clustering. Defaults to 1.0.
         random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
     Returns:
-        np.ndarray: A binary neighborhood matrix on the Louvain method.
+        csr_matrix: A binary neighborhood matrix in CSR format.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -170,83 +179,110 @@ def calculate_louvain_neighborhoods(
     # Get the list of nodes in the network and create a mapping to indices
     nodes = list(network.nodes())
     node_index_map = {node: idx for idx, node in enumerate(nodes)}
-    # Create a binary neighborhood matrix
-    num_nodes = len(nodes)
-    # Initialize neighborhoods with zeros and set self-self entries to 1
-    neighborhoods = np.eye(num_nodes, dtype=int)
     # Group nodes by community
     community_groups = {}
     for node, community in partition.items():
         community_groups.setdefault(community, []).append(node)
-    # Assign neighborhoods based on community partitions using the mapped indices
-    for community, nodes in community_groups.items():
-        for node_i in nodes:
-            for node_j in nodes:
-                idx_i = node_index_map[node_i]
-                idx_j = node_index_map[node_j]
-                neighborhoods[idx_i, idx_j] = 1
+    # Prepare data for CSR matrix
+    row_indices = []
+    col_indices = []
+    for community_nodes in community_groups.values():
+        mapped_indices = [node_index_map[node] for node in community_nodes]
+        for i in mapped_indices:
+            for j in mapped_indices:
+                row_indices.append(i)
+                col_indices.append(j)
+    # Create a CSR matrix
+    num_nodes = len(nodes)
+    data = np.ones(len(row_indices), dtype=int)
+    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
     return neighborhoods
 def calculate_markov_clustering_neighborhoods(
     network: nx.Graph, fraction_shortest_edges: float = 1.0
-) -> np.ndarray:
-    """Apply Markov Clustering (MCL) to the network and return a binary neighborhood matrix.
+) -> csr_matrix:
+    """Apply Markov Clustering (MCL) to the network and return a binary neighborhood matrix (CSR).
     Args:
         network (nx.Graph): The network graph.
         fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
-            subgraphs before clustering.
+            subgraphs before clustering. Defaults to 1.0.
     Returns:
-        np.ndarray: A binary neighborhood matrix on Markov Clustering.
+        csr_matrix: A binary neighborhood matrix (CSR) on Markov Clustering.
+    Warning:
+        This function temporarily converts the adjacency matrix to a dense format, which may lead to
+        high memory consumption for large graphs.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
         network, fraction_shortest_edges=fraction_shortest_edges
     )
-    # Step 1: Convert the subnetwork to an adjacency matrix
+    # Check if the subgraph has edges
+    if subnetwork.number_of_edges() == 0:
+        raise ValueError("The subgraph has no edges. Adjust the fraction_shortest_edges parameter.")
+    # Step 1: Convert the subnetwork to an adjacency matrix (CSR)
     subnetwork_nodes = list(subnetwork.nodes())
-    adjacency_matrix = nx.to_numpy_array(subnetwork, nodelist=subnetwork_nodes)
-    # Step 2: Run Markov Clustering (MCL) on the subnetwork's adjacency matrix
-    result = mc.run_mcl(adjacency_matrix)
+    adjacency_matrix = nx.to_scipy_sparse_array(subnetwork, nodelist=subnetwork_nodes)
+    # Ensure the adjacency matrix is valid
+    if adjacency_matrix.shape[0] == 0 or adjacency_matrix.shape[1] == 0:
+        raise ValueError(
+            "The adjacency matrix is empty. Check the input graph or filtering criteria."
+        )
+    # Convert the sparse matrix to dense format for MCL
+    dense_matrix = adjacency_matrix.toarray()
+    # Step 2: Run Markov Clustering (MCL) on the dense adjacency matrix
+    try:
+        result = mc.run_mcl(dense_matrix)
+    except Exception as e:
+        raise RuntimeError(f"Markov Clustering failed: {e}")
     clusters = mc.get_clusters(result)
     # Step 3: Prepare the original network nodes and indices
     nodes = list(network.nodes())
     node_index_map = {node: idx for idx, node in enumerate(nodes)}
     num_nodes = len(nodes)
-    # Step 4: Initialize the neighborhood matrix for the original network
-    neighborhoods = np.eye(num_nodes, dtype=int)
-    # Step 5: Fill the neighborhoods matrix using the clusters from the subnetwork
+    # Step 4: Prepare data for CSR matrix
+    row_indices = []
+    col_indices = []
     for cluster in clusters:
         for node_i in cluster:
             for node_j in cluster:
                 # Map the indices back to the original network's node indices
                 original_node_i = subnetwork_nodes[node_i]
                 original_node_j = subnetwork_nodes[node_j]
                 if original_node_i in node_index_map and original_node_j in node_index_map:
                     idx_i = node_index_map[original_node_i]
                     idx_j = node_index_map[original_node_j]
-                    neighborhoods[idx_i, idx_j] = 1
+                    row_indices.append(idx_i)
+                    col_indices.append(idx_j)
+    # Step 5: Create a CSR matrix
+    data = np.ones(len(row_indices), dtype=int)
+    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
     return neighborhoods
 def calculate_spinglass_neighborhoods(
     network: nx.Graph, fraction_shortest_edges: float = 1.0
-) -> np.ndarray:
+) -> csr_matrix:
     """Apply Spinglass Community Detection to the network, handling disconnected components.
     Args:
         network (nx.Graph): The network graph.
         fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
-            subgraphs before clustering.
+            subgraphs before clustering. Defaults to 1.0.
     Returns:
-        np.ndarray: A binary neighborhood matrix based on Spinglass communities.
+        csr_matrix: A binary neighborhood matrix (CSR) based on Spinglass communities.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -254,12 +290,11 @@ def calculate_spinglass_neighborhoods(
     )
     # Step 1: Find connected components in the graph
     components = list(nx.connected_components(subnetwork))
-    # Prepare to store community results
+    # Prepare data for CSR matrix
     nodes = list(network.nodes())
     node_index_map = {node: idx for idx, node in enumerate(nodes)}
-    num_nodes = len(nodes)
-    # Initialize neighborhoods with zeros and set self-self entries to 1
-    neighborhoods = np.eye(num_nodes, dtype=int)
+    row_indices = []
+    col_indices = []
     # Step 2: Run Spinglass on each connected component
     for component in components:
         # Extract the subgraph corresponding to the current component
@@ -280,27 +315,34 @@ def calculate_spinglass_neighborhoods(
         # Step 3: Assign neighborhoods based on community labels
         for community in communities:
-            for node_i in community:
-                for node_j in community:
-                    idx_i = node_index_map[igraph_subgraph.vs[node_i]["_nx_name"]]
-                    idx_j = node_index_map[igraph_subgraph.vs[node_j]["_nx_name"]]
-                    neighborhoods[idx_i, idx_j] = 1
+            mapped_indices = [
+                node_index_map[igraph_subgraph.vs[node]["_nx_name"]] for node in community
+            ]
+            for i in mapped_indices:
+                for j in mapped_indices:
+                    row_indices.append(i)
+                    col_indices.append(j)
+    # Step 4: Create a CSR matrix
+    num_nodes = len(nodes)
+    data = np.ones(len(row_indices), dtype=int)
+    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
     return neighborhoods
 def calculate_walktrap_neighborhoods(
     network: nx.Graph, fraction_shortest_edges: float = 1.0
-) -> np.ndarray:
-    """Apply Walktrap Community Detection to the network.
+) -> csr_matrix:
+    """Apply Walktrap Community Detection to the network with CSR matrix output.
     Args:
         network (nx.Graph): The network graph.
         fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
-            subgraphs before clustering.
+            subgraphs before clustering. Defaults to 1.0.
     Returns:
-        np.ndarray: A binary neighborhood matrix on Walktrap communities.
+        csr_matrix: A binary neighborhood matrix (CSR) on Walktrap communities.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -313,17 +355,20 @@ def calculate_walktrap_neighborhoods(
     # Get the list of nodes in the original NetworkX graph
     nodes = list(network.nodes())
     node_index_map = {node: idx for idx, node in enumerate(nodes)}
-    # Create a binary neighborhood matrix
-    num_nodes = len(nodes)
-    # Initialize neighborhoods with zeros and set self-self entries to 1
-    neighborhoods = np.eye(num_nodes, dtype=int)
-    # Assign neighborhoods based on community labels
+    # Prepare data for CSR matrix
+    row_indices = []
+    col_indices = []
     for community in communities:
-        for node_i in community:
-            for node_j in community:
-                idx_i = node_index_map[igraph_network.vs[node_i]["_nx_name"]]
-                idx_j = node_index_map[igraph_network.vs[node_j]["_nx_name"]]
-                neighborhoods[idx_i, idx_j] = 1
+        mapped_indices = [node_index_map[igraph_network.vs[node]["_nx_name"]] for node in community]
+        for i in mapped_indices:
+            for j in mapped_indices:
+                row_indices.append(i)
+                col_indices.append(j)
+    # Create a CSR matrix
+    num_nodes = len(nodes)
+    data = np.ones(len(row_indices), dtype=int)
+    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
     return neighborhoods

risk-network 0.0.9b26__py3-none-any.whl → 0.0.9b28__py3-none-any.whl

risk-network 0.0.9b26py3-none-any.whl → 0.0.9b28py3-none-any.whl