risk-network 0.0.9b26__py3-none-any.whl → 0.0.9b28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.9-beta.26"
10
+ __version__ = "0.0.9-beta.28"
@@ -16,7 +16,7 @@ from nltk.tokenize import word_tokenize
16
16
  from nltk.corpus import stopwords
17
17
 
18
18
  from risk.log import logger
19
- from scipy.sparse import csr_matrix
19
+ from scipy.sparse import coo_matrix
20
20
 
21
21
 
22
22
  def _setup_nltk():
@@ -41,14 +41,13 @@ stop_words = set(stopwords.words("english"))
41
41
  def load_annotations(
42
42
  network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
43
43
  ) -> Dict[str, Any]:
44
- """Convert annotations input to a DataFrame and reindex based on the network's node labels.
44
+ """Convert annotations input to a sparse matrix and reindex based on the network's node labels.
45
45
 
46
46
  Args:
47
47
  network (nx.Graph): The network graph.
48
48
  annotations_input (Dict[str, Any]): A dictionary with annotations.
49
49
  min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
50
50
  term to be included. Defaults to 2.
51
- use_sparse (bool, optional): Whether to return the annotations matrix as a sparse matrix. Defaults to True.
52
51
 
53
52
  Returns:
54
53
  Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the sparse binary annotations
@@ -58,51 +57,53 @@ def load_annotations(
58
57
  ValueError: If no annotations are found for the nodes in the network.
59
58
  ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
60
59
  """
61
- # Flatten the dictionary to a list of tuples for easier DataFrame creation
62
- flattened_annotations = [
63
- (node, annotation) for annotation, nodes in annotations_input.items() for node in nodes
64
- ]
65
- # Create a DataFrame from the flattened list
66
- annotations = pd.DataFrame(flattened_annotations, columns=["node", "annotations"])
67
- annotations["is_member"] = 1
68
- # Pivot to create a binary matrix with nodes as rows and annotations as columns
69
- annotations_pivot = annotations.pivot_table(
70
- index="node", columns="annotations", values="is_member", fill_value=0, dropna=False
71
- )
72
- # Reindex the annotations matrix based on the node labels from the network
73
- node_label_order = (attr["label"] for _, attr in network.nodes(data=True) if "label" in attr)
74
- annotations_pivot = annotations_pivot.reindex(index=node_label_order)
75
- # Raise an error if no valid annotations are found for the nodes in the network
76
- if annotations_pivot.notnull().sum().sum() == 0:
60
+ # Step 1: Map nodes and annotations to indices
61
+ node_label_order = [attr["label"] for _, attr in network.nodes(data=True) if "label" in attr]
62
+ node_to_idx = {node: i for i, node in enumerate(node_label_order)}
63
+ annotation_to_idx = {annotation: i for i, annotation in enumerate(annotations_input)}
64
+ # Step 2: Construct a sparse binary matrix directly
65
+ row = []
66
+ col = []
67
+ data = []
68
+ for annotation, nodes in annotations_input.items():
69
+ for node in nodes:
70
+ if node in node_to_idx and annotation in annotation_to_idx:
71
+ row.append(node_to_idx[node])
72
+ col.append(annotation_to_idx[annotation])
73
+ data.append(1)
74
+
75
+ # Create a sparse binary matrix
76
+ num_nodes = len(node_to_idx)
77
+ num_annotations = len(annotation_to_idx)
78
+ annotations_pivot = coo_matrix((data, (row, col)), shape=(num_nodes, num_annotations)).tocsr()
79
+ # Step 3: Filter out annotations with fewer than min_nodes_per_term occurrences
80
+ valid_annotations = annotations_pivot.sum(axis=0).A1 >= min_nodes_per_term
81
+ annotations_pivot = annotations_pivot[:, valid_annotations]
82
+ # Step 4: Raise errors for empty matrices
83
+ if annotations_pivot.nnz == 0:
77
84
  raise ValueError("No terms found in the annotation file for the nodes in the network.")
78
85
 
79
- # Filter out annotations with fewer than min_nodes_per_term occurrences
80
- num_terms_before_filtering = annotations_pivot.shape[1]
81
- annotations_pivot = annotations_pivot.loc[
82
- :, (annotations_pivot.sum(axis=0) >= min_nodes_per_term)
83
- ]
84
- num_terms_after_filtering = annotations_pivot.shape[1]
85
- # Log the number of annotations before and after filtering
86
- logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
87
- logger.info(f"Number of input annotation terms: {num_terms_before_filtering}")
88
- logger.info(f"Number of remaining annotation terms: {num_terms_after_filtering}")
89
- if num_terms_after_filtering == 0:
86
+ num_remaining_annotations = annotations_pivot.shape[1]
87
+ if num_remaining_annotations == 0:
90
88
  raise ValueError(
91
89
  f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
92
90
  )
93
91
 
94
- # Extract ordered nodes and annotations
95
- ordered_nodes = tuple(annotations_pivot.index)
96
- ordered_annotations = tuple(annotations_pivot.columns)
97
- # Convert the annotations_pivot matrix to a numpy array or sparse matrix
98
- annotations_pivot_binary = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
99
- # Convert the binary annotations matrix to a sparse matrix
100
- annotations_pivot_binary = csr_matrix(annotations_pivot_binary)
92
+ # Step 5: Extract ordered nodes and annotations
93
+ ordered_nodes = tuple(node_label_order)
94
+ ordered_annotations = tuple(
95
+ annotation for annotation, is_valid in zip(annotation_to_idx, valid_annotations) if is_valid
96
+ )
97
+
98
+ # Log the filtering details
99
+ logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
100
+ logger.info(f"Number of input annotation terms: {num_annotations}")
101
+ logger.info(f"Number of remaining annotation terms: {num_remaining_annotations}")
101
102
 
102
103
  return {
103
104
  "ordered_nodes": ordered_nodes,
104
105
  "ordered_annotations": ordered_annotations,
105
- "matrix": annotations_pivot_binary,
106
+ "matrix": annotations_pivot,
106
107
  }
107
108
 
108
109
 
risk/neighborhoods/api.py CHANGED
@@ -368,7 +368,7 @@ class NeighborhoodsAPI:
368
368
  )
369
369
 
370
370
  # Make a copy of the network to avoid modifying the original
371
- network = copy.deepcopy(network)
371
+ network = copy.copy(network)
372
372
  # Load neighborhoods based on the network and distance metric
373
373
  neighborhoods = self._load_neighborhoods(
374
374
  network,
@@ -438,9 +438,5 @@ class NeighborhoodsAPI:
438
438
  random_seed=random_seed,
439
439
  )
440
440
 
441
- # Ensure the neighborhood matrix is in sparse format
442
- if not isinstance(neighborhoods, csr_matrix):
443
- neighborhoods = csr_matrix(neighborhoods)
444
-
445
441
  # Return the sparse neighborhood matrix
446
442
  return neighborhoods
@@ -10,22 +10,23 @@ import networkx as nx
10
10
  import numpy as np
11
11
  from leidenalg import find_partition, RBConfigurationVertexPartition
12
12
  from networkx.algorithms.community import greedy_modularity_communities
13
+ from scipy.sparse import csr_matrix
13
14
 
14
15
  from risk.log import logger
15
16
 
16
17
 
17
18
  def calculate_greedy_modularity_neighborhoods(
18
19
  network: nx.Graph, fraction_shortest_edges: float = 1.0
19
- ) -> np.ndarray:
20
- """Calculate neighborhoods using the Greedy Modularity method.
20
+ ) -> csr_matrix:
21
+ """Calculate neighborhoods using the Greedy Modularity method with CSR matrix output.
21
22
 
22
23
  Args:
23
24
  network (nx.Graph): The network graph.
24
25
  fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
25
- subgraphs before clustering.
26
+ subgraphs before clustering. Defaults to 1.0.
26
27
 
27
28
  Returns:
28
- np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
29
+ csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
29
30
  """
30
31
  # Create a subgraph with the shortest edges based on the rank fraction
31
32
  subnetwork = _create_percentile_limited_subgraph(
@@ -36,35 +37,36 @@ def calculate_greedy_modularity_neighborhoods(
36
37
  # Get the list of nodes in the original NetworkX graph
37
38
  nodes = list(network.nodes())
38
39
  node_index_map = {node: idx for idx, node in enumerate(nodes)}
39
- # Create a binary neighborhood matrix
40
- num_nodes = len(nodes)
41
- # Initialize neighborhoods with zeros and set self-self entries to 1
42
- neighborhoods = np.eye(num_nodes, dtype=int)
43
- # Fill in the neighborhood matrix for nodes in the same community
40
+ # Prepare data for CSR matrix
41
+ row_indices = []
42
+ col_indices = []
44
43
  for community in communities:
45
- # Iterate through all pairs of nodes in the same community
46
- for node_i in community:
47
- for node_j in community:
48
- idx_i = node_index_map[node_i]
49
- idx_j = node_index_map[node_j]
50
- # Set them as neighbors (1) in the binary matrix
51
- neighborhoods[idx_i, idx_j] = 1
44
+ mapped_indices = [node_index_map[node] for node in community]
45
+ for i in mapped_indices:
46
+ for j in mapped_indices:
47
+ row_indices.append(i)
48
+ col_indices.append(j)
49
+
50
+ # Create a CSR matrix
51
+ num_nodes = len(nodes)
52
+ data = np.ones(len(row_indices), dtype=int)
53
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
52
54
 
53
55
  return neighborhoods
54
56
 
55
57
 
56
58
  def calculate_label_propagation_neighborhoods(
57
59
  network: nx.Graph, fraction_shortest_edges: float = 1.0
58
- ) -> np.ndarray:
60
+ ) -> csr_matrix:
59
61
  """Apply Label Propagation to the network to detect communities.
60
62
 
61
63
  Args:
62
64
  network (nx.Graph): The network graph.
63
65
  fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
64
- subgraphs before clustering.
66
+ subgraphs before clustering. Defaults to 1.0.
65
67
 
66
68
  Returns:
67
- np.ndarray: A binary neighborhood matrix on Label Propagation.
69
+ csr_matrix: A binary neighborhood matrix (CSR) on Label Propagation.
68
70
  """
69
71
  # Create a subgraph with the shortest edges based on the rank fraction
70
72
  subnetwork = _create_percentile_limited_subgraph(
@@ -77,17 +79,21 @@ def calculate_label_propagation_neighborhoods(
77
79
  # Get the list of nodes in the network
78
80
  nodes = list(network.nodes())
79
81
  node_index_map = {node: idx for idx, node in enumerate(nodes)}
80
- # Create a binary neighborhood matrix
81
- num_nodes = len(nodes)
82
- # Initialize neighborhoods with zeros and set self-self entries to 1
83
- neighborhoods = np.eye(num_nodes, dtype=int)
82
+ # Prepare data for CSR matrix
83
+ row_indices = []
84
+ col_indices = []
84
85
  # Assign neighborhoods based on community labels using the mapped indices
85
86
  for community in communities:
86
- for node_i in community:
87
- for node_j in community:
88
- idx_i = node_index_map[node_i]
89
- idx_j = node_index_map[node_j]
90
- neighborhoods[idx_i, idx_j] = 1
87
+ mapped_indices = [node_index_map[node] for node in community]
88
+ for i in mapped_indices:
89
+ for j in mapped_indices:
90
+ row_indices.append(i)
91
+ col_indices.append(j)
92
+
93
+ # Create a CSR matrix
94
+ num_nodes = len(nodes)
95
+ data = np.ones(len(row_indices), dtype=int)
96
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
91
97
 
92
98
  return neighborhoods
93
99
 
@@ -97,18 +103,18 @@ def calculate_leiden_neighborhoods(
97
103
  resolution: float = 1.0,
98
104
  fraction_shortest_edges: float = 1.0,
99
105
  random_seed: int = 888,
100
- ) -> np.ndarray:
101
- """Calculate neighborhoods using the Leiden method.
106
+ ) -> csr_matrix:
107
+ """Calculate neighborhoods using the Leiden method with CSR matrix output.
102
108
 
103
109
  Args:
104
110
  network (nx.Graph): The network graph.
105
111
  resolution (float, optional): Resolution parameter for the Leiden method. Defaults to 1.0.
106
112
  fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
107
- subgraphs before clustering.
113
+ subgraphs before clustering. Defaults to 1.0.
108
114
  random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
109
115
 
110
116
  Returns:
111
- np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
117
+ csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
112
118
  """
113
119
  # Create a subgraph with the shortest edges based on the rank fraction
114
120
  subnetwork = _create_percentile_limited_subgraph(
@@ -126,17 +132,20 @@ def calculate_leiden_neighborhoods(
126
132
  # Get the list of nodes in the original NetworkX graph
127
133
  nodes = list(network.nodes())
128
134
  node_index_map = {node: idx for idx, node in enumerate(nodes)}
129
- # Create a binary neighborhood matrix
130
- num_nodes = len(nodes)
131
- # Initialize neighborhoods with zeros and set self-self entries to 1
132
- neighborhoods = np.eye(num_nodes, dtype=int)
133
- # Assign neighborhoods based on community partitions using the mapped indices
135
+ # Prepare data for CSR matrix
136
+ row_indices = []
137
+ col_indices = []
134
138
  for community in partition:
135
- for node_i in community:
136
- for node_j in community:
137
- idx_i = node_index_map[igraph_network.vs[node_i]["_nx_name"]]
138
- idx_j = node_index_map[igraph_network.vs[node_j]["_nx_name"]]
139
- neighborhoods[idx_i, idx_j] = 1
139
+ mapped_indices = [node_index_map[igraph_network.vs[node]["_nx_name"]] for node in community]
140
+ for i in mapped_indices:
141
+ for j in mapped_indices:
142
+ row_indices.append(i)
143
+ col_indices.append(j)
144
+
145
+ # Create a CSR matrix
146
+ num_nodes = len(nodes)
147
+ data = np.ones(len(row_indices), dtype=int)
148
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
140
149
 
141
150
  return neighborhoods
142
151
 
@@ -146,18 +155,18 @@ def calculate_louvain_neighborhoods(
146
155
  resolution: float = 0.1,
147
156
  fraction_shortest_edges: float = 1.0,
148
157
  random_seed: int = 888,
149
- ) -> np.ndarray:
158
+ ) -> csr_matrix:
150
159
  """Calculate neighborhoods using the Louvain method.
151
160
 
152
161
  Args:
153
162
  network (nx.Graph): The network graph.
154
163
  resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 0.1.
155
164
  fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
156
- subgraphs before clustering.
165
+ subgraphs before clustering. Defaults to 1.0.
157
166
  random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
158
167
 
159
168
  Returns:
160
- np.ndarray: A binary neighborhood matrix on the Louvain method.
169
+ csr_matrix: A binary neighborhood matrix in CSR format.
161
170
  """
162
171
  # Create a subgraph with the shortest edges based on the rank fraction
163
172
  subnetwork = _create_percentile_limited_subgraph(
@@ -170,83 +179,110 @@ def calculate_louvain_neighborhoods(
170
179
  # Get the list of nodes in the network and create a mapping to indices
171
180
  nodes = list(network.nodes())
172
181
  node_index_map = {node: idx for idx, node in enumerate(nodes)}
173
- # Create a binary neighborhood matrix
174
- num_nodes = len(nodes)
175
- # Initialize neighborhoods with zeros and set self-self entries to 1
176
- neighborhoods = np.eye(num_nodes, dtype=int)
177
182
  # Group nodes by community
178
183
  community_groups = {}
179
184
  for node, community in partition.items():
180
185
  community_groups.setdefault(community, []).append(node)
181
186
 
182
- # Assign neighborhoods based on community partitions using the mapped indices
183
- for community, nodes in community_groups.items():
184
- for node_i in nodes:
185
- for node_j in nodes:
186
- idx_i = node_index_map[node_i]
187
- idx_j = node_index_map[node_j]
188
- neighborhoods[idx_i, idx_j] = 1
187
+ # Prepare data for CSR matrix
188
+ row_indices = []
189
+ col_indices = []
190
+ for community_nodes in community_groups.values():
191
+ mapped_indices = [node_index_map[node] for node in community_nodes]
192
+ for i in mapped_indices:
193
+ for j in mapped_indices:
194
+ row_indices.append(i)
195
+ col_indices.append(j)
196
+
197
+ # Create a CSR matrix
198
+ num_nodes = len(nodes)
199
+ data = np.ones(len(row_indices), dtype=int)
200
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
189
201
 
190
202
  return neighborhoods
191
203
 
192
204
 
193
205
  def calculate_markov_clustering_neighborhoods(
194
206
  network: nx.Graph, fraction_shortest_edges: float = 1.0
195
- ) -> np.ndarray:
196
- """Apply Markov Clustering (MCL) to the network and return a binary neighborhood matrix.
207
+ ) -> csr_matrix:
208
+ """Apply Markov Clustering (MCL) to the network and return a binary neighborhood matrix (CSR).
197
209
 
198
210
  Args:
199
211
  network (nx.Graph): The network graph.
200
212
  fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
201
- subgraphs before clustering.
213
+ subgraphs before clustering. Defaults to 1.0.
202
214
 
203
215
  Returns:
204
- np.ndarray: A binary neighborhood matrix on Markov Clustering.
216
+ csr_matrix: A binary neighborhood matrix (CSR) on Markov Clustering.
217
+
218
+ Warning:
219
+ This function temporarily converts the adjacency matrix to a dense format, which may lead to
220
+ high memory consumption for large graphs.
205
221
  """
206
222
  # Create a subgraph with the shortest edges based on the rank fraction
207
223
  subnetwork = _create_percentile_limited_subgraph(
208
224
  network, fraction_shortest_edges=fraction_shortest_edges
209
225
  )
210
- # Step 1: Convert the subnetwork to an adjacency matrix
226
+ # Check if the subgraph has edges
227
+ if subnetwork.number_of_edges() == 0:
228
+ raise ValueError("The subgraph has no edges. Adjust the fraction_shortest_edges parameter.")
229
+
230
+ # Step 1: Convert the subnetwork to an adjacency matrix (CSR)
211
231
  subnetwork_nodes = list(subnetwork.nodes())
212
- adjacency_matrix = nx.to_numpy_array(subnetwork, nodelist=subnetwork_nodes)
213
- # Step 2: Run Markov Clustering (MCL) on the subnetwork's adjacency matrix
214
- result = mc.run_mcl(adjacency_matrix)
232
+ adjacency_matrix = nx.to_scipy_sparse_array(subnetwork, nodelist=subnetwork_nodes)
233
+ # Ensure the adjacency matrix is valid
234
+ if adjacency_matrix.shape[0] == 0 or adjacency_matrix.shape[1] == 0:
235
+ raise ValueError(
236
+ "The adjacency matrix is empty. Check the input graph or filtering criteria."
237
+ )
238
+
239
+ # Convert the sparse matrix to dense format for MCL
240
+ dense_matrix = adjacency_matrix.toarray()
241
+ # Step 2: Run Markov Clustering (MCL) on the dense adjacency matrix
242
+ try:
243
+ result = mc.run_mcl(dense_matrix)
244
+ except Exception as e:
245
+ raise RuntimeError(f"Markov Clustering failed: {e}")
246
+
215
247
  clusters = mc.get_clusters(result)
216
248
  # Step 3: Prepare the original network nodes and indices
217
249
  nodes = list(network.nodes())
218
250
  node_index_map = {node: idx for idx, node in enumerate(nodes)}
219
251
  num_nodes = len(nodes)
220
- # Step 4: Initialize the neighborhood matrix for the original network
221
- neighborhoods = np.eye(num_nodes, dtype=int)
222
- # Step 5: Fill the neighborhoods matrix using the clusters from the subnetwork
252
+ # Step 4: Prepare data for CSR matrix
253
+ row_indices = []
254
+ col_indices = []
223
255
  for cluster in clusters:
224
256
  for node_i in cluster:
225
257
  for node_j in cluster:
226
258
  # Map the indices back to the original network's node indices
227
259
  original_node_i = subnetwork_nodes[node_i]
228
260
  original_node_j = subnetwork_nodes[node_j]
229
-
230
261
  if original_node_i in node_index_map and original_node_j in node_index_map:
231
262
  idx_i = node_index_map[original_node_i]
232
263
  idx_j = node_index_map[original_node_j]
233
- neighborhoods[idx_i, idx_j] = 1
264
+ row_indices.append(idx_i)
265
+ col_indices.append(idx_j)
266
+
267
+ # Step 5: Create a CSR matrix
268
+ data = np.ones(len(row_indices), dtype=int)
269
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
234
270
 
235
271
  return neighborhoods
236
272
 
237
273
 
238
274
  def calculate_spinglass_neighborhoods(
239
275
  network: nx.Graph, fraction_shortest_edges: float = 1.0
240
- ) -> np.ndarray:
276
+ ) -> csr_matrix:
241
277
  """Apply Spinglass Community Detection to the network, handling disconnected components.
242
278
 
243
279
  Args:
244
280
  network (nx.Graph): The network graph.
245
281
  fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
246
- subgraphs before clustering.
282
+ subgraphs before clustering. Defaults to 1.0.
247
283
 
248
284
  Returns:
249
- np.ndarray: A binary neighborhood matrix based on Spinglass communities.
285
+ csr_matrix: A binary neighborhood matrix (CSR) based on Spinglass communities.
250
286
  """
251
287
  # Create a subgraph with the shortest edges based on the rank fraction
252
288
  subnetwork = _create_percentile_limited_subgraph(
@@ -254,12 +290,11 @@ def calculate_spinglass_neighborhoods(
254
290
  )
255
291
  # Step 1: Find connected components in the graph
256
292
  components = list(nx.connected_components(subnetwork))
257
- # Prepare to store community results
293
+ # Prepare data for CSR matrix
258
294
  nodes = list(network.nodes())
259
295
  node_index_map = {node: idx for idx, node in enumerate(nodes)}
260
- num_nodes = len(nodes)
261
- # Initialize neighborhoods with zeros and set self-self entries to 1
262
- neighborhoods = np.eye(num_nodes, dtype=int)
296
+ row_indices = []
297
+ col_indices = []
263
298
  # Step 2: Run Spinglass on each connected component
264
299
  for component in components:
265
300
  # Extract the subgraph corresponding to the current component
@@ -280,27 +315,34 @@ def calculate_spinglass_neighborhoods(
280
315
 
281
316
  # Step 3: Assign neighborhoods based on community labels
282
317
  for community in communities:
283
- for node_i in community:
284
- for node_j in community:
285
- idx_i = node_index_map[igraph_subgraph.vs[node_i]["_nx_name"]]
286
- idx_j = node_index_map[igraph_subgraph.vs[node_j]["_nx_name"]]
287
- neighborhoods[idx_i, idx_j] = 1
318
+ mapped_indices = [
319
+ node_index_map[igraph_subgraph.vs[node]["_nx_name"]] for node in community
320
+ ]
321
+ for i in mapped_indices:
322
+ for j in mapped_indices:
323
+ row_indices.append(i)
324
+ col_indices.append(j)
325
+
326
+ # Step 4: Create a CSR matrix
327
+ num_nodes = len(nodes)
328
+ data = np.ones(len(row_indices), dtype=int)
329
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
288
330
 
289
331
  return neighborhoods
290
332
 
291
333
 
292
334
  def calculate_walktrap_neighborhoods(
293
335
  network: nx.Graph, fraction_shortest_edges: float = 1.0
294
- ) -> np.ndarray:
295
- """Apply Walktrap Community Detection to the network.
336
+ ) -> csr_matrix:
337
+ """Apply Walktrap Community Detection to the network with CSR matrix output.
296
338
 
297
339
  Args:
298
340
  network (nx.Graph): The network graph.
299
341
  fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
300
- subgraphs before clustering.
342
+ subgraphs before clustering. Defaults to 1.0.
301
343
 
302
344
  Returns:
303
- np.ndarray: A binary neighborhood matrix on Walktrap communities.
345
+ csr_matrix: A binary neighborhood matrix (CSR) on Walktrap communities.
304
346
  """
305
347
  # Create a subgraph with the shortest edges based on the rank fraction
306
348
  subnetwork = _create_percentile_limited_subgraph(
@@ -313,17 +355,20 @@ def calculate_walktrap_neighborhoods(
313
355
  # Get the list of nodes in the original NetworkX graph
314
356
  nodes = list(network.nodes())
315
357
  node_index_map = {node: idx for idx, node in enumerate(nodes)}
316
- # Create a binary neighborhood matrix
317
- num_nodes = len(nodes)
318
- # Initialize neighborhoods with zeros and set self-self entries to 1
319
- neighborhoods = np.eye(num_nodes, dtype=int)
320
- # Assign neighborhoods based on community labels
358
+ # Prepare data for CSR matrix
359
+ row_indices = []
360
+ col_indices = []
321
361
  for community in communities:
322
- for node_i in community:
323
- for node_j in community:
324
- idx_i = node_index_map[igraph_network.vs[node_i]["_nx_name"]]
325
- idx_j = node_index_map[igraph_network.vs[node_j]["_nx_name"]]
326
- neighborhoods[idx_i, idx_j] = 1
362
+ mapped_indices = [node_index_map[igraph_network.vs[node]["_nx_name"]] for node in community]
363
+ for i in mapped_indices:
364
+ for j in mapped_indices:
365
+ row_indices.append(i)
366
+ col_indices.append(j)
367
+
368
+ # Create a CSR matrix
369
+ num_nodes = len(nodes)
370
+ data = np.ones(len(row_indices), dtype=int)
371
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
327
372
 
328
373
  return neighborhoods
329
374