risk-network 0.0.9b9__tar.gz → 0.0.9b13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/PKG-INFO +3 -1
  2. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/pyproject.toml +2 -0
  3. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/__init__.py +1 -1
  4. risk_network-0.0.9b13/risk/neighborhoods/community.py +367 -0
  5. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/neighborhoods/neighborhoods.py +53 -70
  6. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/plot/labels.py +1 -2
  7. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/risk.py +33 -16
  8. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk_network.egg-info/PKG-INFO +3 -1
  9. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk_network.egg-info/requires.txt +2 -0
  10. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/setup.py +2 -1
  11. risk_network-0.0.9b9/risk/neighborhoods/community.py +0 -189
  12. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/LICENSE +0 -0
  13. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/MANIFEST.in +0 -0
  14. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/README.md +0 -0
  15. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/annotations/__init__.py +0 -0
  16. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/annotations/annotations.py +0 -0
  17. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/annotations/io.py +0 -0
  18. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/constants.py +0 -0
  19. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/log/__init__.py +0 -0
  20. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/log/console.py +0 -0
  21. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/log/parameters.py +0 -0
  22. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/neighborhoods/__init__.py +0 -0
  23. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/neighborhoods/domains.py +0 -0
  24. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/__init__.py +0 -0
  25. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/geometry.py +0 -0
  26. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/graph/__init__.py +0 -0
  27. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/graph/network.py +0 -0
  28. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/graph/summary.py +0 -0
  29. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/io.py +0 -0
  30. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/plot/__init__.py +0 -0
  31. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/plot/canvas.py +0 -0
  32. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/plot/contour.py +0 -0
  33. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/plot/network.py +0 -0
  34. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/plot/plotter.py +0 -0
  35. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/plot/utils/colors.py +0 -0
  36. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/network/plot/utils/layout.py +0 -0
  37. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/stats/__init__.py +0 -0
  38. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/stats/hypergeom.py +0 -0
  39. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/stats/permutation/__init__.py +0 -0
  40. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/stats/permutation/permutation.py +0 -0
  41. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/stats/permutation/test_functions.py +0 -0
  42. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/stats/poisson.py +0 -0
  43. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk/stats/stats.py +0 -0
  44. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk_network.egg-info/SOURCES.txt +0 -0
  45. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk_network.egg-info/dependency_links.txt +0 -0
  46. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/risk_network.egg-info/top_level.txt +0 -0
  47. {risk_network-0.0.9b9 → risk_network-0.0.9b13}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.9b9
3
+ Version: 0.0.9b13
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -695,6 +695,7 @@ Requires-Python: >=3.8
695
695
  Description-Content-Type: text/markdown
696
696
  License-File: LICENSE
697
697
  Requires-Dist: ipywidgets
698
+ Requires-Dist: leidenalg
698
699
  Requires-Dist: markov_clustering
699
700
  Requires-Dist: matplotlib
700
701
  Requires-Dist: networkx
@@ -702,6 +703,7 @@ Requires-Dist: nltk==3.8.1
702
703
  Requires-Dist: numpy
703
704
  Requires-Dist: openpyxl
704
705
  Requires-Dist: pandas
706
+ Requires-Dist: python-igraph
705
707
  Requires-Dist: python-louvain
706
708
  Requires-Dist: scikit-learn
707
709
  Requires-Dist: scipy
@@ -27,6 +27,7 @@ classifiers = [
27
27
  ]
28
28
  dependencies = [
29
29
  "ipywidgets",
30
+ "leidenalg",
30
31
  "markov_clustering",
31
32
  "matplotlib",
32
33
  "networkx",
@@ -34,6 +35,7 @@ dependencies = [
34
35
  "numpy",
35
36
  "openpyxl",
36
37
  "pandas",
38
+ "python-igraph",
37
39
  "python-louvain",
38
40
  "scikit-learn",
39
41
  "scipy",
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.9-beta.9"
10
+ __version__ = "0.0.9-beta.13"
@@ -0,0 +1,367 @@
1
+ """
2
+ risk/neighborhoods/community
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ import community as community_louvain
7
+ import igraph as ig
8
+ import markov_clustering as mc
9
+ import networkx as nx
10
+ import numpy as np
11
+ from leidenalg import find_partition, RBConfigurationVertexPartition
12
+ from networkx.algorithms.community import greedy_modularity_communities
13
+
14
+
15
+ def calculate_greedy_modularity_neighborhoods(
16
+ network: nx.Graph, edge_rank_percentile: float = 1.0
17
+ ) -> np.ndarray:
18
+ """Calculate neighborhoods using the Greedy Modularity method.
19
+
20
+ Args:
21
+ network (nx.Graph): The network graph.
22
+ edge_rank_percentile (float, optional): Shortest edge rank percentile threshold for creating
23
+ subgraphs before clustering.
24
+
25
+ Returns:
26
+ np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
27
+ """
28
+ # Create a subgraph with the shortest edges based on the rank percentile
29
+ subnetwork = _create_percentile_limited_subgraph(
30
+ network, edge_rank_percentile=edge_rank_percentile
31
+ )
32
+ # Detect communities using the Greedy Modularity method
33
+ communities = greedy_modularity_communities(subnetwork)
34
+ # Get the list of nodes in the original NetworkX graph
35
+ nodes = list(network.nodes())
36
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
37
+ # Create a binary neighborhood matrix
38
+ num_nodes = len(nodes)
39
+ # Initialize neighborhoods with zeros and set self-self entries to 1
40
+ neighborhoods = np.eye(num_nodes, dtype=int)
41
+ # Fill in the neighborhood matrix for nodes in the same community
42
+ for community in communities:
43
+ # Iterate through all pairs of nodes in the same community
44
+ for node_i in community:
45
+ for node_j in community:
46
+ idx_i = node_index_map[node_i]
47
+ idx_j = node_index_map[node_j]
48
+ # Set them as neighbors (1) in the binary matrix
49
+ neighborhoods[idx_i, idx_j] = 1
50
+
51
+ return neighborhoods
52
+
53
+
54
+ def calculate_label_propagation_neighborhoods(
55
+ network: nx.Graph, edge_rank_percentile: float = 1.0
56
+ ) -> np.ndarray:
57
+ """Apply Label Propagation to the network to detect communities.
58
+
59
+ Args:
60
+ network (nx.Graph): The network graph.
61
+ edge_rank_percentile (float, optional): Shortest edge rank percentile threshold for creating
62
+ subgraphs before clustering.
63
+
64
+ Returns:
65
+ np.ndarray: A binary neighborhood matrix on Label Propagation.
66
+ """
67
+ # Create a subgraph with the shortest edges based on the rank percentile
68
+ subnetwork = _create_percentile_limited_subgraph(
69
+ network, edge_rank_percentile=edge_rank_percentile
70
+ )
71
+ # Apply Label Propagation for community detection
72
+ communities = nx.algorithms.community.label_propagation.label_propagation_communities(
73
+ subnetwork
74
+ )
75
+ # Get the list of nodes in the network
76
+ nodes = list(network.nodes())
77
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
78
+ # Create a binary neighborhood matrix
79
+ num_nodes = len(nodes)
80
+ # Initialize neighborhoods with zeros and set self-self entries to 1
81
+ neighborhoods = np.eye(num_nodes, dtype=int)
82
+ # Assign neighborhoods based on community labels using the mapped indices
83
+ for community in communities:
84
+ for node_i in community:
85
+ for node_j in community:
86
+ idx_i = node_index_map[node_i]
87
+ idx_j = node_index_map[node_j]
88
+ neighborhoods[idx_i, idx_j] = 1
89
+
90
+ return neighborhoods
91
+
92
+
93
+ def calculate_leiden_neighborhoods(
94
+ network: nx.Graph,
95
+ resolution: float = 1.0,
96
+ edge_rank_percentile: float = 1.0,
97
+ random_seed: int = 888,
98
+ ) -> np.ndarray:
99
+ """Calculate neighborhoods using the Leiden method.
100
+
101
+ Args:
102
+ network (nx.Graph): The network graph.
103
+ resolution (float, optional): Resolution parameter for the Leiden method. Defaults to 1.0.
104
+ edge_rank_percentile (float, optional): Shortest edge rank percentile threshold for creating
105
+ subgraphs before clustering.
106
+ random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
107
+
108
+ Returns:
109
+ np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
110
+ """
111
+ # Create a subgraph with the shortest edges based on the rank percentile
112
+ subnetwork = _create_percentile_limited_subgraph(
113
+ network, edge_rank_percentile=edge_rank_percentile
114
+ )
115
+ # Convert NetworkX graph to iGraph
116
+ igraph_network = ig.Graph.from_networkx(subnetwork)
117
+ # Apply Leiden algorithm using RBConfigurationVertexPartition, which supports resolution
118
+ partition = find_partition(
119
+ igraph_network,
120
+ partition_type=RBConfigurationVertexPartition,
121
+ resolution_parameter=resolution,
122
+ seed=random_seed,
123
+ )
124
+ # Get the list of nodes in the original NetworkX graph
125
+ nodes = list(network.nodes())
126
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
127
+ # Create a binary neighborhood matrix
128
+ num_nodes = len(nodes)
129
+ # Initialize neighborhoods with zeros and set self-self entries to 1
130
+ neighborhoods = np.eye(num_nodes, dtype=int)
131
+ # Assign neighborhoods based on community partitions using the mapped indices
132
+ for community in partition:
133
+ for node_i in community:
134
+ for node_j in community:
135
+ idx_i = node_index_map[igraph_network.vs[node_i]["_nx_name"]]
136
+ idx_j = node_index_map[igraph_network.vs[node_j]["_nx_name"]]
137
+ neighborhoods[idx_i, idx_j] = 1
138
+
139
+ return neighborhoods
140
+
141
+
142
+ def calculate_louvain_neighborhoods(
143
+ network: nx.Graph,
144
+ resolution: float = 0.1,
145
+ edge_rank_percentile: float = 1.0,
146
+ random_seed: int = 888,
147
+ ) -> np.ndarray:
148
+ """Calculate neighborhoods using the Louvain method.
149
+
150
+ Args:
151
+ network (nx.Graph): The network graph.
152
+ resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 0.1.
153
+ edge_rank_percentile (float, optional): Shortest edge rank percentile threshold for creating
154
+ subgraphs before clustering.
155
+ random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
156
+
157
+ Returns:
158
+ np.ndarray: A binary neighborhood matrix on the Louvain method.
159
+ """
160
+ # Create a subgraph with the shortest edges based on the rank percentile
161
+ subnetwork = _create_percentile_limited_subgraph(
162
+ network, edge_rank_percentile=edge_rank_percentile
163
+ )
164
+ # Apply Louvain method to partition the network
165
+ partition = community_louvain.best_partition(
166
+ subnetwork, resolution=resolution, random_state=random_seed
167
+ )
168
+ # Get the list of nodes in the network and create a mapping to indices
169
+ nodes = list(network.nodes())
170
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
171
+ # Create a binary neighborhood matrix
172
+ num_nodes = len(nodes)
173
+ # Initialize neighborhoods with zeros and set self-self entries to 1
174
+ neighborhoods = np.eye(num_nodes, dtype=int)
175
+ # Group nodes by community
176
+ community_groups = {}
177
+ for node, community in partition.items():
178
+ community_groups.setdefault(community, []).append(node)
179
+
180
+ # Assign neighborhoods based on community partitions using the mapped indices
181
+ for community, nodes in community_groups.items():
182
+ for node_i in nodes:
183
+ for node_j in nodes:
184
+ idx_i = node_index_map[node_i]
185
+ idx_j = node_index_map[node_j]
186
+ neighborhoods[idx_i, idx_j] = 1
187
+
188
+ return neighborhoods
189
+
190
+
191
+ def calculate_markov_clustering_neighborhoods(
192
+ network: nx.Graph, edge_rank_percentile: float = 1.0
193
+ ) -> np.ndarray:
194
+ """Apply Markov Clustering (MCL) to the network and return a binary neighborhood matrix.
195
+
196
+ Args:
197
+ network (nx.Graph): The network graph.
198
+ edge_rank_percentile (float, optional): Shortest edge rank percentile threshold for creating
199
+ subgraphs before clustering.
200
+
201
+ Returns:
202
+ np.ndarray: A binary neighborhood matrix on Markov Clustering.
203
+ """
204
+ # Create a subgraph with the shortest edges based on the rank percentile
205
+ subnetwork = _create_percentile_limited_subgraph(
206
+ network, edge_rank_percentile=edge_rank_percentile
207
+ )
208
+ # Step 1: Convert the subnetwork to an adjacency matrix
209
+ subnetwork_nodes = list(subnetwork.nodes())
210
+ adjacency_matrix = nx.to_numpy_array(subnetwork, nodelist=subnetwork_nodes)
211
+ # Step 2: Run Markov Clustering (MCL) on the subnetwork's adjacency matrix
212
+ result = mc.run_mcl(adjacency_matrix)
213
+ clusters = mc.get_clusters(result)
214
+ # Step 3: Prepare the original network nodes and indices
215
+ nodes = list(network.nodes())
216
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
217
+ num_nodes = len(nodes)
218
+ # Step 4: Initialize the neighborhood matrix for the original network
219
+ neighborhoods = np.eye(num_nodes, dtype=int)
220
+ # Step 5: Fill the neighborhoods matrix using the clusters from the subnetwork
221
+ for cluster in clusters:
222
+ for node_i in cluster:
223
+ for node_j in cluster:
224
+ # Map the indices back to the original network's node indices
225
+ original_node_i = subnetwork_nodes[node_i]
226
+ original_node_j = subnetwork_nodes[node_j]
227
+
228
+ if original_node_i in node_index_map and original_node_j in node_index_map:
229
+ idx_i = node_index_map[original_node_i]
230
+ idx_j = node_index_map[original_node_j]
231
+ neighborhoods[idx_i, idx_j] = 1
232
+
233
+ return neighborhoods
234
+
235
+
236
+ def calculate_spinglass_neighborhoods(
237
+ network: nx.Graph, edge_rank_percentile: float = 1.0
238
+ ) -> np.ndarray:
239
+ """Apply Spinglass Community Detection to the network, handling disconnected components.
240
+
241
+ Args:
242
+ network (nx.Graph): The network graph.
243
+ edge_rank_percentile (float, optional): Shortest edge rank percentile threshold for creating
244
+ subgraphs before clustering.
245
+
246
+ Returns:
247
+ np.ndarray: A binary neighborhood matrix based on Spinglass communities.
248
+ """
249
+ # Create a subgraph with the shortest edges based on the rank percentile
250
+ subnetwork = _create_percentile_limited_subgraph(
251
+ network, edge_rank_percentile=edge_rank_percentile
252
+ )
253
+ # Step 1: Find connected components in the graph
254
+ components = list(nx.connected_components(subnetwork))
255
+ # Prepare to store community results
256
+ nodes = list(network.nodes())
257
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
258
+ num_nodes = len(nodes)
259
+ # Initialize neighborhoods with zeros and set self-self entries to 1
260
+ neighborhoods = np.eye(num_nodes, dtype=int)
261
+ # Step 2: Run Spinglass on each connected component
262
+ for component in components:
263
+ # Extract the subgraph corresponding to the current component
264
+ subgraph = network.subgraph(component)
265
+ # Convert the subgraph to an iGraph object
266
+ igraph_subgraph = ig.Graph.from_networkx(subgraph)
267
+ # Ensure the subgraph is connected before running Spinglass
268
+ if not igraph_subgraph.is_connected():
269
+ print("Warning: Subgraph is not connected. Skipping...")
270
+ continue
271
+
272
+ # Apply Spinglass community detection
273
+ try:
274
+ communities = igraph_subgraph.community_spinglass()
275
+ except Exception as e:
276
+ print(f"Error running Spinglass on component: {e}")
277
+ continue
278
+
279
+ # Step 3: Assign neighborhoods based on community labels
280
+ for community in communities:
281
+ for node_i in community:
282
+ for node_j in community:
283
+ idx_i = node_index_map[igraph_subgraph.vs[node_i]["_nx_name"]]
284
+ idx_j = node_index_map[igraph_subgraph.vs[node_j]["_nx_name"]]
285
+ neighborhoods[idx_i, idx_j] = 1
286
+
287
+ return neighborhoods
288
+
289
+
290
+ def calculate_walktrap_neighborhoods(
291
+ network: nx.Graph, edge_rank_percentile: float = 1.0
292
+ ) -> np.ndarray:
293
+ """Apply Walktrap Community Detection to the network.
294
+
295
+ Args:
296
+ network (nx.Graph): The network graph.
297
+ edge_rank_percentile (float, optional): Shortest edge rank percentile threshold for creating
298
+ subgraphs before clustering.
299
+
300
+ Returns:
301
+ np.ndarray: A binary neighborhood matrix on Walktrap communities.
302
+ """
303
+ # Create a subgraph with the shortest edges based on the rank percentile
304
+ subnetwork = _create_percentile_limited_subgraph(
305
+ network, edge_rank_percentile=edge_rank_percentile
306
+ )
307
+ # Convert NetworkX graph to iGraph
308
+ igraph_network = ig.Graph.from_networkx(subnetwork)
309
+ # Apply Walktrap community detection
310
+ communities = igraph_network.community_walktrap().as_clustering()
311
+ # Get the list of nodes in the original NetworkX graph
312
+ nodes = list(network.nodes())
313
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
314
+ # Create a binary neighborhood matrix
315
+ num_nodes = len(nodes)
316
+ # Initialize neighborhoods with zeros and set self-self entries to 1
317
+ neighborhoods = np.eye(num_nodes, dtype=int)
318
+ # Assign neighborhoods based on community labels
319
+ for community in communities:
320
+ for node_i in community:
321
+ for node_j in community:
322
+ idx_i = node_index_map[igraph_network.vs[node_i]["_nx_name"]]
323
+ idx_j = node_index_map[igraph_network.vs[node_j]["_nx_name"]]
324
+ neighborhoods[idx_i, idx_j] = 1
325
+
326
+ return neighborhoods
327
+
328
+
329
+ def _create_percentile_limited_subgraph(G: nx.Graph, edge_rank_percentile: float) -> nx.Graph:
330
+ """Create a subgraph containing the shortest edges based on the specified rank percentile
331
+ of all edge lengths in the input graph.
332
+
333
+ Args:
334
+ G (nx.Graph): The input graph with 'length' attributes on edges.
335
+ edge_rank_percentile (float): The rank percentile (between 0 and 1) to filter edges.
336
+
337
+ Returns:
338
+ nx.Graph: A subgraph with nodes and edges where the edges are within the shortest
339
+ specified rank percentile.
340
+ """
341
+ # Step 1: Extract edges with their lengths
342
+ edges_with_length = [(u, v, d) for u, v, d in G.edges(data=True) if "length" in d]
343
+ if not edges_with_length:
344
+ raise ValueError(
345
+ "No edge lengths found in the graph. Ensure edges have 'length' attributes."
346
+ )
347
+
348
+ # Step 2: Sort edges by length in ascending order
349
+ edges_with_length.sort(key=lambda x: x[2]["length"])
350
+ # Step 3: Calculate the cutoff index for the given rank percentile
351
+ cutoff_index = int(edge_rank_percentile * len(edges_with_length))
352
+ if cutoff_index == 0:
353
+ raise ValueError("The rank percentile is too low, resulting in no edges being included.")
354
+
355
+ # Step 4: Create the subgraph by selecting only the shortest edges within the rank percentile
356
+ subgraph = nx.Graph()
357
+ subgraph.add_nodes_from(G.nodes(data=True)) # Retain all nodes from the original graph
358
+ subgraph.add_edges_from(edges_with_length[:cutoff_index])
359
+ # Step 5: Remove nodes with no edges
360
+ subgraph.remove_nodes_from(list(nx.isolates(subgraph)))
361
+ # Step 6: Check if the resulting subgraph has no edges and issue a warning
362
+ if subgraph.number_of_edges() == 0:
363
+ raise Warning(
364
+ "The resulting subgraph has no edges. Consider adjusting the rank percentile."
365
+ )
366
+
367
+ return subgraph
@@ -15,6 +15,7 @@ from sklearn.metrics.pairwise import cosine_similarity
15
15
  from risk.neighborhoods.community import (
16
16
  calculate_greedy_modularity_neighborhoods,
17
17
  calculate_label_propagation_neighborhoods,
18
+ calculate_leiden_neighborhoods,
18
19
  calculate_louvain_neighborhoods,
19
20
  calculate_markov_clustering_neighborhoods,
20
21
  calculate_spinglass_neighborhoods,
@@ -29,22 +30,20 @@ warnings.filterwarnings(action="ignore", category=DataConversionWarning)
29
30
  def get_network_neighborhoods(
30
31
  network: nx.Graph,
31
32
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
32
- edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 1.0,
33
- louvain_resolution: float = 1.0,
33
+ edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 1.0,
34
+ louvain_resolution: float = 0.1,
35
+ leiden_resolution: float = 1.0,
34
36
  random_seed: int = 888,
35
37
  ) -> np.ndarray:
36
38
  """Calculate the combined neighborhoods for each node based on the specified community detection algorithm(s).
37
39
 
38
40
  Args:
39
41
  network (nx.Graph): The network graph.
40
- distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
41
- metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
42
- 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
43
- edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
44
- Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
45
- Defaults to 1.0.
46
- louvain_resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 1.0.
47
- random_seed (int, optional): Random seed for methods requiring random initialization. Defaults to 888.
42
+ distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use.
43
+ edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
44
+ louvain_resolution (float, optional): Resolution parameter for the Louvain method.
45
+ leiden_resolution (float, optional): Resolution parameter for the Leiden method.
46
+ random_seed (int, optional): Random seed for methods requiring random initialization.
48
47
 
49
48
  Returns:
50
49
  np.ndarray: Summed neighborhood matrix from all selected algorithms.
@@ -56,11 +55,11 @@ def get_network_neighborhoods(
56
55
  # Ensure distance_metric is a list/tuple for multi-algorithm handling
57
56
  if isinstance(distance_metric, (str, np.ndarray)):
58
57
  distance_metric = [distance_metric]
59
- # Ensure edge_length_threshold is a list/tuple for multi-threshold handling
60
- if isinstance(edge_length_threshold, (float, int)):
61
- edge_length_threshold = [edge_length_threshold] * len(distance_metric)
58
+ # Ensure edge_rank_percentile is a list/tuple for multi-threshold handling
59
+ if isinstance(edge_rank_percentile, (float, int)):
60
+ edge_rank_percentile = [edge_rank_percentile] * len(distance_metric)
62
61
  # Check that the number of distance metrics matches the number of edge length thresholds
63
- if len(distance_metric) != len(edge_length_threshold):
62
+ if len(distance_metric) != len(edge_rank_percentile):
64
63
  raise ValueError(
65
64
  "The number of distance metrics must match the number of edge length thresholds."
66
65
  )
@@ -69,29 +68,47 @@ def get_network_neighborhoods(
69
68
  num_nodes = network.number_of_nodes()
70
69
  combined_neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
71
70
 
72
- # Loop through each distance metric and corresponding edge length threshold
73
- for metric, threshold in zip(distance_metric, edge_length_threshold):
74
- # Create a subgraph based on the specific edge length threshold for this algorithm
75
- subgraph = _create_percentile_limited_subgraph(network, edge_length_percentile=threshold)
71
+ # Loop through each distance metric and corresponding edge rank percentile
72
+ for metric, percentile in zip(distance_metric, edge_rank_percentile):
76
73
  # Call the appropriate neighborhood function based on the metric
77
- if metric == "louvain":
78
- neighborhoods = calculate_louvain_neighborhoods(
79
- subgraph, louvain_resolution, random_seed=random_seed
74
+ if metric == "greedy_modularity":
75
+ neighborhoods = calculate_greedy_modularity_neighborhoods(
76
+ network, edge_rank_percentile=percentile
80
77
  )
81
- elif metric == "greedy_modularity":
82
- neighborhoods = calculate_greedy_modularity_neighborhoods(subgraph)
83
78
  elif metric == "label_propagation":
84
- neighborhoods = calculate_label_propagation_neighborhoods(subgraph)
79
+ neighborhoods = calculate_label_propagation_neighborhoods(
80
+ network, edge_rank_percentile=percentile
81
+ )
82
+ elif metric == "leiden":
83
+ neighborhoods = calculate_leiden_neighborhoods(
84
+ network,
85
+ resolution=leiden_resolution,
86
+ edge_rank_percentile=percentile,
87
+ random_seed=random_seed,
88
+ )
89
+ elif metric == "louvain":
90
+ neighborhoods = calculate_louvain_neighborhoods(
91
+ network,
92
+ resolution=louvain_resolution,
93
+ edge_rank_percentile=percentile,
94
+ random_seed=random_seed,
95
+ )
85
96
  elif metric == "markov_clustering":
86
- neighborhoods = calculate_markov_clustering_neighborhoods(subgraph)
87
- elif metric == "walktrap":
88
- neighborhoods = calculate_walktrap_neighborhoods(subgraph)
97
+ neighborhoods = calculate_markov_clustering_neighborhoods(
98
+ network, edge_rank_percentile=percentile
99
+ )
89
100
  elif metric == "spinglass":
90
- neighborhoods = calculate_spinglass_neighborhoods(subgraph)
101
+ neighborhoods = calculate_spinglass_neighborhoods(
102
+ network, edge_rank_percentile=percentile
103
+ )
104
+ elif metric == "walktrap":
105
+ neighborhoods = calculate_walktrap_neighborhoods(
106
+ network, edge_rank_percentile=percentile
107
+ )
91
108
  else:
92
109
  raise ValueError(
93
- "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'louvain',"
94
- "'label_propagation', 'markov_clustering', 'walktrap', 'spinglass'."
110
+ "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'label_propagation',"
111
+ "'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
95
112
  )
96
113
 
97
114
  # Sum the neighborhood matrices
@@ -100,50 +117,16 @@ def get_network_neighborhoods(
100
117
  # Ensure that the maximum value in each row is set to 1
101
118
  # This ensures that for each row, only the strongest relationship (the maximum value) is retained,
102
119
  # while all other values are reset to 0. This transformation simplifies the neighborhood matrix by
103
- # focusing on the most significant connection per row.
104
- combined_neighborhoods = _set_max_to_one(combined_neighborhoods)
120
+ # focusing on the most significant connection per row (or nodes).
121
+ combined_neighborhoods = _set_max_row_value_to_one(combined_neighborhoods)
105
122
 
106
123
  return combined_neighborhoods
107
124
 
108
125
 
109
- def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: float) -> nx.Graph:
110
- """Create a subgraph containing all nodes and edges where the edge length is below the
111
- specified percentile of all edge lengths in the input graph.
112
-
113
- Args:
114
- G (nx.Graph): The input graph with 'length' attributes on edges.
115
- edge_length_percentile (float): The percentile (between 0 and 1) to filter edges by length.
116
-
117
- Returns:
118
- nx.Graph: A subgraph with all nodes and edges where the edge length is below the
119
- calculated threshold length.
120
- """
121
- # Extract edge lengths and handle missing lengths
122
- edge_lengths = [d["length"] for _, _, d in G.edges(data=True) if "length" in d]
123
- if not edge_lengths:
124
- raise ValueError(
125
- "No edge lengths found in the graph. Ensure edges have 'length' attributes."
126
- )
127
-
128
- # Calculate the specific edge length for the given percentile
129
- percentile_length = np.percentile(edge_lengths, edge_length_percentile * 100)
130
- # Create the subgraph by directly filtering edges during iteration
131
- subgraph = nx.Graph()
132
- subgraph.add_nodes_from(G.nodes(data=True)) # Retain all nodes from the original graph
133
- # Add edges below the specified percentile length in a single pass
134
- for u, v, d in G.edges(data=True):
135
- if d.get("length", 1) <= percentile_length:
136
- subgraph.add_edge(u, v, **d)
137
-
138
- # Return the subgraph; optionally check if it's too sparse
139
- if subgraph.number_of_edges() == 0:
140
- raise Warning("The resulting subgraph has no edges. Consider adjusting the percentile.")
141
-
142
- return subgraph
143
-
144
-
145
- def _set_max_to_one(matrix: np.ndarray) -> np.ndarray:
146
- """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0.
126
+ def _set_max_row_value_to_one(matrix: np.ndarray) -> np.ndarray:
127
+ """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0. This is particularly
128
+ useful for neighborhood matrices that have undergone multiple neighborhood detection algorithms, where the
129
+ maximum value in each row represents the most significant relationship per node in the combined neighborhoods.
147
130
 
148
131
  Args:
149
132
  matrix (np.ndarray): A 2D numpy array representing the neighborhood matrix.
@@ -617,8 +617,7 @@ class Labels:
617
617
  """
618
618
  # Return custom labels if domain is in ids_to_labels
619
619
  if ids_to_labels and domain in ids_to_labels:
620
- terms = ids_to_labels[domain].replace(" ", TERM_DELIMITER)
621
- return terms
620
+ return ids_to_labels[domain]
622
621
 
623
622
  else:
624
623
  terms = self.graph.domain_id_to_domain_terms_map[domain].split(" ")
@@ -52,7 +52,8 @@ class RISK(NetworkIO, AnnotationsIO):
52
52
  annotations: Dict[str, Any],
53
53
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
54
54
  louvain_resolution: float = 0.1,
55
- edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
55
+ leiden_resolution: float = 1.0,
56
+ edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 0.5,
56
57
  null_distribution: str = "network",
57
58
  random_seed: int = 888,
58
59
  ) -> Dict[str, Any]:
@@ -65,7 +66,8 @@ class RISK(NetworkIO, AnnotationsIO):
65
66
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
66
67
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
67
68
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
68
- edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
69
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
70
+ edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
69
71
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
70
72
  Defaults to 0.5.
71
73
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
@@ -79,7 +81,8 @@ class RISK(NetworkIO, AnnotationsIO):
79
81
  params.log_neighborhoods(
80
82
  distance_metric=distance_metric,
81
83
  louvain_resolution=louvain_resolution,
82
- edge_length_threshold=edge_length_threshold,
84
+ leiden_resolution=leiden_resolution,
85
+ edge_rank_percentile=edge_rank_percentile,
83
86
  statistical_test_function="hypergeom",
84
87
  null_distribution=null_distribution,
85
88
  random_seed=random_seed,
@@ -93,7 +96,8 @@ class RISK(NetworkIO, AnnotationsIO):
93
96
  network,
94
97
  distance_metric,
95
98
  louvain_resolution=louvain_resolution,
96
- edge_length_threshold=edge_length_threshold,
99
+ leiden_resolution=leiden_resolution,
100
+ edge_rank_percentile=edge_rank_percentile,
97
101
  random_seed=random_seed,
98
102
  )
99
103
  # Run hypergeometric test to compute neighborhood significance
@@ -112,7 +116,8 @@ class RISK(NetworkIO, AnnotationsIO):
112
116
  annotations: Dict[str, Any],
113
117
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
114
118
  louvain_resolution: float = 0.1,
115
- edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
119
+ leiden_resolution: float = 1.0,
120
+ edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 0.5,
116
121
  null_distribution: str = "network",
117
122
  random_seed: int = 888,
118
123
  ) -> Dict[str, Any]:
@@ -125,7 +130,8 @@ class RISK(NetworkIO, AnnotationsIO):
125
130
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
126
131
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
127
132
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
128
- edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
133
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
134
+ edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
129
135
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
130
136
  Defaults to 0.5.
131
137
  null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
@@ -139,7 +145,8 @@ class RISK(NetworkIO, AnnotationsIO):
139
145
  params.log_neighborhoods(
140
146
  distance_metric=distance_metric,
141
147
  louvain_resolution=louvain_resolution,
142
- edge_length_threshold=edge_length_threshold,
148
+ leiden_resolution=leiden_resolution,
149
+ edge_rank_percentile=edge_rank_percentile,
143
150
  statistical_test_function="poisson",
144
151
  null_distribution=null_distribution,
145
152
  random_seed=random_seed,
@@ -153,7 +160,8 @@ class RISK(NetworkIO, AnnotationsIO):
153
160
  network,
154
161
  distance_metric,
155
162
  louvain_resolution=louvain_resolution,
156
- edge_length_threshold=edge_length_threshold,
163
+ leiden_resolution=leiden_resolution,
164
+ edge_rank_percentile=edge_rank_percentile,
157
165
  random_seed=random_seed,
158
166
  )
159
167
  # Run Poisson test to compute neighborhood significance
@@ -172,7 +180,8 @@ class RISK(NetworkIO, AnnotationsIO):
172
180
  annotations: Dict[str, Any],
173
181
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
174
182
  louvain_resolution: float = 0.1,
175
- edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
183
+ leiden_resolution: float = 1.0,
184
+ edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 0.5,
176
185
  score_metric: str = "sum",
177
186
  null_distribution: str = "network",
178
187
  num_permutations: int = 1000,
@@ -188,7 +197,8 @@ class RISK(NetworkIO, AnnotationsIO):
188
197
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
189
198
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
190
199
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
191
- edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
200
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
201
+ edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
192
202
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
193
203
  Defaults to 0.5.
194
204
  score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
@@ -205,7 +215,8 @@ class RISK(NetworkIO, AnnotationsIO):
205
215
  params.log_neighborhoods(
206
216
  distance_metric=distance_metric,
207
217
  louvain_resolution=louvain_resolution,
208
- edge_length_threshold=edge_length_threshold,
218
+ leiden_resolution=leiden_resolution,
219
+ edge_rank_percentile=edge_rank_percentile,
209
220
  statistical_test_function="permutation",
210
221
  score_metric=score_metric,
211
222
  null_distribution=null_distribution,
@@ -222,7 +233,8 @@ class RISK(NetworkIO, AnnotationsIO):
222
233
  network,
223
234
  distance_metric,
224
235
  louvain_resolution=louvain_resolution,
225
- edge_length_threshold=edge_length_threshold,
236
+ leiden_resolution=leiden_resolution,
237
+ edge_rank_percentile=edge_rank_percentile,
226
238
  random_seed=random_seed,
227
239
  )
228
240
 
@@ -408,7 +420,8 @@ class RISK(NetworkIO, AnnotationsIO):
408
420
  network: nx.Graph,
409
421
  distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
410
422
  louvain_resolution: float = 0.1,
411
- edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
423
+ leiden_resolution: float = 1.0,
424
+ edge_rank_percentile: Union[float, List, Tuple, np.ndarray] = 0.5,
412
425
  random_seed: int = 888,
413
426
  ) -> np.ndarray:
414
427
  """Load significant neighborhoods for the network.
@@ -420,7 +433,8 @@ class RISK(NetworkIO, AnnotationsIO):
420
433
  metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
421
434
  'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
422
435
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
423
- edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
436
+ leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
437
+ edge_rank_percentile (float, List, Tuple, or np.ndarray, optional): Shortest edge rank percentile threshold(s) for creating subgraphs.
424
438
  Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
425
439
  Defaults to 0.5.
426
440
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
@@ -431,19 +445,22 @@ class RISK(NetworkIO, AnnotationsIO):
431
445
  # Display the chosen distance metric
432
446
  if distance_metric == "louvain":
433
447
  for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
448
+ elif distance_metric == "leiden":
449
+ for_print_distance_metric = f"leiden (resolution={leiden_resolution})"
434
450
  else:
435
451
  for_print_distance_metric = distance_metric
436
452
  # Log and display neighborhood settings
437
453
  logger.debug(f"Distance metric: '{for_print_distance_metric}'")
438
- logger.debug(f"Edge length threshold: {edge_length_threshold}")
454
+ logger.debug(f"Edge length threshold: {edge_rank_percentile}")
439
455
  logger.debug(f"Random seed: {random_seed}")
440
456
 
441
457
  # Compute neighborhoods based on the network and distance metric
442
458
  neighborhoods = get_network_neighborhoods(
443
459
  network,
444
460
  distance_metric,
445
- edge_length_threshold,
461
+ edge_rank_percentile,
446
462
  louvain_resolution=louvain_resolution,
463
+ leiden_resolution=leiden_resolution,
447
464
  random_seed=random_seed,
448
465
  )
449
466
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.9b9
3
+ Version: 0.0.9b13
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -695,6 +695,7 @@ Requires-Python: >=3.8
695
695
  Description-Content-Type: text/markdown
696
696
  License-File: LICENSE
697
697
  Requires-Dist: ipywidgets
698
+ Requires-Dist: leidenalg
698
699
  Requires-Dist: markov_clustering
699
700
  Requires-Dist: matplotlib
700
701
  Requires-Dist: networkx
@@ -702,6 +703,7 @@ Requires-Dist: nltk==3.8.1
702
703
  Requires-Dist: numpy
703
704
  Requires-Dist: openpyxl
704
705
  Requires-Dist: pandas
706
+ Requires-Dist: python-igraph
705
707
  Requires-Dist: python-louvain
706
708
  Requires-Dist: scikit-learn
707
709
  Requires-Dist: scipy
@@ -1,4 +1,5 @@
1
1
  ipywidgets
2
+ leidenalg
2
3
  markov_clustering
3
4
  matplotlib
4
5
  networkx
@@ -6,6 +7,7 @@ nltk==3.8.1
6
7
  numpy
7
8
  openpyxl
8
9
  pandas
10
+ python-igraph
9
11
  python-louvain
10
12
  scikit-learn
11
13
  scipy
@@ -31,13 +31,14 @@ setup(
31
31
  include_package_data=True,
32
32
  install_requires=[
33
33
  "ipywidgets",
34
+ "leidenalg",
34
35
  "markov_clustering",
35
36
  "matplotlib",
36
37
  "networkx",
37
38
  "nltk==3.8.1",
38
39
  "numpy",
39
40
  "openpyxl",
40
- "pandas",
41
+ "pandas" "python-igraph",
41
42
  "python-louvain",
42
43
  "scikit-learn",
43
44
  "scipy",
@@ -1,189 +0,0 @@
1
- """
2
- risk/neighborhoods/community
3
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
- """
5
-
6
- import community as community_louvain
7
- import networkx as nx
8
- import numpy as np
9
- import markov_clustering as mc
10
- from networkx.algorithms.community import asyn_lpa_communities, greedy_modularity_communities
11
-
12
-
13
- def calculate_greedy_modularity_neighborhoods(network: nx.Graph) -> np.ndarray:
14
- """Calculate neighborhoods using the Greedy Modularity method.
15
-
16
- Args:
17
- network (nx.Graph): The network graph to analyze for community structure.
18
-
19
- Returns:
20
- np.ndarray: A binary neighborhood matrix where nodes in the same community have 1, and others have 0.
21
- """
22
- # Detect communities using the Greedy Modularity method
23
- communities = greedy_modularity_communities(network)
24
- # Create a binary neighborhood matrix
25
- n_nodes = network.number_of_nodes()
26
- neighborhoods = np.zeros((n_nodes, n_nodes), dtype=int)
27
- # Create a mapping from node to index in the matrix
28
- node_index = {node: i for i, node in enumerate(network.nodes())}
29
- # Fill in the neighborhood matrix for nodes in the same community
30
- for community in communities:
31
- # Iterate through all pairs of nodes in the same community
32
- for node_i in community:
33
- idx_i = node_index[node_i]
34
- for node_j in community:
35
- idx_j = node_index[node_j]
36
- # Set them as neighbors (1) in the binary matrix
37
- neighborhoods[idx_i, idx_j] = 1
38
-
39
- return neighborhoods
40
-
41
-
42
- def calculate_label_propagation_neighborhoods(network: nx.Graph) -> np.ndarray:
43
- """Apply Label Propagation to the network to detect communities.
44
-
45
- Args:
46
- network (nx.Graph): The network graph.
47
-
48
- Returns:
49
- np.ndarray: Binary neighborhood matrix on Label Propagation.
50
- """
51
- # Apply Label Propagation for community detection
52
- communities = nx.algorithms.community.label_propagation.label_propagation_communities(network)
53
- # Create a binary neighborhood matrix
54
- num_nodes = network.number_of_nodes()
55
- neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
56
- # Create a mapping from node to index in the matrix
57
- node_index = {node: i for i, node in enumerate(network.nodes())}
58
- # Assign neighborhoods based on community labels
59
- for community in communities:
60
- for node_i in community:
61
- idx_i = node_index[node_i]
62
- for node_j in community:
63
- idx_j = node_index[node_j]
64
- neighborhoods[idx_i, idx_j] = 1
65
-
66
- return neighborhoods
67
-
68
-
69
- def calculate_louvain_neighborhoods(
70
- network: nx.Graph, resolution: float, random_seed: int = 888
71
- ) -> np.ndarray:
72
- """Calculate neighborhoods using the Louvain method.
73
-
74
- Args:
75
- network (nx.Graph): The network graph.
76
- resolution (float): Resolution parameter for the Louvain method.
77
- random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
78
-
79
- Returns:
80
- np.ndarray: Binary neighborhood matrix on the Louvain method.
81
- """
82
- # Apply Louvain method to partition the network
83
- partition = community_louvain.best_partition(
84
- network, resolution=resolution, random_state=random_seed
85
- )
86
- # Create a binary neighborhood matrix
87
- num_nodes = network.number_of_nodes()
88
- neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
89
- # Create a mapping from node to index in the matrix
90
- node_index = {node: i for i, node in enumerate(network.nodes())}
91
- # Group nodes by community
92
- community_groups = {}
93
- for node, community in partition.items():
94
- community_groups.setdefault(community, []).append(node)
95
-
96
- # Assign neighborhoods based on community partitions
97
- for community, nodes in community_groups.items():
98
- for node_i in nodes:
99
- idx_i = node_index[node_i]
100
- for node_j in nodes:
101
- idx_j = node_index[node_j]
102
- neighborhoods[idx_i, idx_j] = 1
103
-
104
- return neighborhoods
105
-
106
-
107
- def calculate_markov_clustering_neighborhoods(network: nx.Graph) -> np.ndarray:
108
- """Apply Markov Clustering (MCL) to the network.
109
-
110
- Args:
111
- network (nx.Graph): The network graph.
112
-
113
- Returns:
114
- np.ndarray: Binary neighborhood matrix on Markov Clustering.
115
- """
116
- # Convert the graph to an adjacency matrix
117
- adjacency_matrix = nx.to_numpy_array(network)
118
- # Run Markov Clustering (MCL)
119
- result = mc.run_mcl(adjacency_matrix) # MCL with default parameters
120
- # Get clusters (communities) from MCL result
121
- clusters = mc.get_clusters(result)
122
- # Create a binary neighborhood matrix
123
- num_nodes = network.number_of_nodes()
124
- neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
125
- # Create a mapping from node to index in the matrix
126
- node_index = {node: i for i, node in enumerate(network.nodes())}
127
- # Assign neighborhoods based on MCL clusters
128
- for cluster in clusters:
129
- for node_i in cluster:
130
- idx_i = node_index[node_i]
131
- for node_j in cluster:
132
- idx_j = node_index[node_j]
133
- neighborhoods[idx_i, idx_j] = 1
134
-
135
- return neighborhoods
136
-
137
-
138
- def calculate_spinglass_neighborhoods(network: nx.Graph) -> np.ndarray:
139
- """Apply Spin Glass Community Detection to the network.
140
-
141
- Args:
142
- network (nx.Graph): The network graph.
143
-
144
- Returns:
145
- np.ndarray: Binary neighborhood matrix on Spin Glass communities.
146
- """
147
- # Apply Asynchronous Label Propagation (LPA)
148
- communities = asyn_lpa_communities(network)
149
- # Create a binary neighborhood matrix
150
- num_nodes = network.number_of_nodes()
151
- neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
152
- # Create a mapping from node to index in the matrix
153
- node_index = {node: i for i, node in enumerate(network.nodes())}
154
- # Assign neighborhoods based on community labels from LPA
155
- for community in communities:
156
- for node_i in community:
157
- idx_i = node_index[node_i]
158
- for node_j in community:
159
- idx_j = node_index[node_j]
160
- neighborhoods[idx_i, idx_j] = 1
161
-
162
- return neighborhoods
163
-
164
-
165
- def calculate_walktrap_neighborhoods(network: nx.Graph) -> np.ndarray:
166
- """Apply Walktrap Community Detection to the network.
167
-
168
- Args:
169
- network (nx.Graph): The network graph.
170
-
171
- Returns:
172
- np.ndarray: Binary neighborhood matrix on Walktrap communities.
173
- """
174
- # Apply Asynchronous Label Propagation (LPA)
175
- communities = asyn_lpa_communities(network)
176
- # Create a binary neighborhood matrix
177
- num_nodes = network.number_of_nodes()
178
- neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
179
- # Create a mapping from node to index in the matrix
180
- node_index = {node: i for i, node in enumerate(network.nodes())}
181
- # Assign neighborhoods based on community labels from LPA
182
- for community in communities:
183
- for node_i in community:
184
- idx_i = node_index[node_i]
185
- for node_j in community:
186
- idx_j = node_index[node_j]
187
- neighborhoods[idx_i, idx_j] = 1
188
-
189
- return neighborhoods
File without changes