risk-network 0.0.12b0__py3-none-any.whl → 0.0.12b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. risk/__init__.py +1 -1
  2. risk/annotations/__init__.py +10 -0
  3. risk/annotations/annotations.py +354 -0
  4. risk/annotations/io.py +241 -0
  5. risk/annotations/nltk_setup.py +86 -0
  6. risk/log/__init__.py +11 -0
  7. risk/log/console.py +141 -0
  8. risk/log/parameters.py +171 -0
  9. risk/neighborhoods/__init__.py +7 -0
  10. risk/neighborhoods/api.py +442 -0
  11. risk/neighborhoods/community.py +441 -0
  12. risk/neighborhoods/domains.py +360 -0
  13. risk/neighborhoods/neighborhoods.py +514 -0
  14. risk/neighborhoods/stats/__init__.py +13 -0
  15. risk/neighborhoods/stats/permutation/__init__.py +6 -0
  16. risk/neighborhoods/stats/permutation/permutation.py +240 -0
  17. risk/neighborhoods/stats/permutation/test_functions.py +70 -0
  18. risk/neighborhoods/stats/tests.py +275 -0
  19. risk/network/__init__.py +4 -0
  20. risk/network/graph/__init__.py +4 -0
  21. risk/network/graph/api.py +200 -0
  22. risk/network/graph/graph.py +268 -0
  23. risk/network/graph/stats.py +166 -0
  24. risk/network/graph/summary.py +253 -0
  25. risk/network/io.py +693 -0
  26. risk/network/plotter/__init__.py +4 -0
  27. risk/network/plotter/api.py +54 -0
  28. risk/network/plotter/canvas.py +291 -0
  29. risk/network/plotter/contour.py +329 -0
  30. risk/network/plotter/labels.py +935 -0
  31. risk/network/plotter/network.py +294 -0
  32. risk/network/plotter/plotter.py +141 -0
  33. risk/network/plotter/utils/colors.py +419 -0
  34. risk/network/plotter/utils/layout.py +94 -0
  35. risk_network-0.0.12b1.dist-info/METADATA +122 -0
  36. risk_network-0.0.12b1.dist-info/RECORD +40 -0
  37. {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/WHEEL +1 -1
  38. risk_network-0.0.12b0.dist-info/METADATA +0 -796
  39. risk_network-0.0.12b0.dist-info/RECORD +0 -7
  40. {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/licenses/LICENSE +0 -0
  41. {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,441 @@
1
+ """
2
+ risk/neighborhoods/community
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ import community as community_louvain
7
+ import igraph as ig
8
+ import markov_clustering as mc
9
+ import networkx as nx
10
+ import numpy as np
11
+ from leidenalg import RBConfigurationVertexPartition, find_partition
12
+ from networkx.algorithms.community import greedy_modularity_communities
13
+ from scipy.sparse import csr_matrix
14
+
15
+ from risk.log import logger
16
+
17
+
18
+ def calculate_greedy_modularity_neighborhoods(
19
+ network: nx.Graph, fraction_shortest_edges: float = 1.0
20
+ ) -> csr_matrix:
21
+ """Calculate neighborhoods using the Greedy Modularity method with CSR matrix output.
22
+
23
+ Args:
24
+ network (nx.Graph): The network graph.
25
+ fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
26
+ subgraphs before clustering. Defaults to 1.0.
27
+
28
+ Returns:
29
+ csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
30
+
31
+ Raises:
32
+ ValueError: If the subgraph has no edges after filtering.
33
+ Warning: If the resulting subgraph has no edges after filtering.
34
+ """
35
+ # Create a subgraph with the shortest edges based on the rank fraction
36
+ subnetwork = _create_percentile_limited_subgraph(
37
+ network, fraction_shortest_edges=fraction_shortest_edges
38
+ )
39
+ # Detect communities using the Greedy Modularity method
40
+ communities = greedy_modularity_communities(subnetwork)
41
+ # Get the list of nodes in the original NetworkX graph
42
+ nodes = list(network.nodes())
43
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
44
+ # Prepare data for CSR matrix
45
+ row_indices = []
46
+ col_indices = []
47
+ for community in communities:
48
+ mapped_indices = [node_index_map[node] for node in community]
49
+ for i in mapped_indices:
50
+ for j in mapped_indices:
51
+ row_indices.append(i)
52
+ col_indices.append(j)
53
+
54
+ # Create a CSR matrix
55
+ num_nodes = len(nodes)
56
+ data = np.ones(len(row_indices), dtype=int)
57
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
58
+
59
+ return neighborhoods
60
+
61
+
62
+ def calculate_label_propagation_neighborhoods(
63
+ network: nx.Graph, fraction_shortest_edges: float = 1.0
64
+ ) -> csr_matrix:
65
+ """Apply Label Propagation to the network to detect communities.
66
+
67
+ Args:
68
+ network (nx.Graph): The network graph.
69
+ fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
70
+ subgraphs before clustering. Defaults to 1.0.
71
+
72
+ Returns:
73
+ csr_matrix: A binary neighborhood matrix (CSR) on Label Propagation.
74
+
75
+ Raises:
76
+ ValueError: If the subgraph has no edges after filtering.
77
+ Warning: If the resulting subgraph has no edges after filtering.
78
+ """
79
+ # Create a subgraph with the shortest edges based on the rank fraction
80
+ subnetwork = _create_percentile_limited_subgraph(
81
+ network, fraction_shortest_edges=fraction_shortest_edges
82
+ )
83
+ # Apply Label Propagation for community detection
84
+ communities = nx.algorithms.community.label_propagation.label_propagation_communities(
85
+ subnetwork
86
+ )
87
+ # Get the list of nodes in the network
88
+ nodes = list(network.nodes())
89
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
90
+ # Prepare data for CSR matrix
91
+ row_indices = []
92
+ col_indices = []
93
+ # Assign neighborhoods based on community labels using the mapped indices
94
+ for community in communities:
95
+ mapped_indices = [node_index_map[node] for node in community]
96
+ for i in mapped_indices:
97
+ for j in mapped_indices:
98
+ row_indices.append(i)
99
+ col_indices.append(j)
100
+
101
+ # Create a CSR matrix
102
+ num_nodes = len(nodes)
103
+ data = np.ones(len(row_indices), dtype=int)
104
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
105
+
106
+ return neighborhoods
107
+
108
+
109
+ def calculate_leiden_neighborhoods(
110
+ network: nx.Graph,
111
+ resolution: float = 1.0,
112
+ fraction_shortest_edges: float = 1.0,
113
+ random_seed: int = 888,
114
+ ) -> csr_matrix:
115
+ """Calculate neighborhoods using the Leiden method with CSR matrix output.
116
+
117
+ Args:
118
+ network (nx.Graph): The network graph.
119
+ resolution (float, optional): Resolution parameter for the Leiden method. Defaults to 1.0.
120
+ fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
121
+ subgraphs before clustering. Defaults to 1.0.
122
+ random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
123
+
124
+ Returns:
125
+ csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
126
+
127
+ Raises:
128
+ ValueError: If the subgraph has no edges after filtering.
129
+ Warning: If the resulting subgraph has no edges after filtering.
130
+ """
131
+ # Create a subgraph with the shortest edges based on the rank fraction
132
+ subnetwork = _create_percentile_limited_subgraph(
133
+ network, fraction_shortest_edges=fraction_shortest_edges
134
+ )
135
+ # Convert NetworkX graph to iGraph
136
+ igraph_network = ig.Graph.from_networkx(subnetwork)
137
+ # Apply Leiden algorithm using RBConfigurationVertexPartition, which supports resolution
138
+ partition = find_partition(
139
+ igraph_network,
140
+ partition_type=RBConfigurationVertexPartition,
141
+ resolution_parameter=resolution,
142
+ seed=random_seed,
143
+ )
144
+ # Get the list of nodes in the original NetworkX graph
145
+ nodes = list(network.nodes())
146
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
147
+ # Prepare data for CSR matrix
148
+ row_indices = []
149
+ col_indices = []
150
+ for community in partition:
151
+ mapped_indices = [node_index_map[igraph_network.vs[node]["_nx_name"]] for node in community]
152
+ for i in mapped_indices:
153
+ for j in mapped_indices:
154
+ row_indices.append(i)
155
+ col_indices.append(j)
156
+
157
+ # Create a CSR matrix
158
+ num_nodes = len(nodes)
159
+ data = np.ones(len(row_indices), dtype=int)
160
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
161
+
162
+ return neighborhoods
163
+
164
+
165
+ def calculate_louvain_neighborhoods(
166
+ network: nx.Graph,
167
+ resolution: float = 0.1,
168
+ fraction_shortest_edges: float = 1.0,
169
+ random_seed: int = 888,
170
+ ) -> csr_matrix:
171
+ """Calculate neighborhoods using the Louvain method.
172
+
173
+ Args:
174
+ network (nx.Graph): The network graph.
175
+ resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 0.1.
176
+ fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
177
+ subgraphs before clustering. Defaults to 1.0.
178
+ random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
179
+
180
+ Returns:
181
+ csr_matrix: A binary neighborhood matrix in CSR format.
182
+
183
+ Raises:
184
+ ValueError: If the subgraph has no edges after filtering.
185
+ Warning: If the resulting subgraph has no edges after filtering.
186
+ """
187
+ # Create a subgraph with the shortest edges based on the rank fraction
188
+ subnetwork = _create_percentile_limited_subgraph(
189
+ network, fraction_shortest_edges=fraction_shortest_edges
190
+ )
191
+ # Apply Louvain method to partition the network
192
+ partition = community_louvain.best_partition(
193
+ subnetwork, resolution=resolution, random_state=random_seed
194
+ )
195
+ # Get the list of nodes in the network and create a mapping to indices
196
+ nodes = list(network.nodes())
197
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
198
+ # Group nodes by community
199
+ community_groups = {}
200
+ for node, community in partition.items():
201
+ community_groups.setdefault(community, []).append(node)
202
+
203
+ # Prepare data for CSR matrix
204
+ row_indices = []
205
+ col_indices = []
206
+ for community_nodes in community_groups.values():
207
+ mapped_indices = [node_index_map[node] for node in community_nodes]
208
+ for i in mapped_indices:
209
+ for j in mapped_indices:
210
+ row_indices.append(i)
211
+ col_indices.append(j)
212
+
213
+ # Create a CSR matrix
214
+ num_nodes = len(nodes)
215
+ data = np.ones(len(row_indices), dtype=int)
216
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
217
+
218
+ return neighborhoods
219
+
220
+
221
+ def calculate_markov_clustering_neighborhoods(
222
+ network: nx.Graph, fraction_shortest_edges: float = 1.0
223
+ ) -> csr_matrix:
224
+ """Apply Markov Clustering (MCL) to the network and return a binary neighborhood matrix (CSR).
225
+
226
+ Args:
227
+ network (nx.Graph): The network graph.
228
+ fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
229
+ subgraphs before clustering. Defaults to 1.0.
230
+
231
+ Returns:
232
+ csr_matrix: A binary neighborhood matrix (CSR) on Markov Clustering.
233
+
234
+ Raises:
235
+ ValueError: If the subgraph has no edges after filtering.
236
+ RuntimeError: If MCL fails to run.
237
+ Warning: If the resulting subgraph has no edges after filtering.
238
+ """
239
+ # Create a subgraph with the shortest edges based on the rank fraction
240
+ subnetwork = _create_percentile_limited_subgraph(
241
+ network, fraction_shortest_edges=fraction_shortest_edges
242
+ )
243
+ # Check if the subgraph has edges
244
+ if subnetwork.number_of_edges() == 0:
245
+ raise ValueError("The subgraph has no edges. Adjust the fraction_shortest_edges parameter.")
246
+
247
+ # Step 1: Convert the subnetwork to an adjacency matrix (CSR)
248
+ subnetwork_nodes = list(subnetwork.nodes())
249
+ adjacency_matrix = nx.to_scipy_sparse_array(subnetwork, nodelist=subnetwork_nodes)
250
+ # Ensure the adjacency matrix is valid
251
+ if adjacency_matrix.shape[0] == 0 or adjacency_matrix.shape[1] == 0:
252
+ raise ValueError(
253
+ "The adjacency matrix is empty. Check the input graph or filtering criteria."
254
+ )
255
+
256
+ # Convert the sparse matrix to dense format for MCL
257
+ dense_matrix = adjacency_matrix.toarray()
258
+ # Step 2: Run Markov Clustering (MCL) on the dense adjacency matrix
259
+ try:
260
+ result = mc.run_mcl(dense_matrix)
261
+ except Exception as e:
262
+ raise RuntimeError(f"Markov Clustering failed: {e}")
263
+
264
+ clusters = mc.get_clusters(result)
265
+ # Step 3: Prepare the original network nodes and indices
266
+ nodes = list(network.nodes())
267
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
268
+ num_nodes = len(nodes)
269
+ # Step 4: Prepare data for CSR matrix
270
+ row_indices = []
271
+ col_indices = []
272
+ for cluster in clusters:
273
+ for node_i in cluster:
274
+ for node_j in cluster:
275
+ # Map the indices back to the original network's node indices
276
+ original_node_i = subnetwork_nodes[node_i]
277
+ original_node_j = subnetwork_nodes[node_j]
278
+ if original_node_i in node_index_map and original_node_j in node_index_map:
279
+ idx_i = node_index_map[original_node_i]
280
+ idx_j = node_index_map[original_node_j]
281
+ row_indices.append(idx_i)
282
+ col_indices.append(idx_j)
283
+
284
+ # Step 5: Create a CSR matrix
285
+ data = np.ones(len(row_indices), dtype=int)
286
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
287
+
288
+ return neighborhoods
289
+
290
+
291
+ def calculate_spinglass_neighborhoods(
292
+ network: nx.Graph, fraction_shortest_edges: float = 1.0
293
+ ) -> csr_matrix:
294
+ """Apply Spinglass Community Detection to the network, handling disconnected components.
295
+
296
+ Args:
297
+ network (nx.Graph): The network graph.
298
+ fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
299
+ subgraphs before clustering. Defaults to 1.0.
300
+
301
+ Returns:
302
+ csr_matrix: A binary neighborhood matrix (CSR) based on Spinglass communities.
303
+
304
+ Raises:
305
+ ValueError: If the subgraph has no edges after filtering.
306
+ Warning: If the resulting subgraph has no edges after filtering.
307
+ """
308
+ # Create a subgraph with the shortest edges based on the rank fraction
309
+ subnetwork = _create_percentile_limited_subgraph(
310
+ network, fraction_shortest_edges=fraction_shortest_edges
311
+ )
312
+ # Step 1: Find connected components in the graph
313
+ components = list(nx.connected_components(subnetwork))
314
+ # Prepare data for CSR matrix
315
+ nodes = list(network.nodes())
316
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
317
+ row_indices = []
318
+ col_indices = []
319
+ # Step 2: Run Spinglass on each connected component
320
+ for component in components:
321
+ # Extract the subgraph corresponding to the current component
322
+ subgraph = network.subgraph(component)
323
+ # Convert the subgraph to an iGraph object
324
+ igraph_subgraph = ig.Graph.from_networkx(subgraph)
325
+ # Ensure the subgraph is connected before running Spinglass
326
+ if not igraph_subgraph.is_connected():
327
+ logger.error("Warning: Subgraph is not connected. Skipping...")
328
+ continue
329
+
330
+ # Apply Spinglass community detection
331
+ try:
332
+ communities = igraph_subgraph.community_spinglass()
333
+ except Exception as e:
334
+ logger.error(f"Error running Spinglass on component: {e}")
335
+ continue
336
+
337
+ # Step 3: Assign neighborhoods based on community labels
338
+ for community in communities:
339
+ mapped_indices = [
340
+ node_index_map[igraph_subgraph.vs[node]["_nx_name"]] for node in community
341
+ ]
342
+ for i in mapped_indices:
343
+ for j in mapped_indices:
344
+ row_indices.append(i)
345
+ col_indices.append(j)
346
+
347
+ # Step 4: Create a CSR matrix
348
+ num_nodes = len(nodes)
349
+ data = np.ones(len(row_indices), dtype=int)
350
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
351
+
352
+ return neighborhoods
353
+
354
+
355
+ def calculate_walktrap_neighborhoods(
356
+ network: nx.Graph, fraction_shortest_edges: float = 1.0
357
+ ) -> csr_matrix:
358
+ """Apply Walktrap Community Detection to the network with CSR matrix output.
359
+
360
+ Args:
361
+ network (nx.Graph): The network graph.
362
+ fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
363
+ subgraphs before clustering. Defaults to 1.0.
364
+
365
+ Returns:
366
+ csr_matrix: A binary neighborhood matrix (CSR) on Walktrap communities.
367
+
368
+ Raises:
369
+ ValueError: If the subgraph has no edges after filtering.
370
+ Warning: If the resulting subgraph has no edges after filtering.
371
+ """
372
+ # Create a subgraph with the shortest edges based on the rank fraction
373
+ subnetwork = _create_percentile_limited_subgraph(
374
+ network, fraction_shortest_edges=fraction_shortest_edges
375
+ )
376
+ # Convert NetworkX graph to iGraph
377
+ igraph_network = ig.Graph.from_networkx(subnetwork)
378
+ # Apply Walktrap community detection
379
+ communities = igraph_network.community_walktrap().as_clustering()
380
+ # Get the list of nodes in the original NetworkX graph
381
+ nodes = list(network.nodes())
382
+ node_index_map = {node: idx for idx, node in enumerate(nodes)}
383
+ # Prepare data for CSR matrix
384
+ row_indices = []
385
+ col_indices = []
386
+ for community in communities:
387
+ mapped_indices = [node_index_map[igraph_network.vs[node]["_nx_name"]] for node in community]
388
+ for i in mapped_indices:
389
+ for j in mapped_indices:
390
+ row_indices.append(i)
391
+ col_indices.append(j)
392
+
393
+ # Create a CSR matrix
394
+ num_nodes = len(nodes)
395
+ data = np.ones(len(row_indices), dtype=int)
396
+ neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
397
+
398
+ return neighborhoods
399
+
400
+
401
+ def _create_percentile_limited_subgraph(G: nx.Graph, fraction_shortest_edges: float) -> nx.Graph:
402
+ """Create a subgraph containing the shortest edges based on the specified rank fraction
403
+ of all edge lengths in the input graph.
404
+
405
+ Args:
406
+ G (nx.Graph): The input graph with 'length' attributes on edges.
407
+ fraction_shortest_edges (float): The rank fraction (between 0 and 1) to filter edges.
408
+
409
+ Returns:
410
+ nx.Graph: A subgraph with nodes and edges where the edges are within the shortest
411
+ specified rank fraction.
412
+
413
+ Raises:
414
+ ValueError: If no edges with 'length' attributes are found in the graph.
415
+ Warning: If the resulting subgraph has no edges after filtering.
416
+ """
417
+ # Step 1: Extract edges with their lengths
418
+ edges_with_length = [(u, v, d) for u, v, d in G.edges(data=True) if "length" in d]
419
+ if not edges_with_length:
420
+ raise ValueError(
421
+ "No edge lengths found in the graph. Ensure edges have 'length' attributes."
422
+ )
423
+
424
+ # Step 2: Sort edges by length in ascending order
425
+ edges_with_length.sort(key=lambda x: x[2]["length"])
426
+ # Step 3: Calculate the cutoff index for the given rank fraction
427
+ cutoff_index = int(fraction_shortest_edges * len(edges_with_length))
428
+ if cutoff_index == 0:
429
+ raise ValueError("The rank fraction is too low, resulting in no edges being included.")
430
+
431
+ # Step 4: Create the subgraph by selecting only the shortest edges within the rank fraction
432
+ subgraph = nx.Graph()
433
+ subgraph.add_nodes_from(G.nodes(data=True)) # Retain all nodes from the original graph
434
+ subgraph.add_edges_from(edges_with_length[:cutoff_index])
435
+ # Step 5: Remove nodes with no edges
436
+ subgraph.remove_nodes_from(list(nx.isolates(subgraph)))
437
+ # Step 6: Check if the resulting subgraph has no edges and issue a warning
438
+ if subgraph.number_of_edges() == 0:
439
+ raise Warning("The resulting subgraph has no edges. Consider adjusting the rank fraction.")
440
+
441
+ return subgraph