risk-network 0.0.9b26__py3-none-any.whl → 0.0.9b27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/annotations.py +39 -38
- risk/neighborhoods/api.py +1 -5
- risk/neighborhoods/community.py +140 -95
- risk/neighborhoods/neighborhoods.py +34 -18
- risk/network/geometry.py +24 -27
- risk/network/io.py +39 -15
- risk/stats/__init__.py +8 -6
- risk/stats/{stats.py → significance.py} +2 -2
- risk/stats/stat_tests.py +267 -0
- {risk_network-0.0.9b26.dist-info → risk_network-0.0.9b27.dist-info}/METADATA +1 -1
- {risk_network-0.0.9b26.dist-info → risk_network-0.0.9b27.dist-info}/RECORD +15 -19
- risk/stats/binom.py +0 -51
- risk/stats/chi2.py +0 -69
- risk/stats/hypergeom.py +0 -64
- risk/stats/poisson.py +0 -50
- risk/stats/zscore.py +0 -68
- {risk_network-0.0.9b26.dist-info → risk_network-0.0.9b27.dist-info}/LICENSE +0 -0
- {risk_network-0.0.9b26.dist-info → risk_network-0.0.9b27.dist-info}/WHEEL +0 -0
- {risk_network-0.0.9b26.dist-info → risk_network-0.0.9b27.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/annotations/annotations.py
CHANGED
@@ -16,7 +16,7 @@ from nltk.tokenize import word_tokenize
|
|
16
16
|
from nltk.corpus import stopwords
|
17
17
|
|
18
18
|
from risk.log import logger
|
19
|
-
from scipy.sparse import
|
19
|
+
from scipy.sparse import coo_matrix
|
20
20
|
|
21
21
|
|
22
22
|
def _setup_nltk():
|
@@ -41,14 +41,13 @@ stop_words = set(stopwords.words("english"))
|
|
41
41
|
def load_annotations(
|
42
42
|
network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
|
43
43
|
) -> Dict[str, Any]:
|
44
|
-
"""Convert annotations input to a
|
44
|
+
"""Convert annotations input to a sparse matrix and reindex based on the network's node labels.
|
45
45
|
|
46
46
|
Args:
|
47
47
|
network (nx.Graph): The network graph.
|
48
48
|
annotations_input (Dict[str, Any]): A dictionary with annotations.
|
49
49
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
50
50
|
term to be included. Defaults to 2.
|
51
|
-
use_sparse (bool, optional): Whether to return the annotations matrix as a sparse matrix. Defaults to True.
|
52
51
|
|
53
52
|
Returns:
|
54
53
|
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the sparse binary annotations
|
@@ -58,51 +57,53 @@ def load_annotations(
|
|
58
57
|
ValueError: If no annotations are found for the nodes in the network.
|
59
58
|
ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
|
60
59
|
"""
|
61
|
-
#
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
#
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
60
|
+
# Step 1: Map nodes and annotations to indices
|
61
|
+
node_label_order = [attr["label"] for _, attr in network.nodes(data=True) if "label" in attr]
|
62
|
+
node_to_idx = {node: i for i, node in enumerate(node_label_order)}
|
63
|
+
annotation_to_idx = {annotation: i for i, annotation in enumerate(annotations_input)}
|
64
|
+
# Step 2: Construct a sparse binary matrix directly
|
65
|
+
row = []
|
66
|
+
col = []
|
67
|
+
data = []
|
68
|
+
for annotation, nodes in annotations_input.items():
|
69
|
+
for node in nodes:
|
70
|
+
if node in node_to_idx and annotation in annotation_to_idx:
|
71
|
+
row.append(node_to_idx[node])
|
72
|
+
col.append(annotation_to_idx[annotation])
|
73
|
+
data.append(1)
|
74
|
+
|
75
|
+
# Create a sparse binary matrix
|
76
|
+
num_nodes = len(node_to_idx)
|
77
|
+
num_annotations = len(annotation_to_idx)
|
78
|
+
annotations_pivot = coo_matrix((data, (row, col)), shape=(num_nodes, num_annotations)).tocsr()
|
79
|
+
# Step 3: Filter out annotations with fewer than min_nodes_per_term occurrences
|
80
|
+
valid_annotations = annotations_pivot.sum(axis=0).A1 >= min_nodes_per_term
|
81
|
+
annotations_pivot = annotations_pivot[:, valid_annotations]
|
82
|
+
# Step 4: Raise errors for empty matrices
|
83
|
+
if annotations_pivot.nnz == 0:
|
77
84
|
raise ValueError("No terms found in the annotation file for the nodes in the network.")
|
78
85
|
|
79
|
-
|
80
|
-
|
81
|
-
annotations_pivot = annotations_pivot.loc[
|
82
|
-
:, (annotations_pivot.sum(axis=0) >= min_nodes_per_term)
|
83
|
-
]
|
84
|
-
num_terms_after_filtering = annotations_pivot.shape[1]
|
85
|
-
# Log the number of annotations before and after filtering
|
86
|
-
logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
|
87
|
-
logger.info(f"Number of input annotation terms: {num_terms_before_filtering}")
|
88
|
-
logger.info(f"Number of remaining annotation terms: {num_terms_after_filtering}")
|
89
|
-
if num_terms_after_filtering == 0:
|
86
|
+
num_remaining_annotations = annotations_pivot.shape[1]
|
87
|
+
if num_remaining_annotations == 0:
|
90
88
|
raise ValueError(
|
91
89
|
f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
|
92
90
|
)
|
93
91
|
|
94
|
-
# Extract ordered nodes and annotations
|
95
|
-
ordered_nodes = tuple(
|
96
|
-
ordered_annotations = tuple(
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
92
|
+
# Step 5: Extract ordered nodes and annotations
|
93
|
+
ordered_nodes = tuple(node_label_order)
|
94
|
+
ordered_annotations = tuple(
|
95
|
+
annotation for annotation, is_valid in zip(annotation_to_idx, valid_annotations) if is_valid
|
96
|
+
)
|
97
|
+
|
98
|
+
# Log the filtering details
|
99
|
+
logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
|
100
|
+
logger.info(f"Number of input annotation terms: {num_annotations}")
|
101
|
+
logger.info(f"Number of remaining annotation terms: {num_remaining_annotations}")
|
101
102
|
|
102
103
|
return {
|
103
104
|
"ordered_nodes": ordered_nodes,
|
104
105
|
"ordered_annotations": ordered_annotations,
|
105
|
-
"matrix":
|
106
|
+
"matrix": annotations_pivot,
|
106
107
|
}
|
107
108
|
|
108
109
|
|
risk/neighborhoods/api.py
CHANGED
@@ -368,7 +368,7 @@ class NeighborhoodsAPI:
|
|
368
368
|
)
|
369
369
|
|
370
370
|
# Make a copy of the network to avoid modifying the original
|
371
|
-
network = copy.
|
371
|
+
network = copy.copy(network)
|
372
372
|
# Load neighborhoods based on the network and distance metric
|
373
373
|
neighborhoods = self._load_neighborhoods(
|
374
374
|
network,
|
@@ -438,9 +438,5 @@ class NeighborhoodsAPI:
|
|
438
438
|
random_seed=random_seed,
|
439
439
|
)
|
440
440
|
|
441
|
-
# Ensure the neighborhood matrix is in sparse format
|
442
|
-
if not isinstance(neighborhoods, csr_matrix):
|
443
|
-
neighborhoods = csr_matrix(neighborhoods)
|
444
|
-
|
445
441
|
# Return the sparse neighborhood matrix
|
446
442
|
return neighborhoods
|
risk/neighborhoods/community.py
CHANGED
@@ -10,22 +10,23 @@ import networkx as nx
|
|
10
10
|
import numpy as np
|
11
11
|
from leidenalg import find_partition, RBConfigurationVertexPartition
|
12
12
|
from networkx.algorithms.community import greedy_modularity_communities
|
13
|
+
from scipy.sparse import csr_matrix
|
13
14
|
|
14
15
|
from risk.log import logger
|
15
16
|
|
16
17
|
|
17
18
|
def calculate_greedy_modularity_neighborhoods(
|
18
19
|
network: nx.Graph, fraction_shortest_edges: float = 1.0
|
19
|
-
) ->
|
20
|
-
"""Calculate neighborhoods using the Greedy Modularity method.
|
20
|
+
) -> csr_matrix:
|
21
|
+
"""Calculate neighborhoods using the Greedy Modularity method with CSR matrix output.
|
21
22
|
|
22
23
|
Args:
|
23
24
|
network (nx.Graph): The network graph.
|
24
25
|
fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
|
25
|
-
subgraphs before clustering.
|
26
|
+
subgraphs before clustering. Defaults to 1.0.
|
26
27
|
|
27
28
|
Returns:
|
28
|
-
|
29
|
+
csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
|
29
30
|
"""
|
30
31
|
# Create a subgraph with the shortest edges based on the rank fraction
|
31
32
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -36,35 +37,36 @@ def calculate_greedy_modularity_neighborhoods(
|
|
36
37
|
# Get the list of nodes in the original NetworkX graph
|
37
38
|
nodes = list(network.nodes())
|
38
39
|
node_index_map = {node: idx for idx, node in enumerate(nodes)}
|
39
|
-
#
|
40
|
-
|
41
|
-
|
42
|
-
neighborhoods = np.eye(num_nodes, dtype=int)
|
43
|
-
# Fill in the neighborhood matrix for nodes in the same community
|
40
|
+
# Prepare data for CSR matrix
|
41
|
+
row_indices = []
|
42
|
+
col_indices = []
|
44
43
|
for community in communities:
|
45
|
-
|
46
|
-
for
|
47
|
-
for
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
44
|
+
mapped_indices = [node_index_map[node] for node in community]
|
45
|
+
for i in mapped_indices:
|
46
|
+
for j in mapped_indices:
|
47
|
+
row_indices.append(i)
|
48
|
+
col_indices.append(j)
|
49
|
+
|
50
|
+
# Create a CSR matrix
|
51
|
+
num_nodes = len(nodes)
|
52
|
+
data = np.ones(len(row_indices), dtype=int)
|
53
|
+
neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
52
54
|
|
53
55
|
return neighborhoods
|
54
56
|
|
55
57
|
|
56
58
|
def calculate_label_propagation_neighborhoods(
|
57
59
|
network: nx.Graph, fraction_shortest_edges: float = 1.0
|
58
|
-
) ->
|
60
|
+
) -> csr_matrix:
|
59
61
|
"""Apply Label Propagation to the network to detect communities.
|
60
62
|
|
61
63
|
Args:
|
62
64
|
network (nx.Graph): The network graph.
|
63
65
|
fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
|
64
|
-
subgraphs before clustering.
|
66
|
+
subgraphs before clustering. Defaults to 1.0.
|
65
67
|
|
66
68
|
Returns:
|
67
|
-
|
69
|
+
csr_matrix: A binary neighborhood matrix (CSR) on Label Propagation.
|
68
70
|
"""
|
69
71
|
# Create a subgraph with the shortest edges based on the rank fraction
|
70
72
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -77,17 +79,21 @@ def calculate_label_propagation_neighborhoods(
|
|
77
79
|
# Get the list of nodes in the network
|
78
80
|
nodes = list(network.nodes())
|
79
81
|
node_index_map = {node: idx for idx, node in enumerate(nodes)}
|
80
|
-
#
|
81
|
-
|
82
|
-
|
83
|
-
neighborhoods = np.eye(num_nodes, dtype=int)
|
82
|
+
# Prepare data for CSR matrix
|
83
|
+
row_indices = []
|
84
|
+
col_indices = []
|
84
85
|
# Assign neighborhoods based on community labels using the mapped indices
|
85
86
|
for community in communities:
|
86
|
-
for
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
87
|
+
mapped_indices = [node_index_map[node] for node in community]
|
88
|
+
for i in mapped_indices:
|
89
|
+
for j in mapped_indices:
|
90
|
+
row_indices.append(i)
|
91
|
+
col_indices.append(j)
|
92
|
+
|
93
|
+
# Create a CSR matrix
|
94
|
+
num_nodes = len(nodes)
|
95
|
+
data = np.ones(len(row_indices), dtype=int)
|
96
|
+
neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
91
97
|
|
92
98
|
return neighborhoods
|
93
99
|
|
@@ -97,18 +103,18 @@ def calculate_leiden_neighborhoods(
|
|
97
103
|
resolution: float = 1.0,
|
98
104
|
fraction_shortest_edges: float = 1.0,
|
99
105
|
random_seed: int = 888,
|
100
|
-
) ->
|
101
|
-
"""Calculate neighborhoods using the Leiden method.
|
106
|
+
) -> csr_matrix:
|
107
|
+
"""Calculate neighborhoods using the Leiden method with CSR matrix output.
|
102
108
|
|
103
109
|
Args:
|
104
110
|
network (nx.Graph): The network graph.
|
105
111
|
resolution (float, optional): Resolution parameter for the Leiden method. Defaults to 1.0.
|
106
112
|
fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
|
107
|
-
subgraphs before clustering.
|
113
|
+
subgraphs before clustering. Defaults to 1.0.
|
108
114
|
random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
|
109
115
|
|
110
116
|
Returns:
|
111
|
-
|
117
|
+
csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
|
112
118
|
"""
|
113
119
|
# Create a subgraph with the shortest edges based on the rank fraction
|
114
120
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -126,17 +132,20 @@ def calculate_leiden_neighborhoods(
|
|
126
132
|
# Get the list of nodes in the original NetworkX graph
|
127
133
|
nodes = list(network.nodes())
|
128
134
|
node_index_map = {node: idx for idx, node in enumerate(nodes)}
|
129
|
-
#
|
130
|
-
|
131
|
-
|
132
|
-
neighborhoods = np.eye(num_nodes, dtype=int)
|
133
|
-
# Assign neighborhoods based on community partitions using the mapped indices
|
135
|
+
# Prepare data for CSR matrix
|
136
|
+
row_indices = []
|
137
|
+
col_indices = []
|
134
138
|
for community in partition:
|
135
|
-
for
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
139
|
+
mapped_indices = [node_index_map[igraph_network.vs[node]["_nx_name"]] for node in community]
|
140
|
+
for i in mapped_indices:
|
141
|
+
for j in mapped_indices:
|
142
|
+
row_indices.append(i)
|
143
|
+
col_indices.append(j)
|
144
|
+
|
145
|
+
# Create a CSR matrix
|
146
|
+
num_nodes = len(nodes)
|
147
|
+
data = np.ones(len(row_indices), dtype=int)
|
148
|
+
neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
140
149
|
|
141
150
|
return neighborhoods
|
142
151
|
|
@@ -146,18 +155,18 @@ def calculate_louvain_neighborhoods(
|
|
146
155
|
resolution: float = 0.1,
|
147
156
|
fraction_shortest_edges: float = 1.0,
|
148
157
|
random_seed: int = 888,
|
149
|
-
) ->
|
158
|
+
) -> csr_matrix:
|
150
159
|
"""Calculate neighborhoods using the Louvain method.
|
151
160
|
|
152
161
|
Args:
|
153
162
|
network (nx.Graph): The network graph.
|
154
163
|
resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 0.1.
|
155
164
|
fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
|
156
|
-
subgraphs before clustering.
|
165
|
+
subgraphs before clustering. Defaults to 1.0.
|
157
166
|
random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
|
158
167
|
|
159
168
|
Returns:
|
160
|
-
|
169
|
+
csr_matrix: A binary neighborhood matrix in CSR format.
|
161
170
|
"""
|
162
171
|
# Create a subgraph with the shortest edges based on the rank fraction
|
163
172
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -170,83 +179,110 @@ def calculate_louvain_neighborhoods(
|
|
170
179
|
# Get the list of nodes in the network and create a mapping to indices
|
171
180
|
nodes = list(network.nodes())
|
172
181
|
node_index_map = {node: idx for idx, node in enumerate(nodes)}
|
173
|
-
# Create a binary neighborhood matrix
|
174
|
-
num_nodes = len(nodes)
|
175
|
-
# Initialize neighborhoods with zeros and set self-self entries to 1
|
176
|
-
neighborhoods = np.eye(num_nodes, dtype=int)
|
177
182
|
# Group nodes by community
|
178
183
|
community_groups = {}
|
179
184
|
for node, community in partition.items():
|
180
185
|
community_groups.setdefault(community, []).append(node)
|
181
186
|
|
182
|
-
#
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
187
|
+
# Prepare data for CSR matrix
|
188
|
+
row_indices = []
|
189
|
+
col_indices = []
|
190
|
+
for community_nodes in community_groups.values():
|
191
|
+
mapped_indices = [node_index_map[node] for node in community_nodes]
|
192
|
+
for i in mapped_indices:
|
193
|
+
for j in mapped_indices:
|
194
|
+
row_indices.append(i)
|
195
|
+
col_indices.append(j)
|
196
|
+
|
197
|
+
# Create a CSR matrix
|
198
|
+
num_nodes = len(nodes)
|
199
|
+
data = np.ones(len(row_indices), dtype=int)
|
200
|
+
neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
189
201
|
|
190
202
|
return neighborhoods
|
191
203
|
|
192
204
|
|
193
205
|
def calculate_markov_clustering_neighborhoods(
|
194
206
|
network: nx.Graph, fraction_shortest_edges: float = 1.0
|
195
|
-
) ->
|
196
|
-
"""Apply Markov Clustering (MCL) to the network and return a binary neighborhood matrix.
|
207
|
+
) -> csr_matrix:
|
208
|
+
"""Apply Markov Clustering (MCL) to the network and return a binary neighborhood matrix (CSR).
|
197
209
|
|
198
210
|
Args:
|
199
211
|
network (nx.Graph): The network graph.
|
200
212
|
fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
|
201
|
-
subgraphs before clustering.
|
213
|
+
subgraphs before clustering. Defaults to 1.0.
|
202
214
|
|
203
215
|
Returns:
|
204
|
-
|
216
|
+
csr_matrix: A binary neighborhood matrix (CSR) on Markov Clustering.
|
217
|
+
|
218
|
+
Warning:
|
219
|
+
This function temporarily converts the adjacency matrix to a dense format, which may lead to
|
220
|
+
high memory consumption for large graphs.
|
205
221
|
"""
|
206
222
|
# Create a subgraph with the shortest edges based on the rank fraction
|
207
223
|
subnetwork = _create_percentile_limited_subgraph(
|
208
224
|
network, fraction_shortest_edges=fraction_shortest_edges
|
209
225
|
)
|
210
|
-
#
|
226
|
+
# Check if the subgraph has edges
|
227
|
+
if subnetwork.number_of_edges() == 0:
|
228
|
+
raise ValueError("The subgraph has no edges. Adjust the fraction_shortest_edges parameter.")
|
229
|
+
|
230
|
+
# Step 1: Convert the subnetwork to an adjacency matrix (CSR)
|
211
231
|
subnetwork_nodes = list(subnetwork.nodes())
|
212
|
-
adjacency_matrix = nx.
|
213
|
-
#
|
214
|
-
|
232
|
+
adjacency_matrix = nx.to_scipy_sparse_array(subnetwork, nodelist=subnetwork_nodes)
|
233
|
+
# Ensure the adjacency matrix is valid
|
234
|
+
if adjacency_matrix.shape[0] == 0 or adjacency_matrix.shape[1] == 0:
|
235
|
+
raise ValueError(
|
236
|
+
"The adjacency matrix is empty. Check the input graph or filtering criteria."
|
237
|
+
)
|
238
|
+
|
239
|
+
# Convert the sparse matrix to dense format for MCL
|
240
|
+
dense_matrix = adjacency_matrix.toarray()
|
241
|
+
# Step 2: Run Markov Clustering (MCL) on the dense adjacency matrix
|
242
|
+
try:
|
243
|
+
result = mc.run_mcl(dense_matrix)
|
244
|
+
except Exception as e:
|
245
|
+
raise RuntimeError(f"Markov Clustering failed: {e}")
|
246
|
+
|
215
247
|
clusters = mc.get_clusters(result)
|
216
248
|
# Step 3: Prepare the original network nodes and indices
|
217
249
|
nodes = list(network.nodes())
|
218
250
|
node_index_map = {node: idx for idx, node in enumerate(nodes)}
|
219
251
|
num_nodes = len(nodes)
|
220
|
-
# Step 4:
|
221
|
-
|
222
|
-
|
252
|
+
# Step 4: Prepare data for CSR matrix
|
253
|
+
row_indices = []
|
254
|
+
col_indices = []
|
223
255
|
for cluster in clusters:
|
224
256
|
for node_i in cluster:
|
225
257
|
for node_j in cluster:
|
226
258
|
# Map the indices back to the original network's node indices
|
227
259
|
original_node_i = subnetwork_nodes[node_i]
|
228
260
|
original_node_j = subnetwork_nodes[node_j]
|
229
|
-
|
230
261
|
if original_node_i in node_index_map and original_node_j in node_index_map:
|
231
262
|
idx_i = node_index_map[original_node_i]
|
232
263
|
idx_j = node_index_map[original_node_j]
|
233
|
-
|
264
|
+
row_indices.append(idx_i)
|
265
|
+
col_indices.append(idx_j)
|
266
|
+
|
267
|
+
# Step 5: Create a CSR matrix
|
268
|
+
data = np.ones(len(row_indices), dtype=int)
|
269
|
+
neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
234
270
|
|
235
271
|
return neighborhoods
|
236
272
|
|
237
273
|
|
238
274
|
def calculate_spinglass_neighborhoods(
|
239
275
|
network: nx.Graph, fraction_shortest_edges: float = 1.0
|
240
|
-
) ->
|
276
|
+
) -> csr_matrix:
|
241
277
|
"""Apply Spinglass Community Detection to the network, handling disconnected components.
|
242
278
|
|
243
279
|
Args:
|
244
280
|
network (nx.Graph): The network graph.
|
245
281
|
fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
|
246
|
-
subgraphs before clustering.
|
282
|
+
subgraphs before clustering. Defaults to 1.0.
|
247
283
|
|
248
284
|
Returns:
|
249
|
-
|
285
|
+
csr_matrix: A binary neighborhood matrix (CSR) based on Spinglass communities.
|
250
286
|
"""
|
251
287
|
# Create a subgraph with the shortest edges based on the rank fraction
|
252
288
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -254,12 +290,11 @@ def calculate_spinglass_neighborhoods(
|
|
254
290
|
)
|
255
291
|
# Step 1: Find connected components in the graph
|
256
292
|
components = list(nx.connected_components(subnetwork))
|
257
|
-
# Prepare
|
293
|
+
# Prepare data for CSR matrix
|
258
294
|
nodes = list(network.nodes())
|
259
295
|
node_index_map = {node: idx for idx, node in enumerate(nodes)}
|
260
|
-
|
261
|
-
|
262
|
-
neighborhoods = np.eye(num_nodes, dtype=int)
|
296
|
+
row_indices = []
|
297
|
+
col_indices = []
|
263
298
|
# Step 2: Run Spinglass on each connected component
|
264
299
|
for component in components:
|
265
300
|
# Extract the subgraph corresponding to the current component
|
@@ -280,27 +315,34 @@ def calculate_spinglass_neighborhoods(
|
|
280
315
|
|
281
316
|
# Step 3: Assign neighborhoods based on community labels
|
282
317
|
for community in communities:
|
283
|
-
|
284
|
-
for
|
285
|
-
|
286
|
-
|
287
|
-
|
318
|
+
mapped_indices = [
|
319
|
+
node_index_map[igraph_subgraph.vs[node]["_nx_name"]] for node in community
|
320
|
+
]
|
321
|
+
for i in mapped_indices:
|
322
|
+
for j in mapped_indices:
|
323
|
+
row_indices.append(i)
|
324
|
+
col_indices.append(j)
|
325
|
+
|
326
|
+
# Step 4: Create a CSR matrix
|
327
|
+
num_nodes = len(nodes)
|
328
|
+
data = np.ones(len(row_indices), dtype=int)
|
329
|
+
neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
288
330
|
|
289
331
|
return neighborhoods
|
290
332
|
|
291
333
|
|
292
334
|
def calculate_walktrap_neighborhoods(
|
293
335
|
network: nx.Graph, fraction_shortest_edges: float = 1.0
|
294
|
-
) ->
|
295
|
-
"""Apply Walktrap Community Detection to the network.
|
336
|
+
) -> csr_matrix:
|
337
|
+
"""Apply Walktrap Community Detection to the network with CSR matrix output.
|
296
338
|
|
297
339
|
Args:
|
298
340
|
network (nx.Graph): The network graph.
|
299
341
|
fraction_shortest_edges (float, optional): Shortest edge rank fraction threshold for creating
|
300
|
-
subgraphs before clustering.
|
342
|
+
subgraphs before clustering. Defaults to 1.0.
|
301
343
|
|
302
344
|
Returns:
|
303
|
-
|
345
|
+
csr_matrix: A binary neighborhood matrix (CSR) on Walktrap communities.
|
304
346
|
"""
|
305
347
|
# Create a subgraph with the shortest edges based on the rank fraction
|
306
348
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -313,17 +355,20 @@ def calculate_walktrap_neighborhoods(
|
|
313
355
|
# Get the list of nodes in the original NetworkX graph
|
314
356
|
nodes = list(network.nodes())
|
315
357
|
node_index_map = {node: idx for idx, node in enumerate(nodes)}
|
316
|
-
#
|
317
|
-
|
318
|
-
|
319
|
-
neighborhoods = np.eye(num_nodes, dtype=int)
|
320
|
-
# Assign neighborhoods based on community labels
|
358
|
+
# Prepare data for CSR matrix
|
359
|
+
row_indices = []
|
360
|
+
col_indices = []
|
321
361
|
for community in communities:
|
322
|
-
for
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
362
|
+
mapped_indices = [node_index_map[igraph_network.vs[node]["_nx_name"]] for node in community]
|
363
|
+
for i in mapped_indices:
|
364
|
+
for j in mapped_indices:
|
365
|
+
row_indices.append(i)
|
366
|
+
col_indices.append(j)
|
367
|
+
|
368
|
+
# Create a CSR matrix
|
369
|
+
num_nodes = len(nodes)
|
370
|
+
data = np.ones(len(row_indices), dtype=int)
|
371
|
+
neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
327
372
|
|
328
373
|
return neighborhoods
|
329
374
|
|