risk-network 0.0.16b0__tar.gz → 0.0.16b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/PKG-INFO +2 -2
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/README.md +1 -1
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/src/risk/__init__.py +2 -2
- {risk_network-0.0.16b0/src/risk/_annotation → risk_network-0.0.16b2/src/risk/annotation}/__init__.py +2 -2
- {risk_network-0.0.16b0/src/risk/_annotation → risk_network-0.0.16b2/src/risk/annotation}/_nltk_setup.py +3 -3
- risk_network-0.0.16b0/src/risk/_annotation/_annotation.py → risk_network-0.0.16b2/src/risk/annotation/annotation.py +22 -25
- risk_network-0.0.16b0/src/risk/_annotation/_io.py → risk_network-0.0.16b2/src/risk/annotation/io.py +4 -4
- risk_network-0.0.16b2/src/risk/cluster/__init__.py +8 -0
- {risk_network-0.0.16b0/src/risk/_neighborhoods → risk_network-0.0.16b2/src/risk/cluster}/_community.py +37 -37
- risk_network-0.0.16b2/src/risk/cluster/api.py +273 -0
- risk_network-0.0.16b0/src/risk/_neighborhoods/_neighborhoods.py → risk_network-0.0.16b2/src/risk/cluster/cluster.py +127 -98
- risk_network-0.0.16b0/src/risk/_neighborhoods/_domains.py → risk_network-0.0.16b2/src/risk/cluster/label.py +18 -12
- {risk_network-0.0.16b0/src/risk/_log → risk_network-0.0.16b2/src/risk/log}/__init__.py +2 -2
- risk_network-0.0.16b0/src/risk/_log/_console.py → risk_network-0.0.16b2/src/risk/log/console.py +2 -2
- risk_network-0.0.16b0/src/risk/_log/_parameters.py → risk_network-0.0.16b2/src/risk/log/parameters.py +20 -10
- risk_network-0.0.16b2/src/risk/network/__init__.py +8 -0
- risk_network-0.0.16b2/src/risk/network/graph/__init__.py +7 -0
- {risk_network-0.0.16b0/src/risk/_network/_graph → risk_network-0.0.16b2/src/risk/network/graph}/_stats.py +2 -2
- {risk_network-0.0.16b0/src/risk/_network/_graph → risk_network-0.0.16b2/src/risk/network/graph}/_summary.py +13 -13
- risk_network-0.0.16b0/src/risk/_network/_graph/_api.py → risk_network-0.0.16b2/src/risk/network/graph/api.py +37 -39
- risk_network-0.0.16b0/src/risk/_network/_graph/_graph.py → risk_network-0.0.16b2/src/risk/network/graph/graph.py +5 -5
- risk_network-0.0.16b0/src/risk/_network/_io.py → risk_network-0.0.16b2/src/risk/network/io.py +9 -4
- risk_network-0.0.16b2/src/risk/network/plotter/__init__.py +6 -0
- {risk_network-0.0.16b0/src/risk/_network/_plotter → risk_network-0.0.16b2/src/risk/network/plotter}/_canvas.py +6 -6
- {risk_network-0.0.16b0/src/risk/_network/_plotter → risk_network-0.0.16b2/src/risk/network/plotter}/_contour.py +4 -4
- {risk_network-0.0.16b0/src/risk/_network/_plotter → risk_network-0.0.16b2/src/risk/network/plotter}/_labels.py +6 -6
- {risk_network-0.0.16b0/src/risk/_network/_plotter → risk_network-0.0.16b2/src/risk/network/plotter}/_network.py +7 -7
- {risk_network-0.0.16b0/src/risk/_network/_plotter → risk_network-0.0.16b2/src/risk/network/plotter}/_plotter.py +5 -5
- risk_network-0.0.16b2/src/risk/network/plotter/_utils/__init__.py +7 -0
- risk_network-0.0.16b0/src/risk/_network/_plotter/_utils/_colors.py → risk_network-0.0.16b2/src/risk/network/plotter/_utils/colors.py +3 -3
- risk_network-0.0.16b0/src/risk/_network/_plotter/_utils/_layout.py → risk_network-0.0.16b2/src/risk/network/plotter/_utils/layout.py +2 -2
- risk_network-0.0.16b0/src/risk/_network/_plotter/_api.py → risk_network-0.0.16b2/src/risk/network/plotter/api.py +5 -5
- risk_network-0.0.16b0/src/risk/_risk.py → risk_network-0.0.16b2/src/risk/risk.py +9 -8
- risk_network-0.0.16b2/src/risk/stats/__init__.py +6 -0
- risk_network-0.0.16b2/src/risk/stats/_stats/__init__.py +11 -0
- risk_network-0.0.16b2/src/risk/stats/_stats/permutation/__init__.py +6 -0
- risk_network-0.0.16b2/src/risk/stats/_stats/permutation/_test_functions.py +72 -0
- risk_network-0.0.16b0/src/risk/_neighborhoods/_stats/_permutation/_permutation.py → risk_network-0.0.16b2/src/risk/stats/_stats/permutation/permutation.py +35 -37
- risk_network-0.0.16b0/src/risk/_neighborhoods/_stats/_tests.py → risk_network-0.0.16b2/src/risk/stats/_stats/tests.py +32 -34
- risk_network-0.0.16b2/src/risk/stats/api.py +202 -0
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/src/risk_network.egg-info/PKG-INFO +2 -2
- risk_network-0.0.16b2/src/risk_network.egg-info/SOURCES.txt +54 -0
- risk_network-0.0.16b2/tests/test_load_clusters.py +308 -0
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/tests/test_load_graph.py +282 -79
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/tests/test_load_io_combinations.py +18 -19
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/tests/test_load_network.py +105 -0
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/tests/test_load_plotter.py +101 -0
- risk_network-0.0.16b2/tests/test_load_stats.py +348 -0
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/tests/test_log.py +1 -1
- risk_network-0.0.16b0/src/risk/_neighborhoods/__init__.py +0 -8
- risk_network-0.0.16b0/src/risk/_neighborhoods/_api.py +0 -354
- risk_network-0.0.16b0/src/risk/_neighborhoods/_stats/__init__.py +0 -11
- risk_network-0.0.16b0/src/risk/_neighborhoods/_stats/_permutation/__init__.py +0 -6
- risk_network-0.0.16b0/src/risk/_neighborhoods/_stats/_permutation/_test_functions.py +0 -72
- risk_network-0.0.16b0/src/risk/_network/__init__.py +0 -8
- risk_network-0.0.16b0/src/risk/_network/_graph/__init__.py +0 -7
- risk_network-0.0.16b0/src/risk/_network/_plotter/__init__.py +0 -6
- risk_network-0.0.16b0/src/risk/_network/_plotter/_utils/__init__.py +0 -7
- risk_network-0.0.16b0/src/risk_network.egg-info/SOURCES.txt +0 -51
- risk_network-0.0.16b0/tests/test_load_neighborhoods.py +0 -415
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/LICENSE +0 -0
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/pyproject.toml +0 -0
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/setup.cfg +0 -0
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/src/risk_network.egg-info/dependency_links.txt +0 -0
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/src/risk_network.egg-info/requires.txt +0 -0
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/src/risk_network.egg-info/top_level.txt +0 -0
- {risk_network-0.0.16b0 → risk_network-0.0.16b2}/tests/test_load_annotation.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: risk-network
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.16b2
|
|
4
4
|
Summary: A Python package for scalable network analysis and high-quality visualization.
|
|
5
5
|
Author-email: Ira Horecka <ira89@icloud.com>
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -44,7 +44,7 @@ Dynamic: license-file
|
|
|
44
44
|

|
|
45
45
|
[](https://pypi.python.org/pypi/risk-network)
|
|
46
46
|

|
|
47
|
-
[](https://doi.org/10.5281/zenodo.17257417)
|
|
48
48
|

|
|
49
49
|
|
|
50
50
|
**RISK** (Regional Inference of Significant Kinships) is a next-generation tool for biological network annotation and visualization. It integrates community detection algorithms, rigorous overrepresentation analysis, and a modular framework for diverse network types. RISK identifies biologically coherent relationships within networks and generates publication-ready visualizations, making it a useful tool for biological and interdisciplinary network analysis.
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|

|
|
4
4
|
[](https://pypi.python.org/pypi/risk-network)
|
|
5
5
|

|
|
6
|
-
[](https://doi.org/10.5281/zenodo.17257417)
|
|
7
7
|

|
|
8
8
|
|
|
9
9
|
**RISK** (Regional Inference of Significant Kinships) is a next-generation tool for biological network annotation and visualization. It integrates community detection algorithms, rigorous overrepresentation analysis, and a modular framework for diverse network types. RISK identifies biologically coherent relationships within networks and generates publication-ready visualizations, making it a useful tool for biological and interdisciplinary network analysis.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
|
-
risk/
|
|
3
|
-
|
|
2
|
+
risk/annotation/_nltk_setup
|
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import os
|
|
@@ -11,7 +11,7 @@ import nltk
|
|
|
11
11
|
from nltk.data import find
|
|
12
12
|
from nltk.data import path as nltk_data_path
|
|
13
13
|
|
|
14
|
-
from ..
|
|
14
|
+
from ..log import logger
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def setup_nltk_resources(required_resources: Optional[List[Tuple[str, str]]] = None) -> None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
|
-
risk/
|
|
3
|
-
|
|
2
|
+
risk/annotation/annotation
|
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import re
|
|
@@ -14,7 +14,7 @@ import pandas as pd
|
|
|
14
14
|
from nltk.tokenize import word_tokenize
|
|
15
15
|
from scipy.sparse import coo_matrix
|
|
16
16
|
|
|
17
|
-
from ..
|
|
17
|
+
from ..log import logger
|
|
18
18
|
from ._nltk_setup import setup_nltk_resources
|
|
19
19
|
|
|
20
20
|
|
|
@@ -123,19 +123,19 @@ def load_annotation(
|
|
|
123
123
|
def define_top_annotation(
|
|
124
124
|
network: nx.Graph,
|
|
125
125
|
ordered_annotation_labels: List[str],
|
|
126
|
-
|
|
126
|
+
cluster_significance_sums: List[int],
|
|
127
127
|
significant_significance_matrix: np.ndarray,
|
|
128
128
|
significant_binary_significance_matrix: np.ndarray,
|
|
129
129
|
min_cluster_size: int = 5,
|
|
130
130
|
max_cluster_size: int = 1000,
|
|
131
131
|
) -> pd.DataFrame:
|
|
132
132
|
"""
|
|
133
|
-
Define top annotations based on
|
|
133
|
+
Define top annotations based on cluster significance sums and binary significance matrix.
|
|
134
134
|
|
|
135
135
|
Args:
|
|
136
136
|
network (NetworkX graph): The network graph.
|
|
137
137
|
ordered_annotation_labels (list of str): List of ordered annotation labels.
|
|
138
|
-
|
|
138
|
+
cluster_significance_sums (list of int): List of cluster significance sums.
|
|
139
139
|
significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
|
|
140
140
|
significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
|
|
141
141
|
min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
|
|
@@ -146,12 +146,12 @@ def define_top_annotation(
|
|
|
146
146
|
"""
|
|
147
147
|
# Sum the columns of the significant significance matrix (positive floating point values)
|
|
148
148
|
significant_significance_scores = significant_significance_matrix.sum(axis=0)
|
|
149
|
-
# Create DataFrame to store annotations, their
|
|
149
|
+
# Create DataFrame to store annotations, their cluster significance sums, and significance scores
|
|
150
150
|
annotation_significance_matrix = pd.DataFrame(
|
|
151
151
|
{
|
|
152
152
|
"id": range(len(ordered_annotation_labels)),
|
|
153
153
|
"full_terms": ordered_annotation_labels,
|
|
154
|
-
"
|
|
154
|
+
"significant_cluster_significance_sums": cluster_significance_sums,
|
|
155
155
|
"significant_significance_score": significant_significance_scores,
|
|
156
156
|
}
|
|
157
157
|
)
|
|
@@ -159,11 +159,11 @@ def define_top_annotation(
|
|
|
159
159
|
# Apply size constraints to identify potential significant annotations
|
|
160
160
|
annotation_significance_matrix.loc[
|
|
161
161
|
(
|
|
162
|
-
annotation_significance_matrix["
|
|
162
|
+
annotation_significance_matrix["significant_cluster_significance_sums"]
|
|
163
163
|
>= min_cluster_size
|
|
164
164
|
)
|
|
165
165
|
& (
|
|
166
|
-
annotation_significance_matrix["
|
|
166
|
+
annotation_significance_matrix["significant_cluster_significance_sums"]
|
|
167
167
|
<= max_cluster_size
|
|
168
168
|
),
|
|
169
169
|
"significant_annotation",
|
|
@@ -179,11 +179,11 @@ def define_top_annotation(
|
|
|
179
179
|
for attribute in annotation_significance_matrix.index.values[
|
|
180
180
|
annotation_significance_matrix["significant_annotation"]
|
|
181
181
|
]:
|
|
182
|
-
# Identify significant
|
|
183
|
-
|
|
182
|
+
# Identify significant clusters based on the binary significance matrix
|
|
183
|
+
significant_clusters = list(
|
|
184
184
|
compress(list(network), significant_binary_significance_matrix[:, attribute])
|
|
185
185
|
)
|
|
186
|
-
significant_network = nx.subgraph(network,
|
|
186
|
+
significant_network = nx.subgraph(network, significant_clusters)
|
|
187
187
|
# Analyze connected components within the significant subnetwork
|
|
188
188
|
connected_components = sorted(
|
|
189
189
|
nx.connected_components(significant_network), key=len, reverse=True
|
|
@@ -257,26 +257,23 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
|
|
|
257
257
|
continue
|
|
258
258
|
# Lemmatize the token to merge similar forms
|
|
259
259
|
token_norm = LEMMATIZER.lemmatize(token_clean)
|
|
260
|
-
|
|
260
|
+
# Apply weighting boost for biologically structured number-word hybrids
|
|
261
|
+
if re.match(r"^\d+[\-\w]+", token_norm):
|
|
262
|
+
actual_weight = int(weight * 1.5)
|
|
263
|
+
else:
|
|
264
|
+
actual_weight = weight
|
|
265
|
+
weighted_counts[token_norm] = weighted_counts.get(token_norm, 0) + actual_weight
|
|
261
266
|
|
|
262
267
|
# Reconstruct a weighted token list by repeating each token by its aggregated count.
|
|
263
268
|
weighted_words = []
|
|
264
269
|
for token, count in weighted_counts.items():
|
|
265
270
|
weighted_words.extend([token] * count)
|
|
266
271
|
|
|
267
|
-
# Combine tokens that match number-word patterns (e.g. "4-alpha")
|
|
272
|
+
# Combine tokens that match number-word patterns (e.g. "4-alpha"), but do not remove numeric tokens.
|
|
273
|
+
# All tokens are included in the final list.
|
|
268
274
|
combined_tokens = []
|
|
269
275
|
for token in weighted_words:
|
|
270
|
-
|
|
271
|
-
combined_tokens.append(token)
|
|
272
|
-
elif token.replace(".", "", 1).isdigit():
|
|
273
|
-
continue
|
|
274
|
-
else:
|
|
275
|
-
combined_tokens.append(token)
|
|
276
|
-
|
|
277
|
-
# If the only token is numeric, return a default value.
|
|
278
|
-
if len(combined_tokens) == 1 and combined_tokens[0].isdigit():
|
|
279
|
-
return "N/A"
|
|
276
|
+
combined_tokens.append(token)
|
|
280
277
|
|
|
281
278
|
# Simplify the token list to remove near-duplicates based on the Jaccard index.
|
|
282
279
|
simplified_words = _simplify_word_list(combined_tokens)
|
risk_network-0.0.16b0/src/risk/_annotation/_io.py → risk_network-0.0.16b2/src/risk/annotation/io.py
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
|
-
risk/
|
|
3
|
-
|
|
2
|
+
risk/annotation/io
|
|
3
|
+
~~~~~~~~~~~~~~~~~~
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import json
|
|
@@ -9,8 +9,8 @@ from typing import Any, Dict
|
|
|
9
9
|
import networkx as nx
|
|
10
10
|
import pandas as pd
|
|
11
11
|
|
|
12
|
-
from ..
|
|
13
|
-
from .
|
|
12
|
+
from ..log import log_header, logger, params
|
|
13
|
+
from .annotation import load_annotation
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class AnnotationHandler:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
|
-
risk/
|
|
3
|
-
|
|
2
|
+
risk/cluster/_community
|
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import community as community_louvain
|
|
@@ -12,14 +12,14 @@ from leidenalg import RBConfigurationVertexPartition, find_partition
|
|
|
12
12
|
from networkx.algorithms.community import greedy_modularity_communities
|
|
13
13
|
from scipy.sparse import csr_matrix
|
|
14
14
|
|
|
15
|
-
from ..
|
|
15
|
+
from ..log import logger
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
def
|
|
18
|
+
def calculate_greedy_modularity_clusters(
|
|
19
19
|
network: nx.Graph, fraction_shortest_edges: float = 1.0
|
|
20
20
|
) -> csr_matrix:
|
|
21
21
|
"""
|
|
22
|
-
Calculate
|
|
22
|
+
Calculate clusters using the Greedy Modularity method with CSR matrix output.
|
|
23
23
|
|
|
24
24
|
Args:
|
|
25
25
|
network (nx.Graph): The network graph.
|
|
@@ -27,7 +27,7 @@ def calculate_greedy_modularity_neighborhoods(
|
|
|
27
27
|
subgraphs before clustering. Defaults to 1.0.
|
|
28
28
|
|
|
29
29
|
Returns:
|
|
30
|
-
csr_matrix: A binary
|
|
30
|
+
csr_matrix: A binary cluster matrix (CSR) where nodes in the same community have 1, and others have 0.
|
|
31
31
|
|
|
32
32
|
Raises:
|
|
33
33
|
ValueError: If the subgraph has no edges after filtering.
|
|
@@ -55,12 +55,12 @@ def calculate_greedy_modularity_neighborhoods(
|
|
|
55
55
|
# Create a CSR matrix
|
|
56
56
|
num_nodes = len(nodes)
|
|
57
57
|
data = np.ones(len(row_indices), dtype=int)
|
|
58
|
-
|
|
58
|
+
clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
|
59
59
|
|
|
60
|
-
return
|
|
60
|
+
return clusters
|
|
61
61
|
|
|
62
62
|
|
|
63
|
-
def
|
|
63
|
+
def calculate_label_propagation_clusters(
|
|
64
64
|
network: nx.Graph, fraction_shortest_edges: float = 1.0
|
|
65
65
|
) -> csr_matrix:
|
|
66
66
|
"""
|
|
@@ -72,7 +72,7 @@ def calculate_label_propagation_neighborhoods(
|
|
|
72
72
|
subgraphs before clustering. Defaults to 1.0.
|
|
73
73
|
|
|
74
74
|
Returns:
|
|
75
|
-
csr_matrix: A binary
|
|
75
|
+
csr_matrix: A binary cluster matrix (CSR) on Label Propagation.
|
|
76
76
|
|
|
77
77
|
Raises:
|
|
78
78
|
ValueError: If the subgraph has no edges after filtering.
|
|
@@ -92,7 +92,7 @@ def calculate_label_propagation_neighborhoods(
|
|
|
92
92
|
# Prepare data for CSR matrix
|
|
93
93
|
row_indices = []
|
|
94
94
|
col_indices = []
|
|
95
|
-
# Assign
|
|
95
|
+
# Assign clusters based on community labels using the mapped indices
|
|
96
96
|
for community in communities:
|
|
97
97
|
mapped_indices = [node_index_map[node] for node in community]
|
|
98
98
|
for i in mapped_indices:
|
|
@@ -103,19 +103,19 @@ def calculate_label_propagation_neighborhoods(
|
|
|
103
103
|
# Create a CSR matrix
|
|
104
104
|
num_nodes = len(nodes)
|
|
105
105
|
data = np.ones(len(row_indices), dtype=int)
|
|
106
|
-
|
|
106
|
+
clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
|
107
107
|
|
|
108
|
-
return
|
|
108
|
+
return clusters
|
|
109
109
|
|
|
110
110
|
|
|
111
|
-
def
|
|
111
|
+
def calculate_leiden_clusters(
|
|
112
112
|
network: nx.Graph,
|
|
113
113
|
resolution: float = 1.0,
|
|
114
114
|
fraction_shortest_edges: float = 1.0,
|
|
115
115
|
random_seed: int = 888,
|
|
116
116
|
) -> csr_matrix:
|
|
117
117
|
"""
|
|
118
|
-
Calculate
|
|
118
|
+
Calculate clusters using the Leiden method with CSR matrix output.
|
|
119
119
|
|
|
120
120
|
Args:
|
|
121
121
|
network (nx.Graph): The network graph.
|
|
@@ -125,7 +125,7 @@ def calculate_leiden_neighborhoods(
|
|
|
125
125
|
random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
|
|
126
126
|
|
|
127
127
|
Returns:
|
|
128
|
-
csr_matrix: A binary
|
|
128
|
+
csr_matrix: A binary cluster matrix (CSR) where nodes in the same community have 1, and others have 0.
|
|
129
129
|
|
|
130
130
|
Raises:
|
|
131
131
|
ValueError: If the subgraph has no edges after filtering.
|
|
@@ -160,19 +160,19 @@ def calculate_leiden_neighborhoods(
|
|
|
160
160
|
# Create a CSR matrix
|
|
161
161
|
num_nodes = len(nodes)
|
|
162
162
|
data = np.ones(len(row_indices), dtype=int)
|
|
163
|
-
|
|
163
|
+
clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
|
164
164
|
|
|
165
|
-
return
|
|
165
|
+
return clusters
|
|
166
166
|
|
|
167
167
|
|
|
168
|
-
def
|
|
168
|
+
def calculate_louvain_clusters(
|
|
169
169
|
network: nx.Graph,
|
|
170
170
|
resolution: float = 0.1,
|
|
171
171
|
fraction_shortest_edges: float = 1.0,
|
|
172
172
|
random_seed: int = 888,
|
|
173
173
|
) -> csr_matrix:
|
|
174
174
|
"""
|
|
175
|
-
Calculate
|
|
175
|
+
Calculate clusters using the Louvain method.
|
|
176
176
|
|
|
177
177
|
Args:
|
|
178
178
|
network (nx.Graph): The network graph.
|
|
@@ -182,7 +182,7 @@ def calculate_louvain_neighborhoods(
|
|
|
182
182
|
random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
|
|
183
183
|
|
|
184
184
|
Returns:
|
|
185
|
-
csr_matrix: A binary
|
|
185
|
+
csr_matrix: A binary cluster matrix in CSR format.
|
|
186
186
|
|
|
187
187
|
Raises:
|
|
188
188
|
ValueError: If the subgraph has no edges after filtering.
|
|
@@ -217,16 +217,16 @@ def calculate_louvain_neighborhoods(
|
|
|
217
217
|
# Create a CSR matrix
|
|
218
218
|
num_nodes = len(nodes)
|
|
219
219
|
data = np.ones(len(row_indices), dtype=int)
|
|
220
|
-
|
|
220
|
+
clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
|
221
221
|
|
|
222
|
-
return
|
|
222
|
+
return clusters
|
|
223
223
|
|
|
224
224
|
|
|
225
|
-
def
|
|
225
|
+
def calculate_markov_clustering_clusters(
|
|
226
226
|
network: nx.Graph, fraction_shortest_edges: float = 1.0
|
|
227
227
|
) -> csr_matrix:
|
|
228
228
|
"""
|
|
229
|
-
Apply Markov Clustering (MCL) to the network and return a binary
|
|
229
|
+
Apply Markov Clustering (MCL) to the network and return a binary cluster matrix (CSR).
|
|
230
230
|
|
|
231
231
|
Args:
|
|
232
232
|
network (nx.Graph): The network graph.
|
|
@@ -234,7 +234,7 @@ def calculate_markov_clustering_neighborhoods(
|
|
|
234
234
|
subgraphs before clustering. Defaults to 1.0.
|
|
235
235
|
|
|
236
236
|
Returns:
|
|
237
|
-
csr_matrix: A binary
|
|
237
|
+
csr_matrix: A binary cluster matrix (CSR) on Markov Clustering.
|
|
238
238
|
|
|
239
239
|
Raises:
|
|
240
240
|
ValueError: If the subgraph has no edges after filtering.
|
|
@@ -288,12 +288,12 @@ def calculate_markov_clustering_neighborhoods(
|
|
|
288
288
|
|
|
289
289
|
# Step 5: Create a CSR matrix
|
|
290
290
|
data = np.ones(len(row_indices), dtype=int)
|
|
291
|
-
|
|
291
|
+
clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
|
292
292
|
|
|
293
|
-
return
|
|
293
|
+
return clusters
|
|
294
294
|
|
|
295
295
|
|
|
296
|
-
def
|
|
296
|
+
def calculate_spinglass_clusters(
|
|
297
297
|
network: nx.Graph, fraction_shortest_edges: float = 1.0
|
|
298
298
|
) -> csr_matrix:
|
|
299
299
|
"""
|
|
@@ -305,7 +305,7 @@ def calculate_spinglass_neighborhoods(
|
|
|
305
305
|
subgraphs before clustering. Defaults to 1.0.
|
|
306
306
|
|
|
307
307
|
Returns:
|
|
308
|
-
csr_matrix: A binary
|
|
308
|
+
csr_matrix: A binary cluster matrix (CSR) based on Spinglass communities.
|
|
309
309
|
|
|
310
310
|
Raises:
|
|
311
311
|
ValueError: If the subgraph has no edges after filtering.
|
|
@@ -340,7 +340,7 @@ def calculate_spinglass_neighborhoods(
|
|
|
340
340
|
logger.error(f"Error running Spinglass on component: {e}")
|
|
341
341
|
continue
|
|
342
342
|
|
|
343
|
-
# Step 3: Assign
|
|
343
|
+
# Step 3: Assign clusters based on community labels
|
|
344
344
|
for community in communities:
|
|
345
345
|
mapped_indices = [
|
|
346
346
|
node_index_map[igraph_subgraph.vs[node]["_nx_name"]] for node in community
|
|
@@ -353,12 +353,12 @@ def calculate_spinglass_neighborhoods(
|
|
|
353
353
|
# Step 4: Create a CSR matrix
|
|
354
354
|
num_nodes = len(nodes)
|
|
355
355
|
data = np.ones(len(row_indices), dtype=int)
|
|
356
|
-
|
|
356
|
+
clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
|
357
357
|
|
|
358
|
-
return
|
|
358
|
+
return clusters
|
|
359
359
|
|
|
360
360
|
|
|
361
|
-
def
|
|
361
|
+
def calculate_walktrap_clusters(
|
|
362
362
|
network: nx.Graph, fraction_shortest_edges: float = 1.0
|
|
363
363
|
) -> csr_matrix:
|
|
364
364
|
"""
|
|
@@ -370,7 +370,7 @@ def calculate_walktrap_neighborhoods(
|
|
|
370
370
|
subgraphs before clustering. Defaults to 1.0.
|
|
371
371
|
|
|
372
372
|
Returns:
|
|
373
|
-
csr_matrix: A binary
|
|
373
|
+
csr_matrix: A binary cluster matrix (CSR) on Walktrap communities.
|
|
374
374
|
|
|
375
375
|
Raises:
|
|
376
376
|
ValueError: If the subgraph has no edges after filtering.
|
|
@@ -400,9 +400,9 @@ def calculate_walktrap_neighborhoods(
|
|
|
400
400
|
# Create a CSR matrix
|
|
401
401
|
num_nodes = len(nodes)
|
|
402
402
|
data = np.ones(len(row_indices), dtype=int)
|
|
403
|
-
|
|
403
|
+
clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
|
|
404
404
|
|
|
405
|
-
return
|
|
405
|
+
return clusters
|
|
406
406
|
|
|
407
407
|
|
|
408
408
|
def _create_percentile_limited_subgraph(G: nx.Graph, fraction_shortest_edges: float) -> nx.Graph:
|