risk-network 0.0.7b11__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/__init__.py +1 -1
- risk/annotations/annotations.py +86 -54
- risk/annotations/io.py +14 -14
- risk/log/__init__.py +1 -1
- risk/log/console.py +139 -0
- risk/log/params.py +6 -6
- risk/neighborhoods/community.py +68 -61
- risk/neighborhoods/domains.py +43 -20
- risk/neighborhoods/neighborhoods.py +136 -71
- risk/network/geometry.py +5 -2
- risk/network/graph.py +69 -235
- risk/network/io.py +56 -18
- risk/network/plot/__init__.py +6 -0
- risk/network/plot/canvas.py +290 -0
- risk/network/plot/contour.py +327 -0
- risk/network/plot/labels.py +929 -0
- risk/network/plot/network.py +288 -0
- risk/network/plot/plotter.py +137 -0
- risk/network/plot/utils/color.py +424 -0
- risk/network/plot/utils/layout.py +91 -0
- risk/risk.py +84 -58
- risk/stats/hypergeom.py +1 -1
- risk/stats/permutation/permutation.py +21 -8
- risk/stats/poisson.py +2 -2
- risk/stats/stats.py +12 -10
- {risk_network-0.0.7b11.dist-info → risk_network-0.0.8.dist-info}/METADATA +84 -21
- risk_network-0.0.8.dist-info/RECORD +37 -0
- {risk_network-0.0.7b11.dist-info → risk_network-0.0.8.dist-info}/WHEEL +1 -1
- risk/log/config.py +0 -48
- risk/network/plot.py +0 -1343
- risk_network-0.0.7b11.dist-info/RECORD +0 -30
- {risk_network-0.0.7b11.dist-info → risk_network-0.0.8.dist-info}/LICENSE +0 -0
- {risk_network-0.0.7b11.dist-info → risk_network-0.0.8.dist-info}/top_level.txt +0 -0
risk/neighborhoods/community.py
CHANGED
@@ -21,15 +21,20 @@ def calculate_greedy_modularity_neighborhoods(network: nx.Graph) -> np.ndarray:
|
|
21
21
|
"""
|
22
22
|
# Detect communities using the Greedy Modularity method
|
23
23
|
communities = greedy_modularity_communities(network)
|
24
|
-
# Create a mapping from node to community
|
25
|
-
community_dict = {node: idx for idx, community in enumerate(communities) for node in community}
|
26
24
|
# Create a binary neighborhood matrix
|
27
|
-
|
25
|
+
n_nodes = network.number_of_nodes()
|
26
|
+
neighborhoods = np.zeros((n_nodes, n_nodes), dtype=int)
|
27
|
+
# Create a mapping from node to index in the matrix
|
28
28
|
node_index = {node: i for i, node in enumerate(network.nodes())}
|
29
|
-
for
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
# Fill in the neighborhood matrix for nodes in the same community
|
30
|
+
for community in communities:
|
31
|
+
# Iterate through all pairs of nodes in the same community
|
32
|
+
for node_i in community:
|
33
|
+
idx_i = node_index[node_i]
|
34
|
+
for node_j in community:
|
35
|
+
idx_j = node_index[node_j]
|
36
|
+
# Set them as neighbors (1) in the binary matrix
|
37
|
+
neighborhoods[idx_i, idx_j] = 1
|
33
38
|
|
34
39
|
return neighborhoods
|
35
40
|
|
@@ -43,22 +48,20 @@ def calculate_label_propagation_neighborhoods(network: nx.Graph) -> np.ndarray:
|
|
43
48
|
Returns:
|
44
49
|
np.ndarray: Binary neighborhood matrix on Label Propagation.
|
45
50
|
"""
|
46
|
-
# Apply Label Propagation
|
51
|
+
# Apply Label Propagation for community detection
|
47
52
|
communities = nx.algorithms.community.label_propagation.label_propagation_communities(network)
|
48
|
-
# Create a mapping from node to community
|
49
|
-
community_dict = {}
|
50
|
-
for community_id, community in enumerate(communities):
|
51
|
-
for node in community:
|
52
|
-
community_dict[node] = community_id
|
53
|
-
|
54
53
|
# Create a binary neighborhood matrix
|
55
54
|
num_nodes = network.number_of_nodes()
|
56
55
|
neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
|
56
|
+
# Create a mapping from node to index in the matrix
|
57
|
+
node_index = {node: i for i, node in enumerate(network.nodes())}
|
57
58
|
# Assign neighborhoods based on community labels
|
58
|
-
for
|
59
|
-
for
|
60
|
-
|
61
|
-
|
59
|
+
for community in communities:
|
60
|
+
for node_i in community:
|
61
|
+
idx_i = node_index[node_i]
|
62
|
+
for node_j in community:
|
63
|
+
idx_j = node_index[node_j]
|
64
|
+
neighborhoods[idx_i, idx_j] = 1
|
62
65
|
|
63
66
|
return neighborhoods
|
64
67
|
|
@@ -81,12 +84,22 @@ def calculate_louvain_neighborhoods(
|
|
81
84
|
network, resolution=resolution, random_state=random_seed
|
82
85
|
)
|
83
86
|
# Create a binary neighborhood matrix
|
84
|
-
|
87
|
+
num_nodes = network.number_of_nodes()
|
88
|
+
neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
|
89
|
+
# Create a mapping from node to index in the matrix
|
90
|
+
node_index = {node: i for i, node in enumerate(network.nodes())}
|
91
|
+
# Group nodes by community
|
92
|
+
community_groups = {}
|
93
|
+
for node, community in partition.items():
|
94
|
+
community_groups.setdefault(community, []).append(node)
|
95
|
+
|
85
96
|
# Assign neighborhoods based on community partitions
|
86
|
-
for
|
87
|
-
for
|
88
|
-
|
89
|
-
|
97
|
+
for community, nodes in community_groups.items():
|
98
|
+
for node_i in nodes:
|
99
|
+
idx_i = node_index[node_i]
|
100
|
+
for node_j in nodes:
|
101
|
+
idx_j = node_index[node_j]
|
102
|
+
neighborhoods[idx_i, idx_j] = 1
|
90
103
|
|
91
104
|
return neighborhoods
|
92
105
|
|
@@ -102,24 +115,22 @@ def calculate_markov_clustering_neighborhoods(network: nx.Graph) -> np.ndarray:
|
|
102
115
|
"""
|
103
116
|
# Convert the graph to an adjacency matrix
|
104
117
|
adjacency_matrix = nx.to_numpy_array(network)
|
105
|
-
# Run Markov Clustering
|
106
|
-
result = mc.run_mcl(adjacency_matrix) #
|
107
|
-
# Get clusters
|
118
|
+
# Run Markov Clustering (MCL)
|
119
|
+
result = mc.run_mcl(adjacency_matrix) # MCL with default parameters
|
120
|
+
# Get clusters (communities) from MCL result
|
108
121
|
clusters = mc.get_clusters(result)
|
109
|
-
# Create a community label for each node
|
110
|
-
community_dict = {}
|
111
|
-
for community_id, community in enumerate(clusters):
|
112
|
-
for node in community:
|
113
|
-
community_dict[node] = community_id
|
114
|
-
|
115
122
|
# Create a binary neighborhood matrix
|
116
123
|
num_nodes = network.number_of_nodes()
|
117
124
|
neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
|
118
|
-
#
|
119
|
-
for
|
120
|
-
|
121
|
-
|
122
|
-
|
125
|
+
# Create a mapping from node to index in the matrix
|
126
|
+
node_index = {node: i for i, node in enumerate(network.nodes())}
|
127
|
+
# Assign neighborhoods based on MCL clusters
|
128
|
+
for cluster in clusters:
|
129
|
+
for node_i in cluster:
|
130
|
+
idx_i = node_index[node_i]
|
131
|
+
for node_j in cluster:
|
132
|
+
idx_j = node_index[node_j]
|
133
|
+
neighborhoods[idx_i, idx_j] = 1
|
123
134
|
|
124
135
|
return neighborhoods
|
125
136
|
|
@@ -133,22 +144,20 @@ def calculate_spinglass_neighborhoods(network: nx.Graph) -> np.ndarray:
|
|
133
144
|
Returns:
|
134
145
|
np.ndarray: Binary neighborhood matrix on Spin Glass communities.
|
135
146
|
"""
|
136
|
-
#
|
147
|
+
# Apply Asynchronous Label Propagation (LPA)
|
137
148
|
communities = asyn_lpa_communities(network)
|
138
|
-
# Create a community label for each node
|
139
|
-
community_dict = {}
|
140
|
-
for community_id, community in enumerate(communities):
|
141
|
-
for node in community:
|
142
|
-
community_dict[node] = community_id
|
143
|
-
|
144
149
|
# Create a binary neighborhood matrix
|
145
150
|
num_nodes = network.number_of_nodes()
|
146
151
|
neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
|
147
|
-
#
|
148
|
-
for
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
+
# Create a mapping from node to index in the matrix
|
153
|
+
node_index = {node: i for i, node in enumerate(network.nodes())}
|
154
|
+
# Assign neighborhoods based on community labels from LPA
|
155
|
+
for community in communities:
|
156
|
+
for node_i in community:
|
157
|
+
idx_i = node_index[node_i]
|
158
|
+
for node_j in community:
|
159
|
+
idx_j = node_index[node_j]
|
160
|
+
neighborhoods[idx_i, idx_j] = 1
|
152
161
|
|
153
162
|
return neighborhoods
|
154
163
|
|
@@ -162,21 +171,19 @@ def calculate_walktrap_neighborhoods(network: nx.Graph) -> np.ndarray:
|
|
162
171
|
Returns:
|
163
172
|
np.ndarray: Binary neighborhood matrix on Walktrap communities.
|
164
173
|
"""
|
165
|
-
#
|
174
|
+
# Apply Asynchronous Label Propagation (LPA)
|
166
175
|
communities = asyn_lpa_communities(network)
|
167
|
-
# Create a community label for each node
|
168
|
-
community_dict = {}
|
169
|
-
for community_id, community in enumerate(communities):
|
170
|
-
for node in community:
|
171
|
-
community_dict[node] = community_id
|
172
|
-
|
173
176
|
# Create a binary neighborhood matrix
|
174
177
|
num_nodes = network.number_of_nodes()
|
175
178
|
neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
|
176
|
-
#
|
177
|
-
for
|
178
|
-
|
179
|
-
|
180
|
-
|
179
|
+
# Create a mapping from node to index in the matrix
|
180
|
+
node_index = {node: i for i, node in enumerate(network.nodes())}
|
181
|
+
# Assign neighborhoods based on community labels from LPA
|
182
|
+
for community in communities:
|
183
|
+
for node_i in community:
|
184
|
+
idx_i = node_index[node_i]
|
185
|
+
for node_j in community:
|
186
|
+
idx_j = node_index[node_j]
|
187
|
+
neighborhoods[idx_i, idx_j] = 1
|
181
188
|
|
182
189
|
return neighborhoods
|
risk/neighborhoods/domains.py
CHANGED
@@ -13,7 +13,7 @@ import pandas as pd
|
|
13
13
|
from scipy.cluster.hierarchy import linkage, fcluster
|
14
14
|
from sklearn.metrics import silhouette_score
|
15
15
|
|
16
|
-
from risk.annotations import
|
16
|
+
from risk.annotations import get_weighted_description
|
17
17
|
from risk.constants import GROUP_LINKAGE_METHODS, GROUP_DISTANCE_METRICS
|
18
18
|
from risk.log import logger
|
19
19
|
|
@@ -40,22 +40,22 @@ def define_domains(
|
|
40
40
|
"""
|
41
41
|
try:
|
42
42
|
# Transpose the matrix to cluster annotations
|
43
|
-
m = significant_neighborhoods_enrichment[:, top_annotations["
|
43
|
+
m = significant_neighborhoods_enrichment[:, top_annotations["significant_annotations"]].T
|
44
44
|
best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
|
45
45
|
m, linkage_criterion, linkage_method, linkage_metric
|
46
46
|
)
|
47
47
|
# Perform hierarchical clustering
|
48
48
|
Z = linkage(m, method=best_linkage, metric=best_metric)
|
49
|
-
logger.
|
49
|
+
logger.warning(
|
50
50
|
f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'"
|
51
51
|
)
|
52
|
-
logger.
|
52
|
+
logger.debug(f"Optimal linkage threshold: {round(best_threshold, 3)}")
|
53
53
|
# Calculate the optimal threshold for clustering
|
54
54
|
max_d_optimal = np.max(Z[:, 2]) * best_threshold
|
55
55
|
# Assign domains to the annotations matrix
|
56
56
|
domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
|
57
57
|
top_annotations["domain"] = 0
|
58
|
-
top_annotations.loc[top_annotations["
|
58
|
+
top_annotations.loc[top_annotations["significant_annotations"], "domain"] = domains
|
59
59
|
except ValueError:
|
60
60
|
# If a ValueError is encountered, handle it by assigning unique domains
|
61
61
|
n_rows = len(top_annotations)
|
@@ -76,8 +76,12 @@ def define_domains(
|
|
76
76
|
t_idxmax = node_to_domain.loc[:, 1:].idxmax(axis=1)
|
77
77
|
t_idxmax[t_max == 0] = 0
|
78
78
|
|
79
|
+
# Assign all domains where the score is greater than 0
|
80
|
+
node_to_domain["all_domains"] = node_to_domain.loc[:, 1:].apply(
|
81
|
+
lambda row: list(row[row > 0].index), axis=1
|
82
|
+
)
|
79
83
|
# Assign primary domain
|
80
|
-
node_to_domain["
|
84
|
+
node_to_domain["primary_domain"] = t_idxmax
|
81
85
|
|
82
86
|
return node_to_domain
|
83
87
|
|
@@ -97,13 +101,13 @@ def trim_domains_and_top_annotations(
|
|
97
101
|
max_cluster_size (int, optional): Maximum size of a cluster to be retained. Defaults to 1000.
|
98
102
|
|
99
103
|
Returns:
|
100
|
-
|
104
|
+
Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing:
|
101
105
|
- Trimmed annotations (pd.DataFrame)
|
102
106
|
- Trimmed domains (pd.DataFrame)
|
103
107
|
- A DataFrame with domain labels (pd.DataFrame)
|
104
108
|
"""
|
105
109
|
# Identify domains to remove based on size criteria
|
106
|
-
domain_counts = domains["
|
110
|
+
domain_counts = domains["primary_domain"].value_counts()
|
107
111
|
to_remove = set(
|
108
112
|
domain_counts[(domain_counts < min_cluster_size) | (domain_counts > max_cluster_size)].index
|
109
113
|
)
|
@@ -113,32 +117,51 @@ def trim_domains_and_top_annotations(
|
|
113
117
|
invalid_domain_ids = {0, invalid_domain_id}
|
114
118
|
# Mark domains to be removed
|
115
119
|
top_annotations["domain"].replace(to_remove, invalid_domain_id, inplace=True)
|
116
|
-
domains.loc[domains["
|
120
|
+
domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
|
117
121
|
|
118
122
|
# Normalize "num enriched neighborhoods" by percentile for each domain and scale to 0-10
|
119
123
|
top_annotations["normalized_value"] = top_annotations.groupby("domain")[
|
120
|
-
"
|
124
|
+
"significant_neighborhood_enrichment_sums"
|
121
125
|
].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
|
122
|
-
#
|
123
|
-
top_annotations["
|
124
|
-
lambda row: " ".join([row["
|
126
|
+
# Modify the lambda function to pass both full_terms and significant_enrichment_score
|
127
|
+
top_annotations["combined_terms"] = top_annotations.apply(
|
128
|
+
lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
|
129
|
+
)
|
130
|
+
|
131
|
+
# Perform the groupby operation while retaining the other columns and adding the weighting with enrichment scores
|
132
|
+
domain_labels = (
|
133
|
+
top_annotations.groupby("domain")
|
134
|
+
.agg(
|
135
|
+
full_terms=("full_terms", lambda x: list(x)),
|
136
|
+
enrichment_scores=("significant_enrichment_score", lambda x: list(x)),
|
137
|
+
)
|
138
|
+
.reset_index()
|
139
|
+
)
|
140
|
+
domain_labels["combined_terms"] = domain_labels.apply(
|
141
|
+
lambda row: get_weighted_description(
|
142
|
+
pd.Series(row["full_terms"]), pd.Series(row["enrichment_scores"])
|
143
|
+
),
|
144
|
+
axis=1,
|
125
145
|
)
|
126
146
|
|
127
|
-
#
|
128
|
-
domain_labels = top_annotations.groupby("domain")["words"].apply(get_description).reset_index()
|
147
|
+
# Rename the columns as necessary
|
129
148
|
trimmed_domains_matrix = domain_labels.rename(
|
130
|
-
columns={
|
149
|
+
columns={
|
150
|
+
"domain": "id",
|
151
|
+
"combined_terms": "normalized_description",
|
152
|
+
"full_terms": "full_descriptions",
|
153
|
+
"enrichment_scores": "enrichment_scores",
|
154
|
+
}
|
131
155
|
).set_index("id")
|
132
156
|
|
133
157
|
# Remove invalid domains
|
134
158
|
valid_annotations = top_annotations[~top_annotations["domain"].isin(invalid_domain_ids)].drop(
|
135
159
|
columns=["normalized_value"]
|
136
160
|
)
|
137
|
-
valid_domains = domains[~domains["
|
161
|
+
valid_domains = domains[~domains["primary_domain"].isin(invalid_domain_ids)]
|
138
162
|
valid_trimmed_domains_matrix = trimmed_domains_matrix[
|
139
163
|
~trimmed_domains_matrix.index.isin(invalid_domain_ids)
|
140
164
|
]
|
141
|
-
|
142
165
|
return valid_annotations, valid_domains, valid_trimmed_domains_matrix
|
143
166
|
|
144
167
|
|
@@ -154,7 +177,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
|
|
154
177
|
linkage_metric (str): Linkage metric for clustering.
|
155
178
|
|
156
179
|
Returns:
|
157
|
-
|
180
|
+
Tuple[str, str, float]: A tuple containing:
|
158
181
|
- Best linkage method (str)
|
159
182
|
- Best linkage metric (str)
|
160
183
|
- Best threshold (float)
|
@@ -208,7 +231,7 @@ def _find_best_silhouette_score(
|
|
208
231
|
resolution (float, optional): Desired resolution for the best threshold. Defaults to 0.001.
|
209
232
|
|
210
233
|
Returns:
|
211
|
-
|
234
|
+
Tuple[float, float]: A tuple containing:
|
212
235
|
- Best threshold (float): The threshold that yields the best silhouette score.
|
213
236
|
- Best silhouette score (float): The highest silhouette score achieved.
|
214
237
|
"""
|