risk-network 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotation/__init__.py +10 -0
- risk/{annotations/annotations.py → annotation/annotation.py} +62 -102
- risk/{annotations → annotation}/io.py +93 -92
- risk/annotation/nltk_setup.py +86 -0
- risk/log/__init__.py +1 -1
- risk/log/parameters.py +26 -27
- risk/neighborhoods/__init__.py +0 -1
- risk/neighborhoods/api.py +38 -38
- risk/neighborhoods/community.py +33 -4
- risk/neighborhoods/domains.py +26 -28
- risk/neighborhoods/neighborhoods.py +8 -2
- risk/neighborhoods/stats/__init__.py +13 -0
- risk/neighborhoods/stats/permutation/__init__.py +6 -0
- risk/{stats → neighborhoods/stats}/permutation/permutation.py +24 -21
- risk/{stats → neighborhoods/stats}/permutation/test_functions.py +5 -4
- risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +62 -54
- risk/network/__init__.py +0 -2
- risk/network/graph/__init__.py +0 -2
- risk/network/graph/api.py +19 -19
- risk/network/graph/graph.py +73 -68
- risk/{stats/significance.py → network/graph/stats.py} +2 -2
- risk/network/graph/summary.py +12 -13
- risk/network/io.py +163 -20
- risk/network/plotter/__init__.py +0 -2
- risk/network/plotter/api.py +1 -1
- risk/network/plotter/canvas.py +36 -36
- risk/network/plotter/contour.py +14 -15
- risk/network/plotter/labels.py +303 -294
- risk/network/plotter/network.py +6 -6
- risk/network/plotter/plotter.py +8 -10
- risk/network/plotter/utils/colors.py +15 -8
- risk/network/plotter/utils/layout.py +3 -3
- risk/risk.py +6 -7
- risk_network-0.0.12.dist-info/METADATA +122 -0
- risk_network-0.0.12.dist-info/RECORD +40 -0
- {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info}/WHEEL +1 -1
- risk/annotations/__init__.py +0 -7
- risk/network/geometry.py +0 -150
- risk/stats/__init__.py +0 -15
- risk/stats/permutation/__init__.py +0 -6
- risk_network-0.0.10.dist-info/METADATA +0 -798
- risk_network-0.0.10.dist-info/RECORD +0 -40
- {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info/licenses}/LICENSE +0 -0
- {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info}/top_level.txt +0 -0
@@ -1,52 +1,51 @@
|
|
1
1
|
"""
|
2
|
-
risk/stats/
|
3
|
-
|
2
|
+
risk/neighborhoods/stats/tests
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
from typing import Any, Dict
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
from scipy.sparse import csr_matrix
|
10
|
-
from scipy.stats import binom
|
11
|
-
from scipy.stats import chi2
|
12
|
-
from scipy.stats import hypergeom
|
13
|
-
from scipy.stats import norm
|
14
|
-
from scipy.stats import poisson
|
10
|
+
from scipy.stats import binom, chi2, hypergeom, norm, poisson
|
15
11
|
|
16
12
|
|
17
13
|
def compute_binom_test(
|
18
14
|
neighborhoods: csr_matrix,
|
19
|
-
|
15
|
+
annotation: csr_matrix,
|
20
16
|
null_distribution: str = "network",
|
21
17
|
) -> Dict[str, Any]:
|
22
18
|
"""Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
|
23
19
|
|
24
20
|
Args:
|
25
21
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
26
|
-
|
27
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
22
|
+
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
23
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
28
24
|
|
29
25
|
Returns:
|
30
26
|
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
27
|
+
|
28
|
+
Raises:
|
29
|
+
ValueError: If an invalid null_distribution value is provided.
|
31
30
|
"""
|
32
31
|
# Get the total number of nodes in the network
|
33
32
|
total_nodes = neighborhoods.shape[1]
|
34
33
|
|
35
34
|
# Compute sums (remain sparse here)
|
36
35
|
neighborhood_sizes = neighborhoods.sum(axis=1) # Row sums
|
37
|
-
annotation_totals =
|
36
|
+
annotation_totals = annotation.sum(axis=0) # Column sums
|
38
37
|
# Compute probabilities (convert to dense)
|
39
38
|
if null_distribution == "network":
|
40
39
|
p_values = (annotation_totals / total_nodes).A.flatten() # Dense 1D array
|
41
|
-
elif null_distribution == "
|
42
|
-
p_values = (annotation_totals /
|
40
|
+
elif null_distribution == "annotation":
|
41
|
+
p_values = (annotation_totals / annotation.sum()).A.flatten() # Dense 1D array
|
43
42
|
else:
|
44
43
|
raise ValueError(
|
45
|
-
"Invalid null_distribution value. Choose either 'network' or '
|
44
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotation'."
|
46
45
|
)
|
47
46
|
|
48
47
|
# Observed counts (sparse matrix multiplication)
|
49
|
-
annotated_counts = neighborhoods @
|
48
|
+
annotated_counts = neighborhoods @ annotation # Sparse result
|
50
49
|
annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
|
51
50
|
|
52
51
|
# Compute enrichment and depletion p-values
|
@@ -58,18 +57,21 @@ def compute_binom_test(
|
|
58
57
|
|
59
58
|
def compute_chi2_test(
|
60
59
|
neighborhoods: csr_matrix,
|
61
|
-
|
60
|
+
annotation: csr_matrix,
|
62
61
|
null_distribution: str = "network",
|
63
62
|
) -> Dict[str, Any]:
|
64
63
|
"""Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
|
65
64
|
|
66
65
|
Args:
|
67
66
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
68
|
-
|
69
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
67
|
+
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
68
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
70
69
|
|
71
70
|
Returns:
|
72
71
|
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
72
|
+
|
73
|
+
Raises:
|
74
|
+
ValueError: If an invalid null_distribution value is provided.
|
73
75
|
"""
|
74
76
|
# Total number of nodes in the network
|
75
77
|
total_node_count = neighborhoods.shape[0]
|
@@ -78,22 +80,22 @@ def compute_chi2_test(
|
|
78
80
|
# Case 1: Use all nodes as the background
|
79
81
|
background_population = total_node_count
|
80
82
|
neighborhood_sums = neighborhoods.sum(axis=0) # Column sums of neighborhoods
|
81
|
-
annotation_sums =
|
82
|
-
elif null_distribution == "
|
83
|
+
annotation_sums = annotation.sum(axis=0) # Column sums of annotations
|
84
|
+
elif null_distribution == "annotation":
|
83
85
|
# Case 2: Only consider nodes with at least one annotation
|
84
86
|
annotated_nodes = (
|
85
|
-
np.ravel(
|
87
|
+
np.ravel(annotation.sum(axis=1)) > 0
|
86
88
|
) # Row-wise sum to filter nodes with annotations
|
87
89
|
background_population = annotated_nodes.sum() # Total number of annotated nodes
|
88
90
|
neighborhood_sums = neighborhoods[annotated_nodes].sum(
|
89
91
|
axis=0
|
90
92
|
) # Neighborhood sums for annotated nodes
|
91
|
-
annotation_sums =
|
93
|
+
annotation_sums = annotation[annotated_nodes].sum(
|
92
94
|
axis=0
|
93
95
|
) # Annotation sums for annotated nodes
|
94
96
|
else:
|
95
97
|
raise ValueError(
|
96
|
-
"Invalid null_distribution value. Choose either 'network' or '
|
98
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotation'."
|
97
99
|
)
|
98
100
|
|
99
101
|
# Convert to dense arrays for downstream computations
|
@@ -101,7 +103,7 @@ def compute_chi2_test(
|
|
101
103
|
annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
|
102
104
|
|
103
105
|
# Observed values: number of annotated nodes in each neighborhood
|
104
|
-
observed = neighborhoods.T @
|
106
|
+
observed = neighborhoods.T @ annotation # Shape: (neighborhoods, annotation)
|
105
107
|
# Expected values under the null
|
106
108
|
expected = (neighborhood_sums @ annotation_sums) / background_population
|
107
109
|
# Chi-squared statistic: sum((observed - expected)^2 / expected)
|
@@ -117,41 +119,43 @@ def compute_chi2_test(
|
|
117
119
|
|
118
120
|
def compute_hypergeom_test(
|
119
121
|
neighborhoods: csr_matrix,
|
120
|
-
|
122
|
+
annotation: csr_matrix,
|
121
123
|
null_distribution: str = "network",
|
122
124
|
) -> Dict[str, Any]:
|
123
|
-
"""
|
124
|
-
Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
|
125
|
+
"""Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
|
125
126
|
|
126
127
|
Args:
|
127
128
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
128
|
-
|
129
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
129
|
+
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
130
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
130
131
|
|
131
132
|
Returns:
|
132
133
|
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
134
|
+
|
135
|
+
Raises:
|
136
|
+
ValueError: If an invalid null_distribution value is provided.
|
133
137
|
"""
|
134
138
|
# Get the total number of nodes in the network
|
135
139
|
total_nodes = neighborhoods.shape[1]
|
136
140
|
|
137
141
|
# Compute sums
|
138
142
|
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Convert to dense array
|
139
|
-
annotation_sums =
|
143
|
+
annotation_sums = annotation.sum(axis=0).A.flatten() # Convert to dense array
|
140
144
|
|
141
145
|
if null_distribution == "network":
|
142
146
|
background_population = total_nodes
|
143
|
-
elif null_distribution == "
|
144
|
-
annotated_nodes =
|
147
|
+
elif null_distribution == "annotation":
|
148
|
+
annotated_nodes = annotation.sum(axis=1).A.flatten() > 0 # Boolean mask
|
145
149
|
background_population = annotated_nodes.sum()
|
146
150
|
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
147
|
-
annotation_sums =
|
151
|
+
annotation_sums = annotation[annotated_nodes].sum(axis=0).A.flatten()
|
148
152
|
else:
|
149
153
|
raise ValueError(
|
150
|
-
"Invalid null_distribution value. Choose either 'network' or '
|
154
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotation'."
|
151
155
|
)
|
152
156
|
|
153
157
|
# Observed counts
|
154
|
-
annotated_in_neighborhood = neighborhoods.T @
|
158
|
+
annotated_in_neighborhood = neighborhoods.T @ annotation # Sparse result
|
155
159
|
annotated_in_neighborhood = annotated_in_neighborhood.toarray() # Convert to dense
|
156
160
|
# Align shapes for broadcasting
|
157
161
|
neighborhood_sums = neighborhood_sums.reshape(-1, 1)
|
@@ -171,22 +175,24 @@ def compute_hypergeom_test(
|
|
171
175
|
|
172
176
|
def compute_poisson_test(
|
173
177
|
neighborhoods: csr_matrix,
|
174
|
-
|
178
|
+
annotation: csr_matrix,
|
175
179
|
null_distribution: str = "network",
|
176
180
|
) -> Dict[str, Any]:
|
177
|
-
"""
|
178
|
-
Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
|
181
|
+
"""Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
|
179
182
|
|
180
183
|
Args:
|
181
184
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
182
|
-
|
183
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
185
|
+
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
186
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
184
187
|
|
185
188
|
Returns:
|
186
189
|
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
190
|
+
|
191
|
+
Raises:
|
192
|
+
ValueError: If an invalid null_distribution value is provided.
|
187
193
|
"""
|
188
194
|
# Matrix multiplication to get the number of annotated nodes in each neighborhood
|
189
|
-
annotated_in_neighborhood = neighborhoods @
|
195
|
+
annotated_in_neighborhood = neighborhoods @ annotation # Sparse result
|
190
196
|
# Convert annotated counts to dense for downstream calculations
|
191
197
|
annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
|
192
198
|
|
@@ -194,12 +200,12 @@ def compute_poisson_test(
|
|
194
200
|
if null_distribution == "network":
|
195
201
|
# Use the mean across neighborhoods (axis=1)
|
196
202
|
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
|
197
|
-
elif null_distribution == "
|
203
|
+
elif null_distribution == "annotation":
|
198
204
|
# Use the mean across annotations (axis=0)
|
199
205
|
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
|
200
206
|
else:
|
201
207
|
raise ValueError(
|
202
|
-
"Invalid null_distribution value. Choose either 'network' or '
|
208
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotation'."
|
203
209
|
)
|
204
210
|
|
205
211
|
# Compute p-values for enrichment and depletion using Poisson distribution
|
@@ -211,19 +217,21 @@ def compute_poisson_test(
|
|
211
217
|
|
212
218
|
def compute_zscore_test(
|
213
219
|
neighborhoods: csr_matrix,
|
214
|
-
|
220
|
+
annotation: csr_matrix,
|
215
221
|
null_distribution: str = "network",
|
216
222
|
) -> Dict[str, Any]:
|
217
|
-
"""
|
218
|
-
Compute z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
|
223
|
+
"""Compute z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
|
219
224
|
|
220
225
|
Args:
|
221
226
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
222
|
-
|
223
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
227
|
+
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
228
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
224
229
|
|
225
230
|
Returns:
|
226
231
|
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
232
|
+
|
233
|
+
Raises:
|
234
|
+
ValueError: If an invalid null_distribution value is provided.
|
227
235
|
"""
|
228
236
|
# Total number of nodes in the network
|
229
237
|
total_node_count = neighborhoods.shape[1]
|
@@ -232,19 +240,19 @@ def compute_zscore_test(
|
|
232
240
|
if null_distribution == "network":
|
233
241
|
background_population = total_node_count
|
234
242
|
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
|
235
|
-
annotation_sums =
|
236
|
-
elif null_distribution == "
|
237
|
-
annotated_nodes =
|
243
|
+
annotation_sums = annotation.sum(axis=0).A.flatten() # Dense row sums
|
244
|
+
elif null_distribution == "annotation":
|
245
|
+
annotated_nodes = annotation.sum(axis=1).A.flatten() > 0 # Dense boolean mask
|
238
246
|
background_population = annotated_nodes.sum()
|
239
247
|
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
240
|
-
annotation_sums =
|
248
|
+
annotation_sums = annotation[annotated_nodes].sum(axis=0).A.flatten()
|
241
249
|
else:
|
242
250
|
raise ValueError(
|
243
|
-
"Invalid null_distribution value. Choose either 'network' or '
|
251
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotation'."
|
244
252
|
)
|
245
253
|
|
246
254
|
# Observed values
|
247
|
-
observed = (neighborhoods.T @
|
255
|
+
observed = (neighborhoods.T @ annotation).toarray() # Convert sparse result to dense
|
248
256
|
# Expected values under the null
|
249
257
|
neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
|
250
258
|
annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
|
risk/network/__init__.py
CHANGED
risk/network/graph/__init__.py
CHANGED
risk/network/graph/api.py
CHANGED
@@ -9,15 +9,15 @@ from typing import Any, Dict, Union
|
|
9
9
|
import networkx as nx
|
10
10
|
import pandas as pd
|
11
11
|
|
12
|
-
from risk.
|
13
|
-
from risk.log import
|
12
|
+
from risk.annotation import define_top_annotation
|
13
|
+
from risk.log import log_header, logger, params
|
14
14
|
from risk.neighborhoods import (
|
15
15
|
define_domains,
|
16
16
|
process_neighborhoods,
|
17
17
|
trim_domains,
|
18
18
|
)
|
19
19
|
from risk.network.graph.graph import Graph
|
20
|
-
from risk.stats import calculate_significance_matrices
|
20
|
+
from risk.network.graph.stats import calculate_significance_matrices
|
21
21
|
|
22
22
|
|
23
23
|
class GraphAPI:
|
@@ -26,13 +26,13 @@ class GraphAPI:
|
|
26
26
|
The GraphAPI class provides methods to load and process network graphs, annotations, and neighborhoods.
|
27
27
|
"""
|
28
28
|
|
29
|
-
def __init__() -> None:
|
29
|
+
def __init__(self) -> None:
|
30
30
|
pass
|
31
31
|
|
32
32
|
def load_graph(
|
33
33
|
self,
|
34
34
|
network: nx.Graph,
|
35
|
-
|
35
|
+
annotation: Dict[str, Any],
|
36
36
|
neighborhoods: Dict[str, Any],
|
37
37
|
tail: str = "right",
|
38
38
|
pval_cutoff: float = 0.01,
|
@@ -50,7 +50,7 @@ class GraphAPI:
|
|
50
50
|
|
51
51
|
Args:
|
52
52
|
network (nx.Graph): The network graph.
|
53
|
-
|
53
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
54
54
|
neighborhoods (Dict[str, Any]): Neighborhood significance data.
|
55
55
|
tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
|
56
56
|
pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
|
@@ -115,9 +115,9 @@ class GraphAPI:
|
|
115
115
|
logger.debug(f"Min cluster size: {min_cluster_size}")
|
116
116
|
logger.debug(f"Max cluster size: {max_cluster_size}")
|
117
117
|
# Define top annotations based on processed neighborhoods
|
118
|
-
|
118
|
+
top_annotation = self._define_top_annotation(
|
119
119
|
network=network,
|
120
|
-
|
120
|
+
annotation=annotation,
|
121
121
|
neighborhoods=processed_neighborhoods,
|
122
122
|
min_cluster_size=min_cluster_size,
|
123
123
|
max_cluster_size=max_cluster_size,
|
@@ -130,7 +130,7 @@ class GraphAPI:
|
|
130
130
|
]
|
131
131
|
# Define domains in the network using the specified clustering settings
|
132
132
|
domains = define_domains(
|
133
|
-
|
133
|
+
top_annotation=top_annotation,
|
134
134
|
significant_neighborhoods_significance=significant_neighborhoods_significance,
|
135
135
|
linkage_criterion=linkage_criterion,
|
136
136
|
linkage_method=linkage_method,
|
@@ -140,20 +140,20 @@ class GraphAPI:
|
|
140
140
|
# Trim domains and top annotations based on cluster size constraints
|
141
141
|
domains, trimmed_domains = trim_domains(
|
142
142
|
domains=domains,
|
143
|
-
|
143
|
+
top_annotation=top_annotation,
|
144
144
|
min_cluster_size=min_cluster_size,
|
145
145
|
max_cluster_size=max_cluster_size,
|
146
146
|
)
|
147
147
|
|
148
148
|
# Prepare node mapping and significance sums for the final Graph object
|
149
|
-
ordered_nodes =
|
149
|
+
ordered_nodes = annotation["ordered_nodes"]
|
150
150
|
node_label_to_id = dict(zip(ordered_nodes, range(len(ordered_nodes))))
|
151
151
|
node_significance_sums = processed_neighborhoods["node_significance_sums"]
|
152
152
|
|
153
153
|
# Return the fully initialized Graph object
|
154
154
|
return Graph(
|
155
155
|
network=network,
|
156
|
-
|
156
|
+
annotation=annotation,
|
157
157
|
neighborhoods=neighborhoods,
|
158
158
|
domains=domains,
|
159
159
|
trimmed_domains=trimmed_domains,
|
@@ -161,10 +161,10 @@ class GraphAPI:
|
|
161
161
|
node_significance_sums=node_significance_sums,
|
162
162
|
)
|
163
163
|
|
164
|
-
def
|
164
|
+
def _define_top_annotation(
|
165
165
|
self,
|
166
166
|
network: nx.Graph,
|
167
|
-
|
167
|
+
annotation: Dict[str, Any],
|
168
168
|
neighborhoods: Dict[str, Any],
|
169
169
|
min_cluster_size: int = 5,
|
170
170
|
max_cluster_size: int = 1000,
|
@@ -173,7 +173,7 @@ class GraphAPI:
|
|
173
173
|
|
174
174
|
Args:
|
175
175
|
network (nx.Graph): The network graph.
|
176
|
-
|
176
|
+
annotation (Dict[str, Any]): Annotation data for the network.
|
177
177
|
neighborhoods (Dict[str, Any]): Neighborhood significance data.
|
178
178
|
min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
|
179
179
|
max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
|
@@ -181,17 +181,17 @@ class GraphAPI:
|
|
181
181
|
Returns:
|
182
182
|
Dict[str, Any]: Top annotations identified within the network.
|
183
183
|
"""
|
184
|
-
# Extract necessary data from
|
185
|
-
|
184
|
+
# Extract necessary data from annotation and neighborhoods
|
185
|
+
ordered_annotation = annotation["ordered_annotation"]
|
186
186
|
neighborhood_significance_sums = neighborhoods["neighborhood_significance_counts"]
|
187
187
|
significant_significance_matrix = neighborhoods["significant_significance_matrix"]
|
188
188
|
significant_binary_significance_matrix = neighborhoods[
|
189
189
|
"significant_binary_significance_matrix"
|
190
190
|
]
|
191
191
|
# Call external function to define top annotations
|
192
|
-
return
|
192
|
+
return define_top_annotation(
|
193
193
|
network=network,
|
194
|
-
ordered_annotation_labels=
|
194
|
+
ordered_annotation_labels=ordered_annotation,
|
195
195
|
neighborhood_significance_sums=neighborhood_significance_sums,
|
196
196
|
significant_significance_matrix=significant_significance_matrix,
|
197
197
|
significant_binary_significance_matrix=significant_binary_significance_matrix,
|
risk/network/graph/graph.py
CHANGED
@@ -17,7 +17,7 @@ class Graph:
|
|
17
17
|
"""A class to represent a network graph and process its nodes and edges.
|
18
18
|
|
19
19
|
The Graph class provides functionality to handle and manipulate a network graph,
|
20
|
-
including managing domains,
|
20
|
+
including managing domains, annotation, and node significance data. It also includes methods
|
21
21
|
for transforming and mapping graph coordinates, as well as generating colors based on node
|
22
22
|
significance.
|
23
23
|
"""
|
@@ -25,7 +25,7 @@ class Graph:
|
|
25
25
|
def __init__(
|
26
26
|
self,
|
27
27
|
network: nx.Graph,
|
28
|
-
|
28
|
+
annotation: Dict[str, Any],
|
29
29
|
neighborhoods: Dict[str, Any],
|
30
30
|
domains: pd.DataFrame,
|
31
31
|
trimmed_domains: pd.DataFrame,
|
@@ -36,7 +36,7 @@ class Graph:
|
|
36
36
|
|
37
37
|
Args:
|
38
38
|
network (nx.Graph): The network graph.
|
39
|
-
|
39
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
40
40
|
neighborhoods (Dict[str, Any]): Neighborhood significance data.
|
41
41
|
domains (pd.DataFrame): DataFrame containing domain data for the network nodes.
|
42
42
|
trimmed_domains (pd.DataFrame): DataFrame containing trimmed domain data for the network nodes.
|
@@ -65,20 +65,24 @@ class Graph:
|
|
65
65
|
# NOTE: Below this point, instance attributes (i.e., self) will be used!
|
66
66
|
self.domain_id_to_node_labels_map = self._create_domain_id_to_node_labels_map()
|
67
67
|
# Unfold the network's 3D coordinates to 2D and extract node coordinates
|
68
|
-
self.network = _unfold_sphere_to_plane(network)
|
69
|
-
self.node_coordinates = _extract_node_coordinates(self.network)
|
68
|
+
self.network = self._unfold_sphere_to_plane(network)
|
69
|
+
self.node_coordinates = self._extract_node_coordinates(self.network)
|
70
70
|
|
71
71
|
# NOTE: Only after the above attributes are initialized, we can create the summary
|
72
|
-
self.summary = Summary(
|
72
|
+
self.summary = Summary(annotation, neighborhoods, self)
|
73
73
|
|
74
|
-
def pop(self, domain_id:
|
75
|
-
"""Remove domain ID from
|
76
|
-
domain-specific mappings based on a given criterion, as domain attributes are stored and
|
77
|
-
accessed only in dictionaries modified by this method.
|
74
|
+
def pop(self, domain_id: int) -> List[str]:
|
75
|
+
"""Remove a domain ID from the graph and return the corresponding node labels.
|
78
76
|
|
79
77
|
Args:
|
80
|
-
key (
|
78
|
+
key (int): The domain ID key to be removed from each mapping.
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
List[str]: A list of node labels associated with the domain ID.
|
81
82
|
"""
|
83
|
+
# Get the node labels associated with the domain ID
|
84
|
+
node_labels = self.domain_id_to_node_labels_map.get(domain_id, [])
|
85
|
+
|
82
86
|
# Define the domain mappings to be updated
|
83
87
|
domain_mappings = [
|
84
88
|
self.domain_id_to_node_ids_map,
|
@@ -97,8 +101,9 @@ class Graph:
|
|
97
101
|
domain_info["domains"].remove(domain_id)
|
98
102
|
domain_info["significances"].pop(domain_id)
|
99
103
|
|
100
|
-
|
101
|
-
|
104
|
+
return node_labels
|
105
|
+
|
106
|
+
def _create_domain_id_to_node_ids_map(self, domains: pd.DataFrame) -> Dict[int, Any]:
|
102
107
|
"""Create a mapping from domains to the list of node IDs belonging to each domain.
|
103
108
|
|
104
109
|
Args:
|
@@ -115,8 +120,9 @@ class Graph:
|
|
115
120
|
|
116
121
|
return domain_id_to_node_ids_map
|
117
122
|
|
118
|
-
|
119
|
-
|
123
|
+
def _create_domain_id_to_domain_terms_map(
|
124
|
+
self, trimmed_domains: pd.DataFrame
|
125
|
+
) -> Dict[int, Any]:
|
120
126
|
"""Create a mapping from domain IDs to their corresponding terms.
|
121
127
|
|
122
128
|
Args:
|
@@ -132,8 +138,8 @@ class Graph:
|
|
132
138
|
)
|
133
139
|
)
|
134
140
|
|
135
|
-
@staticmethod
|
136
141
|
def _create_domain_id_to_domain_info_map(
|
142
|
+
self,
|
137
143
|
trimmed_domains: pd.DataFrame,
|
138
144
|
) -> Dict[int, Dict[str, Any]]:
|
139
145
|
"""Create a mapping from domain IDs to their corresponding full description and significance score,
|
@@ -163,14 +169,15 @@ class Graph:
|
|
163
169
|
sorted_descriptions, sorted_scores = zip(*descriptions_and_scores)
|
164
170
|
# Assign to the domain info map
|
165
171
|
domain_info_map[int(domain_id)] = {
|
166
|
-
"full_descriptions":
|
167
|
-
"significance_scores":
|
172
|
+
"full_descriptions": sorted_descriptions,
|
173
|
+
"significance_scores": sorted_scores,
|
168
174
|
}
|
169
175
|
|
170
176
|
return domain_info_map
|
171
177
|
|
172
|
-
|
173
|
-
|
178
|
+
def _create_node_id_to_domain_ids_and_significances(
|
179
|
+
self, domains: pd.DataFrame
|
180
|
+
) -> Dict[int, Dict]:
|
174
181
|
"""Creates a dictionary mapping each node ID to its corresponding domain IDs and significance values.
|
175
182
|
|
176
183
|
Args:
|
@@ -216,54 +223,52 @@ class Graph:
|
|
216
223
|
|
217
224
|
return domain_id_to_label_map
|
218
225
|
|
226
|
+
def _unfold_sphere_to_plane(self, G: nx.Graph) -> nx.Graph:
|
227
|
+
"""Convert 3D coordinates to 2D by unfolding a sphere to a plane.
|
228
|
+
|
229
|
+
Args:
|
230
|
+
G (nx.Graph): A network graph with 3D coordinates. Each node should have 'x', 'y', and 'z' attributes.
|
219
231
|
|
220
|
-
|
221
|
-
|
232
|
+
Returns:
|
233
|
+
nx.Graph: The network graph with updated 2D coordinates (only 'x' and 'y').
|
234
|
+
"""
|
235
|
+
for node in G.nodes():
|
236
|
+
if "z" in G.nodes[node]:
|
237
|
+
# Extract 3D coordinates
|
238
|
+
x, y, z = G.nodes[node]["x"], G.nodes[node]["y"], G.nodes[node]["z"]
|
239
|
+
# Calculate spherical coordinates theta and phi from Cartesian coordinates
|
240
|
+
r = np.sqrt(x**2 + y**2 + z**2)
|
241
|
+
theta = np.arctan2(y, x)
|
242
|
+
phi = np.arccos(z / r)
|
243
|
+
|
244
|
+
# Convert spherical coordinates to 2D plane coordinates
|
245
|
+
unfolded_x = (theta + np.pi) / (2 * np.pi) # Shift and normalize theta to [0, 1]
|
246
|
+
unfolded_x = unfolded_x + 0.5 if unfolded_x < 0.5 else unfolded_x - 0.5
|
247
|
+
unfolded_y = (np.pi - phi) / np.pi # Reflect phi and normalize to [0, 1]
|
248
|
+
# Update network node attributes
|
249
|
+
G.nodes[node]["x"] = unfolded_x
|
250
|
+
G.nodes[node]["y"] = -unfolded_y
|
251
|
+
# Remove the 'z' coordinate as it's no longer needed
|
252
|
+
del G.nodes[node]["z"]
|
253
|
+
|
254
|
+
return G
|
255
|
+
|
256
|
+
def _extract_node_coordinates(self, G: nx.Graph) -> np.ndarray:
|
257
|
+
"""Extract 2D coordinates of nodes from the graph.
|
222
258
|
|
223
|
-
|
224
|
-
|
259
|
+
Args:
|
260
|
+
G (nx.Graph): The network graph with node coordinates.
|
225
261
|
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
unfolded_x = (theta + np.pi) / (2 * np.pi) # Shift and normalize theta to [0, 1]
|
240
|
-
unfolded_x = unfolded_x + 0.5 if unfolded_x < 0.5 else unfolded_x - 0.5
|
241
|
-
unfolded_y = (np.pi - phi) / np.pi # Reflect phi and normalize to [0, 1]
|
242
|
-
# Update network node attributes
|
243
|
-
G.nodes[node]["x"] = unfolded_x
|
244
|
-
G.nodes[node]["y"] = -unfolded_y
|
245
|
-
# Remove the 'z' coordinate as it's no longer needed
|
246
|
-
del G.nodes[node]["z"]
|
247
|
-
|
248
|
-
return G
|
249
|
-
|
250
|
-
|
251
|
-
def _extract_node_coordinates(G: nx.Graph) -> np.ndarray:
|
252
|
-
"""Extract 2D coordinates of nodes from the graph.
|
253
|
-
|
254
|
-
Args:
|
255
|
-
G (nx.Graph): The network graph with node coordinates.
|
256
|
-
|
257
|
-
Returns:
|
258
|
-
np.ndarray: Array of node coordinates with shape (num_nodes, 2).
|
259
|
-
"""
|
260
|
-
# Extract x and y coordinates from graph nodes
|
261
|
-
x_coords = dict(G.nodes.data("x"))
|
262
|
-
y_coords = dict(G.nodes.data("y"))
|
263
|
-
coordinates_dicts = [x_coords, y_coords]
|
264
|
-
# Combine x and y coordinates into a single array
|
265
|
-
node_positions = {
|
266
|
-
node: np.array([coords[node] for coords in coordinates_dicts]) for node in x_coords
|
267
|
-
}
|
268
|
-
node_coordinates = np.vstack(list(node_positions.values()))
|
269
|
-
return node_coordinates
|
262
|
+
Returns:
|
263
|
+
np.ndarray: Array of node coordinates with shape (num_nodes, 2).
|
264
|
+
"""
|
265
|
+
# Extract x and y coordinates from graph nodes
|
266
|
+
x_coords = dict(G.nodes.data("x"))
|
267
|
+
y_coords = dict(G.nodes.data("y"))
|
268
|
+
coordinates_dicts = [x_coords, y_coords]
|
269
|
+
# Combine x and y coordinates into a single array
|
270
|
+
node_positions = {
|
271
|
+
node: np.array([coords[node] for coords in coordinates_dicts]) for node in x_coords
|
272
|
+
}
|
273
|
+
node_coordinates = np.vstack(list(node_positions.values()))
|
274
|
+
return node_coordinates
|