risk-network 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotation/__init__.py +10 -0
- risk/{annotations/annotations.py → annotation/annotation.py} +44 -44
- risk/{annotations → annotation}/io.py +93 -92
- risk/{annotations → annotation}/nltk_setup.py +6 -5
- risk/log/__init__.py +1 -1
- risk/log/parameters.py +26 -27
- risk/neighborhoods/__init__.py +0 -1
- risk/neighborhoods/api.py +38 -38
- risk/neighborhoods/community.py +33 -4
- risk/neighborhoods/domains.py +26 -28
- risk/neighborhoods/neighborhoods.py +8 -2
- risk/neighborhoods/stats/__init__.py +13 -0
- risk/neighborhoods/stats/permutation/__init__.py +6 -0
- risk/{stats → neighborhoods/stats}/permutation/permutation.py +24 -21
- risk/{stats → neighborhoods/stats}/permutation/test_functions.py +4 -4
- risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +62 -54
- risk/network/__init__.py +0 -2
- risk/network/graph/__init__.py +0 -2
- risk/network/graph/api.py +19 -19
- risk/network/graph/graph.py +73 -68
- risk/{stats/significance.py → network/graph/stats.py} +2 -2
- risk/network/graph/summary.py +12 -13
- risk/network/io.py +163 -20
- risk/network/plotter/__init__.py +0 -2
- risk/network/plotter/api.py +1 -1
- risk/network/plotter/canvas.py +36 -36
- risk/network/plotter/contour.py +14 -15
- risk/network/plotter/labels.py +303 -294
- risk/network/plotter/network.py +6 -6
- risk/network/plotter/plotter.py +8 -10
- risk/network/plotter/utils/colors.py +15 -8
- risk/network/plotter/utils/layout.py +3 -3
- risk/risk.py +6 -6
- risk_network-0.0.12.dist-info/METADATA +122 -0
- risk_network-0.0.12.dist-info/RECORD +40 -0
- {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/WHEEL +1 -1
- risk/annotations/__init__.py +0 -7
- risk/network/geometry.py +0 -150
- risk/stats/__init__.py +0 -15
- risk/stats/permutation/__init__.py +0 -6
- risk_network-0.0.11.dist-info/METADATA +0 -798
- risk_network-0.0.11.dist-info/RECORD +0 -41
- {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info/licenses}/LICENSE +0 -0
- {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,9 @@
|
|
1
1
|
"""
|
2
|
-
risk/stats/permutation/permutation
|
3
|
-
|
2
|
+
risk/neighborhoods/stats/permutation/permutation
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from multiprocessing import
|
6
|
+
from multiprocessing import Manager, get_context
|
7
7
|
from multiprocessing.managers import ValueProxy
|
8
8
|
from typing import Any, Callable, Dict, List, Tuple, Union
|
9
9
|
|
@@ -12,12 +12,12 @@ from scipy.sparse import csr_matrix
|
|
12
12
|
from threadpoolctl import threadpool_limits
|
13
13
|
from tqdm import tqdm
|
14
14
|
|
15
|
-
from risk.stats.permutation.test_functions import DISPATCH_TEST_FUNCTIONS
|
15
|
+
from risk.neighborhoods.stats.permutation.test_functions import DISPATCH_TEST_FUNCTIONS
|
16
16
|
|
17
17
|
|
18
18
|
def compute_permutation_test(
|
19
19
|
neighborhoods: csr_matrix,
|
20
|
-
|
20
|
+
annotation: csr_matrix,
|
21
21
|
score_metric: str = "sum",
|
22
22
|
null_distribution: str = "network",
|
23
23
|
num_permutations: int = 1000,
|
@@ -28,9 +28,9 @@ def compute_permutation_test(
|
|
28
28
|
|
29
29
|
Args:
|
30
30
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
31
|
-
|
31
|
+
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
32
32
|
score_metric (str, optional): Metric to use for scoring ('sum' or 'stdev'). Defaults to "sum".
|
33
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
33
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
34
34
|
num_permutations (int, optional): Number of permutations to run. Defaults to 1000.
|
35
35
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
36
36
|
max_workers (int, optional): Number of workers for multiprocessing. Defaults to 1.
|
@@ -41,14 +41,14 @@ def compute_permutation_test(
|
|
41
41
|
# Ensure that the matrices are in the correct format and free of NaN values
|
42
42
|
# NOTE: Keep the data type as float32 to avoid locking issues with dot product operations
|
43
43
|
neighborhoods = neighborhoods.astype(np.float32)
|
44
|
-
|
44
|
+
annotation = annotation.astype(np.float32)
|
45
45
|
# Retrieve the appropriate neighborhood score function based on the metric
|
46
46
|
neighborhood_score_func = DISPATCH_TEST_FUNCTIONS[score_metric]
|
47
47
|
|
48
48
|
# Run the permutation test to calculate depletion and enrichment counts
|
49
49
|
counts_depletion, counts_enrichment = _run_permutation_test(
|
50
50
|
neighborhoods=neighborhoods,
|
51
|
-
|
51
|
+
annotation=annotation,
|
52
52
|
neighborhood_score_func=neighborhood_score_func,
|
53
53
|
null_distribution=null_distribution,
|
54
54
|
num_permutations=num_permutations,
|
@@ -68,7 +68,7 @@ def compute_permutation_test(
|
|
68
68
|
|
69
69
|
def _run_permutation_test(
|
70
70
|
neighborhoods: csr_matrix,
|
71
|
-
|
71
|
+
annotation: csr_matrix,
|
72
72
|
neighborhood_score_func: Callable,
|
73
73
|
null_distribution: str = "network",
|
74
74
|
num_permutations: int = 1000,
|
@@ -79,31 +79,34 @@ def _run_permutation_test(
|
|
79
79
|
|
80
80
|
Args:
|
81
81
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
82
|
-
|
82
|
+
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
83
83
|
neighborhood_score_func (Callable): Function to calculate neighborhood scores.
|
84
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
84
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
85
85
|
num_permutations (int, optional): Number of permutations. Defaults to 1000.
|
86
86
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
87
87
|
max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
|
88
88
|
|
89
89
|
Returns:
|
90
90
|
tuple: Depletion and enrichment counts.
|
91
|
+
|
92
|
+
Raises:
|
93
|
+
ValueError: If an invalid null_distribution value is provided.
|
91
94
|
"""
|
92
95
|
# Initialize the RNG for reproducibility
|
93
96
|
rng = np.random.default_rng(seed=random_seed)
|
94
97
|
# Determine the indices to use based on the null distribution type
|
95
98
|
if null_distribution == "network":
|
96
|
-
idxs = range(
|
97
|
-
elif null_distribution == "
|
98
|
-
idxs = np.nonzero(
|
99
|
+
idxs = range(annotation.shape[0])
|
100
|
+
elif null_distribution == "annotation":
|
101
|
+
idxs = np.nonzero(annotation.getnnz(axis=1) > 0)[0]
|
99
102
|
else:
|
100
103
|
raise ValueError(
|
101
|
-
"Invalid null_distribution value. Choose either 'network' or '
|
104
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotation'."
|
102
105
|
)
|
103
106
|
|
104
|
-
# Replace NaNs with zeros in the sparse
|
105
|
-
|
106
|
-
annotation_matrix_obsv =
|
107
|
+
# Replace NaNs with zeros in the sparse annotation matrix
|
108
|
+
annotation.data[np.isnan(annotation.data)] = 0
|
109
|
+
annotation_matrix_obsv = annotation[idxs]
|
107
110
|
neighborhoods_matrix_obsv = neighborhoods.T[idxs].T
|
108
111
|
# Calculate observed neighborhood scores
|
109
112
|
with np.errstate(invalid="ignore", divide="ignore"):
|
@@ -139,7 +142,7 @@ def _run_permutation_test(
|
|
139
142
|
params_list = [
|
140
143
|
(
|
141
144
|
permutation_batches[i], # Pass the batch of precomputed permutations
|
142
|
-
|
145
|
+
annotation,
|
143
146
|
neighborhoods_matrix_obsv,
|
144
147
|
observed_neighborhood_scores,
|
145
148
|
neighborhood_score_func,
|
@@ -182,7 +185,7 @@ def _permutation_process_batch(
|
|
182
185
|
|
183
186
|
Args:
|
184
187
|
permutations (Union[List, Tuple, np.ndarray]): Permutation batch to process.
|
185
|
-
annotation_matrix (csr_matrix): Sparse binary matrix representing
|
188
|
+
annotation_matrix (csr_matrix): Sparse binary matrix representing annotation.
|
186
189
|
neighborhoods_matrix_obsv (csr_matrix): Sparse binary matrix representing observed neighborhoods.
|
187
190
|
observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
|
188
191
|
neighborhood_score_func (Callable): Function to calculate neighborhood scores.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
"""
|
2
|
-
risk/stats/permutation/test_functions
|
3
|
-
|
2
|
+
risk/neighborhoods/stats/permutation/test_functions
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
import numpy as np
|
@@ -24,7 +24,7 @@ def compute_neighborhood_score_by_sum(
|
|
24
24
|
Returns:
|
25
25
|
np.ndarray: Dense array of summed attribute values for each neighborhood.
|
26
26
|
"""
|
27
|
-
# Calculate the neighborhood score as the dot product of neighborhoods and
|
27
|
+
# Calculate the neighborhood score as the dot product of neighborhoods and annotation
|
28
28
|
neighborhood_score = neighborhoods_matrix @ annotation_matrix # Sparse matrix multiplication
|
29
29
|
# Convert the result to a dense array for downstream calculations
|
30
30
|
neighborhood_score_dense = neighborhood_score.toarray()
|
@@ -43,7 +43,7 @@ def compute_neighborhood_score_by_stdev(
|
|
43
43
|
Returns:
|
44
44
|
np.ndarray: Standard deviation of the neighborhood scores.
|
45
45
|
"""
|
46
|
-
# Calculate the neighborhood score as the dot product of neighborhoods and
|
46
|
+
# Calculate the neighborhood score as the dot product of neighborhoods and annotation
|
47
47
|
neighborhood_score = neighborhoods_matrix @ annotation_matrix # Sparse matrix multiplication
|
48
48
|
# Calculate the number of elements in each neighborhood (sum of rows)
|
49
49
|
N = neighborhoods_matrix.sum(axis=1).A.flatten() # Convert to 1D array
|
@@ -1,52 +1,51 @@
|
|
1
1
|
"""
|
2
|
-
risk/stats/
|
3
|
-
|
2
|
+
risk/neighborhoods/stats/tests
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
from typing import Any, Dict
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
from scipy.sparse import csr_matrix
|
10
|
-
from scipy.stats import binom
|
11
|
-
from scipy.stats import chi2
|
12
|
-
from scipy.stats import hypergeom
|
13
|
-
from scipy.stats import norm
|
14
|
-
from scipy.stats import poisson
|
10
|
+
from scipy.stats import binom, chi2, hypergeom, norm, poisson
|
15
11
|
|
16
12
|
|
17
13
|
def compute_binom_test(
|
18
14
|
neighborhoods: csr_matrix,
|
19
|
-
|
15
|
+
annotation: csr_matrix,
|
20
16
|
null_distribution: str = "network",
|
21
17
|
) -> Dict[str, Any]:
|
22
18
|
"""Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
|
23
19
|
|
24
20
|
Args:
|
25
21
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
26
|
-
|
27
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
22
|
+
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
23
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
28
24
|
|
29
25
|
Returns:
|
30
26
|
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
27
|
+
|
28
|
+
Raises:
|
29
|
+
ValueError: If an invalid null_distribution value is provided.
|
31
30
|
"""
|
32
31
|
# Get the total number of nodes in the network
|
33
32
|
total_nodes = neighborhoods.shape[1]
|
34
33
|
|
35
34
|
# Compute sums (remain sparse here)
|
36
35
|
neighborhood_sizes = neighborhoods.sum(axis=1) # Row sums
|
37
|
-
annotation_totals =
|
36
|
+
annotation_totals = annotation.sum(axis=0) # Column sums
|
38
37
|
# Compute probabilities (convert to dense)
|
39
38
|
if null_distribution == "network":
|
40
39
|
p_values = (annotation_totals / total_nodes).A.flatten() # Dense 1D array
|
41
|
-
elif null_distribution == "
|
42
|
-
p_values = (annotation_totals /
|
40
|
+
elif null_distribution == "annotation":
|
41
|
+
p_values = (annotation_totals / annotation.sum()).A.flatten() # Dense 1D array
|
43
42
|
else:
|
44
43
|
raise ValueError(
|
45
|
-
"Invalid null_distribution value. Choose either 'network' or '
|
44
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotation'."
|
46
45
|
)
|
47
46
|
|
48
47
|
# Observed counts (sparse matrix multiplication)
|
49
|
-
annotated_counts = neighborhoods @
|
48
|
+
annotated_counts = neighborhoods @ annotation # Sparse result
|
50
49
|
annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
|
51
50
|
|
52
51
|
# Compute enrichment and depletion p-values
|
@@ -58,18 +57,21 @@ def compute_binom_test(
|
|
58
57
|
|
59
58
|
def compute_chi2_test(
|
60
59
|
neighborhoods: csr_matrix,
|
61
|
-
|
60
|
+
annotation: csr_matrix,
|
62
61
|
null_distribution: str = "network",
|
63
62
|
) -> Dict[str, Any]:
|
64
63
|
"""Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
|
65
64
|
|
66
65
|
Args:
|
67
66
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
68
|
-
|
69
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
67
|
+
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
68
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
70
69
|
|
71
70
|
Returns:
|
72
71
|
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
72
|
+
|
73
|
+
Raises:
|
74
|
+
ValueError: If an invalid null_distribution value is provided.
|
73
75
|
"""
|
74
76
|
# Total number of nodes in the network
|
75
77
|
total_node_count = neighborhoods.shape[0]
|
@@ -78,22 +80,22 @@ def compute_chi2_test(
|
|
78
80
|
# Case 1: Use all nodes as the background
|
79
81
|
background_population = total_node_count
|
80
82
|
neighborhood_sums = neighborhoods.sum(axis=0) # Column sums of neighborhoods
|
81
|
-
annotation_sums =
|
82
|
-
elif null_distribution == "
|
83
|
+
annotation_sums = annotation.sum(axis=0) # Column sums of annotations
|
84
|
+
elif null_distribution == "annotation":
|
83
85
|
# Case 2: Only consider nodes with at least one annotation
|
84
86
|
annotated_nodes = (
|
85
|
-
np.ravel(
|
87
|
+
np.ravel(annotation.sum(axis=1)) > 0
|
86
88
|
) # Row-wise sum to filter nodes with annotations
|
87
89
|
background_population = annotated_nodes.sum() # Total number of annotated nodes
|
88
90
|
neighborhood_sums = neighborhoods[annotated_nodes].sum(
|
89
91
|
axis=0
|
90
92
|
) # Neighborhood sums for annotated nodes
|
91
|
-
annotation_sums =
|
93
|
+
annotation_sums = annotation[annotated_nodes].sum(
|
92
94
|
axis=0
|
93
95
|
) # Annotation sums for annotated nodes
|
94
96
|
else:
|
95
97
|
raise ValueError(
|
96
|
-
"Invalid null_distribution value. Choose either 'network' or '
|
98
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotation'."
|
97
99
|
)
|
98
100
|
|
99
101
|
# Convert to dense arrays for downstream computations
|
@@ -101,7 +103,7 @@ def compute_chi2_test(
|
|
101
103
|
annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
|
102
104
|
|
103
105
|
# Observed values: number of annotated nodes in each neighborhood
|
104
|
-
observed = neighborhoods.T @
|
106
|
+
observed = neighborhoods.T @ annotation # Shape: (neighborhoods, annotation)
|
105
107
|
# Expected values under the null
|
106
108
|
expected = (neighborhood_sums @ annotation_sums) / background_population
|
107
109
|
# Chi-squared statistic: sum((observed - expected)^2 / expected)
|
@@ -117,41 +119,43 @@ def compute_chi2_test(
|
|
117
119
|
|
118
120
|
def compute_hypergeom_test(
|
119
121
|
neighborhoods: csr_matrix,
|
120
|
-
|
122
|
+
annotation: csr_matrix,
|
121
123
|
null_distribution: str = "network",
|
122
124
|
) -> Dict[str, Any]:
|
123
|
-
"""
|
124
|
-
Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
|
125
|
+
"""Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
|
125
126
|
|
126
127
|
Args:
|
127
128
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
128
|
-
|
129
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
129
|
+
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
130
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
130
131
|
|
131
132
|
Returns:
|
132
133
|
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
134
|
+
|
135
|
+
Raises:
|
136
|
+
ValueError: If an invalid null_distribution value is provided.
|
133
137
|
"""
|
134
138
|
# Get the total number of nodes in the network
|
135
139
|
total_nodes = neighborhoods.shape[1]
|
136
140
|
|
137
141
|
# Compute sums
|
138
142
|
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Convert to dense array
|
139
|
-
annotation_sums =
|
143
|
+
annotation_sums = annotation.sum(axis=0).A.flatten() # Convert to dense array
|
140
144
|
|
141
145
|
if null_distribution == "network":
|
142
146
|
background_population = total_nodes
|
143
|
-
elif null_distribution == "
|
144
|
-
annotated_nodes =
|
147
|
+
elif null_distribution == "annotation":
|
148
|
+
annotated_nodes = annotation.sum(axis=1).A.flatten() > 0 # Boolean mask
|
145
149
|
background_population = annotated_nodes.sum()
|
146
150
|
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
147
|
-
annotation_sums =
|
151
|
+
annotation_sums = annotation[annotated_nodes].sum(axis=0).A.flatten()
|
148
152
|
else:
|
149
153
|
raise ValueError(
|
150
|
-
"Invalid null_distribution value. Choose either 'network' or '
|
154
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotation'."
|
151
155
|
)
|
152
156
|
|
153
157
|
# Observed counts
|
154
|
-
annotated_in_neighborhood = neighborhoods.T @
|
158
|
+
annotated_in_neighborhood = neighborhoods.T @ annotation # Sparse result
|
155
159
|
annotated_in_neighborhood = annotated_in_neighborhood.toarray() # Convert to dense
|
156
160
|
# Align shapes for broadcasting
|
157
161
|
neighborhood_sums = neighborhood_sums.reshape(-1, 1)
|
@@ -171,22 +175,24 @@ def compute_hypergeom_test(
|
|
171
175
|
|
172
176
|
def compute_poisson_test(
|
173
177
|
neighborhoods: csr_matrix,
|
174
|
-
|
178
|
+
annotation: csr_matrix,
|
175
179
|
null_distribution: str = "network",
|
176
180
|
) -> Dict[str, Any]:
|
177
|
-
"""
|
178
|
-
Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
|
181
|
+
"""Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
|
179
182
|
|
180
183
|
Args:
|
181
184
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
182
|
-
|
183
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
185
|
+
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
186
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
184
187
|
|
185
188
|
Returns:
|
186
189
|
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
190
|
+
|
191
|
+
Raises:
|
192
|
+
ValueError: If an invalid null_distribution value is provided.
|
187
193
|
"""
|
188
194
|
# Matrix multiplication to get the number of annotated nodes in each neighborhood
|
189
|
-
annotated_in_neighborhood = neighborhoods @
|
195
|
+
annotated_in_neighborhood = neighborhoods @ annotation # Sparse result
|
190
196
|
# Convert annotated counts to dense for downstream calculations
|
191
197
|
annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
|
192
198
|
|
@@ -194,12 +200,12 @@ def compute_poisson_test(
|
|
194
200
|
if null_distribution == "network":
|
195
201
|
# Use the mean across neighborhoods (axis=1)
|
196
202
|
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
|
197
|
-
elif null_distribution == "
|
203
|
+
elif null_distribution == "annotation":
|
198
204
|
# Use the mean across annotations (axis=0)
|
199
205
|
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
|
200
206
|
else:
|
201
207
|
raise ValueError(
|
202
|
-
"Invalid null_distribution value. Choose either 'network' or '
|
208
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotation'."
|
203
209
|
)
|
204
210
|
|
205
211
|
# Compute p-values for enrichment and depletion using Poisson distribution
|
@@ -211,19 +217,21 @@ def compute_poisson_test(
|
|
211
217
|
|
212
218
|
def compute_zscore_test(
|
213
219
|
neighborhoods: csr_matrix,
|
214
|
-
|
220
|
+
annotation: csr_matrix,
|
215
221
|
null_distribution: str = "network",
|
216
222
|
) -> Dict[str, Any]:
|
217
|
-
"""
|
218
|
-
Compute z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
|
223
|
+
"""Compute z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
|
219
224
|
|
220
225
|
Args:
|
221
226
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
222
|
-
|
223
|
-
null_distribution (str, optional): Type of null distribution ('network' or '
|
227
|
+
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
228
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
224
229
|
|
225
230
|
Returns:
|
226
231
|
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
232
|
+
|
233
|
+
Raises:
|
234
|
+
ValueError: If an invalid null_distribution value is provided.
|
227
235
|
"""
|
228
236
|
# Total number of nodes in the network
|
229
237
|
total_node_count = neighborhoods.shape[1]
|
@@ -232,19 +240,19 @@ def compute_zscore_test(
|
|
232
240
|
if null_distribution == "network":
|
233
241
|
background_population = total_node_count
|
234
242
|
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
|
235
|
-
annotation_sums =
|
236
|
-
elif null_distribution == "
|
237
|
-
annotated_nodes =
|
243
|
+
annotation_sums = annotation.sum(axis=0).A.flatten() # Dense row sums
|
244
|
+
elif null_distribution == "annotation":
|
245
|
+
annotated_nodes = annotation.sum(axis=1).A.flatten() > 0 # Dense boolean mask
|
238
246
|
background_population = annotated_nodes.sum()
|
239
247
|
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
240
|
-
annotation_sums =
|
248
|
+
annotation_sums = annotation[annotated_nodes].sum(axis=0).A.flatten()
|
241
249
|
else:
|
242
250
|
raise ValueError(
|
243
|
-
"Invalid null_distribution value. Choose either 'network' or '
|
251
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotation'."
|
244
252
|
)
|
245
253
|
|
246
254
|
# Observed values
|
247
|
-
observed = (neighborhoods.T @
|
255
|
+
observed = (neighborhoods.T @ annotation).toarray() # Convert sparse result to dense
|
248
256
|
# Expected values under the null
|
249
257
|
neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
|
250
258
|
annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
|
risk/network/__init__.py
CHANGED
risk/network/graph/__init__.py
CHANGED
risk/network/graph/api.py
CHANGED
@@ -9,15 +9,15 @@ from typing import Any, Dict, Union
|
|
9
9
|
import networkx as nx
|
10
10
|
import pandas as pd
|
11
11
|
|
12
|
-
from risk.
|
13
|
-
from risk.log import
|
12
|
+
from risk.annotation import define_top_annotation
|
13
|
+
from risk.log import log_header, logger, params
|
14
14
|
from risk.neighborhoods import (
|
15
15
|
define_domains,
|
16
16
|
process_neighborhoods,
|
17
17
|
trim_domains,
|
18
18
|
)
|
19
19
|
from risk.network.graph.graph import Graph
|
20
|
-
from risk.stats import calculate_significance_matrices
|
20
|
+
from risk.network.graph.stats import calculate_significance_matrices
|
21
21
|
|
22
22
|
|
23
23
|
class GraphAPI:
|
@@ -26,13 +26,13 @@ class GraphAPI:
|
|
26
26
|
The GraphAPI class provides methods to load and process network graphs, annotations, and neighborhoods.
|
27
27
|
"""
|
28
28
|
|
29
|
-
def __init__() -> None:
|
29
|
+
def __init__(self) -> None:
|
30
30
|
pass
|
31
31
|
|
32
32
|
def load_graph(
|
33
33
|
self,
|
34
34
|
network: nx.Graph,
|
35
|
-
|
35
|
+
annotation: Dict[str, Any],
|
36
36
|
neighborhoods: Dict[str, Any],
|
37
37
|
tail: str = "right",
|
38
38
|
pval_cutoff: float = 0.01,
|
@@ -50,7 +50,7 @@ class GraphAPI:
|
|
50
50
|
|
51
51
|
Args:
|
52
52
|
network (nx.Graph): The network graph.
|
53
|
-
|
53
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
54
54
|
neighborhoods (Dict[str, Any]): Neighborhood significance data.
|
55
55
|
tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
|
56
56
|
pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
|
@@ -115,9 +115,9 @@ class GraphAPI:
|
|
115
115
|
logger.debug(f"Min cluster size: {min_cluster_size}")
|
116
116
|
logger.debug(f"Max cluster size: {max_cluster_size}")
|
117
117
|
# Define top annotations based on processed neighborhoods
|
118
|
-
|
118
|
+
top_annotation = self._define_top_annotation(
|
119
119
|
network=network,
|
120
|
-
|
120
|
+
annotation=annotation,
|
121
121
|
neighborhoods=processed_neighborhoods,
|
122
122
|
min_cluster_size=min_cluster_size,
|
123
123
|
max_cluster_size=max_cluster_size,
|
@@ -130,7 +130,7 @@ class GraphAPI:
|
|
130
130
|
]
|
131
131
|
# Define domains in the network using the specified clustering settings
|
132
132
|
domains = define_domains(
|
133
|
-
|
133
|
+
top_annotation=top_annotation,
|
134
134
|
significant_neighborhoods_significance=significant_neighborhoods_significance,
|
135
135
|
linkage_criterion=linkage_criterion,
|
136
136
|
linkage_method=linkage_method,
|
@@ -140,20 +140,20 @@ class GraphAPI:
|
|
140
140
|
# Trim domains and top annotations based on cluster size constraints
|
141
141
|
domains, trimmed_domains = trim_domains(
|
142
142
|
domains=domains,
|
143
|
-
|
143
|
+
top_annotation=top_annotation,
|
144
144
|
min_cluster_size=min_cluster_size,
|
145
145
|
max_cluster_size=max_cluster_size,
|
146
146
|
)
|
147
147
|
|
148
148
|
# Prepare node mapping and significance sums for the final Graph object
|
149
|
-
ordered_nodes =
|
149
|
+
ordered_nodes = annotation["ordered_nodes"]
|
150
150
|
node_label_to_id = dict(zip(ordered_nodes, range(len(ordered_nodes))))
|
151
151
|
node_significance_sums = processed_neighborhoods["node_significance_sums"]
|
152
152
|
|
153
153
|
# Return the fully initialized Graph object
|
154
154
|
return Graph(
|
155
155
|
network=network,
|
156
|
-
|
156
|
+
annotation=annotation,
|
157
157
|
neighborhoods=neighborhoods,
|
158
158
|
domains=domains,
|
159
159
|
trimmed_domains=trimmed_domains,
|
@@ -161,10 +161,10 @@ class GraphAPI:
|
|
161
161
|
node_significance_sums=node_significance_sums,
|
162
162
|
)
|
163
163
|
|
164
|
-
def
|
164
|
+
def _define_top_annotation(
|
165
165
|
self,
|
166
166
|
network: nx.Graph,
|
167
|
-
|
167
|
+
annotation: Dict[str, Any],
|
168
168
|
neighborhoods: Dict[str, Any],
|
169
169
|
min_cluster_size: int = 5,
|
170
170
|
max_cluster_size: int = 1000,
|
@@ -173,7 +173,7 @@ class GraphAPI:
|
|
173
173
|
|
174
174
|
Args:
|
175
175
|
network (nx.Graph): The network graph.
|
176
|
-
|
176
|
+
annotation (Dict[str, Any]): Annotation data for the network.
|
177
177
|
neighborhoods (Dict[str, Any]): Neighborhood significance data.
|
178
178
|
min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
|
179
179
|
max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
|
@@ -181,17 +181,17 @@ class GraphAPI:
|
|
181
181
|
Returns:
|
182
182
|
Dict[str, Any]: Top annotations identified within the network.
|
183
183
|
"""
|
184
|
-
# Extract necessary data from
|
185
|
-
|
184
|
+
# Extract necessary data from annotation and neighborhoods
|
185
|
+
ordered_annotation = annotation["ordered_annotation"]
|
186
186
|
neighborhood_significance_sums = neighborhoods["neighborhood_significance_counts"]
|
187
187
|
significant_significance_matrix = neighborhoods["significant_significance_matrix"]
|
188
188
|
significant_binary_significance_matrix = neighborhoods[
|
189
189
|
"significant_binary_significance_matrix"
|
190
190
|
]
|
191
191
|
# Call external function to define top annotations
|
192
|
-
return
|
192
|
+
return define_top_annotation(
|
193
193
|
network=network,
|
194
|
-
ordered_annotation_labels=
|
194
|
+
ordered_annotation_labels=ordered_annotation,
|
195
195
|
neighborhood_significance_sums=neighborhood_significance_sums,
|
196
196
|
significant_significance_matrix=significant_significance_matrix,
|
197
197
|
significant_binary_significance_matrix=significant_binary_significance_matrix,
|