risk-network 0.0.6b9__py3-none-any.whl → 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/annotations.py +61 -42
- risk/annotations/io.py +14 -14
- risk/log/__init__.py +1 -1
- risk/log/config.py +139 -0
- risk/log/params.py +6 -4
- risk/neighborhoods/community.py +25 -36
- risk/neighborhoods/domains.py +29 -27
- risk/neighborhoods/neighborhoods.py +171 -72
- risk/network/graph.py +92 -41
- risk/network/io.py +22 -26
- risk/network/plot.py +132 -19
- risk/risk.py +84 -81
- risk/stats/__init__.py +2 -2
- risk/stats/hypergeom.py +30 -107
- risk/stats/permutation/permutation.py +23 -17
- risk/stats/permutation/test_functions.py +2 -2
- risk/stats/poisson.py +44 -0
- {risk_network-0.0.6b9.dist-info → risk_network-0.0.7.dist-info}/METADATA +1 -1
- risk_network-0.0.7.dist-info/RECORD +30 -0
- {risk_network-0.0.6b9.dist-info → risk_network-0.0.7.dist-info}/WHEEL +1 -1
- risk/log/console.py +0 -16
- risk/stats/fisher_exact.py +0 -132
- risk_network-0.0.6b9.dist-info/RECORD +0 -30
- {risk_network-0.0.6b9.dist-info → risk_network-0.0.7.dist-info}/LICENSE +0 -0
- {risk_network-0.0.6b9.dist-info → risk_network-0.0.7.dist-info}/top_level.txt +0 -0
risk/risk.py
CHANGED
@@ -10,7 +10,7 @@ import numpy as np
|
|
10
10
|
import pandas as pd
|
11
11
|
|
12
12
|
from risk.annotations import AnnotationsIO, define_top_annotations
|
13
|
-
from risk.log import params,
|
13
|
+
from risk.log import params, logger, log_header, set_global_verbosity
|
14
14
|
from risk.neighborhoods import (
|
15
15
|
define_domains,
|
16
16
|
get_network_neighborhoods,
|
@@ -20,9 +20,9 @@ from risk.neighborhoods import (
|
|
20
20
|
from risk.network import NetworkIO, NetworkGraph, NetworkPlotter
|
21
21
|
from risk.stats import (
|
22
22
|
calculate_significance_matrices,
|
23
|
-
compute_fisher_exact_test,
|
24
23
|
compute_hypergeom_test,
|
25
24
|
compute_permutation_test,
|
25
|
+
compute_poisson_test,
|
26
26
|
)
|
27
27
|
|
28
28
|
|
@@ -33,60 +33,60 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
33
33
|
and performing network-based statistical analysis, such as neighborhood significance testing.
|
34
34
|
"""
|
35
35
|
|
36
|
-
def __init__(self,
|
37
|
-
"""Initialize the RISK class with configuration settings.
|
36
|
+
def __init__(self, verbose: bool = True):
|
37
|
+
"""Initialize the RISK class with configuration settings.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
verbose (bool): If False, suppresses all log messages to the console. Defaults to True.
|
41
|
+
"""
|
42
|
+
# Set global verbosity for logging
|
43
|
+
set_global_verbosity(verbose)
|
38
44
|
# Initialize and log network parameters
|
39
45
|
params.initialize()
|
40
|
-
|
41
|
-
super().__init__(*args, **kwargs)
|
46
|
+
super().__init__()
|
42
47
|
|
43
48
|
@property
|
44
|
-
def params(self):
|
45
|
-
"""Access the logged parameters.
|
49
|
+
def params(self) -> params:
|
50
|
+
"""Access the logged network parameters.
|
51
|
+
|
52
|
+
Returns:
|
53
|
+
Params: An instance of the Params class with logged parameters and methods to access or update them.
|
54
|
+
"""
|
46
55
|
return params
|
47
56
|
|
48
|
-
def
|
57
|
+
def load_neighborhoods_by_hypergeom(
|
49
58
|
self,
|
50
59
|
network: nx.Graph,
|
51
60
|
annotations: Dict[str, Any],
|
52
|
-
distance_metric: str = "
|
61
|
+
distance_metric: str = "louvain",
|
53
62
|
louvain_resolution: float = 0.1,
|
54
63
|
edge_length_threshold: float = 0.5,
|
55
|
-
score_metric: str = "sum",
|
56
64
|
null_distribution: str = "network",
|
57
|
-
num_permutations: int = 1000,
|
58
65
|
random_seed: int = 888,
|
59
|
-
max_workers: int = 1,
|
60
66
|
) -> Dict[str, Any]:
|
61
|
-
"""Load significant neighborhoods for the network using the
|
67
|
+
"""Load significant neighborhoods for the network using the hypergeometric test.
|
62
68
|
|
63
69
|
Args:
|
64
70
|
network (nx.Graph): The network graph.
|
65
|
-
annotations (
|
66
|
-
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "
|
71
|
+
annotations (dict): The annotations associated with the network.
|
72
|
+
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
|
67
73
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
68
74
|
edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
|
69
|
-
|
70
|
-
null_distribution (str, optional): Distribution used for permutation tests. Defaults to "network".
|
71
|
-
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
75
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
72
76
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
73
|
-
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
74
77
|
|
75
78
|
Returns:
|
76
79
|
dict: Computed significance of neighborhoods.
|
77
80
|
"""
|
78
|
-
|
81
|
+
log_header("Running hypergeometric test")
|
79
82
|
# Log neighborhood analysis parameters
|
80
83
|
params.log_neighborhoods(
|
81
84
|
distance_metric=distance_metric,
|
82
85
|
louvain_resolution=louvain_resolution,
|
83
86
|
edge_length_threshold=edge_length_threshold,
|
84
|
-
statistical_test_function="
|
85
|
-
score_metric=score_metric,
|
87
|
+
statistical_test_function="hypergeom",
|
86
88
|
null_distribution=null_distribution,
|
87
|
-
num_permutations=num_permutations,
|
88
89
|
random_seed=random_seed,
|
89
|
-
max_workers=max_workers,
|
90
90
|
)
|
91
91
|
|
92
92
|
# Load neighborhoods based on the network and distance metric
|
@@ -97,59 +97,49 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
97
97
|
edge_length_threshold=edge_length_threshold,
|
98
98
|
random_seed=random_seed,
|
99
99
|
)
|
100
|
-
|
101
|
-
|
102
|
-
print(f"Neighborhood scoring metric: '{score_metric}'")
|
103
|
-
print(f"Null distribution: '{null_distribution}'")
|
104
|
-
print(f"Number of permutations: {num_permutations}")
|
105
|
-
print(f"Maximum workers: {max_workers}")
|
106
|
-
# Run permutation test to compute neighborhood significance
|
107
|
-
neighborhood_significance = compute_permutation_test(
|
100
|
+
# Run hypergeometric test to compute neighborhood significance
|
101
|
+
neighborhood_significance = compute_hypergeom_test(
|
108
102
|
neighborhoods=neighborhoods,
|
109
103
|
annotations=annotations["matrix"],
|
110
|
-
score_metric=score_metric,
|
111
104
|
null_distribution=null_distribution,
|
112
|
-
num_permutations=num_permutations,
|
113
|
-
random_seed=random_seed,
|
114
|
-
max_workers=max_workers,
|
115
105
|
)
|
116
106
|
|
117
107
|
# Return the computed neighborhood significance
|
118
108
|
return neighborhood_significance
|
119
109
|
|
120
|
-
def
|
110
|
+
def load_neighborhoods_by_poisson(
|
121
111
|
self,
|
122
112
|
network: nx.Graph,
|
123
113
|
annotations: Dict[str, Any],
|
124
|
-
distance_metric: str = "
|
114
|
+
distance_metric: str = "louvain",
|
125
115
|
louvain_resolution: float = 0.1,
|
126
116
|
edge_length_threshold: float = 0.5,
|
117
|
+
null_distribution: str = "network",
|
127
118
|
random_seed: int = 888,
|
128
|
-
max_workers: int = 1,
|
129
119
|
) -> Dict[str, Any]:
|
130
|
-
"""Load significant neighborhoods for the network using the
|
120
|
+
"""Load significant neighborhoods for the network using the Poisson test.
|
131
121
|
|
132
122
|
Args:
|
133
123
|
network (nx.Graph): The network graph.
|
134
|
-
annotations (
|
135
|
-
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "
|
124
|
+
annotations (dict): The annotations associated with the network.
|
125
|
+
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
|
136
126
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
137
127
|
edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
|
128
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
138
129
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
139
|
-
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
140
130
|
|
141
131
|
Returns:
|
142
132
|
dict: Computed significance of neighborhoods.
|
143
133
|
"""
|
144
|
-
|
134
|
+
log_header("Running Poisson test")
|
145
135
|
# Log neighborhood analysis parameters
|
146
136
|
params.log_neighborhoods(
|
147
137
|
distance_metric=distance_metric,
|
148
138
|
louvain_resolution=louvain_resolution,
|
149
139
|
edge_length_threshold=edge_length_threshold,
|
150
|
-
statistical_test_function="
|
140
|
+
statistical_test_function="poisson",
|
141
|
+
null_distribution=null_distribution,
|
151
142
|
random_seed=random_seed,
|
152
|
-
max_workers=max_workers,
|
153
143
|
)
|
154
144
|
|
155
145
|
# Load neighborhoods based on the network and distance metric
|
@@ -160,50 +150,56 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
160
150
|
edge_length_threshold=edge_length_threshold,
|
161
151
|
random_seed=random_seed,
|
162
152
|
)
|
163
|
-
|
164
|
-
|
165
|
-
print(f"Maximum workers: {max_workers}")
|
166
|
-
# Run Fisher's exact test to compute neighborhood significance
|
167
|
-
neighborhood_significance = compute_fisher_exact_test(
|
153
|
+
# Run Poisson test to compute neighborhood significance
|
154
|
+
neighborhood_significance = compute_poisson_test(
|
168
155
|
neighborhoods=neighborhoods,
|
169
156
|
annotations=annotations["matrix"],
|
170
|
-
|
157
|
+
null_distribution=null_distribution,
|
171
158
|
)
|
172
159
|
|
173
160
|
# Return the computed neighborhood significance
|
174
161
|
return neighborhood_significance
|
175
162
|
|
176
|
-
def
|
163
|
+
def load_neighborhoods_by_permutation(
|
177
164
|
self,
|
178
165
|
network: nx.Graph,
|
179
166
|
annotations: Dict[str, Any],
|
180
|
-
distance_metric: str = "
|
167
|
+
distance_metric: str = "louvain",
|
181
168
|
louvain_resolution: float = 0.1,
|
182
169
|
edge_length_threshold: float = 0.5,
|
170
|
+
score_metric: str = "sum",
|
171
|
+
null_distribution: str = "network",
|
172
|
+
num_permutations: int = 1000,
|
183
173
|
random_seed: int = 888,
|
184
174
|
max_workers: int = 1,
|
185
175
|
) -> Dict[str, Any]:
|
186
|
-
"""Load significant neighborhoods for the network using the
|
176
|
+
"""Load significant neighborhoods for the network using the permutation test.
|
187
177
|
|
188
178
|
Args:
|
189
179
|
network (nx.Graph): The network graph.
|
190
|
-
annotations (
|
191
|
-
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "
|
180
|
+
annotations (dict): The annotations associated with the network.
|
181
|
+
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
|
192
182
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
193
183
|
edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
|
184
|
+
score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
|
185
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
186
|
+
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
194
187
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
195
188
|
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
196
189
|
|
197
190
|
Returns:
|
198
191
|
dict: Computed significance of neighborhoods.
|
199
192
|
"""
|
200
|
-
|
193
|
+
log_header("Running permutation test")
|
201
194
|
# Log neighborhood analysis parameters
|
202
195
|
params.log_neighborhoods(
|
203
196
|
distance_metric=distance_metric,
|
204
197
|
louvain_resolution=louvain_resolution,
|
205
198
|
edge_length_threshold=edge_length_threshold,
|
206
|
-
statistical_test_function="
|
199
|
+
statistical_test_function="permutation",
|
200
|
+
score_metric=score_metric,
|
201
|
+
null_distribution=null_distribution,
|
202
|
+
num_permutations=num_permutations,
|
207
203
|
random_seed=random_seed,
|
208
204
|
max_workers=max_workers,
|
209
205
|
)
|
@@ -217,12 +213,19 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
217
213
|
random_seed=random_seed,
|
218
214
|
)
|
219
215
|
|
220
|
-
# Log and display
|
221
|
-
|
222
|
-
|
223
|
-
|
216
|
+
# Log and display permutation test settings
|
217
|
+
logger.debug(f"Neighborhood scoring metric: '{score_metric}'")
|
218
|
+
logger.debug(f"Null distribution: '{null_distribution}'")
|
219
|
+
logger.debug(f"Number of permutations: {num_permutations}")
|
220
|
+
logger.debug(f"Maximum workers: {max_workers}")
|
221
|
+
# Run permutation test to compute neighborhood significance
|
222
|
+
neighborhood_significance = compute_permutation_test(
|
224
223
|
neighborhoods=neighborhoods,
|
225
224
|
annotations=annotations["matrix"],
|
225
|
+
score_metric=score_metric,
|
226
|
+
null_distribution=null_distribution,
|
227
|
+
num_permutations=num_permutations,
|
228
|
+
random_seed=random_seed,
|
226
229
|
max_workers=max_workers,
|
227
230
|
)
|
228
231
|
|
@@ -237,7 +240,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
237
240
|
tail: str = "right", # OPTIONS: "right" (enrichment), "left" (depletion), "both"
|
238
241
|
pval_cutoff: float = 0.01, # OPTIONS: Any value between 0 to 1
|
239
242
|
fdr_cutoff: float = 0.9999, # OPTIONS: Any value between 0 to 1
|
240
|
-
impute_depth: int =
|
243
|
+
impute_depth: int = 0,
|
241
244
|
prune_threshold: float = 0.0,
|
242
245
|
linkage_criterion: str = "distance",
|
243
246
|
linkage_method: str = "average",
|
@@ -254,7 +257,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
254
257
|
tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
|
255
258
|
pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
|
256
259
|
fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
|
257
|
-
impute_depth (int, optional): Depth for imputing neighbors. Defaults to
|
260
|
+
impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
|
258
261
|
prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
|
259
262
|
linkage_criterion (str, optional): Clustering criterion for defining domains. Defaults to "distance".
|
260
263
|
linkage_method (str, optional): Clustering method to use. Defaults to "average".
|
@@ -266,7 +269,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
266
269
|
NetworkGraph: A fully initialized and processed NetworkGraph object.
|
267
270
|
"""
|
268
271
|
# Log the parameters and display headers
|
269
|
-
|
272
|
+
log_header("Finding significant neighborhoods")
|
270
273
|
params.log_graph(
|
271
274
|
tail=tail,
|
272
275
|
pval_cutoff=pval_cutoff,
|
@@ -280,9 +283,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
280
283
|
max_cluster_size=max_cluster_size,
|
281
284
|
)
|
282
285
|
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
+
logger.debug(f"p-value cutoff: {pval_cutoff}")
|
287
|
+
logger.debug(f"FDR BH cutoff: {fdr_cutoff}")
|
288
|
+
logger.debug(
|
286
289
|
f"Significance tail: '{tail}' ({'enrichment' if tail == 'right' else 'depletion' if tail == 'left' else 'both'})"
|
287
290
|
)
|
288
291
|
# Calculate significant neighborhoods based on the provided parameters
|
@@ -294,7 +297,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
294
297
|
fdr_cutoff=fdr_cutoff,
|
295
298
|
)
|
296
299
|
|
297
|
-
|
300
|
+
log_header("Processing neighborhoods")
|
298
301
|
# Process neighborhoods by imputing and pruning based on the given settings
|
299
302
|
processed_neighborhoods = process_neighborhoods(
|
300
303
|
network=network,
|
@@ -303,9 +306,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
303
306
|
prune_threshold=prune_threshold,
|
304
307
|
)
|
305
308
|
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
+
log_header("Finding top annotations")
|
310
|
+
logger.debug(f"Min cluster size: {min_cluster_size}")
|
311
|
+
logger.debug(f"Max cluster size: {max_cluster_size}")
|
309
312
|
# Define top annotations based on processed neighborhoods
|
310
313
|
top_annotations = self._define_top_annotations(
|
311
314
|
network=network,
|
@@ -315,7 +318,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
315
318
|
max_cluster_size=max_cluster_size,
|
316
319
|
)
|
317
320
|
|
318
|
-
|
321
|
+
log_header("Optimizing distance threshold for domains")
|
319
322
|
# Define domains in the network using the specified clustering settings
|
320
323
|
domains = self._define_domains(
|
321
324
|
neighborhoods=processed_neighborhoods,
|
@@ -363,7 +366,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
363
366
|
Returns:
|
364
367
|
NetworkPlotter: A NetworkPlotter object configured with the given parameters.
|
365
368
|
"""
|
366
|
-
|
369
|
+
log_header("Loading plotter")
|
367
370
|
# Log the plotter settings
|
368
371
|
params.log_plotter(
|
369
372
|
figsize=figsize,
|
@@ -380,7 +383,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
380
383
|
def _load_neighborhoods(
|
381
384
|
self,
|
382
385
|
network: nx.Graph,
|
383
|
-
distance_metric: str = "
|
386
|
+
distance_metric: str = "louvain",
|
384
387
|
louvain_resolution: float = 0.1,
|
385
388
|
edge_length_threshold: float = 0.5,
|
386
389
|
random_seed: int = 888,
|
@@ -390,7 +393,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
390
393
|
Args:
|
391
394
|
network (nx.Graph): The network graph.
|
392
395
|
annotations (pd.DataFrame): The matrix of annotations associated with the network.
|
393
|
-
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "
|
396
|
+
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
|
394
397
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
395
398
|
edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
|
396
399
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
@@ -404,9 +407,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
404
407
|
else:
|
405
408
|
for_print_distance_metric = distance_metric
|
406
409
|
# Log and display neighborhood settings
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
+
logger.debug(f"Distance metric: '{for_print_distance_metric}'")
|
411
|
+
logger.debug(f"Edge length threshold: {edge_length_threshold}")
|
412
|
+
logger.debug(f"Random seed: {random_seed}")
|
410
413
|
|
411
414
|
# Compute neighborhoods based on the network and distance metric
|
412
415
|
neighborhoods = get_network_neighborhoods(
|
risk/stats/__init__.py
CHANGED
@@ -3,7 +3,7 @@ risk/stats
|
|
3
3
|
~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from .stats import calculate_significance_matrices
|
7
|
-
from .fisher_exact import compute_fisher_exact_test
|
8
6
|
from .hypergeom import compute_hypergeom_test
|
9
7
|
from .permutation import compute_permutation_test
|
8
|
+
from .poisson import compute_poisson_test
|
9
|
+
from .stats import calculate_significance_matrices
|
risk/stats/hypergeom.py
CHANGED
@@ -3,8 +3,6 @@ risk/stats/hypergeom
|
|
3
3
|
~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from multiprocessing import get_context, Manager
|
7
|
-
from tqdm import tqdm
|
8
6
|
from typing import Any, Dict
|
9
7
|
|
10
8
|
import numpy as np
|
@@ -12,120 +10,45 @@ from scipy.stats import hypergeom
|
|
12
10
|
|
13
11
|
|
14
12
|
def compute_hypergeom_test(
|
15
|
-
neighborhoods: np.ndarray,
|
16
|
-
annotations: np.ndarray,
|
17
|
-
max_workers: int = 4,
|
13
|
+
neighborhoods: np.ndarray, annotations: np.ndarray, null_distribution: str = "network"
|
18
14
|
) -> Dict[str, Any]:
|
19
|
-
"""Compute hypergeometric test for enrichment and depletion in neighborhoods.
|
15
|
+
"""Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
|
20
16
|
|
21
17
|
Args:
|
22
18
|
neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
|
23
19
|
annotations (np.ndarray): Binary matrix representing annotations.
|
24
|
-
|
20
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
25
21
|
|
26
22
|
Returns:
|
27
23
|
dict: Dictionary containing depletion and enrichment p-values.
|
28
24
|
"""
|
29
|
-
#
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
start_idx = end_idx
|
54
|
-
|
55
|
-
# Start the hypergeometric test process in parallel
|
56
|
-
results = pool.starmap_async(_hypergeom_process_subset, params_list, chunksize=1)
|
57
|
-
|
58
|
-
# Update progress bar based on progress_counter
|
59
|
-
while not results.ready():
|
60
|
-
progress.update(progress_counter.value - progress.n)
|
61
|
-
results.wait(0.05) # Wait for 50ms
|
62
|
-
# Ensure progress bar reaches 100%
|
63
|
-
progress.update(total_tasks - progress.n)
|
64
|
-
|
65
|
-
# Accumulate results from each worker
|
66
|
-
depletion_pvals, enrichment_pvals = [], []
|
67
|
-
for dp, ep in results.get():
|
68
|
-
depletion_pvals.extend(dp)
|
69
|
-
enrichment_pvals.extend(ep)
|
70
|
-
|
71
|
-
# Reshape the results back into arrays with the appropriate dimensions
|
72
|
-
depletion_pvals = np.array(depletion_pvals).reshape(
|
73
|
-
neighborhoods.shape[1], annotations.shape[1]
|
25
|
+
# Get the total number of nodes in the network
|
26
|
+
total_node_count = neighborhoods.shape[0]
|
27
|
+
|
28
|
+
if null_distribution == "network":
|
29
|
+
# Case 1: Use all nodes as the background
|
30
|
+
background_population = total_node_count
|
31
|
+
neighborhood_sums = np.sum(neighborhoods, axis=0, keepdims=True).T
|
32
|
+
annotation_sums = np.sum(annotations, axis=0, keepdims=True)
|
33
|
+
elif null_distribution == "annotations":
|
34
|
+
# Case 2: Only consider nodes with at least one annotation
|
35
|
+
annotated_nodes = np.sum(annotations, axis=1) > 0
|
36
|
+
background_population = np.sum(annotated_nodes)
|
37
|
+
neighborhood_sums = np.sum(neighborhoods[annotated_nodes], axis=0, keepdims=True).T
|
38
|
+
annotation_sums = np.sum(annotations[annotated_nodes], axis=0, keepdims=True)
|
39
|
+
else:
|
40
|
+
raise ValueError(
|
41
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
42
|
+
)
|
43
|
+
|
44
|
+
# Matrix multiplication for annotated nodes in each neighborhood
|
45
|
+
annotated_in_neighborhood = neighborhoods.T @ annotations
|
46
|
+
# Calculate depletion and enrichment p-values using the hypergeometric distribution
|
47
|
+
depletion_pvals = hypergeom.cdf(
|
48
|
+
annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
|
74
49
|
)
|
75
|
-
enrichment_pvals =
|
76
|
-
|
50
|
+
enrichment_pvals = hypergeom.sf(
|
51
|
+
annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
|
77
52
|
)
|
78
53
|
|
79
|
-
return {
|
80
|
-
"depletion_pvals": depletion_pvals,
|
81
|
-
"enrichment_pvals": enrichment_pvals,
|
82
|
-
}
|
83
|
-
|
84
|
-
|
85
|
-
def _hypergeom_process_subset(
|
86
|
-
neighborhoods: np.ndarray,
|
87
|
-
annotations: np.ndarray,
|
88
|
-
start_idx: int,
|
89
|
-
end_idx: int,
|
90
|
-
progress_counter,
|
91
|
-
) -> tuple:
|
92
|
-
"""Process a subset of neighborhoods using the hypergeometric test.
|
93
|
-
|
94
|
-
Args:
|
95
|
-
neighborhoods (np.ndarray): The full neighborhood matrix.
|
96
|
-
annotations (np.ndarray): The annotation matrix.
|
97
|
-
start_idx (int): Starting index of the neighborhood-annotation pairs to process.
|
98
|
-
end_idx (int): Ending index of the neighborhood-annotation pairs to process.
|
99
|
-
progress_counter: Shared counter for tracking progress.
|
100
|
-
|
101
|
-
Returns:
|
102
|
-
tuple: Local p-values for depletion and enrichment.
|
103
|
-
"""
|
104
|
-
# Initialize lists to store p-values for depletion and enrichment
|
105
|
-
depletion_pvals = []
|
106
|
-
enrichment_pvals = []
|
107
|
-
# Process the subset of tasks assigned to this worker
|
108
|
-
for idx in range(start_idx, end_idx):
|
109
|
-
i = idx // annotations.shape[1] # Neighborhood index
|
110
|
-
j = idx % annotations.shape[1] # Annotation index
|
111
|
-
|
112
|
-
neighborhood = neighborhoods[:, i]
|
113
|
-
annotation = annotations[:, j]
|
114
|
-
|
115
|
-
# Calculate the required values for the hypergeometric test
|
116
|
-
M = annotations.shape[0] # Total number of items (population size)
|
117
|
-
n = np.sum(annotation) # Total number of successes in population
|
118
|
-
N = np.sum(neighborhood) # Total number of draws (sample size)
|
119
|
-
k = np.sum(neighborhood & annotation) # Number of successes in sample
|
120
|
-
|
121
|
-
# Perform hypergeometric test for depletion
|
122
|
-
p_value_depletion = hypergeom.cdf(k, M, n, N)
|
123
|
-
depletion_pvals.append(p_value_depletion)
|
124
|
-
# Perform hypergeometric test for enrichment
|
125
|
-
p_value_enrichment = hypergeom.sf(k - 1, M, n, N)
|
126
|
-
enrichment_pvals.append(p_value_enrichment)
|
127
|
-
|
128
|
-
# Update the shared progress counter
|
129
|
-
progress_counter.value += 1
|
130
|
-
|
131
|
-
return depletion_pvals, enrichment_pvals
|
54
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|