risk-network 0.0.7b11__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/__init__.py +1 -1
- risk/annotations/annotations.py +86 -54
- risk/annotations/io.py +14 -14
- risk/log/__init__.py +1 -1
- risk/log/console.py +139 -0
- risk/log/params.py +6 -6
- risk/neighborhoods/community.py +68 -61
- risk/neighborhoods/domains.py +43 -20
- risk/neighborhoods/neighborhoods.py +136 -71
- risk/network/geometry.py +5 -2
- risk/network/graph.py +69 -235
- risk/network/io.py +56 -18
- risk/network/plot/__init__.py +6 -0
- risk/network/plot/canvas.py +290 -0
- risk/network/plot/contour.py +327 -0
- risk/network/plot/labels.py +929 -0
- risk/network/plot/network.py +288 -0
- risk/network/plot/plotter.py +137 -0
- risk/network/plot/utils/color.py +424 -0
- risk/network/plot/utils/layout.py +91 -0
- risk/risk.py +84 -58
- risk/stats/hypergeom.py +1 -1
- risk/stats/permutation/permutation.py +21 -8
- risk/stats/poisson.py +2 -2
- risk/stats/stats.py +12 -10
- {risk_network-0.0.7b11.dist-info → risk_network-0.0.8.dist-info}/METADATA +84 -21
- risk_network-0.0.8.dist-info/RECORD +37 -0
- {risk_network-0.0.7b11.dist-info → risk_network-0.0.8.dist-info}/WHEEL +1 -1
- risk/log/config.py +0 -48
- risk/network/plot.py +0 -1343
- risk_network-0.0.7b11.dist-info/RECORD +0 -30
- {risk_network-0.0.7b11.dist-info → risk_network-0.0.8.dist-info}/LICENSE +0 -0
- {risk_network-0.0.7b11.dist-info → risk_network-0.0.8.dist-info}/top_level.txt +0 -0
risk/risk.py
CHANGED
@@ -3,7 +3,8 @@ risk/risk
|
|
3
3
|
~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
|
6
|
+
import copy
|
7
|
+
from typing import Any, Dict, List, Tuple, Union
|
7
8
|
|
8
9
|
import networkx as nx
|
9
10
|
import numpy as np
|
@@ -33,24 +34,17 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
33
34
|
and performing network-based statistical analysis, such as neighborhood significance testing.
|
34
35
|
"""
|
35
36
|
|
36
|
-
def __init__(self,
|
37
|
+
def __init__(self, verbose: bool = True):
|
37
38
|
"""Initialize the RISK class with configuration settings.
|
38
39
|
|
39
40
|
Args:
|
40
41
|
verbose (bool): If False, suppresses all log messages to the console. Defaults to True.
|
41
|
-
*args: Variable length argument list.
|
42
|
-
**kwargs: Arbitrary keyword arguments.
|
43
|
-
|
44
|
-
Note:
|
45
|
-
- All *args and **kwargs are passed to NetworkIO's __init__ method.
|
46
|
-
- AnnotationsIO does not take any arguments and is initialized without them.
|
47
42
|
"""
|
48
43
|
# Set global verbosity for logging
|
49
44
|
set_global_verbosity(verbose)
|
50
45
|
# Initialize and log network parameters
|
51
46
|
params.initialize()
|
52
|
-
|
53
|
-
super().__init__(*args, **kwargs)
|
47
|
+
super().__init__()
|
54
48
|
|
55
49
|
@property
|
56
50
|
def params(self) -> params:
|
@@ -65,9 +59,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
65
59
|
self,
|
66
60
|
network: nx.Graph,
|
67
61
|
annotations: Dict[str, Any],
|
68
|
-
distance_metric: str = "louvain",
|
62
|
+
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
69
63
|
louvain_resolution: float = 0.1,
|
70
|
-
edge_length_threshold: float = 0.5,
|
64
|
+
edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
|
71
65
|
null_distribution: str = "network",
|
72
66
|
random_seed: int = 888,
|
73
67
|
) -> Dict[str, Any]:
|
@@ -75,15 +69,19 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
75
69
|
|
76
70
|
Args:
|
77
71
|
network (nx.Graph): The network graph.
|
78
|
-
annotations (
|
79
|
-
distance_metric (str, optional):
|
72
|
+
annotations (Dict[str, Any]): The annotations associated with the network.
|
73
|
+
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
74
|
+
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
|
75
|
+
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
80
76
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
81
|
-
edge_length_threshold (float, optional): Edge length threshold for
|
77
|
+
edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
|
78
|
+
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
79
|
+
Defaults to 0.5.
|
82
80
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
83
81
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
84
82
|
|
85
83
|
Returns:
|
86
|
-
|
84
|
+
Dict[str, Any]: Computed significance of neighborhoods.
|
87
85
|
"""
|
88
86
|
log_header("Running hypergeometric test")
|
89
87
|
# Log neighborhood analysis parameters
|
@@ -96,6 +94,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
96
94
|
random_seed=random_seed,
|
97
95
|
)
|
98
96
|
|
97
|
+
# Make a copy of the network to avoid modifying the original
|
98
|
+
network = copy.deepcopy(network)
|
99
|
+
|
99
100
|
# Load neighborhoods based on the network and distance metric
|
100
101
|
neighborhoods = self._load_neighborhoods(
|
101
102
|
network,
|
@@ -118,9 +119,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
118
119
|
self,
|
119
120
|
network: nx.Graph,
|
120
121
|
annotations: Dict[str, Any],
|
121
|
-
distance_metric: str = "louvain",
|
122
|
+
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
122
123
|
louvain_resolution: float = 0.1,
|
123
|
-
edge_length_threshold: float = 0.5,
|
124
|
+
edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
|
124
125
|
null_distribution: str = "network",
|
125
126
|
random_seed: int = 888,
|
126
127
|
) -> Dict[str, Any]:
|
@@ -128,15 +129,19 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
128
129
|
|
129
130
|
Args:
|
130
131
|
network (nx.Graph): The network graph.
|
131
|
-
annotations (
|
132
|
-
distance_metric (str, optional):
|
132
|
+
annotations (Dict[str, Any]): The annotations associated with the network.
|
133
|
+
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
134
|
+
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
|
135
|
+
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
133
136
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
134
|
-
edge_length_threshold (float, optional): Edge length threshold for
|
137
|
+
edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
|
138
|
+
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
139
|
+
Defaults to 0.5.
|
135
140
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
136
141
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
137
142
|
|
138
143
|
Returns:
|
139
|
-
|
144
|
+
Dict[str, Any]: Computed significance of neighborhoods.
|
140
145
|
"""
|
141
146
|
log_header("Running Poisson test")
|
142
147
|
# Log neighborhood analysis parameters
|
@@ -149,6 +154,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
149
154
|
random_seed=random_seed,
|
150
155
|
)
|
151
156
|
|
157
|
+
# Make a copy of the network to avoid modifying the original
|
158
|
+
network = copy.deepcopy(network)
|
159
|
+
|
152
160
|
# Load neighborhoods based on the network and distance metric
|
153
161
|
neighborhoods = self._load_neighborhoods(
|
154
162
|
network,
|
@@ -171,9 +179,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
171
179
|
self,
|
172
180
|
network: nx.Graph,
|
173
181
|
annotations: Dict[str, Any],
|
174
|
-
distance_metric: str = "louvain",
|
182
|
+
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
175
183
|
louvain_resolution: float = 0.1,
|
176
|
-
edge_length_threshold: float = 0.5,
|
184
|
+
edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
|
177
185
|
score_metric: str = "sum",
|
178
186
|
null_distribution: str = "network",
|
179
187
|
num_permutations: int = 1000,
|
@@ -184,10 +192,14 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
184
192
|
|
185
193
|
Args:
|
186
194
|
network (nx.Graph): The network graph.
|
187
|
-
annotations (
|
188
|
-
distance_metric (str, optional):
|
195
|
+
annotations (Dict[str, Any]): The annotations associated with the network.
|
196
|
+
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
197
|
+
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
|
198
|
+
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
189
199
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
190
|
-
edge_length_threshold (float, optional): Edge length threshold for
|
200
|
+
edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
|
201
|
+
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
202
|
+
Defaults to 0.5.
|
191
203
|
score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
|
192
204
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
193
205
|
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
@@ -195,7 +207,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
195
207
|
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
196
208
|
|
197
209
|
Returns:
|
198
|
-
|
210
|
+
Dict[str, Any]: Computed significance of neighborhoods.
|
199
211
|
"""
|
200
212
|
log_header("Running permutation test")
|
201
213
|
# Log neighborhood analysis parameters
|
@@ -211,6 +223,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
211
223
|
max_workers=max_workers,
|
212
224
|
)
|
213
225
|
|
226
|
+
# Make a copy of the network to avoid modifying the original
|
227
|
+
network = copy.deepcopy(network)
|
228
|
+
|
214
229
|
# Load neighborhoods based on the network and distance metric
|
215
230
|
neighborhoods = self._load_neighborhoods(
|
216
231
|
network,
|
@@ -221,10 +236,10 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
221
236
|
)
|
222
237
|
|
223
238
|
# Log and display permutation test settings
|
224
|
-
logger.
|
225
|
-
logger.
|
226
|
-
logger.
|
227
|
-
logger.
|
239
|
+
logger.debug(f"Neighborhood scoring metric: '{score_metric}'")
|
240
|
+
logger.debug(f"Null distribution: '{null_distribution}'")
|
241
|
+
logger.debug(f"Number of permutations: {num_permutations}")
|
242
|
+
logger.debug(f"Maximum workers: {max_workers}")
|
228
243
|
# Run permutation test to compute neighborhood significance
|
229
244
|
neighborhood_significance = compute_permutation_test(
|
230
245
|
neighborhoods=neighborhoods,
|
@@ -260,7 +275,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
260
275
|
Args:
|
261
276
|
network (nx.Graph): The network graph.
|
262
277
|
annotations (pd.DataFrame): DataFrame containing annotation data for the network.
|
263
|
-
neighborhoods (
|
278
|
+
neighborhoods (Dict[str, Any]): Neighborhood enrichment data.
|
264
279
|
tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
|
265
280
|
pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
|
266
281
|
fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
|
@@ -290,9 +305,12 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
290
305
|
max_cluster_size=max_cluster_size,
|
291
306
|
)
|
292
307
|
|
293
|
-
|
294
|
-
|
295
|
-
|
308
|
+
# Make a copy of the network to avoid modifying the original
|
309
|
+
network = copy.deepcopy(network)
|
310
|
+
|
311
|
+
logger.debug(f"p-value cutoff: {pval_cutoff}")
|
312
|
+
logger.debug(f"FDR BH cutoff: {fdr_cutoff}")
|
313
|
+
logger.debug(
|
296
314
|
f"Significance tail: '{tail}' ({'enrichment' if tail == 'right' else 'depletion' if tail == 'left' else 'both'})"
|
297
315
|
)
|
298
316
|
# Calculate significant neighborhoods based on the provided parameters
|
@@ -314,8 +332,8 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
314
332
|
)
|
315
333
|
|
316
334
|
log_header("Finding top annotations")
|
317
|
-
logger.
|
318
|
-
logger.
|
335
|
+
logger.debug(f"Min cluster size: {min_cluster_size}")
|
336
|
+
logger.debug(f"Max cluster size: {max_cluster_size}")
|
319
337
|
# Define top annotations based on processed neighborhoods
|
320
338
|
top_annotations = self._define_top_annotations(
|
321
339
|
network=network,
|
@@ -360,39 +378,41 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
360
378
|
def load_plotter(
|
361
379
|
self,
|
362
380
|
graph: NetworkGraph,
|
363
|
-
figsize: Tuple = (10, 10),
|
381
|
+
figsize: Union[List, Tuple, np.ndarray] = (10, 10),
|
364
382
|
background_color: str = "white",
|
383
|
+
background_alpha: Union[float, None] = 1.0,
|
384
|
+
pad: float = 0.3,
|
365
385
|
) -> NetworkPlotter:
|
366
386
|
"""Get a NetworkPlotter object for plotting.
|
367
387
|
|
368
388
|
Args:
|
369
389
|
graph (NetworkGraph): The graph to plot.
|
370
|
-
figsize (
|
390
|
+
figsize (List, Tuple, or np.ndarray, optional): Size of the plot. Defaults to (10, 10)., optional): Size of the figure. Defaults to (10, 10).
|
371
391
|
background_color (str, optional): Background color of the plot. Defaults to "white".
|
392
|
+
background_alpha (float, None, optional): Transparency level of the background color. If provided, it overrides
|
393
|
+
any existing alpha values found in background_color. Defaults to 1.0.
|
394
|
+
pad (float, optional): Padding value to adjust the axis limits. Defaults to 0.3.
|
372
395
|
|
373
396
|
Returns:
|
374
397
|
NetworkPlotter: A NetworkPlotter object configured with the given parameters.
|
375
398
|
"""
|
376
399
|
log_header("Loading plotter")
|
377
|
-
# Log the plotter settings
|
378
|
-
params.log_plotter(
|
379
|
-
figsize=figsize,
|
380
|
-
background_color=background_color,
|
381
|
-
)
|
382
400
|
|
383
401
|
# Initialize and return a NetworkPlotter object
|
384
402
|
return NetworkPlotter(
|
385
403
|
graph,
|
386
404
|
figsize=figsize,
|
387
405
|
background_color=background_color,
|
406
|
+
background_alpha=background_alpha,
|
407
|
+
pad=pad,
|
388
408
|
)
|
389
409
|
|
390
410
|
def _load_neighborhoods(
|
391
411
|
self,
|
392
412
|
network: nx.Graph,
|
393
|
-
distance_metric: str = "louvain",
|
413
|
+
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
394
414
|
louvain_resolution: float = 0.1,
|
395
|
-
edge_length_threshold: float = 0.5,
|
415
|
+
edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
|
396
416
|
random_seed: int = 888,
|
397
417
|
) -> np.ndarray:
|
398
418
|
"""Load significant neighborhoods for the network.
|
@@ -400,9 +420,13 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
400
420
|
Args:
|
401
421
|
network (nx.Graph): The network graph.
|
402
422
|
annotations (pd.DataFrame): The matrix of annotations associated with the network.
|
403
|
-
distance_metric (str, optional):
|
423
|
+
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
424
|
+
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
|
425
|
+
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
404
426
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
405
|
-
edge_length_threshold (float, optional): Edge length threshold for
|
427
|
+
edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
|
428
|
+
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
429
|
+
Defaults to 0.5.
|
406
430
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
407
431
|
|
408
432
|
Returns:
|
@@ -414,9 +438,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
414
438
|
else:
|
415
439
|
for_print_distance_metric = distance_metric
|
416
440
|
# Log and display neighborhood settings
|
417
|
-
logger.
|
418
|
-
logger.
|
419
|
-
logger.
|
441
|
+
logger.debug(f"Distance metric: '{for_print_distance_metric}'")
|
442
|
+
logger.debug(f"Edge length threshold: {edge_length_threshold}")
|
443
|
+
logger.debug(f"Random seed: {random_seed}")
|
420
444
|
|
421
445
|
# Compute neighborhoods based on the network and distance metric
|
422
446
|
neighborhoods = get_network_neighborhoods(
|
@@ -442,24 +466,26 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
442
466
|
|
443
467
|
Args:
|
444
468
|
network (nx.Graph): The network graph.
|
445
|
-
annotations (
|
446
|
-
neighborhoods (
|
469
|
+
annotations (Dict[str, Any]): Annotations data for the network.
|
470
|
+
neighborhoods (Dict[str, Any]): Neighborhood enrichment data.
|
447
471
|
min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
|
448
472
|
max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
|
449
473
|
|
450
474
|
Returns:
|
451
|
-
|
475
|
+
Dict[str, Any]: Top annotations identified within the network.
|
452
476
|
"""
|
453
477
|
# Extract necessary data from annotations and neighborhoods
|
454
478
|
ordered_annotations = annotations["ordered_annotations"]
|
455
479
|
neighborhood_enrichment_sums = neighborhoods["neighborhood_enrichment_counts"]
|
456
|
-
|
480
|
+
significant_enrichment_matrix = neighborhoods["significant_enrichment_matrix"]
|
481
|
+
significant_binary_enrichment_matrix = neighborhoods["significant_binary_enrichment_matrix"]
|
457
482
|
# Call external function to define top annotations
|
458
483
|
return define_top_annotations(
|
459
484
|
network=network,
|
460
485
|
ordered_annotation_labels=ordered_annotations,
|
461
486
|
neighborhood_enrichment_sums=neighborhood_enrichment_sums,
|
462
|
-
|
487
|
+
significant_enrichment_matrix=significant_enrichment_matrix,
|
488
|
+
significant_binary_enrichment_matrix=significant_binary_enrichment_matrix,
|
463
489
|
min_cluster_size=min_cluster_size,
|
464
490
|
max_cluster_size=max_cluster_size,
|
465
491
|
)
|
@@ -475,7 +501,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
475
501
|
"""Define domains in the network based on enrichment data.
|
476
502
|
|
477
503
|
Args:
|
478
|
-
neighborhoods (
|
504
|
+
neighborhoods (Dict[str, Any]): Enrichment data for neighborhoods.
|
479
505
|
top_annotations (pd.DataFrame): Enrichment matrix for top annotations.
|
480
506
|
linkage_criterion (str): Clustering criterion for defining domains.
|
481
507
|
linkage_method (str): Clustering method to use.
|
risk/stats/hypergeom.py
CHANGED
@@ -20,7 +20,7 @@ def compute_hypergeom_test(
|
|
20
20
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
21
21
|
|
22
22
|
Returns:
|
23
|
-
|
23
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
24
24
|
"""
|
25
25
|
# Get the total number of nodes in the network
|
26
26
|
total_node_count = neighborhoods.shape[0]
|
@@ -35,7 +35,7 @@ def compute_permutation_test(
|
|
35
35
|
max_workers (int, optional): Number of workers for multiprocessing. Defaults to 1.
|
36
36
|
|
37
37
|
Returns:
|
38
|
-
|
38
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
39
39
|
"""
|
40
40
|
# Ensure that the matrices are in the correct format and free of NaN values
|
41
41
|
neighborhoods = neighborhoods.astype(np.float32)
|
@@ -133,6 +133,7 @@ def _run_permutation_test(
|
|
133
133
|
observed_neighborhood_scores,
|
134
134
|
neighborhood_score_func,
|
135
135
|
subset_size + (1 if i < remainder else 0),
|
136
|
+
num_permutations,
|
136
137
|
progress_counter,
|
137
138
|
max_workers,
|
138
139
|
rng, # Pass the random number generator to each worker
|
@@ -144,11 +145,9 @@ def _run_permutation_test(
|
|
144
145
|
results = pool.starmap_async(_permutation_process_subset, params_list, chunksize=1)
|
145
146
|
|
146
147
|
# Update progress bar based on progress_counter
|
147
|
-
# NOTE: Waiting for results to be ready while updating progress bar gives a big improvement
|
148
|
-
# in performance, especially for large number of permutations and workers
|
149
148
|
while not results.ready():
|
150
149
|
progress.update(progress_counter.value - progress.n)
|
151
|
-
results.wait(0.
|
150
|
+
results.wait(0.1) # Wait for 100ms
|
152
151
|
# Ensure progress bar reaches 100%
|
153
152
|
progress.update(total_progress - progress.n)
|
154
153
|
|
@@ -167,6 +166,7 @@ def _permutation_process_subset(
|
|
167
166
|
observed_neighborhood_scores: np.ndarray,
|
168
167
|
neighborhood_score_func: Callable,
|
169
168
|
subset_size: int,
|
169
|
+
num_permutations: int,
|
170
170
|
progress_counter: ValueProxy,
|
171
171
|
max_workers: int,
|
172
172
|
rng: np.random.Generator,
|
@@ -180,6 +180,7 @@ def _permutation_process_subset(
|
|
180
180
|
observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
|
181
181
|
neighborhood_score_func (Callable): Function to calculate neighborhood scores.
|
182
182
|
subset_size (int): Number of permutations to run in this subset.
|
183
|
+
num_permutations (int): Number of total permutations across all subsets.
|
183
184
|
progress_counter (multiprocessing.managers.ValueProxy): Shared counter for tracking progress.
|
184
185
|
max_workers (int): Number of workers for multiprocessing.
|
185
186
|
rng (np.random.Generator): Random number generator object.
|
@@ -190,11 +191,15 @@ def _permutation_process_subset(
|
|
190
191
|
# Initialize local count matrices for this worker
|
191
192
|
local_counts_depletion = np.zeros(observed_neighborhood_scores.shape)
|
192
193
|
local_counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
|
194
|
+
|
193
195
|
# NOTE: Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used.
|
194
|
-
# This can help prevent oversubscription of CPU resources during multiprocessing, ensuring that each process
|
195
|
-
# doesn't use more than one CPU core.
|
196
196
|
limits = None if max_workers == 1 else 1
|
197
197
|
with threadpool_limits(limits=limits, user_api="blas"):
|
198
|
+
# Initialize a local counter for batched progress updates
|
199
|
+
local_progress = 0
|
200
|
+
# Calculate the modulo value based on total permutations for 1/100th frequency updates
|
201
|
+
modulo_value = max(1, num_permutations // 100)
|
202
|
+
|
198
203
|
for _ in range(subset_size):
|
199
204
|
# Permute the annotation matrix using the RNG
|
200
205
|
annotation_matrix_permut = annotation_matrix[rng.permutation(idxs)]
|
@@ -212,7 +217,15 @@ def _permutation_process_subset(
|
|
212
217
|
local_counts_enrichment,
|
213
218
|
permuted_neighborhood_scores >= observed_neighborhood_scores,
|
214
219
|
)
|
215
|
-
|
216
|
-
|
220
|
+
|
221
|
+
# Update local progress counter
|
222
|
+
local_progress += 1
|
223
|
+
# Update shared progress counter every 1/100th of total permutations
|
224
|
+
if local_progress % modulo_value == 0:
|
225
|
+
progress_counter.value += modulo_value
|
226
|
+
|
227
|
+
# Final progress update for any remaining iterations
|
228
|
+
if local_progress % modulo_value != 0:
|
229
|
+
progress_counter.value += modulo_value
|
217
230
|
|
218
231
|
return local_counts_depletion, local_counts_enrichment
|
risk/stats/poisson.py
CHANGED
@@ -3,7 +3,7 @@ risk/stats/poisson
|
|
3
3
|
~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from typing import
|
6
|
+
from typing import Any, Dict
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
from scipy.stats import poisson
|
@@ -20,7 +20,7 @@ def compute_poisson_test(
|
|
20
20
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
21
21
|
|
22
22
|
Returns:
|
23
|
-
|
23
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
24
24
|
"""
|
25
25
|
# Matrix multiplication to get the number of annotated nodes in each neighborhood
|
26
26
|
annotated_in_neighborhood = neighborhoods @ annotations
|
risk/stats/stats.py
CHANGED
@@ -3,7 +3,7 @@ risk/stats/stats
|
|
3
3
|
~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from typing import Union
|
6
|
+
from typing import Any, Dict, Union
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
from statsmodels.stats.multitest import fdrcorrection
|
@@ -15,7 +15,7 @@ def calculate_significance_matrices(
|
|
15
15
|
tail: str = "right",
|
16
16
|
pval_cutoff: float = 0.05,
|
17
17
|
fdr_cutoff: float = 0.05,
|
18
|
-
) ->
|
18
|
+
) -> Dict[str, Any]:
|
19
19
|
"""Calculate significance matrices based on p-values and specified tail.
|
20
20
|
|
21
21
|
Args:
|
@@ -26,8 +26,8 @@ def calculate_significance_matrices(
|
|
26
26
|
fdr_cutoff (float, optional): Cutoff for FDR significance if applied. Defaults to 0.05.
|
27
27
|
|
28
28
|
Returns:
|
29
|
-
|
30
|
-
|
29
|
+
Dict[str, Any]: Dictionary containing the enrichment matrix, binary significance matrix,
|
30
|
+
and the matrix of significant enrichment values.
|
31
31
|
"""
|
32
32
|
if fdr_cutoff < 1.0:
|
33
33
|
# Apply FDR correction to depletion p-values
|
@@ -62,7 +62,7 @@ def calculate_significance_matrices(
|
|
62
62
|
log_enrichment_matrix = -np.log10(enrichment_matrix)
|
63
63
|
|
64
64
|
# Select the appropriate significance matrices based on the specified tail
|
65
|
-
enrichment_matrix,
|
65
|
+
enrichment_matrix, significant_binary_enrichment_matrix = _select_significance_matrices(
|
66
66
|
tail,
|
67
67
|
log_depletion_matrix,
|
68
68
|
depletion_alpha_threshold_matrix,
|
@@ -71,11 +71,13 @@ def calculate_significance_matrices(
|
|
71
71
|
)
|
72
72
|
|
73
73
|
# Filter the enrichment matrix using the binary significance matrix
|
74
|
-
significant_enrichment_matrix = np.where(
|
74
|
+
significant_enrichment_matrix = np.where(
|
75
|
+
significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
|
76
|
+
)
|
75
77
|
|
76
78
|
return {
|
77
79
|
"enrichment_matrix": enrichment_matrix,
|
78
|
-
"
|
80
|
+
"significant_binary_enrichment_matrix": significant_binary_enrichment_matrix,
|
79
81
|
"significant_enrichment_matrix": significant_enrichment_matrix,
|
80
82
|
}
|
81
83
|
|
@@ -127,10 +129,10 @@ def _select_significance_matrices(
|
|
127
129
|
|
128
130
|
# Create a binary significance matrix where valid indices meet the alpha threshold
|
129
131
|
valid_idxs = ~np.isnan(alpha_threshold_matrix)
|
130
|
-
|
131
|
-
|
132
|
+
significant_binary_enrichment_matrix = np.zeros(alpha_threshold_matrix.shape)
|
133
|
+
significant_binary_enrichment_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
|
132
134
|
|
133
|
-
return enrichment_matrix,
|
135
|
+
return enrichment_matrix, significant_binary_enrichment_matrix
|
134
136
|
|
135
137
|
|
136
138
|
def _compute_threshold_matrix(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: risk-network
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.8
|
4
4
|
Summary: A Python package for biological network analysis
|
5
5
|
Author: Ira Horecka
|
6
6
|
Author-email: Ira Horecka <ira89@icloud.com>
|
@@ -709,42 +709,105 @@ Requires-Dist: statsmodels
|
|
709
709
|
Requires-Dist: threadpoolctl
|
710
710
|
Requires-Dist: tqdm
|
711
711
|
|
712
|
-
|
713
|
-
<img src="https://i.imgur.com/Fo9EmnK.png" width="400" />
|
714
|
-
</p>
|
712
|
+
# RISK Network
|
715
713
|
|
716
714
|
<p align="center">
|
717
|
-
<
|
718
|
-
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.8+-blue.svg" alt="Python 3.8+"></a>
|
719
|
-
<a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
|
715
|
+
<img src="https://i.imgur.com/8TleEJs.png" width="50%" />
|
720
716
|
</p>
|
721
717
|
|
722
|
-
|
718
|
+
<br>
|
719
|
+
|
720
|
+

|
721
|
+
[](https://pypi.python.org/pypi/risk-network)
|
722
|
+

|
723
|
+
[](https://doi.org/10.5281/zenodo.xxxxxxx)
|
724
|
+

|
725
|
+

|
726
|
+
|
727
|
+
**RISK (RISK Infers Spatial Kinships)** is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
|
723
728
|
|
724
|
-
|
729
|
+
## Documentation and Tutorial
|
730
|
+
|
731
|
+
- **Documentation**: Comprehensive documentation is available [here](Documentation link).
|
732
|
+
- **Tutorial**: An interactive Jupyter notebook tutorial can be found [here](https://github.com/riskportal/network-tutorial).
|
733
|
+
We highly recommend new users to consult the documentation and tutorial early on to fully leverage RISK's capabilities.
|
734
|
+
|
735
|
+
## Installation
|
725
736
|
|
726
|
-
RISK is
|
737
|
+
RISK is compatible with Python 3.8 and later versions and operates on all major operating systems. Install RISK via pip:
|
738
|
+
|
739
|
+
```bash
|
740
|
+
pip install risk-network
|
741
|
+
```
|
727
742
|
|
728
743
|
## Features
|
729
744
|
|
730
|
-
-
|
731
|
-
-
|
732
|
-
-
|
745
|
+
- **Comprehensive Network Analysis**: Analyze biological networks such as protein–protein interaction (PPI) and gene regulatory networks, as well as non-biological networks.
|
746
|
+
- **Advanced Clustering Algorithms**: Utilize algorithms like Louvain, Markov Clustering, Spinglass, and more to identify key functional modules.
|
747
|
+
- **Flexible Visualization**: Generate clear, publication-quality figures with customizable node and edge attributes, including colors, shapes, sizes, and labels.
|
748
|
+
- **Efficient Data Handling**: Optimized for large datasets, supporting multiple file formats such as JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
|
749
|
+
- **Statistical Analysis**: Integrated statistical tests, including hypergeometric, permutation, and Poisson tests, to assess the significance of enriched regions.
|
750
|
+
- **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
|
733
751
|
|
734
|
-
## Example
|
752
|
+
## Example Usage
|
735
753
|
|
736
|
-
*Saccharomyces cerevisiae*
|
754
|
+
We applied RISK to a *Saccharomyces cerevisiae* protein–protein interaction network, revealing both established and novel functional relationships. The visualization below highlights key biological processes such as ribosomal assembly and mitochondrial organization.
|
737
755
|
|
738
|
-

|
739
757
|
|
740
|
-
|
758
|
+
RISK successfully detected both known and novel functional clusters within the yeast interactome. Clusters related to Golgi transport and actin nucleation were clearly defined and closely located, showcasing RISK's ability to map well-characterized interactions. Additionally, RISK identified links between mRNA processing pathways and vesicle trafficking proteins, consistent with recent studies demonstrating the role of vesicles in mRNA localization and stability.
|
759
|
+
|
760
|
+
## Citation
|
761
|
+
|
762
|
+
If you use RISK in your research, please cite the following:
|
763
|
+
|
764
|
+
**Horecka**, *et al.*, "RISK: a next-generation tool for biological network annotation and visualization", **[Journal Name]**, 2024. DOI: [10.1234/zenodo.xxxxxxx](https://doi.org/10.1234/zenodo.xxxxxxx)
|
765
|
+
|
766
|
+
## Software Architecture and Implementation
|
741
767
|
|
742
|
-
|
768
|
+
RISK features a streamlined, modular architecture designed to meet diverse research needs. Each module focuses on a specific task—such as network input/output, statistical analysis, or visualization—ensuring ease of adaptation and extension. This design enhances flexibility and reduces development overhead for users integrating RISK into their workflows.
|
743
769
|
|
744
|
-
|
770
|
+
### Supported Data Formats
|
745
771
|
|
746
|
-
|
772
|
+
- **Input/Output**: JSON, CSV, TSV, Excel, Cytoscape, GPickle.
|
773
|
+
- **Visualization Outputs**: SVG, PNG, PDF.
|
774
|
+
|
775
|
+
### Clustering Algorithms
|
776
|
+
|
777
|
+
- **Available Algorithms**:
|
778
|
+
- Greedy Modularity
|
779
|
+
- Label Propagation
|
780
|
+
- Louvain
|
781
|
+
- Markov Clustering
|
782
|
+
- Spinglass
|
783
|
+
- Walktrap
|
784
|
+
- **Distance Metrics**: Supports both spherical and Euclidean distance metrics.
|
785
|
+
|
786
|
+
### Statistical Tests
|
787
|
+
|
788
|
+
- **Hypergeometric Test**
|
789
|
+
- **Permutation Test** (single- or multi-process modes)
|
790
|
+
- **Poisson Test**
|
791
|
+
|
792
|
+
## Performance and Efficiency
|
793
|
+
|
794
|
+
In benchmarking tests using the yeast interactome network, RISK demonstrated substantial improvements over previous tools in both computational performance and memory efficiency. RISK processed the dataset approximately **3.25 times faster**, reducing CPU time by **69%**, and required **25% less peak memory usage**, underscoring its efficient utilization of computational resources.
|
795
|
+
|
796
|
+
## Contributing
|
797
|
+
|
798
|
+
We welcome contributions from the community. Please use the following resources:
|
799
|
+
|
800
|
+
- [Issues Tracker](https://github.com/irahorecka/risk/issues)
|
801
|
+
- [Source Code](https://github.com/irahorecka/risk/tree/main/risk)
|
802
|
+
|
803
|
+
## Support
|
804
|
+
|
805
|
+
If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/irahorecka/risk/issues) on GitHub.
|
747
806
|
|
748
807
|
## License
|
749
808
|
|
750
|
-
|
809
|
+
RISK is freely available as open-source software under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
810
|
+
|
811
|
+
---
|
812
|
+
|
813
|
+
**Note**: For detailed documentation and to access the interactive tutorial, please visit the links provided in the [Documentation and Tutorial](#documentation-and-tutorial) section.
|