risk-network 0.0.7b12__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/__init__.py +1 -1
- risk/annotations/annotations.py +85 -53
- risk/annotations/io.py +3 -3
- risk/log/__init__.py +1 -1
- risk/log/{config.py → console.py} +2 -2
- risk/log/params.py +6 -6
- risk/neighborhoods/community.py +68 -61
- risk/neighborhoods/domains.py +41 -18
- risk/neighborhoods/neighborhoods.py +134 -69
- risk/network/geometry.py +5 -2
- risk/network/graph.py +69 -235
- risk/network/io.py +44 -6
- risk/network/plot/__init__.py +6 -0
- risk/network/plot/canvas.py +290 -0
- risk/network/plot/contour.py +327 -0
- risk/network/plot/labels.py +929 -0
- risk/network/plot/network.py +288 -0
- risk/network/plot/plotter.py +137 -0
- risk/network/plot/utils/color.py +424 -0
- risk/network/plot/utils/layout.py +91 -0
- risk/risk.py +70 -37
- risk/stats/hypergeom.py +1 -1
- risk/stats/permutation/permutation.py +21 -8
- risk/stats/poisson.py +2 -2
- risk/stats/stats.py +12 -10
- {risk_network-0.0.7b12.dist-info → risk_network-0.0.8.dist-info}/METADATA +84 -21
- risk_network-0.0.8.dist-info/RECORD +37 -0
- {risk_network-0.0.7b12.dist-info → risk_network-0.0.8.dist-info}/WHEEL +1 -1
- risk/network/plot.py +0 -1450
- risk_network-0.0.7b12.dist-info/RECORD +0 -30
- {risk_network-0.0.7b12.dist-info → risk_network-0.0.8.dist-info}/LICENSE +0 -0
- {risk_network-0.0.7b12.dist-info → risk_network-0.0.8.dist-info}/top_level.txt +0 -0
risk/risk.py
CHANGED
@@ -3,7 +3,8 @@ risk/risk
|
|
3
3
|
~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
|
6
|
+
import copy
|
7
|
+
from typing import Any, Dict, List, Tuple, Union
|
7
8
|
|
8
9
|
import networkx as nx
|
9
10
|
import numpy as np
|
@@ -58,9 +59,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
58
59
|
self,
|
59
60
|
network: nx.Graph,
|
60
61
|
annotations: Dict[str, Any],
|
61
|
-
distance_metric: str = "louvain",
|
62
|
+
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
62
63
|
louvain_resolution: float = 0.1,
|
63
|
-
edge_length_threshold: float = 0.5,
|
64
|
+
edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
|
64
65
|
null_distribution: str = "network",
|
65
66
|
random_seed: int = 888,
|
66
67
|
) -> Dict[str, Any]:
|
@@ -68,15 +69,19 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
68
69
|
|
69
70
|
Args:
|
70
71
|
network (nx.Graph): The network graph.
|
71
|
-
annotations (
|
72
|
-
distance_metric (str, optional):
|
72
|
+
annotations (Dict[str, Any]): The annotations associated with the network.
|
73
|
+
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
74
|
+
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
|
75
|
+
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
73
76
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
74
|
-
edge_length_threshold (float, optional): Edge length threshold for
|
77
|
+
edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
|
78
|
+
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
79
|
+
Defaults to 0.5.
|
75
80
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
76
81
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
77
82
|
|
78
83
|
Returns:
|
79
|
-
|
84
|
+
Dict[str, Any]: Computed significance of neighborhoods.
|
80
85
|
"""
|
81
86
|
log_header("Running hypergeometric test")
|
82
87
|
# Log neighborhood analysis parameters
|
@@ -89,6 +94,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
89
94
|
random_seed=random_seed,
|
90
95
|
)
|
91
96
|
|
97
|
+
# Make a copy of the network to avoid modifying the original
|
98
|
+
network = copy.deepcopy(network)
|
99
|
+
|
92
100
|
# Load neighborhoods based on the network and distance metric
|
93
101
|
neighborhoods = self._load_neighborhoods(
|
94
102
|
network,
|
@@ -111,9 +119,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
111
119
|
self,
|
112
120
|
network: nx.Graph,
|
113
121
|
annotations: Dict[str, Any],
|
114
|
-
distance_metric: str = "louvain",
|
122
|
+
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
115
123
|
louvain_resolution: float = 0.1,
|
116
|
-
edge_length_threshold: float = 0.5,
|
124
|
+
edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
|
117
125
|
null_distribution: str = "network",
|
118
126
|
random_seed: int = 888,
|
119
127
|
) -> Dict[str, Any]:
|
@@ -121,15 +129,19 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
121
129
|
|
122
130
|
Args:
|
123
131
|
network (nx.Graph): The network graph.
|
124
|
-
annotations (
|
125
|
-
distance_metric (str, optional):
|
132
|
+
annotations (Dict[str, Any]): The annotations associated with the network.
|
133
|
+
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
134
|
+
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
|
135
|
+
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
126
136
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
127
|
-
edge_length_threshold (float, optional): Edge length threshold for
|
137
|
+
edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
|
138
|
+
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
139
|
+
Defaults to 0.5.
|
128
140
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
129
141
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
130
142
|
|
131
143
|
Returns:
|
132
|
-
|
144
|
+
Dict[str, Any]: Computed significance of neighborhoods.
|
133
145
|
"""
|
134
146
|
log_header("Running Poisson test")
|
135
147
|
# Log neighborhood analysis parameters
|
@@ -142,6 +154,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
142
154
|
random_seed=random_seed,
|
143
155
|
)
|
144
156
|
|
157
|
+
# Make a copy of the network to avoid modifying the original
|
158
|
+
network = copy.deepcopy(network)
|
159
|
+
|
145
160
|
# Load neighborhoods based on the network and distance metric
|
146
161
|
neighborhoods = self._load_neighborhoods(
|
147
162
|
network,
|
@@ -164,9 +179,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
164
179
|
self,
|
165
180
|
network: nx.Graph,
|
166
181
|
annotations: Dict[str, Any],
|
167
|
-
distance_metric: str = "louvain",
|
182
|
+
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
168
183
|
louvain_resolution: float = 0.1,
|
169
|
-
edge_length_threshold: float = 0.5,
|
184
|
+
edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
|
170
185
|
score_metric: str = "sum",
|
171
186
|
null_distribution: str = "network",
|
172
187
|
num_permutations: int = 1000,
|
@@ -177,10 +192,14 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
177
192
|
|
178
193
|
Args:
|
179
194
|
network (nx.Graph): The network graph.
|
180
|
-
annotations (
|
181
|
-
distance_metric (str, optional):
|
195
|
+
annotations (Dict[str, Any]): The annotations associated with the network.
|
196
|
+
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
197
|
+
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
|
198
|
+
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
182
199
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
183
|
-
edge_length_threshold (float, optional): Edge length threshold for
|
200
|
+
edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
|
201
|
+
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
202
|
+
Defaults to 0.5.
|
184
203
|
score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
|
185
204
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
186
205
|
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
@@ -188,7 +207,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
188
207
|
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
189
208
|
|
190
209
|
Returns:
|
191
|
-
|
210
|
+
Dict[str, Any]: Computed significance of neighborhoods.
|
192
211
|
"""
|
193
212
|
log_header("Running permutation test")
|
194
213
|
# Log neighborhood analysis parameters
|
@@ -204,6 +223,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
204
223
|
max_workers=max_workers,
|
205
224
|
)
|
206
225
|
|
226
|
+
# Make a copy of the network to avoid modifying the original
|
227
|
+
network = copy.deepcopy(network)
|
228
|
+
|
207
229
|
# Load neighborhoods based on the network and distance metric
|
208
230
|
neighborhoods = self._load_neighborhoods(
|
209
231
|
network,
|
@@ -253,7 +275,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
253
275
|
Args:
|
254
276
|
network (nx.Graph): The network graph.
|
255
277
|
annotations (pd.DataFrame): DataFrame containing annotation data for the network.
|
256
|
-
neighborhoods (
|
278
|
+
neighborhoods (Dict[str, Any]): Neighborhood enrichment data.
|
257
279
|
tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
|
258
280
|
pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
|
259
281
|
fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
|
@@ -283,6 +305,9 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
283
305
|
max_cluster_size=max_cluster_size,
|
284
306
|
)
|
285
307
|
|
308
|
+
# Make a copy of the network to avoid modifying the original
|
309
|
+
network = copy.deepcopy(network)
|
310
|
+
|
286
311
|
logger.debug(f"p-value cutoff: {pval_cutoff}")
|
287
312
|
logger.debug(f"FDR BH cutoff: {fdr_cutoff}")
|
288
313
|
logger.debug(
|
@@ -353,39 +378,41 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
353
378
|
def load_plotter(
|
354
379
|
self,
|
355
380
|
graph: NetworkGraph,
|
356
|
-
figsize: Tuple = (10, 10),
|
381
|
+
figsize: Union[List, Tuple, np.ndarray] = (10, 10),
|
357
382
|
background_color: str = "white",
|
383
|
+
background_alpha: Union[float, None] = 1.0,
|
384
|
+
pad: float = 0.3,
|
358
385
|
) -> NetworkPlotter:
|
359
386
|
"""Get a NetworkPlotter object for plotting.
|
360
387
|
|
361
388
|
Args:
|
362
389
|
graph (NetworkGraph): The graph to plot.
|
363
|
-
figsize (
|
390
|
+
figsize (List, Tuple, or np.ndarray, optional): Size of the plot. Defaults to (10, 10)., optional): Size of the figure. Defaults to (10, 10).
|
364
391
|
background_color (str, optional): Background color of the plot. Defaults to "white".
|
392
|
+
background_alpha (float, None, optional): Transparency level of the background color. If provided, it overrides
|
393
|
+
any existing alpha values found in background_color. Defaults to 1.0.
|
394
|
+
pad (float, optional): Padding value to adjust the axis limits. Defaults to 0.3.
|
365
395
|
|
366
396
|
Returns:
|
367
397
|
NetworkPlotter: A NetworkPlotter object configured with the given parameters.
|
368
398
|
"""
|
369
399
|
log_header("Loading plotter")
|
370
|
-
# Log the plotter settings
|
371
|
-
params.log_plotter(
|
372
|
-
figsize=figsize,
|
373
|
-
background_color=background_color,
|
374
|
-
)
|
375
400
|
|
376
401
|
# Initialize and return a NetworkPlotter object
|
377
402
|
return NetworkPlotter(
|
378
403
|
graph,
|
379
404
|
figsize=figsize,
|
380
405
|
background_color=background_color,
|
406
|
+
background_alpha=background_alpha,
|
407
|
+
pad=pad,
|
381
408
|
)
|
382
409
|
|
383
410
|
def _load_neighborhoods(
|
384
411
|
self,
|
385
412
|
network: nx.Graph,
|
386
|
-
distance_metric: str = "louvain",
|
413
|
+
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
387
414
|
louvain_resolution: float = 0.1,
|
388
|
-
edge_length_threshold: float = 0.5,
|
415
|
+
edge_length_threshold: Union[float, List, Tuple, np.ndarray] = 0.5,
|
389
416
|
random_seed: int = 888,
|
390
417
|
) -> np.ndarray:
|
391
418
|
"""Load significant neighborhoods for the network.
|
@@ -393,9 +420,13 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
393
420
|
Args:
|
394
421
|
network (nx.Graph): The network graph.
|
395
422
|
annotations (pd.DataFrame): The matrix of annotations associated with the network.
|
396
|
-
distance_metric (str, optional):
|
423
|
+
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
424
|
+
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'label_propagation',
|
425
|
+
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
397
426
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
398
|
-
edge_length_threshold (float, optional): Edge length threshold for
|
427
|
+
edge_length_threshold (float, List, Tuple, or np.ndarray, optional): Edge length threshold(s) for creating subgraphs.
|
428
|
+
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
429
|
+
Defaults to 0.5.
|
399
430
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
400
431
|
|
401
432
|
Returns:
|
@@ -435,24 +466,26 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
435
466
|
|
436
467
|
Args:
|
437
468
|
network (nx.Graph): The network graph.
|
438
|
-
annotations (
|
439
|
-
neighborhoods (
|
469
|
+
annotations (Dict[str, Any]): Annotations data for the network.
|
470
|
+
neighborhoods (Dict[str, Any]): Neighborhood enrichment data.
|
440
471
|
min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
|
441
472
|
max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
|
442
473
|
|
443
474
|
Returns:
|
444
|
-
|
475
|
+
Dict[str, Any]: Top annotations identified within the network.
|
445
476
|
"""
|
446
477
|
# Extract necessary data from annotations and neighborhoods
|
447
478
|
ordered_annotations = annotations["ordered_annotations"]
|
448
479
|
neighborhood_enrichment_sums = neighborhoods["neighborhood_enrichment_counts"]
|
449
|
-
|
480
|
+
significant_enrichment_matrix = neighborhoods["significant_enrichment_matrix"]
|
481
|
+
significant_binary_enrichment_matrix = neighborhoods["significant_binary_enrichment_matrix"]
|
450
482
|
# Call external function to define top annotations
|
451
483
|
return define_top_annotations(
|
452
484
|
network=network,
|
453
485
|
ordered_annotation_labels=ordered_annotations,
|
454
486
|
neighborhood_enrichment_sums=neighborhood_enrichment_sums,
|
455
|
-
|
487
|
+
significant_enrichment_matrix=significant_enrichment_matrix,
|
488
|
+
significant_binary_enrichment_matrix=significant_binary_enrichment_matrix,
|
456
489
|
min_cluster_size=min_cluster_size,
|
457
490
|
max_cluster_size=max_cluster_size,
|
458
491
|
)
|
@@ -468,7 +501,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
468
501
|
"""Define domains in the network based on enrichment data.
|
469
502
|
|
470
503
|
Args:
|
471
|
-
neighborhoods (
|
504
|
+
neighborhoods (Dict[str, Any]): Enrichment data for neighborhoods.
|
472
505
|
top_annotations (pd.DataFrame): Enrichment matrix for top annotations.
|
473
506
|
linkage_criterion (str): Clustering criterion for defining domains.
|
474
507
|
linkage_method (str): Clustering method to use.
|
risk/stats/hypergeom.py
CHANGED
@@ -20,7 +20,7 @@ def compute_hypergeom_test(
|
|
20
20
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
21
21
|
|
22
22
|
Returns:
|
23
|
-
|
23
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
24
24
|
"""
|
25
25
|
# Get the total number of nodes in the network
|
26
26
|
total_node_count = neighborhoods.shape[0]
|
@@ -35,7 +35,7 @@ def compute_permutation_test(
|
|
35
35
|
max_workers (int, optional): Number of workers for multiprocessing. Defaults to 1.
|
36
36
|
|
37
37
|
Returns:
|
38
|
-
|
38
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
39
39
|
"""
|
40
40
|
# Ensure that the matrices are in the correct format and free of NaN values
|
41
41
|
neighborhoods = neighborhoods.astype(np.float32)
|
@@ -133,6 +133,7 @@ def _run_permutation_test(
|
|
133
133
|
observed_neighborhood_scores,
|
134
134
|
neighborhood_score_func,
|
135
135
|
subset_size + (1 if i < remainder else 0),
|
136
|
+
num_permutations,
|
136
137
|
progress_counter,
|
137
138
|
max_workers,
|
138
139
|
rng, # Pass the random number generator to each worker
|
@@ -144,11 +145,9 @@ def _run_permutation_test(
|
|
144
145
|
results = pool.starmap_async(_permutation_process_subset, params_list, chunksize=1)
|
145
146
|
|
146
147
|
# Update progress bar based on progress_counter
|
147
|
-
# NOTE: Waiting for results to be ready while updating progress bar gives a big improvement
|
148
|
-
# in performance, especially for large number of permutations and workers
|
149
148
|
while not results.ready():
|
150
149
|
progress.update(progress_counter.value - progress.n)
|
151
|
-
results.wait(0.
|
150
|
+
results.wait(0.1) # Wait for 100ms
|
152
151
|
# Ensure progress bar reaches 100%
|
153
152
|
progress.update(total_progress - progress.n)
|
154
153
|
|
@@ -167,6 +166,7 @@ def _permutation_process_subset(
|
|
167
166
|
observed_neighborhood_scores: np.ndarray,
|
168
167
|
neighborhood_score_func: Callable,
|
169
168
|
subset_size: int,
|
169
|
+
num_permutations: int,
|
170
170
|
progress_counter: ValueProxy,
|
171
171
|
max_workers: int,
|
172
172
|
rng: np.random.Generator,
|
@@ -180,6 +180,7 @@ def _permutation_process_subset(
|
|
180
180
|
observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
|
181
181
|
neighborhood_score_func (Callable): Function to calculate neighborhood scores.
|
182
182
|
subset_size (int): Number of permutations to run in this subset.
|
183
|
+
num_permutations (int): Number of total permutations across all subsets.
|
183
184
|
progress_counter (multiprocessing.managers.ValueProxy): Shared counter for tracking progress.
|
184
185
|
max_workers (int): Number of workers for multiprocessing.
|
185
186
|
rng (np.random.Generator): Random number generator object.
|
@@ -190,11 +191,15 @@ def _permutation_process_subset(
|
|
190
191
|
# Initialize local count matrices for this worker
|
191
192
|
local_counts_depletion = np.zeros(observed_neighborhood_scores.shape)
|
192
193
|
local_counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
|
194
|
+
|
193
195
|
# NOTE: Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used.
|
194
|
-
# This can help prevent oversubscription of CPU resources during multiprocessing, ensuring that each process
|
195
|
-
# doesn't use more than one CPU core.
|
196
196
|
limits = None if max_workers == 1 else 1
|
197
197
|
with threadpool_limits(limits=limits, user_api="blas"):
|
198
|
+
# Initialize a local counter for batched progress updates
|
199
|
+
local_progress = 0
|
200
|
+
# Calculate the modulo value based on total permutations for 1/100th frequency updates
|
201
|
+
modulo_value = max(1, num_permutations // 100)
|
202
|
+
|
198
203
|
for _ in range(subset_size):
|
199
204
|
# Permute the annotation matrix using the RNG
|
200
205
|
annotation_matrix_permut = annotation_matrix[rng.permutation(idxs)]
|
@@ -212,7 +217,15 @@ def _permutation_process_subset(
|
|
212
217
|
local_counts_enrichment,
|
213
218
|
permuted_neighborhood_scores >= observed_neighborhood_scores,
|
214
219
|
)
|
215
|
-
|
216
|
-
|
220
|
+
|
221
|
+
# Update local progress counter
|
222
|
+
local_progress += 1
|
223
|
+
# Update shared progress counter every 1/100th of total permutations
|
224
|
+
if local_progress % modulo_value == 0:
|
225
|
+
progress_counter.value += modulo_value
|
226
|
+
|
227
|
+
# Final progress update for any remaining iterations
|
228
|
+
if local_progress % modulo_value != 0:
|
229
|
+
progress_counter.value += modulo_value
|
217
230
|
|
218
231
|
return local_counts_depletion, local_counts_enrichment
|
risk/stats/poisson.py
CHANGED
@@ -3,7 +3,7 @@ risk/stats/poisson
|
|
3
3
|
~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from typing import
|
6
|
+
from typing import Any, Dict
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
from scipy.stats import poisson
|
@@ -20,7 +20,7 @@ def compute_poisson_test(
|
|
20
20
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
21
21
|
|
22
22
|
Returns:
|
23
|
-
|
23
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
24
24
|
"""
|
25
25
|
# Matrix multiplication to get the number of annotated nodes in each neighborhood
|
26
26
|
annotated_in_neighborhood = neighborhoods @ annotations
|
risk/stats/stats.py
CHANGED
@@ -3,7 +3,7 @@ risk/stats/stats
|
|
3
3
|
~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from typing import Union
|
6
|
+
from typing import Any, Dict, Union
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
from statsmodels.stats.multitest import fdrcorrection
|
@@ -15,7 +15,7 @@ def calculate_significance_matrices(
|
|
15
15
|
tail: str = "right",
|
16
16
|
pval_cutoff: float = 0.05,
|
17
17
|
fdr_cutoff: float = 0.05,
|
18
|
-
) ->
|
18
|
+
) -> Dict[str, Any]:
|
19
19
|
"""Calculate significance matrices based on p-values and specified tail.
|
20
20
|
|
21
21
|
Args:
|
@@ -26,8 +26,8 @@ def calculate_significance_matrices(
|
|
26
26
|
fdr_cutoff (float, optional): Cutoff for FDR significance if applied. Defaults to 0.05.
|
27
27
|
|
28
28
|
Returns:
|
29
|
-
|
30
|
-
|
29
|
+
Dict[str, Any]: Dictionary containing the enrichment matrix, binary significance matrix,
|
30
|
+
and the matrix of significant enrichment values.
|
31
31
|
"""
|
32
32
|
if fdr_cutoff < 1.0:
|
33
33
|
# Apply FDR correction to depletion p-values
|
@@ -62,7 +62,7 @@ def calculate_significance_matrices(
|
|
62
62
|
log_enrichment_matrix = -np.log10(enrichment_matrix)
|
63
63
|
|
64
64
|
# Select the appropriate significance matrices based on the specified tail
|
65
|
-
enrichment_matrix,
|
65
|
+
enrichment_matrix, significant_binary_enrichment_matrix = _select_significance_matrices(
|
66
66
|
tail,
|
67
67
|
log_depletion_matrix,
|
68
68
|
depletion_alpha_threshold_matrix,
|
@@ -71,11 +71,13 @@ def calculate_significance_matrices(
|
|
71
71
|
)
|
72
72
|
|
73
73
|
# Filter the enrichment matrix using the binary significance matrix
|
74
|
-
significant_enrichment_matrix = np.where(
|
74
|
+
significant_enrichment_matrix = np.where(
|
75
|
+
significant_binary_enrichment_matrix == 1, enrichment_matrix, 0
|
76
|
+
)
|
75
77
|
|
76
78
|
return {
|
77
79
|
"enrichment_matrix": enrichment_matrix,
|
78
|
-
"
|
80
|
+
"significant_binary_enrichment_matrix": significant_binary_enrichment_matrix,
|
79
81
|
"significant_enrichment_matrix": significant_enrichment_matrix,
|
80
82
|
}
|
81
83
|
|
@@ -127,10 +129,10 @@ def _select_significance_matrices(
|
|
127
129
|
|
128
130
|
# Create a binary significance matrix where valid indices meet the alpha threshold
|
129
131
|
valid_idxs = ~np.isnan(alpha_threshold_matrix)
|
130
|
-
|
131
|
-
|
132
|
+
significant_binary_enrichment_matrix = np.zeros(alpha_threshold_matrix.shape)
|
133
|
+
significant_binary_enrichment_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
|
132
134
|
|
133
|
-
return enrichment_matrix,
|
135
|
+
return enrichment_matrix, significant_binary_enrichment_matrix
|
134
136
|
|
135
137
|
|
136
138
|
def _compute_threshold_matrix(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: risk-network
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.8
|
4
4
|
Summary: A Python package for biological network analysis
|
5
5
|
Author: Ira Horecka
|
6
6
|
Author-email: Ira Horecka <ira89@icloud.com>
|
@@ -709,42 +709,105 @@ Requires-Dist: statsmodels
|
|
709
709
|
Requires-Dist: threadpoolctl
|
710
710
|
Requires-Dist: tqdm
|
711
711
|
|
712
|
-
|
713
|
-
<img src="https://i.imgur.com/Fo9EmnK.png" width="400" />
|
714
|
-
</p>
|
712
|
+
# RISK Network
|
715
713
|
|
716
714
|
<p align="center">
|
717
|
-
<
|
718
|
-
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.8+-blue.svg" alt="Python 3.8+"></a>
|
719
|
-
<a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
|
715
|
+
<img src="https://i.imgur.com/8TleEJs.png" width="50%" />
|
720
716
|
</p>
|
721
717
|
|
722
|
-
|
718
|
+
<br>
|
719
|
+
|
720
|
+

|
721
|
+
[](https://pypi.python.org/pypi/risk-network)
|
722
|
+

|
723
|
+
[](https://doi.org/10.5281/zenodo.xxxxxxx)
|
724
|
+

|
725
|
+

|
726
|
+
|
727
|
+
**RISK (RISK Infers Spatial Kinships)** is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
|
723
728
|
|
724
|
-
|
729
|
+
## Documentation and Tutorial
|
730
|
+
|
731
|
+
- **Documentation**: Comprehensive documentation is available [here](Documentation link).
|
732
|
+
- **Tutorial**: An interactive Jupyter notebook tutorial can be found [here](https://github.com/riskportal/network-tutorial).
|
733
|
+
We highly recommend new users to consult the documentation and tutorial early on to fully leverage RISK's capabilities.
|
734
|
+
|
735
|
+
## Installation
|
725
736
|
|
726
|
-
RISK is
|
737
|
+
RISK is compatible with Python 3.8 and later versions and operates on all major operating systems. Install RISK via pip:
|
738
|
+
|
739
|
+
```bash
|
740
|
+
pip install risk-network
|
741
|
+
```
|
727
742
|
|
728
743
|
## Features
|
729
744
|
|
730
|
-
-
|
731
|
-
-
|
732
|
-
-
|
745
|
+
- **Comprehensive Network Analysis**: Analyze biological networks such as protein–protein interaction (PPI) and gene regulatory networks, as well as non-biological networks.
|
746
|
+
- **Advanced Clustering Algorithms**: Utilize algorithms like Louvain, Markov Clustering, Spinglass, and more to identify key functional modules.
|
747
|
+
- **Flexible Visualization**: Generate clear, publication-quality figures with customizable node and edge attributes, including colors, shapes, sizes, and labels.
|
748
|
+
- **Efficient Data Handling**: Optimized for large datasets, supporting multiple file formats such as JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
|
749
|
+
- **Statistical Analysis**: Integrated statistical tests, including hypergeometric, permutation, and Poisson tests, to assess the significance of enriched regions.
|
750
|
+
- **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
|
733
751
|
|
734
|
-
## Example
|
752
|
+
## Example Usage
|
735
753
|
|
736
|
-
*Saccharomyces cerevisiae*
|
754
|
+
We applied RISK to a *Saccharomyces cerevisiae* protein–protein interaction network, revealing both established and novel functional relationships. The visualization below highlights key biological processes such as ribosomal assembly and mitochondrial organization.
|
737
755
|
|
738
|
-

|
739
757
|
|
740
|
-
|
758
|
+
RISK successfully detected both known and novel functional clusters within the yeast interactome. Clusters related to Golgi transport and actin nucleation were clearly defined and closely located, showcasing RISK's ability to map well-characterized interactions. Additionally, RISK identified links between mRNA processing pathways and vesicle trafficking proteins, consistent with recent studies demonstrating the role of vesicles in mRNA localization and stability.
|
759
|
+
|
760
|
+
## Citation
|
761
|
+
|
762
|
+
If you use RISK in your research, please cite the following:
|
763
|
+
|
764
|
+
**Horecka**, *et al.*, "RISK: a next-generation tool for biological network annotation and visualization", **[Journal Name]**, 2024. DOI: [10.1234/zenodo.xxxxxxx](https://doi.org/10.1234/zenodo.xxxxxxx)
|
765
|
+
|
766
|
+
## Software Architecture and Implementation
|
741
767
|
|
742
|
-
|
768
|
+
RISK features a streamlined, modular architecture designed to meet diverse research needs. Each module focuses on a specific task—such as network input/output, statistical analysis, or visualization—ensuring ease of adaptation and extension. This design enhances flexibility and reduces development overhead for users integrating RISK into their workflows.
|
743
769
|
|
744
|
-
|
770
|
+
### Supported Data Formats
|
745
771
|
|
746
|
-
|
772
|
+
- **Input/Output**: JSON, CSV, TSV, Excel, Cytoscape, GPickle.
|
773
|
+
- **Visualization Outputs**: SVG, PNG, PDF.
|
774
|
+
|
775
|
+
### Clustering Algorithms
|
776
|
+
|
777
|
+
- **Available Algorithms**:
|
778
|
+
- Greedy Modularity
|
779
|
+
- Label Propagation
|
780
|
+
- Louvain
|
781
|
+
- Markov Clustering
|
782
|
+
- Spinglass
|
783
|
+
- Walktrap
|
784
|
+
- **Distance Metrics**: Supports both spherical and Euclidean distance metrics.
|
785
|
+
|
786
|
+
### Statistical Tests
|
787
|
+
|
788
|
+
- **Hypergeometric Test**
|
789
|
+
- **Permutation Test** (single- or multi-process modes)
|
790
|
+
- **Poisson Test**
|
791
|
+
|
792
|
+
## Performance and Efficiency
|
793
|
+
|
794
|
+
In benchmarking tests using the yeast interactome network, RISK demonstrated substantial improvements over previous tools in both computational performance and memory efficiency. RISK processed the dataset approximately **3.25 times faster**, reducing CPU time by **69%**, and required **25% less peak memory usage**, underscoring its efficient utilization of computational resources.
|
795
|
+
|
796
|
+
## Contributing
|
797
|
+
|
798
|
+
We welcome contributions from the community. Please use the following resources:
|
799
|
+
|
800
|
+
- [Issues Tracker](https://github.com/irahorecka/risk/issues)
|
801
|
+
- [Source Code](https://github.com/irahorecka/risk/tree/main/risk)
|
802
|
+
|
803
|
+
## Support
|
804
|
+
|
805
|
+
If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/irahorecka/risk/issues) on GitHub.
|
747
806
|
|
748
807
|
## License
|
749
808
|
|
750
|
-
|
809
|
+
RISK is freely available as open-source software under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
810
|
+
|
811
|
+
---
|
812
|
+
|
813
|
+
**Note**: For detailed documentation and to access the interactive tutorial, please visit the links provided in the [Documentation and Tutorial](#documentation-and-tutorial) section.
|
@@ -0,0 +1,37 @@
|
|
1
|
+
risk/__init__.py,sha256=1uHw76jOGBsjhDxEmv0vYmPZhEY2JBiXt_n6-TI3x5w,105
|
2
|
+
risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
|
3
|
+
risk/risk.py,sha256=8GTUpj3TC7XLwEUD22-fDTKXtW7PndzW9TixWaZ23bI,23853
|
4
|
+
risk/annotations/__init__.py,sha256=kXgadEXaCh0z8OyhOhTj7c3qXGmWgOhaSZ4gSzSb59U,147
|
5
|
+
risk/annotations/annotations.py,sha256=dHO6kQOQjMA57nYA-yTAU1uB-ieiZ5sknAKvX6vF0Os,13024
|
6
|
+
risk/annotations/io.py,sha256=powWzeimVdE0WCwlBCXyu5otMyZZHQujC0DS3m5DC0c,9505
|
7
|
+
risk/log/__init__.py,sha256=xKeU9uK1AnVk7Yt9GTp-E-dn7Ealow2igEXZZnQRa2c,135
|
8
|
+
risk/log/console.py,sha256=C52s3FgQ2e9kQWcXL8m7rs_pnKXt5Yy8PBHmQkOTiNo,4537
|
9
|
+
risk/log/params.py,sha256=qSTktJ3OazldTzgtDGZkh0s30vu5kiXPkiNGLdSFDvg,6416
|
10
|
+
risk/neighborhoods/__init__.py,sha256=tKKEg4lsbqFukpgYlUGxU_v_9FOqK7V0uvM9T2QzoL0,206
|
11
|
+
risk/neighborhoods/community.py,sha256=MAgIblbuisEPwVU6mFZd4Yd9NUKlaHK99suw51r1Is0,7065
|
12
|
+
risk/neighborhoods/domains.py,sha256=3iV0-nRLF2sL9_7epHY5b9AtTU-QQ84hOWO76VwFcrs,11685
|
13
|
+
risk/neighborhoods/neighborhoods.py,sha256=cT9CCi1uQLn9Kv9Lxt8AN_4s63SKIlOZspvUZnx27nE,21832
|
14
|
+
risk/network/__init__.py,sha256=iEPeJdZfqp0toxtbElryB8jbz9_t_k4QQ3iDvKE8C_0,126
|
15
|
+
risk/network/geometry.py,sha256=gFtYUj9j9aul4paKq_qSGJn39Nazxu_MXv8m-tYYtrk,6840
|
16
|
+
risk/network/graph.py,sha256=-tslu8nSbuBaqNGf6TQ8ON7C27v-BLH_37J2aC6Ke14,9602
|
17
|
+
risk/network/io.py,sha256=-NJ9Tg1s-DxhlDbwQGO4o87rbMqO4-BzShgnIgFoRRE,22962
|
18
|
+
risk/network/plot/__init__.py,sha256=MfmaXJgAZJgXZ2wrhK8pXwzETlcMaLChhWXKAozniAo,98
|
19
|
+
risk/network/plot/canvas.py,sha256=TlCpNtvoceizAumNr9I02JcBrBO6FiAFAa2ZC0bx3SU,13356
|
20
|
+
risk/network/plot/contour.py,sha256=CwX4i3uE5HL0W4kfx34U7YyoTTqMxyb7xaXKRVoNLzY,15265
|
21
|
+
risk/network/plot/labels.py,sha256=fNccRds6seShMFPN6WX_7M1_qnscBkcWEH3QOJAKalk,45502
|
22
|
+
risk/network/plot/network.py,sha256=6RURL1OdBFyQ34qNcwM_uH3LSQGYZZ8tZT51dggH1a0,13685
|
23
|
+
risk/network/plot/plotter.py,sha256=iTPMiTnTTatM_-q1Ox_bjt5Pvv-Lo8gceiYB6TVzDcw,5770
|
24
|
+
risk/network/plot/utils/color.py,sha256=WSs1ge2oZ8yXwyVk2QqBF-avRd0aYT-sYZr9cxxAn7M,19626
|
25
|
+
risk/network/plot/utils/layout.py,sha256=RnJq0yODpoheZnDl7KKFPQeXrnrsS3FLIdxupoYVZq4,3553
|
26
|
+
risk/stats/__init__.py,sha256=WcgoETQ-hS0LQqKRsAMIPtP15xZ-4eul6VUBuUx4Wzc,220
|
27
|
+
risk/stats/hypergeom.py,sha256=oc39f02ViB1vQ-uaDrxG_tzAT6dxQBRjc88EK2EGn78,2282
|
28
|
+
risk/stats/poisson.py,sha256=polLgwS08MTCNzupYdmMUoEUYrJOjAbcYtYwjlfeE5Y,1803
|
29
|
+
risk/stats/stats.py,sha256=6iGi0-oN05mTmupg6X_VEBxEQvi2rujNhfPk4aLjwNI,7186
|
30
|
+
risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
|
31
|
+
risk/stats/permutation/permutation.py,sha256=meBNSrbRa9P8WJ54n485l0H7VQJlMSfHqdN4aCKYCtQ,10105
|
32
|
+
risk/stats/permutation/test_functions.py,sha256=lftOude6hee0pyR80HlBD32522JkDoN5hrKQ9VEbuoY,2345
|
33
|
+
risk_network-0.0.8.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
34
|
+
risk_network-0.0.8.dist-info/METADATA,sha256=bFaieAp2hbwf-6YMsIryAIoZ9kUKGqewW1iQUNxfXlI,47495
|
35
|
+
risk_network-0.0.8.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
36
|
+
risk_network-0.0.8.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
37
|
+
risk_network-0.0.8.dist-info/RECORD,,
|