risk-network 0.0.9b23__py3-none-any.whl → 0.0.9b25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/__init__.py +2 -2
- risk/annotations/annotations.py +9 -9
- risk/annotations/io.py +0 -2
- risk/log/__init__.py +2 -2
- risk/neighborhoods/__init__.py +3 -5
- risk/neighborhoods/api.py +446 -0
- risk/neighborhoods/community.py +4 -2
- risk/neighborhoods/domains.py +28 -1
- risk/network/__init__.py +1 -3
- risk/network/graph/__init__.py +1 -1
- risk/network/graph/api.py +194 -0
- risk/network/graph/summary.py +6 -2
- risk/network/io.py +0 -2
- risk/network/plotter/__init__.py +6 -0
- risk/network/plotter/api.py +54 -0
- risk/network/{plot → plotter}/canvas.py +3 -3
- risk/network/{plot → plotter}/contour.py +2 -2
- risk/network/{plot → plotter}/labels.py +3 -3
- risk/network/{plot → plotter}/network.py +136 -3
- risk/network/{plot → plotter}/utils/colors.py +15 -6
- risk/risk.py +10 -483
- risk/stats/__init__.py +8 -4
- risk/stats/binom.py +51 -0
- risk/stats/chi2.py +69 -0
- risk/stats/hypergeom.py +27 -17
- risk/stats/permutation/__init__.py +1 -1
- risk/stats/permutation/permutation.py +44 -55
- risk/stats/permutation/test_functions.py +25 -17
- risk/stats/poisson.py +15 -9
- risk/stats/zscore.py +68 -0
- {risk_network-0.0.9b23.dist-info → risk_network-0.0.9b25.dist-info}/METADATA +1 -1
- risk_network-0.0.9b25.dist-info/RECORD +44 -0
- risk/network/plot/__init__.py +0 -6
- risk/network/plot/plotter.py +0 -143
- risk_network-0.0.9b23.dist-info/RECORD +0 -39
- /risk/network/{plot → plotter}/utils/layout.py +0 -0
- {risk_network-0.0.9b23.dist-info → risk_network-0.0.9b25.dist-info}/LICENSE +0 -0
- {risk_network-0.0.9b23.dist-info → risk_network-0.0.9b25.dist-info}/WHEEL +0 -0
- {risk_network-0.0.9b23.dist-info → risk_network-0.0.9b25.dist-info}/top_level.txt +0 -0
risk/risk.py
CHANGED
@@ -3,35 +3,21 @@ risk/risk
|
|
3
3
|
~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
import
|
7
|
-
from
|
6
|
+
from risk.network import NetworkIO
|
7
|
+
from risk.annotations import AnnotationsIO
|
8
|
+
from risk.neighborhoods import NeighborhoodsAPI
|
9
|
+
from risk.network.graph import GraphAPI
|
10
|
+
from risk.network.plotter import PlotterAPI
|
8
11
|
|
9
|
-
import
|
10
|
-
import numpy as np
|
11
|
-
import pandas as pd
|
12
|
+
from risk.log import params, set_global_verbosity
|
12
13
|
|
13
|
-
from risk.annotations import AnnotationsIO, define_top_annotations
|
14
|
-
from risk.log import params, logger, log_header, set_global_verbosity
|
15
|
-
from risk.neighborhoods import (
|
16
|
-
define_domains,
|
17
|
-
get_network_neighborhoods,
|
18
|
-
process_neighborhoods,
|
19
|
-
trim_domains,
|
20
|
-
)
|
21
|
-
from risk.network import NetworkIO, NetworkGraph, NetworkPlotter
|
22
|
-
from risk.stats import (
|
23
|
-
calculate_significance_matrices,
|
24
|
-
compute_hypergeom_test,
|
25
|
-
compute_permutation_test,
|
26
|
-
compute_poisson_test,
|
27
|
-
)
|
28
14
|
|
29
|
-
|
30
|
-
class RISK(NetworkIO, AnnotationsIO):
|
15
|
+
class RISK(NetworkIO, AnnotationsIO, NeighborhoodsAPI, GraphAPI, PlotterAPI):
|
31
16
|
"""RISK: A class for network analysis and visualization.
|
32
17
|
|
33
18
|
The RISK class integrates functionalities for loading networks, processing annotations,
|
34
|
-
|
19
|
+
performing network-based statistical analysis to quantify neighborhood relationships,
|
20
|
+
and visualizing networks and their properties.
|
35
21
|
"""
|
36
22
|
|
37
23
|
def __init__(self, verbose: bool = True):
|
@@ -42,465 +28,6 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
42
28
|
"""
|
43
29
|
# Set global verbosity for logging
|
44
30
|
set_global_verbosity(verbose)
|
45
|
-
# Provide public access to
|
31
|
+
# Provide public access to network parameters
|
46
32
|
self.params = params
|
47
33
|
super().__init__()
|
48
|
-
|
49
|
-
def load_neighborhoods_by_hypergeom(
|
50
|
-
self,
|
51
|
-
network: nx.Graph,
|
52
|
-
annotations: Dict[str, Any],
|
53
|
-
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
54
|
-
louvain_resolution: float = 0.1,
|
55
|
-
leiden_resolution: float = 1.0,
|
56
|
-
fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
|
57
|
-
null_distribution: str = "network",
|
58
|
-
random_seed: int = 888,
|
59
|
-
) -> Dict[str, Any]:
|
60
|
-
"""Load significant neighborhoods for the network using the hypergeometric test.
|
61
|
-
|
62
|
-
Args:
|
63
|
-
network (nx.Graph): The network graph.
|
64
|
-
annotations (Dict[str, Any]): The annotations associated with the network.
|
65
|
-
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
66
|
-
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
67
|
-
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
68
|
-
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
69
|
-
leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
|
70
|
-
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
71
|
-
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
72
|
-
Defaults to 0.5.
|
73
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
74
|
-
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
75
|
-
|
76
|
-
Returns:
|
77
|
-
Dict[str, Any]: Computed significance of neighborhoods.
|
78
|
-
"""
|
79
|
-
log_header("Running hypergeometric test")
|
80
|
-
# Log neighborhood analysis parameters
|
81
|
-
params.log_neighborhoods(
|
82
|
-
distance_metric=distance_metric,
|
83
|
-
louvain_resolution=louvain_resolution,
|
84
|
-
leiden_resolution=leiden_resolution,
|
85
|
-
fraction_shortest_edges=fraction_shortest_edges,
|
86
|
-
statistical_test_function="hypergeom",
|
87
|
-
null_distribution=null_distribution,
|
88
|
-
random_seed=random_seed,
|
89
|
-
)
|
90
|
-
|
91
|
-
# Make a copy of the network to avoid modifying the original
|
92
|
-
network = copy.deepcopy(network)
|
93
|
-
|
94
|
-
# Load neighborhoods based on the network and distance metric
|
95
|
-
neighborhoods = self._load_neighborhoods(
|
96
|
-
network,
|
97
|
-
distance_metric,
|
98
|
-
louvain_resolution=louvain_resolution,
|
99
|
-
leiden_resolution=leiden_resolution,
|
100
|
-
fraction_shortest_edges=fraction_shortest_edges,
|
101
|
-
random_seed=random_seed,
|
102
|
-
)
|
103
|
-
# Run hypergeometric test to compute neighborhood significance
|
104
|
-
neighborhood_significance = compute_hypergeom_test(
|
105
|
-
neighborhoods=neighborhoods,
|
106
|
-
annotations=annotations["matrix"],
|
107
|
-
null_distribution=null_distribution,
|
108
|
-
)
|
109
|
-
|
110
|
-
# Return the computed neighborhood significance
|
111
|
-
return neighborhood_significance
|
112
|
-
|
113
|
-
def load_neighborhoods_by_poisson(
|
114
|
-
self,
|
115
|
-
network: nx.Graph,
|
116
|
-
annotations: Dict[str, Any],
|
117
|
-
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
118
|
-
louvain_resolution: float = 0.1,
|
119
|
-
leiden_resolution: float = 1.0,
|
120
|
-
fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
|
121
|
-
null_distribution: str = "network",
|
122
|
-
random_seed: int = 888,
|
123
|
-
) -> Dict[str, Any]:
|
124
|
-
"""Load significant neighborhoods for the network using the Poisson test.
|
125
|
-
|
126
|
-
Args:
|
127
|
-
network (nx.Graph): The network graph.
|
128
|
-
annotations (Dict[str, Any]): The annotations associated with the network.
|
129
|
-
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
130
|
-
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
131
|
-
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
132
|
-
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
133
|
-
leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
|
134
|
-
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
135
|
-
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
136
|
-
Defaults to 0.5.
|
137
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
138
|
-
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
139
|
-
|
140
|
-
Returns:
|
141
|
-
Dict[str, Any]: Computed significance of neighborhoods.
|
142
|
-
"""
|
143
|
-
log_header("Running Poisson test")
|
144
|
-
# Log neighborhood analysis parameters
|
145
|
-
params.log_neighborhoods(
|
146
|
-
distance_metric=distance_metric,
|
147
|
-
louvain_resolution=louvain_resolution,
|
148
|
-
leiden_resolution=leiden_resolution,
|
149
|
-
fraction_shortest_edges=fraction_shortest_edges,
|
150
|
-
statistical_test_function="poisson",
|
151
|
-
null_distribution=null_distribution,
|
152
|
-
random_seed=random_seed,
|
153
|
-
)
|
154
|
-
|
155
|
-
# Make a copy of the network to avoid modifying the original
|
156
|
-
network = copy.deepcopy(network)
|
157
|
-
|
158
|
-
# Load neighborhoods based on the network and distance metric
|
159
|
-
neighborhoods = self._load_neighborhoods(
|
160
|
-
network,
|
161
|
-
distance_metric,
|
162
|
-
louvain_resolution=louvain_resolution,
|
163
|
-
leiden_resolution=leiden_resolution,
|
164
|
-
fraction_shortest_edges=fraction_shortest_edges,
|
165
|
-
random_seed=random_seed,
|
166
|
-
)
|
167
|
-
# Run Poisson test to compute neighborhood significance
|
168
|
-
neighborhood_significance = compute_poisson_test(
|
169
|
-
neighborhoods=neighborhoods,
|
170
|
-
annotations=annotations["matrix"],
|
171
|
-
null_distribution=null_distribution,
|
172
|
-
)
|
173
|
-
|
174
|
-
# Return the computed neighborhood significance
|
175
|
-
return neighborhood_significance
|
176
|
-
|
177
|
-
def load_neighborhoods_by_permutation(
|
178
|
-
self,
|
179
|
-
network: nx.Graph,
|
180
|
-
annotations: Dict[str, Any],
|
181
|
-
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
182
|
-
louvain_resolution: float = 0.1,
|
183
|
-
leiden_resolution: float = 1.0,
|
184
|
-
fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
|
185
|
-
score_metric: str = "sum",
|
186
|
-
null_distribution: str = "network",
|
187
|
-
num_permutations: int = 1000,
|
188
|
-
random_seed: int = 888,
|
189
|
-
max_workers: int = 1,
|
190
|
-
) -> Dict[str, Any]:
|
191
|
-
"""Load significant neighborhoods for the network using the permutation test.
|
192
|
-
|
193
|
-
Args:
|
194
|
-
network (nx.Graph): The network graph.
|
195
|
-
annotations (Dict[str, Any]): The annotations associated with the network.
|
196
|
-
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
197
|
-
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
198
|
-
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
199
|
-
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
200
|
-
leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
|
201
|
-
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
202
|
-
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
203
|
-
Defaults to 0.5.
|
204
|
-
score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
|
205
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
206
|
-
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
207
|
-
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
208
|
-
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
209
|
-
|
210
|
-
Returns:
|
211
|
-
Dict[str, Any]: Computed significance of neighborhoods.
|
212
|
-
"""
|
213
|
-
log_header("Running permutation test")
|
214
|
-
# Log neighborhood analysis parameters
|
215
|
-
params.log_neighborhoods(
|
216
|
-
distance_metric=distance_metric,
|
217
|
-
louvain_resolution=louvain_resolution,
|
218
|
-
leiden_resolution=leiden_resolution,
|
219
|
-
fraction_shortest_edges=fraction_shortest_edges,
|
220
|
-
statistical_test_function="permutation",
|
221
|
-
score_metric=score_metric,
|
222
|
-
null_distribution=null_distribution,
|
223
|
-
num_permutations=num_permutations,
|
224
|
-
random_seed=random_seed,
|
225
|
-
max_workers=max_workers,
|
226
|
-
)
|
227
|
-
|
228
|
-
# Make a copy of the network to avoid modifying the original
|
229
|
-
network = copy.deepcopy(network)
|
230
|
-
|
231
|
-
# Load neighborhoods based on the network and distance metric
|
232
|
-
neighborhoods = self._load_neighborhoods(
|
233
|
-
network,
|
234
|
-
distance_metric,
|
235
|
-
louvain_resolution=louvain_resolution,
|
236
|
-
leiden_resolution=leiden_resolution,
|
237
|
-
fraction_shortest_edges=fraction_shortest_edges,
|
238
|
-
random_seed=random_seed,
|
239
|
-
)
|
240
|
-
|
241
|
-
# Log and display permutation test settings
|
242
|
-
logger.debug(f"Neighborhood scoring metric: '{score_metric}'")
|
243
|
-
logger.debug(f"Null distribution: '{null_distribution}'")
|
244
|
-
logger.debug(f"Number of permutations: {num_permutations}")
|
245
|
-
logger.debug(f"Maximum workers: {max_workers}")
|
246
|
-
# Run permutation test to compute neighborhood significance
|
247
|
-
neighborhood_significance = compute_permutation_test(
|
248
|
-
neighborhoods=neighborhoods,
|
249
|
-
annotations=annotations["matrix"],
|
250
|
-
score_metric=score_metric,
|
251
|
-
null_distribution=null_distribution,
|
252
|
-
num_permutations=num_permutations,
|
253
|
-
random_seed=random_seed,
|
254
|
-
max_workers=max_workers,
|
255
|
-
)
|
256
|
-
|
257
|
-
# Return the computed neighborhood significance
|
258
|
-
return neighborhood_significance
|
259
|
-
|
260
|
-
def load_graph(
|
261
|
-
self,
|
262
|
-
network: nx.Graph,
|
263
|
-
annotations: Dict[str, Any],
|
264
|
-
neighborhoods: Dict[str, Any],
|
265
|
-
tail: str = "right",
|
266
|
-
pval_cutoff: float = 0.01,
|
267
|
-
fdr_cutoff: float = 0.9999,
|
268
|
-
impute_depth: int = 0,
|
269
|
-
prune_threshold: float = 0.0,
|
270
|
-
linkage_criterion: str = "distance",
|
271
|
-
linkage_method: str = "average",
|
272
|
-
linkage_metric: str = "yule",
|
273
|
-
min_cluster_size: int = 5,
|
274
|
-
max_cluster_size: int = 1000,
|
275
|
-
) -> NetworkGraph:
|
276
|
-
"""Load and process the network graph, defining top annotations and domains.
|
277
|
-
|
278
|
-
Args:
|
279
|
-
network (nx.Graph): The network graph.
|
280
|
-
annotations (Dict[str, Any]): The annotations associated with the network.
|
281
|
-
neighborhoods (Dict[str, Any]): Neighborhood significance data.
|
282
|
-
tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
|
283
|
-
pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
|
284
|
-
fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
|
285
|
-
impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
|
286
|
-
prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
|
287
|
-
linkage_criterion (str, optional): Clustering criterion for defining domains. Defaults to "distance".
|
288
|
-
linkage_method (str, optional): Clustering method to use. Defaults to "average".
|
289
|
-
linkage_metric (str, optional): Metric to use for calculating distances. Defaults to "yule".
|
290
|
-
min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
|
291
|
-
max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
|
292
|
-
|
293
|
-
Returns:
|
294
|
-
NetworkGraph: A fully initialized and processed NetworkGraph object.
|
295
|
-
"""
|
296
|
-
# Log the parameters and display headers
|
297
|
-
log_header("Finding significant neighborhoods")
|
298
|
-
params.log_graph(
|
299
|
-
tail=tail,
|
300
|
-
pval_cutoff=pval_cutoff,
|
301
|
-
fdr_cutoff=fdr_cutoff,
|
302
|
-
impute_depth=impute_depth,
|
303
|
-
prune_threshold=prune_threshold,
|
304
|
-
linkage_criterion=linkage_criterion,
|
305
|
-
linkage_method=linkage_method,
|
306
|
-
linkage_metric=linkage_metric,
|
307
|
-
min_cluster_size=min_cluster_size,
|
308
|
-
max_cluster_size=max_cluster_size,
|
309
|
-
)
|
310
|
-
|
311
|
-
# Make a copy of the network to avoid modifying the original
|
312
|
-
network = copy.deepcopy(network)
|
313
|
-
|
314
|
-
logger.debug(f"p-value cutoff: {pval_cutoff}")
|
315
|
-
logger.debug(f"FDR BH cutoff: {fdr_cutoff}")
|
316
|
-
logger.debug(
|
317
|
-
f"Significance tail: '{tail}' ({'enrichment' if tail == 'right' else 'depletion' if tail == 'left' else 'both'})"
|
318
|
-
)
|
319
|
-
# Calculate significant neighborhoods based on the provided parameters
|
320
|
-
significant_neighborhoods = calculate_significance_matrices(
|
321
|
-
neighborhoods["depletion_pvals"],
|
322
|
-
neighborhoods["enrichment_pvals"],
|
323
|
-
tail=tail,
|
324
|
-
pval_cutoff=pval_cutoff,
|
325
|
-
fdr_cutoff=fdr_cutoff,
|
326
|
-
)
|
327
|
-
|
328
|
-
log_header("Processing neighborhoods")
|
329
|
-
# Process neighborhoods by imputing and pruning based on the given settings
|
330
|
-
processed_neighborhoods = process_neighborhoods(
|
331
|
-
network=network,
|
332
|
-
neighborhoods=significant_neighborhoods,
|
333
|
-
impute_depth=impute_depth,
|
334
|
-
prune_threshold=prune_threshold,
|
335
|
-
)
|
336
|
-
|
337
|
-
log_header("Finding top annotations")
|
338
|
-
logger.debug(f"Min cluster size: {min_cluster_size}")
|
339
|
-
logger.debug(f"Max cluster size: {max_cluster_size}")
|
340
|
-
# Define top annotations based on processed neighborhoods
|
341
|
-
top_annotations = self._define_top_annotations(
|
342
|
-
network=network,
|
343
|
-
annotations=annotations,
|
344
|
-
neighborhoods=processed_neighborhoods,
|
345
|
-
min_cluster_size=min_cluster_size,
|
346
|
-
max_cluster_size=max_cluster_size,
|
347
|
-
)
|
348
|
-
|
349
|
-
log_header("Optimizing distance threshold for domains")
|
350
|
-
# Extract the significant significance matrix from the neighborhoods data
|
351
|
-
significant_neighborhoods_significance = processed_neighborhoods[
|
352
|
-
"significant_significance_matrix"
|
353
|
-
]
|
354
|
-
# Define domains in the network using the specified clustering settings
|
355
|
-
domains = define_domains(
|
356
|
-
top_annotations=top_annotations,
|
357
|
-
significant_neighborhoods_significance=significant_neighborhoods_significance,
|
358
|
-
linkage_criterion=linkage_criterion,
|
359
|
-
linkage_method=linkage_method,
|
360
|
-
linkage_metric=linkage_metric,
|
361
|
-
)
|
362
|
-
# Trim domains and top annotations based on cluster size constraints
|
363
|
-
domains, trimmed_domains = trim_domains(
|
364
|
-
domains=domains,
|
365
|
-
top_annotations=top_annotations,
|
366
|
-
min_cluster_size=min_cluster_size,
|
367
|
-
max_cluster_size=max_cluster_size,
|
368
|
-
)
|
369
|
-
|
370
|
-
# Prepare node mapping and significance sums for the final NetworkGraph object
|
371
|
-
ordered_nodes = annotations["ordered_nodes"]
|
372
|
-
node_label_to_id = dict(zip(ordered_nodes, range(len(ordered_nodes))))
|
373
|
-
node_significance_sums = processed_neighborhoods["node_significance_sums"]
|
374
|
-
|
375
|
-
# Return the fully initialized NetworkGraph object
|
376
|
-
return NetworkGraph(
|
377
|
-
network=network,
|
378
|
-
annotations=annotations,
|
379
|
-
neighborhoods=neighborhoods,
|
380
|
-
domains=domains,
|
381
|
-
trimmed_domains=trimmed_domains,
|
382
|
-
node_label_to_node_id_map=node_label_to_id,
|
383
|
-
node_significance_sums=node_significance_sums,
|
384
|
-
)
|
385
|
-
|
386
|
-
def load_plotter(
|
387
|
-
self,
|
388
|
-
graph: NetworkGraph,
|
389
|
-
figsize: Union[List, Tuple, np.ndarray] = (10, 10),
|
390
|
-
background_color: str = "white",
|
391
|
-
background_alpha: Union[float, None] = 1.0,
|
392
|
-
pad: float = 0.3,
|
393
|
-
) -> NetworkPlotter:
|
394
|
-
"""Get a NetworkPlotter object for plotting.
|
395
|
-
|
396
|
-
Args:
|
397
|
-
graph (NetworkGraph): The graph to plot.
|
398
|
-
figsize (List, Tuple, or np.ndarray, optional): Size of the plot. Defaults to (10, 10)., optional): Size of the figure. Defaults to (10, 10).
|
399
|
-
background_color (str, optional): Background color of the plot. Defaults to "white".
|
400
|
-
background_alpha (float, None, optional): Transparency level of the background color. If provided, it overrides
|
401
|
-
any existing alpha values found in background_color. Defaults to 1.0.
|
402
|
-
pad (float, optional): Padding value to adjust the axis limits. Defaults to 0.3.
|
403
|
-
|
404
|
-
Returns:
|
405
|
-
NetworkPlotter: A NetworkPlotter object configured with the given parameters.
|
406
|
-
"""
|
407
|
-
log_header("Loading plotter")
|
408
|
-
|
409
|
-
# Initialize and return a NetworkPlotter object
|
410
|
-
return NetworkPlotter(
|
411
|
-
graph,
|
412
|
-
figsize=figsize,
|
413
|
-
background_color=background_color,
|
414
|
-
background_alpha=background_alpha,
|
415
|
-
pad=pad,
|
416
|
-
)
|
417
|
-
|
418
|
-
def _load_neighborhoods(
|
419
|
-
self,
|
420
|
-
network: nx.Graph,
|
421
|
-
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
422
|
-
louvain_resolution: float = 0.1,
|
423
|
-
leiden_resolution: float = 1.0,
|
424
|
-
fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
|
425
|
-
random_seed: int = 888,
|
426
|
-
) -> np.ndarray:
|
427
|
-
"""Load significant neighborhoods for the network.
|
428
|
-
|
429
|
-
Args:
|
430
|
-
network (nx.Graph): The network graph.
|
431
|
-
annotations (pd.DataFrame): The matrix of annotations associated with the network.
|
432
|
-
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
433
|
-
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
434
|
-
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
435
|
-
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
436
|
-
leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
|
437
|
-
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
438
|
-
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
439
|
-
Defaults to 0.5.
|
440
|
-
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
441
|
-
|
442
|
-
Returns:
|
443
|
-
np.ndarray: Neighborhood matrix calculated based on the selected distance metric.
|
444
|
-
"""
|
445
|
-
# Display the chosen distance metric
|
446
|
-
if distance_metric == "louvain":
|
447
|
-
for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
|
448
|
-
elif distance_metric == "leiden":
|
449
|
-
for_print_distance_metric = f"leiden (resolution={leiden_resolution})"
|
450
|
-
else:
|
451
|
-
for_print_distance_metric = distance_metric
|
452
|
-
# Log and display neighborhood settings
|
453
|
-
logger.debug(f"Distance metric: '{for_print_distance_metric}'")
|
454
|
-
logger.debug(f"Edge length threshold: {fraction_shortest_edges}")
|
455
|
-
logger.debug(f"Random seed: {random_seed}")
|
456
|
-
|
457
|
-
# Compute neighborhoods based on the network and distance metric
|
458
|
-
neighborhoods = get_network_neighborhoods(
|
459
|
-
network,
|
460
|
-
distance_metric,
|
461
|
-
fraction_shortest_edges,
|
462
|
-
louvain_resolution=louvain_resolution,
|
463
|
-
leiden_resolution=leiden_resolution,
|
464
|
-
random_seed=random_seed,
|
465
|
-
)
|
466
|
-
|
467
|
-
# Return the computed neighborhoods
|
468
|
-
return neighborhoods
|
469
|
-
|
470
|
-
def _define_top_annotations(
|
471
|
-
self,
|
472
|
-
network: nx.Graph,
|
473
|
-
annotations: Dict[str, Any],
|
474
|
-
neighborhoods: Dict[str, Any],
|
475
|
-
min_cluster_size: int = 5,
|
476
|
-
max_cluster_size: int = 1000,
|
477
|
-
) -> pd.DataFrame:
|
478
|
-
"""Define top annotations for the network.
|
479
|
-
|
480
|
-
Args:
|
481
|
-
network (nx.Graph): The network graph.
|
482
|
-
annotations (Dict[str, Any]): Annotations data for the network.
|
483
|
-
neighborhoods (Dict[str, Any]): Neighborhood significance data.
|
484
|
-
min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
|
485
|
-
max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
|
486
|
-
|
487
|
-
Returns:
|
488
|
-
Dict[str, Any]: Top annotations identified within the network.
|
489
|
-
"""
|
490
|
-
# Extract necessary data from annotations and neighborhoods
|
491
|
-
ordered_annotations = annotations["ordered_annotations"]
|
492
|
-
neighborhood_significance_sums = neighborhoods["neighborhood_significance_counts"]
|
493
|
-
significant_significance_matrix = neighborhoods["significant_significance_matrix"]
|
494
|
-
significant_binary_significance_matrix = neighborhoods[
|
495
|
-
"significant_binary_significance_matrix"
|
496
|
-
]
|
497
|
-
# Call external function to define top annotations
|
498
|
-
return define_top_annotations(
|
499
|
-
network=network,
|
500
|
-
ordered_annotation_labels=ordered_annotations,
|
501
|
-
neighborhood_significance_sums=neighborhood_significance_sums,
|
502
|
-
significant_significance_matrix=significant_significance_matrix,
|
503
|
-
significant_binary_significance_matrix=significant_binary_significance_matrix,
|
504
|
-
min_cluster_size=min_cluster_size,
|
505
|
-
max_cluster_size=max_cluster_size,
|
506
|
-
)
|
risk/stats/__init__.py
CHANGED
@@ -3,7 +3,11 @@ risk/stats
|
|
3
3
|
~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from .
|
7
|
-
from .
|
8
|
-
from .
|
9
|
-
from .stats import
|
6
|
+
from risk.stats.binom import compute_binom_test
|
7
|
+
from risk.stats.chi2 import compute_chi2_test
|
8
|
+
from risk.stats.hypergeom import compute_hypergeom_test
|
9
|
+
from risk.stats.permutation import compute_permutation_test
|
10
|
+
from risk.stats.poisson import compute_poisson_test
|
11
|
+
from risk.stats.zscore import compute_zscore_test
|
12
|
+
|
13
|
+
from risk.stats.stats import calculate_significance_matrices
|
risk/stats/binom.py
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
"""
|
2
|
+
risk/stats/binomial
|
3
|
+
~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
from typing import Any, Dict
|
7
|
+
|
8
|
+
from scipy.sparse import csr_matrix
|
9
|
+
from scipy.stats import binom
|
10
|
+
|
11
|
+
|
12
|
+
def compute_binom_test(
|
13
|
+
neighborhoods: csr_matrix,
|
14
|
+
annotations: csr_matrix,
|
15
|
+
null_distribution: str = "network",
|
16
|
+
) -> Dict[str, Any]:
|
17
|
+
"""Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
21
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
22
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
26
|
+
"""
|
27
|
+
# Get the total number of nodes in the network
|
28
|
+
total_nodes = neighborhoods.shape[1]
|
29
|
+
|
30
|
+
# Compute sums (remain sparse here)
|
31
|
+
neighborhood_sizes = neighborhoods.sum(axis=1) # Row sums
|
32
|
+
annotation_totals = annotations.sum(axis=0) # Column sums
|
33
|
+
# Compute probabilities (convert to dense)
|
34
|
+
if null_distribution == "network":
|
35
|
+
p_values = (annotation_totals / total_nodes).A.flatten() # Dense 1D array
|
36
|
+
elif null_distribution == "annotations":
|
37
|
+
p_values = (annotation_totals / annotations.sum()).A.flatten() # Dense 1D array
|
38
|
+
else:
|
39
|
+
raise ValueError(
|
40
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
41
|
+
)
|
42
|
+
|
43
|
+
# Observed counts (sparse matrix multiplication)
|
44
|
+
annotated_counts = neighborhoods @ annotations # Sparse result
|
45
|
+
annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
|
46
|
+
|
47
|
+
# Compute enrichment and depletion p-values
|
48
|
+
enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, neighborhood_sizes.A, p_values)
|
49
|
+
depletion_pvals = binom.cdf(annotated_counts_dense, neighborhood_sizes.A, p_values)
|
50
|
+
|
51
|
+
return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
|
risk/stats/chi2.py
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
"""
|
2
|
+
risk/stats/chi2
|
3
|
+
~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
from typing import Any, Dict
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
from scipy.sparse import csr_matrix
|
10
|
+
from scipy.stats import chi2
|
11
|
+
|
12
|
+
|
13
|
+
def compute_chi2_test(
|
14
|
+
neighborhoods: csr_matrix,
|
15
|
+
annotations: csr_matrix,
|
16
|
+
null_distribution: str = "network",
|
17
|
+
) -> Dict[str, Any]:
|
18
|
+
"""Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
22
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
23
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
27
|
+
"""
|
28
|
+
# Total number of nodes in the network
|
29
|
+
total_node_count = neighborhoods.shape[0]
|
30
|
+
|
31
|
+
if null_distribution == "network":
|
32
|
+
# Case 1: Use all nodes as the background
|
33
|
+
background_population = total_node_count
|
34
|
+
neighborhood_sums = neighborhoods.sum(axis=0) # Column sums of neighborhoods
|
35
|
+
annotation_sums = annotations.sum(axis=0) # Column sums of annotations
|
36
|
+
elif null_distribution == "annotations":
|
37
|
+
# Case 2: Only consider nodes with at least one annotation
|
38
|
+
annotated_nodes = (
|
39
|
+
np.ravel(annotations.sum(axis=1)) > 0
|
40
|
+
) # Row-wise sum to filter nodes with annotations
|
41
|
+
background_population = annotated_nodes.sum() # Total number of annotated nodes
|
42
|
+
neighborhood_sums = neighborhoods[annotated_nodes].sum(
|
43
|
+
axis=0
|
44
|
+
) # Neighborhood sums for annotated nodes
|
45
|
+
annotation_sums = annotations[annotated_nodes].sum(
|
46
|
+
axis=0
|
47
|
+
) # Annotation sums for annotated nodes
|
48
|
+
else:
|
49
|
+
raise ValueError(
|
50
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
51
|
+
)
|
52
|
+
|
53
|
+
# Convert to dense arrays for downstream computations
|
54
|
+
neighborhood_sums = np.asarray(neighborhood_sums).reshape(-1, 1) # Ensure column vector shape
|
55
|
+
annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
|
56
|
+
|
57
|
+
# Observed values: number of annotated nodes in each neighborhood
|
58
|
+
observed = neighborhoods.T @ annotations # Shape: (neighborhoods, annotations)
|
59
|
+
# Expected values under the null
|
60
|
+
expected = (neighborhood_sums @ annotation_sums) / background_population
|
61
|
+
# Chi-squared statistic: sum((observed - expected)^2 / expected)
|
62
|
+
with np.errstate(divide="ignore", invalid="ignore"): # Handle divide-by-zero
|
63
|
+
chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
|
64
|
+
|
65
|
+
# Compute p-values for enrichment (upper tail) and depletion (lower tail)
|
66
|
+
enrichment_pvals = chi2.sf(chi2_stat, df=1) # Survival function for upper tail
|
67
|
+
depletion_pvals = chi2.cdf(chi2_stat, df=1) # Cumulative distribution for lower tail
|
68
|
+
|
69
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|