risk-network 0.0.16b0__py3-none-any.whl → 0.0.16b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +2 -2
- risk/{_annotation → annotation}/__init__.py +2 -2
- risk/{_annotation → annotation}/_nltk_setup.py +3 -3
- risk/{_annotation/_annotation.py → annotation/annotation.py} +22 -25
- risk/{_annotation/_io.py → annotation/io.py} +4 -4
- risk/cluster/__init__.py +8 -0
- risk/{_neighborhoods → cluster}/_community.py +37 -37
- risk/cluster/api.py +273 -0
- risk/{_neighborhoods/_neighborhoods.py → cluster/cluster.py} +127 -98
- risk/{_neighborhoods/_domains.py → cluster/label.py} +18 -12
- risk/{_log → log}/__init__.py +2 -2
- risk/{_log/_console.py → log/console.py} +2 -2
- risk/{_log/_parameters.py → log/parameters.py} +20 -10
- risk/network/__init__.py +8 -0
- risk/network/graph/__init__.py +7 -0
- risk/{_network/_graph → network/graph}/_stats.py +2 -2
- risk/{_network/_graph → network/graph}/_summary.py +13 -13
- risk/{_network/_graph/_api.py → network/graph/api.py} +37 -39
- risk/{_network/_graph/_graph.py → network/graph/graph.py} +5 -5
- risk/{_network/_io.py → network/io.py} +9 -4
- risk/network/plotter/__init__.py +6 -0
- risk/{_network/_plotter → network/plotter}/_canvas.py +6 -6
- risk/{_network/_plotter → network/plotter}/_contour.py +4 -4
- risk/{_network/_plotter → network/plotter}/_labels.py +6 -6
- risk/{_network/_plotter → network/plotter}/_network.py +7 -7
- risk/{_network/_plotter → network/plotter}/_plotter.py +5 -5
- risk/network/plotter/_utils/__init__.py +7 -0
- risk/{_network/_plotter/_utils/_colors.py → network/plotter/_utils/colors.py} +3 -3
- risk/{_network/_plotter/_utils/_layout.py → network/plotter/_utils/layout.py} +2 -2
- risk/{_network/_plotter/_api.py → network/plotter/api.py} +5 -5
- risk/{_risk.py → risk.py} +9 -8
- risk/stats/__init__.py +6 -0
- risk/stats/_stats/__init__.py +11 -0
- risk/stats/_stats/permutation/__init__.py +6 -0
- risk/stats/_stats/permutation/_test_functions.py +72 -0
- risk/{_neighborhoods/_stats/_permutation/_permutation.py → stats/_stats/permutation/permutation.py} +35 -37
- risk/{_neighborhoods/_stats/_tests.py → stats/_stats/tests.py} +32 -34
- risk/stats/api.py +202 -0
- {risk_network-0.0.16b0.dist-info → risk_network-0.0.16b2.dist-info}/METADATA +2 -2
- risk_network-0.0.16b2.dist-info/RECORD +43 -0
- risk/_neighborhoods/__init__.py +0 -8
- risk/_neighborhoods/_api.py +0 -354
- risk/_neighborhoods/_stats/__init__.py +0 -11
- risk/_neighborhoods/_stats/_permutation/__init__.py +0 -6
- risk/_neighborhoods/_stats/_permutation/_test_functions.py +0 -72
- risk/_network/__init__.py +0 -8
- risk/_network/_graph/__init__.py +0 -7
- risk/_network/_plotter/__init__.py +0 -6
- risk/_network/_plotter/_utils/__init__.py +0 -7
- risk_network-0.0.16b0.dist-info/RECORD +0 -41
- {risk_network-0.0.16b0.dist-info → risk_network-0.0.16b2.dist-info}/WHEEL +0 -0
- {risk_network-0.0.16b0.dist-info → risk_network-0.0.16b2.dist-info}/licenses/LICENSE +0 -0
- {risk_network-0.0.16b0.dist-info → risk_network-0.0.16b2.dist-info}/top_level.txt +0 -0
risk/stats/api.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""
|
|
2
|
+
risk/stats/api
|
|
3
|
+
~~~~~~~~~~~~~~
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Dict
|
|
7
|
+
|
|
8
|
+
from scipy.sparse import csr_matrix
|
|
9
|
+
|
|
10
|
+
from ..log import log_header, logger, params
|
|
11
|
+
from ._stats import (
|
|
12
|
+
compute_binom_test,
|
|
13
|
+
compute_chi2_test,
|
|
14
|
+
compute_hypergeom_test,
|
|
15
|
+
compute_permutation_test,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class StatsAPI:
|
|
20
|
+
"""
|
|
21
|
+
Handles the loading of statistical results and annotation significance for clusters.
|
|
22
|
+
|
|
23
|
+
The StatsAPI class provides methods to load cluster results from statistical tests.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def run_binom(
|
|
27
|
+
self,
|
|
28
|
+
annotation: Dict[str, Any],
|
|
29
|
+
clusters: csr_matrix,
|
|
30
|
+
null_distribution: str = "network",
|
|
31
|
+
**kwargs,
|
|
32
|
+
) -> Dict[str, Any]:
|
|
33
|
+
"""
|
|
34
|
+
Compute cluster significance using the binomial test.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
|
38
|
+
clusters (csr_matrix): The cluster assignments for the network.
|
|
39
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation').
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Dict[str, Any]: The computed significance of clusters based on the specified statistical test.
|
|
43
|
+
"""
|
|
44
|
+
log_header("Running binomial test")
|
|
45
|
+
# Compute cluster significance using the binomial test
|
|
46
|
+
return self._run_statistical_test(
|
|
47
|
+
annotation=annotation,
|
|
48
|
+
clusters=clusters,
|
|
49
|
+
null_distribution=null_distribution,
|
|
50
|
+
statistical_test_key="binom",
|
|
51
|
+
statistical_test_function=compute_binom_test,
|
|
52
|
+
**kwargs,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def run_chi2(
|
|
56
|
+
self,
|
|
57
|
+
annotation: Dict[str, Any],
|
|
58
|
+
clusters: csr_matrix,
|
|
59
|
+
null_distribution: str = "network",
|
|
60
|
+
**kwargs,
|
|
61
|
+
) -> Dict[str, Any]:
|
|
62
|
+
"""
|
|
63
|
+
Compute cluster significance using the chi-squared test.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
|
67
|
+
clusters (csr_matrix): The cluster assignments for the network.
|
|
68
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Dict[str, Any]: The computed significance of clusters based on the specified statistical test.
|
|
72
|
+
"""
|
|
73
|
+
log_header("Running chi-squared test")
|
|
74
|
+
# Compute cluster significance using the chi-squared test
|
|
75
|
+
return self._run_statistical_test(
|
|
76
|
+
annotation=annotation,
|
|
77
|
+
clusters=clusters,
|
|
78
|
+
null_distribution=null_distribution,
|
|
79
|
+
statistical_test_key="chi2",
|
|
80
|
+
statistical_test_function=compute_chi2_test,
|
|
81
|
+
**kwargs,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def run_hypergeom(
|
|
85
|
+
self,
|
|
86
|
+
annotation: Dict[str, Any],
|
|
87
|
+
clusters: csr_matrix,
|
|
88
|
+
null_distribution: str = "network",
|
|
89
|
+
**kwargs,
|
|
90
|
+
) -> Dict[str, Any]:
|
|
91
|
+
"""
|
|
92
|
+
Compute cluster significance using the hypergeometric test.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
|
96
|
+
clusters (csr_matrix): The cluster matrix to use.
|
|
97
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Dict[str, Any]: The computed significance of clusters based on the specified statistical test.
|
|
101
|
+
"""
|
|
102
|
+
log_header("Running hypergeometric test")
|
|
103
|
+
# Compute cluster significance using the hypergeometric test
|
|
104
|
+
return self._run_statistical_test(
|
|
105
|
+
annotation=annotation,
|
|
106
|
+
clusters=clusters,
|
|
107
|
+
null_distribution=null_distribution,
|
|
108
|
+
statistical_test_key="hypergeom",
|
|
109
|
+
statistical_test_function=compute_hypergeom_test,
|
|
110
|
+
**kwargs,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def run_permutation(
|
|
114
|
+
self,
|
|
115
|
+
annotation: Dict[str, Any],
|
|
116
|
+
clusters: csr_matrix,
|
|
117
|
+
score_metric: str = "sum",
|
|
118
|
+
null_distribution: str = "network",
|
|
119
|
+
num_permutations: int = 1000,
|
|
120
|
+
random_seed: int = 888,
|
|
121
|
+
max_workers: int = 1,
|
|
122
|
+
**kwargs,
|
|
123
|
+
) -> Dict[str, Any]:
|
|
124
|
+
"""
|
|
125
|
+
Compute cluster significance using the permutation test.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
|
129
|
+
clusters (csr_matrix): The cluster matrix to use.
|
|
130
|
+
score_metric (str, optional): Scoring metric for cluster significance. Defaults to "sum".
|
|
131
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
132
|
+
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
|
133
|
+
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
|
134
|
+
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Dict[str, Any]: The computed significance of clusters based on the specified statistical test.
|
|
138
|
+
"""
|
|
139
|
+
log_header("Running permutation test")
|
|
140
|
+
# Log and display permutation test settings, which is unique to this test
|
|
141
|
+
logger.debug(f"Cluster scoring metric: '{score_metric}'")
|
|
142
|
+
logger.debug(f"Number of permutations: {num_permutations}")
|
|
143
|
+
logger.debug(f"Maximum workers: {max_workers}")
|
|
144
|
+
# Compute cluster significance using the permutation test
|
|
145
|
+
return self._run_statistical_test(
|
|
146
|
+
annotation=annotation,
|
|
147
|
+
clusters=clusters,
|
|
148
|
+
null_distribution=null_distribution,
|
|
149
|
+
random_seed=random_seed,
|
|
150
|
+
statistical_test_key="permutation",
|
|
151
|
+
statistical_test_function=compute_permutation_test,
|
|
152
|
+
score_metric=score_metric,
|
|
153
|
+
num_permutations=num_permutations,
|
|
154
|
+
max_workers=max_workers,
|
|
155
|
+
**kwargs,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
def _run_statistical_test(
|
|
159
|
+
self,
|
|
160
|
+
annotation: Dict[str, Any],
|
|
161
|
+
clusters: csr_matrix,
|
|
162
|
+
null_distribution: str = "network",
|
|
163
|
+
statistical_test_key: str = "hypergeom",
|
|
164
|
+
statistical_test_function: Any = compute_hypergeom_test,
|
|
165
|
+
**kwargs,
|
|
166
|
+
) -> Dict[str, Any]:
|
|
167
|
+
"""
|
|
168
|
+
Run the specified statistical test to compute cluster significance.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
annotation (Dict[str, Any]): Annotation data associated with the network.
|
|
172
|
+
clusters (csr_matrix): The cluster matrix to analyze.
|
|
173
|
+
null_distribution (str, optional): The type of null distribution to use ('network' or 'annotation').
|
|
174
|
+
Defaults to "network".
|
|
175
|
+
random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
|
|
176
|
+
statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "binom").
|
|
177
|
+
Used for logging and debugging. Defaults to "hypergeom".
|
|
178
|
+
statistical_test_function (Any, optional): The function implementing the statistical test.
|
|
179
|
+
It should accept clusters, annotation, null distribution, and additional kwargs.
|
|
180
|
+
Defaults to `compute_hypergeom_test`.
|
|
181
|
+
**kwargs: Additional parameters to be passed to the statistical test function.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Dict[str, Any]: A dictionary containing the computed significance values for clusters.
|
|
185
|
+
"""
|
|
186
|
+
# Log null distribution type
|
|
187
|
+
logger.debug(f"Null distribution: '{null_distribution}'")
|
|
188
|
+
# Log cluster analysis parameters
|
|
189
|
+
params.log_stats(
|
|
190
|
+
statistical_test_function=statistical_test_key,
|
|
191
|
+
null_distribution=null_distribution,
|
|
192
|
+
**kwargs,
|
|
193
|
+
)
|
|
194
|
+
# Apply statistical test function to compute cluster significance
|
|
195
|
+
cluster_significance = statistical_test_function(
|
|
196
|
+
clusters=clusters,
|
|
197
|
+
annotation=annotation["matrix"],
|
|
198
|
+
null_distribution=null_distribution,
|
|
199
|
+
**kwargs,
|
|
200
|
+
)
|
|
201
|
+
# Return the computed cluster significance
|
|
202
|
+
return cluster_significance
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: risk-network
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.16b2
|
|
4
4
|
Summary: A Python package for scalable network analysis and high-quality visualization.
|
|
5
5
|
Author-email: Ira Horecka <ira89@icloud.com>
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -44,7 +44,7 @@ Dynamic: license-file
|
|
|
44
44
|

|
|
45
45
|
[](https://pypi.python.org/pypi/risk-network)
|
|
46
46
|

|
|
47
|
-
[](https://doi.org/10.5281/zenodo.17257417)
|
|
48
48
|

|
|
49
49
|
|
|
50
50
|
**RISK** (Regional Inference of Significant Kinships) is a next-generation tool for biological network annotation and visualization. It integrates community detection algorithms, rigorous overrepresentation analysis, and a modular framework for diverse network types. RISK identifies biologically coherent relationships within networks and generates publication-ready visualizations, making it a useful tool for biological and interdisciplinary network analysis.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
risk/__init__.py,sha256=8kw8nQa4l3_e5whBhKyspNfK6gv9ACvnrFRffg7RO40,142
|
|
2
|
+
risk/risk.py,sha256=EHq4jfsx1TssuzkfYd8joaogW0heS0SuM38BWj4CX0E,1063
|
|
3
|
+
risk/annotation/__init__.py,sha256=1F_P_JVQD7ai4bWNjL5dIHTR6SuaFIfnD-MC7wZW0vY,162
|
|
4
|
+
risk/annotation/_nltk_setup.py,sha256=YUlB7bqiHELn8rb5tE5o7_FiXyirnxWL3Vzm47HWtiM,3581
|
|
5
|
+
risk/annotation/annotation.py,sha256=FVYFJoIFUzjhQCba44ocXz65u5Kwr_b0h431ewb_QZc,14980
|
|
6
|
+
risk/annotation/io.py,sha256=_bA-sRDh4ynvgNVCz60AChzh_f22Oxv1AkVnQK-LTPw,12451
|
|
7
|
+
risk/cluster/__init__.py,sha256=vP7ZTKRogKtfMZt7HquJes6TZqQvufIvIeIYbM_bwAE,161
|
|
8
|
+
risk/cluster/_community.py,sha256=wgcoUiFL8tJZTmfnUA2_0LkK_pOFtNCC_d1vBX6ENL4,17665
|
|
9
|
+
risk/cluster/api.py,sha256=d5IT-th4uVotGrt-sctqs3EsWjgvSi4ncQrdB1ghp-c,9337
|
|
10
|
+
risk/cluster/cluster.py,sha256=sWPWBlfy3bZoPBh6LSHc54sZrXMSf6AA9vBVu_z0TmA,21469
|
|
11
|
+
risk/cluster/label.py,sha256=TtN1gbfoDITTc2KWBz8rC8HLr2CZzuhnZU2EClh3dNs,16811
|
|
12
|
+
risk/log/__init__.py,sha256=wEJ0hXt8yyIAS-IGGL7Kd8xoPD_beVWQwhF-buxW6J4,203
|
|
13
|
+
risk/log/console.py,sha256=x1sFpOqKzPVoBHr9sDJJYAbFqQmRUV2HPKBvEn9_AX4,4649
|
|
14
|
+
risk/log/parameters.py,sha256=bKQKYs4cRXnIlEboys8pVjs23NBgMtQWRSg1lGgFUR0,6103
|
|
15
|
+
risk/network/__init__.py,sha256=ZdJRKDXc9DOCIFzDYOsnqmnHvFbODNjaJ8XmeNby93M,124
|
|
16
|
+
risk/network/io.py,sha256=909YYj1h9TKW-VpcdYSya3i1V-hOLXvjU795Y5v6roA,28337
|
|
17
|
+
risk/network/graph/__init__.py,sha256=sU46_opsN9ep9jh6fVoBYRzbohjm6_Nx1IVYqv0sMj4,98
|
|
18
|
+
risk/network/graph/_stats.py,sha256=Q2LPu6vzW-lpDCOLtdBna9tCfuCJ4jxjRjvtb3SD9os,7355
|
|
19
|
+
risk/network/graph/_summary.py,sha256=wCSPXHtA6wfjJwRRuD9RzherNOB8RGQzq1weDa-sHBY,10188
|
|
20
|
+
risk/network/graph/api.py,sha256=E8wjC_RvaHTmdQyD5aGmlSOFtjCrVHpkbqcca570ajQ,8456
|
|
21
|
+
risk/network/graph/graph.py,sha256=Ztw6-rcr4cgQgW1uujbU4y_RHYJRzT_oNOtsgmYCf9s,12475
|
|
22
|
+
risk/network/plotter/__init__.py,sha256=gw2fV1atXlxC5ckST2TnBV3yhVYfQSFadPYBBTiDatk,79
|
|
23
|
+
risk/network/plotter/_canvas.py,sha256=fQEcBCQEI4vwj6sAHgULYMdMuWyqeRrVYp7NxHkyXng,13591
|
|
24
|
+
risk/network/plotter/_contour.py,sha256=8QshMFxZXZmZRzfru5a3Bpf5pGL-ElukegVMvculgFM,15551
|
|
25
|
+
risk/network/plotter/_labels.py,sha256=JZ6KOy_kTz-m_qkedKJ181kppjhk09Z63zlyRv18lnw,46909
|
|
26
|
+
risk/network/plotter/_network.py,sha256=IiK1mnByoTvjc7xmuRHfewjC1t8TR8ZIvDxgITSU5Fk,14306
|
|
27
|
+
risk/network/plotter/_plotter.py,sha256=6OTGz1md5v_1iKGru__2QeV3-gUULLJscyLPWPOBCGo,6004
|
|
28
|
+
risk/network/plotter/api.py,sha256=LIrXZJBdTCnh6ntFHEl5vbQs35f4cytqixizxzUQqvU,1686
|
|
29
|
+
risk/network/plotter/_utils/__init__.py,sha256=F2W_R_lhtTDap3xJ25qN-C3Ba1fb0NYkDBiR8vB-4Eo,205
|
|
30
|
+
risk/network/plotter/_utils/colors.py,sha256=CKmZ-Ki1oh40nNGBHE4PTirJ8dF0Y4yReTF6VzzQlrA,19170
|
|
31
|
+
risk/network/plotter/_utils/layout.py,sha256=UQQHT-A-iHQxKEzFla_8fvnjBJTc_zsm2-ZJpvLLbOk,3627
|
|
32
|
+
risk/stats/__init__.py,sha256=Mb8h-1Z3upFS0zrlFVbyfO5afLkVaZXfqZpOs4lj_lg,57
|
|
33
|
+
risk/stats/api.py,sha256=1U-cABIZzVQjmTF7M09QaRZfCLI82C8OzSXOSuM37XI,8075
|
|
34
|
+
risk/stats/_stats/__init__.py,sha256=yMGqINN7Z4A2V4ln5f6lhdV-5GRfV5ABm-m-mni4YNQ,197
|
|
35
|
+
risk/stats/_stats/tests.py,sha256=mTCwu6CnZjU_qURmKKc8Dd37Gar8xukttIm2qXhrCY0,7279
|
|
36
|
+
risk/stats/_stats/permutation/__init__.py,sha256=BDELJoCtaz0Byc4V6f7chtfYqkG4l_trM30kgzNxluM,134
|
|
37
|
+
risk/stats/_stats/permutation/_test_functions.py,sha256=qeHsljh_gVG1EqFPzYPvLG_G3HnoXtKwsmowVXmEKnI,2970
|
|
38
|
+
risk/stats/_stats/permutation/permutation.py,sha256=aglSO8NiP0hzTdcZKIQy1ZwaTJaXPBPBULTB8JG_02E,10377
|
|
39
|
+
risk_network-0.0.16b2.dist-info/licenses/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
|
40
|
+
risk_network-0.0.16b2.dist-info/METADATA,sha256=7usyb0e7b7qPUqaxIYp-UT2zrasH5Ur4N6E96jqA7aY,5390
|
|
41
|
+
risk_network-0.0.16b2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
42
|
+
risk_network-0.0.16b2.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
|
43
|
+
risk_network-0.0.16b2.dist-info/RECORD,,
|
risk/_neighborhoods/__init__.py
DELETED
risk/_neighborhoods/_api.py
DELETED
|
@@ -1,354 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
risk/_neighborhoods/_api
|
|
3
|
-
~~~~~~~~~~~~~~~~~~~~~~~~
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import copy
|
|
7
|
-
from typing import Any, Dict, List, Tuple, Union
|
|
8
|
-
|
|
9
|
-
import networkx as nx
|
|
10
|
-
import numpy as np
|
|
11
|
-
from scipy.sparse import csr_matrix
|
|
12
|
-
|
|
13
|
-
from .._log import log_header, logger, params
|
|
14
|
-
from ._neighborhoods import get_network_neighborhoods
|
|
15
|
-
from ._stats import (
|
|
16
|
-
compute_binom_test,
|
|
17
|
-
compute_chi2_test,
|
|
18
|
-
compute_hypergeom_test,
|
|
19
|
-
compute_permutation_test,
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class NeighborhoodsAPI:
|
|
24
|
-
"""
|
|
25
|
-
Handles the loading of statistical results and annotation significance for neighborhoods.
|
|
26
|
-
|
|
27
|
-
The NeighborhoodsAPI class provides methods to load neighborhood results from statistical tests.
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
def load_neighborhoods_binom(
|
|
31
|
-
self,
|
|
32
|
-
network: nx.Graph,
|
|
33
|
-
annotation: Dict[str, Any],
|
|
34
|
-
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
|
35
|
-
louvain_resolution: float = 0.1,
|
|
36
|
-
leiden_resolution: float = 1.0,
|
|
37
|
-
fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
|
|
38
|
-
null_distribution: str = "network",
|
|
39
|
-
random_seed: int = 888,
|
|
40
|
-
) -> Dict[str, Any]:
|
|
41
|
-
"""
|
|
42
|
-
Load significant neighborhoods for the network using the binomial test.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
network (nx.Graph): The network graph.
|
|
46
|
-
annotation (Dict[str, Any]): The annotation associated with the network.
|
|
47
|
-
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
|
48
|
-
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
|
49
|
-
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
|
50
|
-
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
|
51
|
-
leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
|
|
52
|
-
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
|
53
|
-
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
|
54
|
-
Defaults to 0.5.
|
|
55
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
56
|
-
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
|
57
|
-
|
|
58
|
-
Returns:
|
|
59
|
-
Dict[str, Any]: Computed significance of neighborhoods.
|
|
60
|
-
"""
|
|
61
|
-
log_header("Running binomial test")
|
|
62
|
-
# Compute neighborhood significance using the binomial test
|
|
63
|
-
return self._load_neighborhoods_by_statistical_test(
|
|
64
|
-
network=network,
|
|
65
|
-
annotation=annotation,
|
|
66
|
-
distance_metric=distance_metric,
|
|
67
|
-
louvain_resolution=louvain_resolution,
|
|
68
|
-
leiden_resolution=leiden_resolution,
|
|
69
|
-
fraction_shortest_edges=fraction_shortest_edges,
|
|
70
|
-
null_distribution=null_distribution,
|
|
71
|
-
random_seed=random_seed,
|
|
72
|
-
statistical_test_key="binom",
|
|
73
|
-
statistical_test_function=compute_binom_test,
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
def load_neighborhoods_chi2(
|
|
77
|
-
self,
|
|
78
|
-
network: nx.Graph,
|
|
79
|
-
annotation: Dict[str, Any],
|
|
80
|
-
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
|
81
|
-
louvain_resolution: float = 0.1,
|
|
82
|
-
leiden_resolution: float = 1.0,
|
|
83
|
-
fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
|
|
84
|
-
null_distribution: str = "network",
|
|
85
|
-
random_seed: int = 888,
|
|
86
|
-
) -> Dict[str, Any]:
|
|
87
|
-
"""
|
|
88
|
-
Load significant neighborhoods for the network using the chi-squared test.
|
|
89
|
-
|
|
90
|
-
Args:
|
|
91
|
-
network (nx.Graph): The network graph.
|
|
92
|
-
annotation (Dict[str, Any]): The annotation associated with the network.
|
|
93
|
-
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
|
94
|
-
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
|
95
|
-
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
|
96
|
-
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
|
97
|
-
leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
|
|
98
|
-
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
|
99
|
-
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
|
100
|
-
Defaults to 0.5.
|
|
101
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
102
|
-
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
|
103
|
-
|
|
104
|
-
Returns:
|
|
105
|
-
Dict[str, Any]: Computed significance of neighborhoods.
|
|
106
|
-
"""
|
|
107
|
-
log_header("Running chi-squared test")
|
|
108
|
-
# Compute neighborhood significance using the chi-squared test
|
|
109
|
-
return self._load_neighborhoods_by_statistical_test(
|
|
110
|
-
network=network,
|
|
111
|
-
annotation=annotation,
|
|
112
|
-
distance_metric=distance_metric,
|
|
113
|
-
louvain_resolution=louvain_resolution,
|
|
114
|
-
leiden_resolution=leiden_resolution,
|
|
115
|
-
fraction_shortest_edges=fraction_shortest_edges,
|
|
116
|
-
null_distribution=null_distribution,
|
|
117
|
-
random_seed=random_seed,
|
|
118
|
-
statistical_test_key="chi2",
|
|
119
|
-
statistical_test_function=compute_chi2_test,
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
def load_neighborhoods_hypergeom(
|
|
123
|
-
self,
|
|
124
|
-
network: nx.Graph,
|
|
125
|
-
annotation: Dict[str, Any],
|
|
126
|
-
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
|
127
|
-
louvain_resolution: float = 0.1,
|
|
128
|
-
leiden_resolution: float = 1.0,
|
|
129
|
-
fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
|
|
130
|
-
null_distribution: str = "network",
|
|
131
|
-
random_seed: int = 888,
|
|
132
|
-
) -> Dict[str, Any]:
|
|
133
|
-
"""
|
|
134
|
-
Load significant neighborhoods for the network using the hypergeometric test.
|
|
135
|
-
|
|
136
|
-
Args:
|
|
137
|
-
network (nx.Graph): The network graph.
|
|
138
|
-
annotation (Dict[str, Any]): The annotation associated with the network.
|
|
139
|
-
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
|
140
|
-
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
|
141
|
-
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
|
142
|
-
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
|
143
|
-
leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
|
|
144
|
-
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
|
145
|
-
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
|
146
|
-
Defaults to 0.5.
|
|
147
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
148
|
-
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
|
149
|
-
|
|
150
|
-
Returns:
|
|
151
|
-
Dict[str, Any]: Computed significance of neighborhoods.
|
|
152
|
-
"""
|
|
153
|
-
log_header("Running hypergeometric test")
|
|
154
|
-
# Compute neighborhood significance using the hypergeometric test
|
|
155
|
-
return self._load_neighborhoods_by_statistical_test(
|
|
156
|
-
network=network,
|
|
157
|
-
annotation=annotation,
|
|
158
|
-
distance_metric=distance_metric,
|
|
159
|
-
louvain_resolution=louvain_resolution,
|
|
160
|
-
leiden_resolution=leiden_resolution,
|
|
161
|
-
fraction_shortest_edges=fraction_shortest_edges,
|
|
162
|
-
null_distribution=null_distribution,
|
|
163
|
-
random_seed=random_seed,
|
|
164
|
-
statistical_test_key="hypergeom",
|
|
165
|
-
statistical_test_function=compute_hypergeom_test,
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
def load_neighborhoods_permutation(
|
|
169
|
-
self,
|
|
170
|
-
network: nx.Graph,
|
|
171
|
-
annotation: Dict[str, Any],
|
|
172
|
-
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
|
173
|
-
louvain_resolution: float = 0.1,
|
|
174
|
-
leiden_resolution: float = 1.0,
|
|
175
|
-
fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
|
|
176
|
-
score_metric: str = "sum",
|
|
177
|
-
null_distribution: str = "network",
|
|
178
|
-
num_permutations: int = 1000,
|
|
179
|
-
random_seed: int = 888,
|
|
180
|
-
max_workers: int = 1,
|
|
181
|
-
) -> Dict[str, Any]:
|
|
182
|
-
"""
|
|
183
|
-
Load significant neighborhoods for the network using the permutation test.
|
|
184
|
-
|
|
185
|
-
Args:
|
|
186
|
-
network (nx.Graph): The network graph.
|
|
187
|
-
annotation (Dict[str, Any]): The annotation associated with the network.
|
|
188
|
-
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
|
189
|
-
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
|
190
|
-
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
|
191
|
-
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
|
192
|
-
leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
|
|
193
|
-
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
|
194
|
-
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
|
195
|
-
Defaults to 0.5.
|
|
196
|
-
score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
|
|
197
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
198
|
-
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
|
199
|
-
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
|
200
|
-
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
|
201
|
-
|
|
202
|
-
Returns:
|
|
203
|
-
Dict[str, Any]: Computed significance of neighborhoods.
|
|
204
|
-
"""
|
|
205
|
-
log_header("Running permutation test")
|
|
206
|
-
# Log and display permutation test settings, which is unique to this test
|
|
207
|
-
logger.debug(f"Neighborhood scoring metric: '{score_metric}'")
|
|
208
|
-
logger.debug(f"Number of permutations: {num_permutations}")
|
|
209
|
-
logger.debug(f"Maximum workers: {max_workers}")
|
|
210
|
-
# Compute neighborhood significance using the permutation test
|
|
211
|
-
return self._load_neighborhoods_by_statistical_test(
|
|
212
|
-
network=network,
|
|
213
|
-
annotation=annotation,
|
|
214
|
-
distance_metric=distance_metric,
|
|
215
|
-
louvain_resolution=louvain_resolution,
|
|
216
|
-
leiden_resolution=leiden_resolution,
|
|
217
|
-
fraction_shortest_edges=fraction_shortest_edges,
|
|
218
|
-
null_distribution=null_distribution,
|
|
219
|
-
random_seed=random_seed,
|
|
220
|
-
statistical_test_key="permutation",
|
|
221
|
-
statistical_test_function=compute_permutation_test,
|
|
222
|
-
score_metric=score_metric,
|
|
223
|
-
num_permutations=num_permutations,
|
|
224
|
-
max_workers=max_workers,
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
def _load_neighborhoods_by_statistical_test(
|
|
228
|
-
self,
|
|
229
|
-
network: nx.Graph,
|
|
230
|
-
annotation: Dict[str, Any],
|
|
231
|
-
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
|
232
|
-
louvain_resolution: float = 0.1,
|
|
233
|
-
leiden_resolution: float = 1.0,
|
|
234
|
-
fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
|
|
235
|
-
null_distribution: str = "network",
|
|
236
|
-
random_seed: int = 888,
|
|
237
|
-
statistical_test_key: str = "hypergeom",
|
|
238
|
-
statistical_test_function: Any = compute_hypergeom_test,
|
|
239
|
-
**kwargs,
|
|
240
|
-
):
|
|
241
|
-
"""
|
|
242
|
-
Load and compute significant neighborhoods for the network using a specified statistical test.
|
|
243
|
-
|
|
244
|
-
Args:
|
|
245
|
-
network (nx.Graph): The input network graph.
|
|
246
|
-
annotation (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
|
|
247
|
-
distance_metric (Union[str, List, Tuple, np.ndarray], optional): The distance metric or clustering method to define neighborhoods.
|
|
248
|
-
Can be a string specifying one method (e.g., 'louvain', 'leiden') or a collection of methods.
|
|
249
|
-
Defaults to "louvain".
|
|
250
|
-
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
|
251
|
-
leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
|
|
252
|
-
fraction_shortest_edges (Union[float, List, Tuple, np.ndarray], optional): Fraction of shortest edges to consider for creating subgraphs.
|
|
253
|
-
Can be a single value or a collection of thresholds for flexibility. Defaults to 0.5.
|
|
254
|
-
null_distribution (str, optional): The type of null distribution to use ('network' or 'annotation').
|
|
255
|
-
Defaults to "network".
|
|
256
|
-
random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
|
|
257
|
-
statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "binom").
|
|
258
|
-
Used for logging and debugging. Defaults to "hypergeom".
|
|
259
|
-
statistical_test_function (Any, optional): The function implementing the statistical test.
|
|
260
|
-
It should accept neighborhoods, annotation, null distribution, and additional kwargs.
|
|
261
|
-
Defaults to `compute_hypergeom_test`.
|
|
262
|
-
**kwargs: Additional parameters to be passed to the statistical test function.
|
|
263
|
-
|
|
264
|
-
Returns:
|
|
265
|
-
Dict[str, Any]: A dictionary containing the computed significance values for neighborhoods.
|
|
266
|
-
"""
|
|
267
|
-
# Log null distribution type
|
|
268
|
-
logger.debug(f"Null distribution: '{null_distribution}'")
|
|
269
|
-
# Log neighborhood analysis parameters
|
|
270
|
-
params.log_neighborhoods(
|
|
271
|
-
distance_metric=distance_metric,
|
|
272
|
-
louvain_resolution=louvain_resolution,
|
|
273
|
-
leiden_resolution=leiden_resolution,
|
|
274
|
-
fraction_shortest_edges=fraction_shortest_edges,
|
|
275
|
-
statistical_test_function=statistical_test_key,
|
|
276
|
-
null_distribution=null_distribution,
|
|
277
|
-
random_seed=random_seed,
|
|
278
|
-
**kwargs,
|
|
279
|
-
)
|
|
280
|
-
|
|
281
|
-
# Make a copy of the network to avoid modifying the original
|
|
282
|
-
network = copy.copy(network)
|
|
283
|
-
# Load neighborhoods based on the network and distance metric
|
|
284
|
-
neighborhoods = self._load_neighborhoods(
|
|
285
|
-
network,
|
|
286
|
-
distance_metric,
|
|
287
|
-
louvain_resolution=louvain_resolution,
|
|
288
|
-
leiden_resolution=leiden_resolution,
|
|
289
|
-
fraction_shortest_edges=fraction_shortest_edges,
|
|
290
|
-
random_seed=random_seed,
|
|
291
|
-
)
|
|
292
|
-
# Apply statistical test function to compute neighborhood significance
|
|
293
|
-
neighborhood_significance = statistical_test_function(
|
|
294
|
-
neighborhoods=neighborhoods,
|
|
295
|
-
annotation=annotation["matrix"],
|
|
296
|
-
null_distribution=null_distribution,
|
|
297
|
-
**kwargs,
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
# Return the computed neighborhood significance
|
|
301
|
-
return neighborhood_significance
|
|
302
|
-
|
|
303
|
-
def _load_neighborhoods(
|
|
304
|
-
self,
|
|
305
|
-
network: nx.Graph,
|
|
306
|
-
distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
|
|
307
|
-
louvain_resolution: float = 0.1,
|
|
308
|
-
leiden_resolution: float = 1.0,
|
|
309
|
-
fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
|
|
310
|
-
random_seed: int = 888,
|
|
311
|
-
) -> csr_matrix:
|
|
312
|
-
"""
|
|
313
|
-
Load significant neighborhoods for the network.
|
|
314
|
-
|
|
315
|
-
Args:
|
|
316
|
-
network (nx.Graph): The network graph.
|
|
317
|
-
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
|
|
318
|
-
metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
|
|
319
|
-
'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
|
|
320
|
-
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
|
321
|
-
leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
|
|
322
|
-
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
|
|
323
|
-
Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
|
|
324
|
-
Defaults to 0.5.
|
|
325
|
-
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
|
326
|
-
|
|
327
|
-
Returns:
|
|
328
|
-
csr_matrix: Sparse neighborhood matrix calculated based on the selected distance metric.
|
|
329
|
-
"""
|
|
330
|
-
# Display the chosen distance metric
|
|
331
|
-
if distance_metric == "louvain":
|
|
332
|
-
for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
|
|
333
|
-
elif distance_metric == "leiden":
|
|
334
|
-
for_print_distance_metric = f"leiden (resolution={leiden_resolution})"
|
|
335
|
-
else:
|
|
336
|
-
for_print_distance_metric = distance_metric
|
|
337
|
-
|
|
338
|
-
# Log and display neighborhood settings
|
|
339
|
-
logger.debug(f"Distance metric: '{for_print_distance_metric}'")
|
|
340
|
-
logger.debug(f"Edge length threshold: {fraction_shortest_edges}")
|
|
341
|
-
logger.debug(f"Random seed: {random_seed}")
|
|
342
|
-
|
|
343
|
-
# Compute neighborhoods
|
|
344
|
-
neighborhoods = get_network_neighborhoods(
|
|
345
|
-
network,
|
|
346
|
-
distance_metric,
|
|
347
|
-
fraction_shortest_edges,
|
|
348
|
-
louvain_resolution=louvain_resolution,
|
|
349
|
-
leiden_resolution=leiden_resolution,
|
|
350
|
-
random_seed=random_seed,
|
|
351
|
-
)
|
|
352
|
-
|
|
353
|
-
# Return the sparse neighborhood matrix
|
|
354
|
-
return neighborhoods
|