risk-network 0.0.16b1__py3-none-any.whl → 0.0.16b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +2 -2
- risk/{_annotation → annotation}/__init__.py +2 -2
- risk/{_annotation → annotation}/_nltk_setup.py +3 -3
- risk/{_annotation/_annotation.py → annotation/annotation.py} +13 -13
- risk/{_annotation/_io.py → annotation/io.py} +4 -4
- risk/cluster/__init__.py +8 -0
- risk/{_neighborhoods → cluster}/_community.py +37 -37
- risk/cluster/api.py +273 -0
- risk/{_neighborhoods/_neighborhoods.py → cluster/cluster.py} +127 -98
- risk/{_neighborhoods/_domains.py → cluster/label.py} +18 -12
- risk/{_log → log}/__init__.py +2 -2
- risk/{_log/_console.py → log/console.py} +2 -2
- risk/{_log/_parameters.py → log/parameters.py} +20 -10
- risk/network/__init__.py +8 -0
- risk/network/graph/__init__.py +7 -0
- risk/{_network/_graph → network/graph}/_stats.py +2 -2
- risk/{_network/_graph → network/graph}/_summary.py +13 -13
- risk/{_network/_graph/_api.py → network/graph/api.py} +37 -39
- risk/{_network/_graph/_graph.py → network/graph/graph.py} +5 -5
- risk/{_network/_io.py → network/io.py} +9 -4
- risk/network/plotter/__init__.py +6 -0
- risk/{_network/_plotter → network/plotter}/_canvas.py +6 -6
- risk/{_network/_plotter → network/plotter}/_contour.py +4 -4
- risk/{_network/_plotter → network/plotter}/_labels.py +6 -6
- risk/{_network/_plotter → network/plotter}/_network.py +7 -7
- risk/{_network/_plotter → network/plotter}/_plotter.py +5 -5
- risk/network/plotter/_utils/__init__.py +7 -0
- risk/{_network/_plotter/_utils/_colors.py → network/plotter/_utils/colors.py} +3 -3
- risk/{_network/_plotter/_utils/_layout.py → network/plotter/_utils/layout.py} +2 -2
- risk/{_network/_plotter/_api.py → network/plotter/api.py} +5 -5
- risk/{_risk.py → risk.py} +9 -8
- risk/stats/__init__.py +6 -0
- risk/stats/_stats/__init__.py +11 -0
- risk/stats/_stats/permutation/__init__.py +6 -0
- risk/stats/_stats/permutation/_test_functions.py +72 -0
- risk/{_neighborhoods/_stats/_permutation/_permutation.py → stats/_stats/permutation/permutation.py} +35 -37
- risk/{_neighborhoods/_stats/_tests.py → stats/_stats/tests.py} +32 -34
- risk/stats/api.py +202 -0
- {risk_network-0.0.16b1.dist-info → risk_network-0.0.16b2.dist-info}/METADATA +2 -2
- risk_network-0.0.16b2.dist-info/RECORD +43 -0
- risk/_neighborhoods/__init__.py +0 -8
- risk/_neighborhoods/_api.py +0 -354
- risk/_neighborhoods/_stats/__init__.py +0 -11
- risk/_neighborhoods/_stats/_permutation/__init__.py +0 -6
- risk/_neighborhoods/_stats/_permutation/_test_functions.py +0 -72
- risk/_network/__init__.py +0 -8
- risk/_network/_graph/__init__.py +0 -7
- risk/_network/_plotter/__init__.py +0 -6
- risk/_network/_plotter/_utils/__init__.py +0 -7
- risk_network-0.0.16b1.dist-info/RECORD +0 -41
- {risk_network-0.0.16b1.dist-info → risk_network-0.0.16b2.dist-info}/WHEEL +0 -0
- {risk_network-0.0.16b1.dist-info → risk_network-0.0.16b2.dist-info}/licenses/LICENSE +0 -0
- {risk_network-0.0.16b1.dist-info → risk_network-0.0.16b2.dist-info}/top_level.txt +0 -0
risk/{_neighborhoods/_stats/_permutation/_permutation.py → stats/_stats/permutation/permutation.py}
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
|
-
risk/
|
|
3
|
-
|
|
2
|
+
risk/stats/_stats/permutation/permutation
|
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from multiprocessing import Manager, get_context
|
|
@@ -16,7 +16,7 @@ from ._test_functions import DISPATCH_TEST_FUNCTIONS
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def compute_permutation_test(
|
|
19
|
-
|
|
19
|
+
clusters: csr_matrix,
|
|
20
20
|
annotation: csr_matrix,
|
|
21
21
|
score_metric: str = "sum",
|
|
22
22
|
null_distribution: str = "network",
|
|
@@ -25,10 +25,10 @@ def compute_permutation_test(
|
|
|
25
25
|
max_workers: int = 1,
|
|
26
26
|
) -> Dict[str, Any]:
|
|
27
27
|
"""
|
|
28
|
-
Compute permutation test for enrichment and depletion in
|
|
28
|
+
Compute permutation test for enrichment and depletion in clusters.
|
|
29
29
|
|
|
30
30
|
Args:
|
|
31
|
-
|
|
31
|
+
clusters (csr_matrix): Sparse binary matrix representing clusters.
|
|
32
32
|
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
|
33
33
|
score_metric (str, optional): Metric to use for scoring ('sum' or 'stdev'). Defaults to "sum".
|
|
34
34
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
@@ -41,16 +41,16 @@ def compute_permutation_test(
|
|
|
41
41
|
"""
|
|
42
42
|
# Ensure that the matrices are in the correct format and free of NaN values
|
|
43
43
|
# NOTE: Keep the data type as float32 to avoid locking issues with dot product operations
|
|
44
|
-
|
|
44
|
+
clusters = clusters.astype(np.float32)
|
|
45
45
|
annotation = annotation.astype(np.float32)
|
|
46
|
-
# Retrieve the appropriate
|
|
47
|
-
|
|
46
|
+
# Retrieve the appropriate cluster score function based on the metric
|
|
47
|
+
cluster_score_func = DISPATCH_TEST_FUNCTIONS[score_metric]
|
|
48
48
|
|
|
49
49
|
# Run the permutation test to calculate depletion and enrichment counts
|
|
50
50
|
counts_depletion, counts_enrichment = _run_permutation_test(
|
|
51
|
-
|
|
51
|
+
clusters=clusters,
|
|
52
52
|
annotation=annotation,
|
|
53
|
-
|
|
53
|
+
cluster_score_func=cluster_score_func,
|
|
54
54
|
null_distribution=null_distribution,
|
|
55
55
|
num_permutations=num_permutations,
|
|
56
56
|
random_seed=random_seed,
|
|
@@ -68,9 +68,9 @@ def compute_permutation_test(
|
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
def _run_permutation_test(
|
|
71
|
-
|
|
71
|
+
clusters: csr_matrix,
|
|
72
72
|
annotation: csr_matrix,
|
|
73
|
-
|
|
73
|
+
cluster_score_func: Callable,
|
|
74
74
|
null_distribution: str = "network",
|
|
75
75
|
num_permutations: int = 1000,
|
|
76
76
|
random_seed: int = 888,
|
|
@@ -80,9 +80,9 @@ def _run_permutation_test(
|
|
|
80
80
|
Run the permutation test to calculate depletion and enrichment counts.
|
|
81
81
|
|
|
82
82
|
Args:
|
|
83
|
-
|
|
83
|
+
clusters (csr_matrix): Sparse binary matrix representing clusters.
|
|
84
84
|
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
|
85
|
-
|
|
85
|
+
cluster_score_func (Callable): Function to calculate cluster scores.
|
|
86
86
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
87
87
|
num_permutations (int, optional): Number of permutations. Defaults to 1000.
|
|
88
88
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
|
@@ -109,16 +109,14 @@ def _run_permutation_test(
|
|
|
109
109
|
# Replace NaNs with zeros in the sparse annotation matrix
|
|
110
110
|
annotation.data[np.isnan(annotation.data)] = 0
|
|
111
111
|
annotation_matrix_obsv = annotation[idxs]
|
|
112
|
-
|
|
113
|
-
# Calculate observed
|
|
112
|
+
clusters_matrix_obsv = clusters.T[idxs].T
|
|
113
|
+
# Calculate observed cluster scores
|
|
114
114
|
with np.errstate(invalid="ignore", divide="ignore"):
|
|
115
|
-
|
|
116
|
-
neighborhoods_matrix_obsv, annotation_matrix_obsv
|
|
117
|
-
)
|
|
115
|
+
observed_cluster_scores = cluster_score_func(clusters_matrix_obsv, annotation_matrix_obsv)
|
|
118
116
|
|
|
119
117
|
# Initialize count matrices for depletion and enrichment
|
|
120
|
-
counts_depletion = np.zeros(
|
|
121
|
-
counts_enrichment = np.zeros(
|
|
118
|
+
counts_depletion = np.zeros(observed_cluster_scores.shape)
|
|
119
|
+
counts_enrichment = np.zeros(observed_cluster_scores.shape)
|
|
122
120
|
# Determine the number of permutations to run in each worker process
|
|
123
121
|
subset_size = num_permutations // max_workers
|
|
124
122
|
remainder = num_permutations % max_workers
|
|
@@ -145,9 +143,9 @@ def _run_permutation_test(
|
|
|
145
143
|
(
|
|
146
144
|
permutation_batches[i], # Pass the batch of precomputed permutations
|
|
147
145
|
annotation,
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
146
|
+
clusters_matrix_obsv,
|
|
147
|
+
observed_cluster_scores,
|
|
148
|
+
cluster_score_func,
|
|
151
149
|
num_permutations,
|
|
152
150
|
progress_counter,
|
|
153
151
|
max_workers,
|
|
@@ -176,9 +174,9 @@ def _run_permutation_test(
|
|
|
176
174
|
def _permutation_process_batch(
|
|
177
175
|
permutations: Union[List, Tuple, np.ndarray],
|
|
178
176
|
annotation_matrix: csr_matrix,
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
177
|
+
clusters_matrix_obsv: csr_matrix,
|
|
178
|
+
observed_cluster_scores: np.ndarray,
|
|
179
|
+
cluster_score_func: Callable,
|
|
182
180
|
num_permutations: int,
|
|
183
181
|
progress_counter: ValueProxy,
|
|
184
182
|
max_workers: int,
|
|
@@ -189,9 +187,9 @@ def _permutation_process_batch(
|
|
|
189
187
|
Args:
|
|
190
188
|
permutations (Union[List, Tuple, np.ndarray]): Permutation batch to process.
|
|
191
189
|
annotation_matrix (csr_matrix): Sparse binary matrix representing annotation.
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
190
|
+
clusters_matrix_obsv (csr_matrix): Sparse binary matrix representing observed clusters.
|
|
191
|
+
observed_cluster_scores (np.ndarray): Observed cluster scores.
|
|
192
|
+
cluster_score_func (Callable): Function to calculate cluster scores.
|
|
195
193
|
num_permutations (int): Number of total permutations across all subsets.
|
|
196
194
|
progress_counter (multiprocessing.managers.ValueProxy): Shared counter for tracking progress.
|
|
197
195
|
max_workers (int): Number of workers for multiprocessing.
|
|
@@ -200,8 +198,8 @@ def _permutation_process_batch(
|
|
|
200
198
|
tuple: Local counts of depletion and enrichment.
|
|
201
199
|
"""
|
|
202
200
|
# Initialize local count matrices for this worker
|
|
203
|
-
local_counts_depletion = np.zeros(
|
|
204
|
-
local_counts_enrichment = np.zeros(
|
|
201
|
+
local_counts_depletion = np.zeros(observed_cluster_scores.shape)
|
|
202
|
+
local_counts_enrichment = np.zeros(observed_cluster_scores.shape)
|
|
205
203
|
|
|
206
204
|
# Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used
|
|
207
205
|
# NOTE: This does not work for Mac M chips due to a bug in the threadpoolctl package
|
|
@@ -216,19 +214,19 @@ def _permutation_process_batch(
|
|
|
216
214
|
for permuted_idxs in permutations:
|
|
217
215
|
# Apply precomputed permutation
|
|
218
216
|
annotation_matrix_permut = annotation_matrix[permuted_idxs]
|
|
219
|
-
# Calculate permuted
|
|
217
|
+
# Calculate permuted cluster scores
|
|
220
218
|
with np.errstate(invalid="ignore", divide="ignore"):
|
|
221
|
-
|
|
222
|
-
|
|
219
|
+
permuted_cluster_scores = cluster_score_func(
|
|
220
|
+
clusters_matrix_obsv, annotation_matrix_permut
|
|
223
221
|
)
|
|
224
222
|
|
|
225
223
|
# Update local depletion and enrichment counts
|
|
226
224
|
local_counts_depletion = np.add(
|
|
227
|
-
local_counts_depletion,
|
|
225
|
+
local_counts_depletion, permuted_cluster_scores <= observed_cluster_scores
|
|
228
226
|
)
|
|
229
227
|
local_counts_enrichment = np.add(
|
|
230
228
|
local_counts_enrichment,
|
|
231
|
-
|
|
229
|
+
permuted_cluster_scores >= observed_cluster_scores,
|
|
232
230
|
)
|
|
233
231
|
|
|
234
232
|
# Update progress
|
|
@@ -1,25 +1,25 @@
|
|
|
1
1
|
"""
|
|
2
|
-
risk/
|
|
3
|
-
|
|
2
|
+
risk/stats/_stats/tests
|
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from typing import Any, Dict
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
from scipy.sparse import csr_matrix
|
|
10
|
-
from scipy.stats import binom, chi2, hypergeom
|
|
10
|
+
from scipy.stats import binom, chi2, hypergeom
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def compute_binom_test(
|
|
14
|
-
|
|
14
|
+
clusters: csr_matrix,
|
|
15
15
|
annotation: csr_matrix,
|
|
16
16
|
null_distribution: str = "network",
|
|
17
17
|
) -> Dict[str, Any]:
|
|
18
18
|
"""
|
|
19
|
-
Compute Binomial test for enrichment and depletion in
|
|
19
|
+
Compute Binomial test for enrichment and depletion in clusters with selectable null distribution.
|
|
20
20
|
|
|
21
21
|
Args:
|
|
22
|
-
|
|
22
|
+
clusters (csr_matrix): Sparse binary matrix representing clusters.
|
|
23
23
|
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
|
24
24
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
25
25
|
|
|
@@ -30,10 +30,10 @@ def compute_binom_test(
|
|
|
30
30
|
ValueError: If an invalid null_distribution value is provided.
|
|
31
31
|
"""
|
|
32
32
|
# Get the total number of nodes in the network
|
|
33
|
-
total_nodes =
|
|
33
|
+
total_nodes = clusters.shape[1]
|
|
34
34
|
|
|
35
35
|
# Compute sums (remain sparse here)
|
|
36
|
-
|
|
36
|
+
cluster_sizes = clusters.sum(axis=1) # Row sums
|
|
37
37
|
annotation_totals = annotation.sum(axis=0) # Column sums
|
|
38
38
|
# Compute probabilities (convert to dense)
|
|
39
39
|
if null_distribution == "network":
|
|
@@ -46,26 +46,26 @@ def compute_binom_test(
|
|
|
46
46
|
)
|
|
47
47
|
|
|
48
48
|
# Observed counts (sparse matrix multiplication)
|
|
49
|
-
annotated_counts =
|
|
49
|
+
annotated_counts = clusters @ annotation # Sparse result
|
|
50
50
|
annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
|
|
51
51
|
|
|
52
52
|
# Compute enrichment and depletion p-values
|
|
53
|
-
enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1,
|
|
54
|
-
depletion_pvals = binom.cdf(annotated_counts_dense,
|
|
53
|
+
enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, cluster_sizes.A, p_values)
|
|
54
|
+
depletion_pvals = binom.cdf(annotated_counts_dense, cluster_sizes.A, p_values)
|
|
55
55
|
|
|
56
56
|
return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
def compute_chi2_test(
|
|
60
|
-
|
|
60
|
+
clusters: csr_matrix,
|
|
61
61
|
annotation: csr_matrix,
|
|
62
62
|
null_distribution: str = "network",
|
|
63
63
|
) -> Dict[str, Any]:
|
|
64
64
|
"""
|
|
65
|
-
Compute chi-squared test for enrichment and depletion in
|
|
65
|
+
Compute chi-squared test for enrichment and depletion in clusters with selectable null distribution.
|
|
66
66
|
|
|
67
67
|
Args:
|
|
68
|
-
|
|
68
|
+
clusters (csr_matrix): Sparse binary matrix representing clusters.
|
|
69
69
|
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
|
70
70
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
71
71
|
|
|
@@ -76,12 +76,12 @@ def compute_chi2_test(
|
|
|
76
76
|
ValueError: If an invalid null_distribution value is provided.
|
|
77
77
|
"""
|
|
78
78
|
# Total number of nodes in the network
|
|
79
|
-
total_node_count =
|
|
79
|
+
total_node_count = clusters.shape[0]
|
|
80
80
|
|
|
81
81
|
if null_distribution == "network":
|
|
82
82
|
# Case 1: Use all nodes as the background
|
|
83
83
|
background_population = total_node_count
|
|
84
|
-
|
|
84
|
+
cluster_sums = clusters.sum(axis=0) # Column sums of clusters
|
|
85
85
|
annotation_sums = annotation.sum(axis=0) # Column sums of annotations
|
|
86
86
|
elif null_distribution == "annotation":
|
|
87
87
|
# Case 2: Only consider nodes with at least one annotation
|
|
@@ -89,9 +89,7 @@ def compute_chi2_test(
|
|
|
89
89
|
np.ravel(annotation.sum(axis=1)) > 0
|
|
90
90
|
) # Row-wise sum to filter nodes with annotations
|
|
91
91
|
background_population = annotated_nodes.sum() # Total number of annotated nodes
|
|
92
|
-
|
|
93
|
-
axis=0
|
|
94
|
-
) # Neighborhood sums for annotated nodes
|
|
92
|
+
cluster_sums = clusters[annotated_nodes].sum(axis=0) # Cluster sums for annotated nodes
|
|
95
93
|
annotation_sums = annotation[annotated_nodes].sum(
|
|
96
94
|
axis=0
|
|
97
95
|
) # Annotation sums for annotated nodes
|
|
@@ -101,13 +99,13 @@ def compute_chi2_test(
|
|
|
101
99
|
)
|
|
102
100
|
|
|
103
101
|
# Convert to dense arrays for downstream computations
|
|
104
|
-
|
|
102
|
+
cluster_sums = np.asarray(cluster_sums).reshape(-1, 1) # Ensure column vector shape
|
|
105
103
|
annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
|
|
106
104
|
|
|
107
|
-
# Observed values: number of annotated nodes in each
|
|
108
|
-
observed =
|
|
105
|
+
# Observed values: number of annotated nodes in each cluster
|
|
106
|
+
observed = clusters.T @ annotation # Shape: (clusters, annotation)
|
|
109
107
|
# Expected values under the null
|
|
110
|
-
expected = (
|
|
108
|
+
expected = (cluster_sums @ annotation_sums) / background_population
|
|
111
109
|
# Chi-squared statistic: sum((observed - expected)^2 / expected)
|
|
112
110
|
with np.errstate(divide="ignore", invalid="ignore"): # Handle divide-by-zero
|
|
113
111
|
chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
|
|
@@ -120,15 +118,15 @@ def compute_chi2_test(
|
|
|
120
118
|
|
|
121
119
|
|
|
122
120
|
def compute_hypergeom_test(
|
|
123
|
-
|
|
121
|
+
clusters: csr_matrix,
|
|
124
122
|
annotation: csr_matrix,
|
|
125
123
|
null_distribution: str = "network",
|
|
126
124
|
) -> Dict[str, Any]:
|
|
127
125
|
"""
|
|
128
|
-
Compute hypergeometric test for enrichment and depletion in
|
|
126
|
+
Compute hypergeometric test for enrichment and depletion in clusters with selectable null distribution.
|
|
129
127
|
|
|
130
128
|
Args:
|
|
131
|
-
|
|
129
|
+
clusters (csr_matrix): Sparse binary matrix representing clusters.
|
|
132
130
|
annotation (csr_matrix): Sparse binary matrix representing annotation.
|
|
133
131
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
134
132
|
|
|
@@ -139,10 +137,10 @@ def compute_hypergeom_test(
|
|
|
139
137
|
ValueError: If an invalid null_distribution value is provided.
|
|
140
138
|
"""
|
|
141
139
|
# Get the total number of nodes in the network
|
|
142
|
-
total_nodes =
|
|
140
|
+
total_nodes = clusters.shape[1]
|
|
143
141
|
|
|
144
142
|
# Compute sums
|
|
145
|
-
|
|
143
|
+
cluster_sums = clusters.sum(axis=0).A.flatten() # Convert to dense array
|
|
146
144
|
annotation_sums = annotation.sum(axis=0).A.flatten() # Convert to dense array
|
|
147
145
|
|
|
148
146
|
if null_distribution == "network":
|
|
@@ -150,7 +148,7 @@ def compute_hypergeom_test(
|
|
|
150
148
|
elif null_distribution == "annotation":
|
|
151
149
|
annotated_nodes = annotation.sum(axis=1).A.flatten() > 0 # Boolean mask
|
|
152
150
|
background_population = annotated_nodes.sum()
|
|
153
|
-
|
|
151
|
+
cluster_sums = clusters[annotated_nodes].sum(axis=0).A.flatten()
|
|
154
152
|
annotation_sums = annotation[annotated_nodes].sum(axis=0).A.flatten()
|
|
155
153
|
else:
|
|
156
154
|
raise ValueError(
|
|
@@ -158,19 +156,19 @@ def compute_hypergeom_test(
|
|
|
158
156
|
)
|
|
159
157
|
|
|
160
158
|
# Observed counts
|
|
161
|
-
|
|
162
|
-
|
|
159
|
+
annotated_in_cluster = clusters.T @ annotation # Sparse result
|
|
160
|
+
annotated_in_cluster = annotated_in_cluster.toarray() # Convert to dense
|
|
163
161
|
# Align shapes for broadcasting
|
|
164
|
-
|
|
162
|
+
cluster_sums = cluster_sums.reshape(-1, 1)
|
|
165
163
|
annotation_sums = annotation_sums.reshape(1, -1)
|
|
166
164
|
background_population = np.array(background_population).reshape(1, 1)
|
|
167
165
|
|
|
168
166
|
# Compute hypergeometric p-values
|
|
169
167
|
depletion_pvals = hypergeom.cdf(
|
|
170
|
-
|
|
168
|
+
annotated_in_cluster, background_population, annotation_sums, cluster_sums
|
|
171
169
|
)
|
|
172
170
|
enrichment_pvals = hypergeom.sf(
|
|
173
|
-
|
|
171
|
+
annotated_in_cluster - 1, background_population, annotation_sums, cluster_sums
|
|
174
172
|
)
|
|
175
173
|
|
|
176
174
|
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
risk/stats/api.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""
|
|
2
|
+
risk/stats/api
|
|
3
|
+
~~~~~~~~~~~~~~
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Dict
|
|
7
|
+
|
|
8
|
+
from scipy.sparse import csr_matrix
|
|
9
|
+
|
|
10
|
+
from ..log import log_header, logger, params
|
|
11
|
+
from ._stats import (
|
|
12
|
+
compute_binom_test,
|
|
13
|
+
compute_chi2_test,
|
|
14
|
+
compute_hypergeom_test,
|
|
15
|
+
compute_permutation_test,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class StatsAPI:
|
|
20
|
+
"""
|
|
21
|
+
Handles the loading of statistical results and annotation significance for clusters.
|
|
22
|
+
|
|
23
|
+
The StatsAPI class provides methods to load cluster results from statistical tests.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def run_binom(
|
|
27
|
+
self,
|
|
28
|
+
annotation: Dict[str, Any],
|
|
29
|
+
clusters: csr_matrix,
|
|
30
|
+
null_distribution: str = "network",
|
|
31
|
+
**kwargs,
|
|
32
|
+
) -> Dict[str, Any]:
|
|
33
|
+
"""
|
|
34
|
+
Compute cluster significance using the binomial test.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
|
38
|
+
clusters (csr_matrix): The cluster assignments for the network.
|
|
39
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation').
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Dict[str, Any]: The computed significance of clusters based on the specified statistical test.
|
|
43
|
+
"""
|
|
44
|
+
log_header("Running binomial test")
|
|
45
|
+
# Compute cluster significance using the binomial test
|
|
46
|
+
return self._run_statistical_test(
|
|
47
|
+
annotation=annotation,
|
|
48
|
+
clusters=clusters,
|
|
49
|
+
null_distribution=null_distribution,
|
|
50
|
+
statistical_test_key="binom",
|
|
51
|
+
statistical_test_function=compute_binom_test,
|
|
52
|
+
**kwargs,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def run_chi2(
|
|
56
|
+
self,
|
|
57
|
+
annotation: Dict[str, Any],
|
|
58
|
+
clusters: csr_matrix,
|
|
59
|
+
null_distribution: str = "network",
|
|
60
|
+
**kwargs,
|
|
61
|
+
) -> Dict[str, Any]:
|
|
62
|
+
"""
|
|
63
|
+
Compute cluster significance using the chi-squared test.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
|
67
|
+
clusters (csr_matrix): The cluster assignments for the network.
|
|
68
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Dict[str, Any]: The computed significance of clusters based on the specified statistical test.
|
|
72
|
+
"""
|
|
73
|
+
log_header("Running chi-squared test")
|
|
74
|
+
# Compute cluster significance using the chi-squared test
|
|
75
|
+
return self._run_statistical_test(
|
|
76
|
+
annotation=annotation,
|
|
77
|
+
clusters=clusters,
|
|
78
|
+
null_distribution=null_distribution,
|
|
79
|
+
statistical_test_key="chi2",
|
|
80
|
+
statistical_test_function=compute_chi2_test,
|
|
81
|
+
**kwargs,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def run_hypergeom(
|
|
85
|
+
self,
|
|
86
|
+
annotation: Dict[str, Any],
|
|
87
|
+
clusters: csr_matrix,
|
|
88
|
+
null_distribution: str = "network",
|
|
89
|
+
**kwargs,
|
|
90
|
+
) -> Dict[str, Any]:
|
|
91
|
+
"""
|
|
92
|
+
Compute cluster significance using the hypergeometric test.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
|
96
|
+
clusters (csr_matrix): The cluster matrix to use.
|
|
97
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Dict[str, Any]: The computed significance of clusters based on the specified statistical test.
|
|
101
|
+
"""
|
|
102
|
+
log_header("Running hypergeometric test")
|
|
103
|
+
# Compute cluster significance using the hypergeometric test
|
|
104
|
+
return self._run_statistical_test(
|
|
105
|
+
annotation=annotation,
|
|
106
|
+
clusters=clusters,
|
|
107
|
+
null_distribution=null_distribution,
|
|
108
|
+
statistical_test_key="hypergeom",
|
|
109
|
+
statistical_test_function=compute_hypergeom_test,
|
|
110
|
+
**kwargs,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def run_permutation(
|
|
114
|
+
self,
|
|
115
|
+
annotation: Dict[str, Any],
|
|
116
|
+
clusters: csr_matrix,
|
|
117
|
+
score_metric: str = "sum",
|
|
118
|
+
null_distribution: str = "network",
|
|
119
|
+
num_permutations: int = 1000,
|
|
120
|
+
random_seed: int = 888,
|
|
121
|
+
max_workers: int = 1,
|
|
122
|
+
**kwargs,
|
|
123
|
+
) -> Dict[str, Any]:
|
|
124
|
+
"""
|
|
125
|
+
Compute cluster significance using the permutation test.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
annotation (Dict[str, Any]): The annotation associated with the network.
|
|
129
|
+
clusters (csr_matrix): The cluster matrix to use.
|
|
130
|
+
score_metric (str, optional): Scoring metric for cluster significance. Defaults to "sum".
|
|
131
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
|
|
132
|
+
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
|
133
|
+
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
|
134
|
+
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Dict[str, Any]: The computed significance of clusters based on the specified statistical test.
|
|
138
|
+
"""
|
|
139
|
+
log_header("Running permutation test")
|
|
140
|
+
# Log and display permutation test settings, which is unique to this test
|
|
141
|
+
logger.debug(f"Cluster scoring metric: '{score_metric}'")
|
|
142
|
+
logger.debug(f"Number of permutations: {num_permutations}")
|
|
143
|
+
logger.debug(f"Maximum workers: {max_workers}")
|
|
144
|
+
# Compute cluster significance using the permutation test
|
|
145
|
+
return self._run_statistical_test(
|
|
146
|
+
annotation=annotation,
|
|
147
|
+
clusters=clusters,
|
|
148
|
+
null_distribution=null_distribution,
|
|
149
|
+
random_seed=random_seed,
|
|
150
|
+
statistical_test_key="permutation",
|
|
151
|
+
statistical_test_function=compute_permutation_test,
|
|
152
|
+
score_metric=score_metric,
|
|
153
|
+
num_permutations=num_permutations,
|
|
154
|
+
max_workers=max_workers,
|
|
155
|
+
**kwargs,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
def _run_statistical_test(
|
|
159
|
+
self,
|
|
160
|
+
annotation: Dict[str, Any],
|
|
161
|
+
clusters: csr_matrix,
|
|
162
|
+
null_distribution: str = "network",
|
|
163
|
+
statistical_test_key: str = "hypergeom",
|
|
164
|
+
statistical_test_function: Any = compute_hypergeom_test,
|
|
165
|
+
**kwargs,
|
|
166
|
+
) -> Dict[str, Any]:
|
|
167
|
+
"""
|
|
168
|
+
Run the specified statistical test to compute cluster significance.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
annotation (Dict[str, Any]): Annotation data associated with the network.
|
|
172
|
+
clusters (csr_matrix): The cluster matrix to analyze.
|
|
173
|
+
null_distribution (str, optional): The type of null distribution to use ('network' or 'annotation').
|
|
174
|
+
Defaults to "network".
|
|
175
|
+
random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
|
|
176
|
+
statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "binom").
|
|
177
|
+
Used for logging and debugging. Defaults to "hypergeom".
|
|
178
|
+
statistical_test_function (Any, optional): The function implementing the statistical test.
|
|
179
|
+
It should accept clusters, annotation, null distribution, and additional kwargs.
|
|
180
|
+
Defaults to `compute_hypergeom_test`.
|
|
181
|
+
**kwargs: Additional parameters to be passed to the statistical test function.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Dict[str, Any]: A dictionary containing the computed significance values for clusters.
|
|
185
|
+
"""
|
|
186
|
+
# Log null distribution type
|
|
187
|
+
logger.debug(f"Null distribution: '{null_distribution}'")
|
|
188
|
+
# Log cluster analysis parameters
|
|
189
|
+
params.log_stats(
|
|
190
|
+
statistical_test_function=statistical_test_key,
|
|
191
|
+
null_distribution=null_distribution,
|
|
192
|
+
**kwargs,
|
|
193
|
+
)
|
|
194
|
+
# Apply statistical test function to compute cluster significance
|
|
195
|
+
cluster_significance = statistical_test_function(
|
|
196
|
+
clusters=clusters,
|
|
197
|
+
annotation=annotation["matrix"],
|
|
198
|
+
null_distribution=null_distribution,
|
|
199
|
+
**kwargs,
|
|
200
|
+
)
|
|
201
|
+
# Return the computed cluster significance
|
|
202
|
+
return cluster_significance
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: risk-network
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.16b2
|
|
4
4
|
Summary: A Python package for scalable network analysis and high-quality visualization.
|
|
5
5
|
Author-email: Ira Horecka <ira89@icloud.com>
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -44,7 +44,7 @@ Dynamic: license-file
|
|
|
44
44
|

|
|
45
45
|
[](https://pypi.python.org/pypi/risk-network)
|
|
46
46
|

|
|
47
|
-
[](https://doi.org/10.5281/zenodo.17257417)
|
|
48
48
|

|
|
49
49
|
|
|
50
50
|
**RISK** (Regional Inference of Significant Kinships) is a next-generation tool for biological network annotation and visualization. It integrates community detection algorithms, rigorous overrepresentation analysis, and a modular framework for diverse network types. RISK identifies biologically coherent relationships within networks and generates publication-ready visualizations, making it a useful tool for biological and interdisciplinary network analysis.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
risk/__init__.py,sha256=8kw8nQa4l3_e5whBhKyspNfK6gv9ACvnrFRffg7RO40,142
|
|
2
|
+
risk/risk.py,sha256=EHq4jfsx1TssuzkfYd8joaogW0heS0SuM38BWj4CX0E,1063
|
|
3
|
+
risk/annotation/__init__.py,sha256=1F_P_JVQD7ai4bWNjL5dIHTR6SuaFIfnD-MC7wZW0vY,162
|
|
4
|
+
risk/annotation/_nltk_setup.py,sha256=YUlB7bqiHELn8rb5tE5o7_FiXyirnxWL3Vzm47HWtiM,3581
|
|
5
|
+
risk/annotation/annotation.py,sha256=FVYFJoIFUzjhQCba44ocXz65u5Kwr_b0h431ewb_QZc,14980
|
|
6
|
+
risk/annotation/io.py,sha256=_bA-sRDh4ynvgNVCz60AChzh_f22Oxv1AkVnQK-LTPw,12451
|
|
7
|
+
risk/cluster/__init__.py,sha256=vP7ZTKRogKtfMZt7HquJes6TZqQvufIvIeIYbM_bwAE,161
|
|
8
|
+
risk/cluster/_community.py,sha256=wgcoUiFL8tJZTmfnUA2_0LkK_pOFtNCC_d1vBX6ENL4,17665
|
|
9
|
+
risk/cluster/api.py,sha256=d5IT-th4uVotGrt-sctqs3EsWjgvSi4ncQrdB1ghp-c,9337
|
|
10
|
+
risk/cluster/cluster.py,sha256=sWPWBlfy3bZoPBh6LSHc54sZrXMSf6AA9vBVu_z0TmA,21469
|
|
11
|
+
risk/cluster/label.py,sha256=TtN1gbfoDITTc2KWBz8rC8HLr2CZzuhnZU2EClh3dNs,16811
|
|
12
|
+
risk/log/__init__.py,sha256=wEJ0hXt8yyIAS-IGGL7Kd8xoPD_beVWQwhF-buxW6J4,203
|
|
13
|
+
risk/log/console.py,sha256=x1sFpOqKzPVoBHr9sDJJYAbFqQmRUV2HPKBvEn9_AX4,4649
|
|
14
|
+
risk/log/parameters.py,sha256=bKQKYs4cRXnIlEboys8pVjs23NBgMtQWRSg1lGgFUR0,6103
|
|
15
|
+
risk/network/__init__.py,sha256=ZdJRKDXc9DOCIFzDYOsnqmnHvFbODNjaJ8XmeNby93M,124
|
|
16
|
+
risk/network/io.py,sha256=909YYj1h9TKW-VpcdYSya3i1V-hOLXvjU795Y5v6roA,28337
|
|
17
|
+
risk/network/graph/__init__.py,sha256=sU46_opsN9ep9jh6fVoBYRzbohjm6_Nx1IVYqv0sMj4,98
|
|
18
|
+
risk/network/graph/_stats.py,sha256=Q2LPu6vzW-lpDCOLtdBna9tCfuCJ4jxjRjvtb3SD9os,7355
|
|
19
|
+
risk/network/graph/_summary.py,sha256=wCSPXHtA6wfjJwRRuD9RzherNOB8RGQzq1weDa-sHBY,10188
|
|
20
|
+
risk/network/graph/api.py,sha256=E8wjC_RvaHTmdQyD5aGmlSOFtjCrVHpkbqcca570ajQ,8456
|
|
21
|
+
risk/network/graph/graph.py,sha256=Ztw6-rcr4cgQgW1uujbU4y_RHYJRzT_oNOtsgmYCf9s,12475
|
|
22
|
+
risk/network/plotter/__init__.py,sha256=gw2fV1atXlxC5ckST2TnBV3yhVYfQSFadPYBBTiDatk,79
|
|
23
|
+
risk/network/plotter/_canvas.py,sha256=fQEcBCQEI4vwj6sAHgULYMdMuWyqeRrVYp7NxHkyXng,13591
|
|
24
|
+
risk/network/plotter/_contour.py,sha256=8QshMFxZXZmZRzfru5a3Bpf5pGL-ElukegVMvculgFM,15551
|
|
25
|
+
risk/network/plotter/_labels.py,sha256=JZ6KOy_kTz-m_qkedKJ181kppjhk09Z63zlyRv18lnw,46909
|
|
26
|
+
risk/network/plotter/_network.py,sha256=IiK1mnByoTvjc7xmuRHfewjC1t8TR8ZIvDxgITSU5Fk,14306
|
|
27
|
+
risk/network/plotter/_plotter.py,sha256=6OTGz1md5v_1iKGru__2QeV3-gUULLJscyLPWPOBCGo,6004
|
|
28
|
+
risk/network/plotter/api.py,sha256=LIrXZJBdTCnh6ntFHEl5vbQs35f4cytqixizxzUQqvU,1686
|
|
29
|
+
risk/network/plotter/_utils/__init__.py,sha256=F2W_R_lhtTDap3xJ25qN-C3Ba1fb0NYkDBiR8vB-4Eo,205
|
|
30
|
+
risk/network/plotter/_utils/colors.py,sha256=CKmZ-Ki1oh40nNGBHE4PTirJ8dF0Y4yReTF6VzzQlrA,19170
|
|
31
|
+
risk/network/plotter/_utils/layout.py,sha256=UQQHT-A-iHQxKEzFla_8fvnjBJTc_zsm2-ZJpvLLbOk,3627
|
|
32
|
+
risk/stats/__init__.py,sha256=Mb8h-1Z3upFS0zrlFVbyfO5afLkVaZXfqZpOs4lj_lg,57
|
|
33
|
+
risk/stats/api.py,sha256=1U-cABIZzVQjmTF7M09QaRZfCLI82C8OzSXOSuM37XI,8075
|
|
34
|
+
risk/stats/_stats/__init__.py,sha256=yMGqINN7Z4A2V4ln5f6lhdV-5GRfV5ABm-m-mni4YNQ,197
|
|
35
|
+
risk/stats/_stats/tests.py,sha256=mTCwu6CnZjU_qURmKKc8Dd37Gar8xukttIm2qXhrCY0,7279
|
|
36
|
+
risk/stats/_stats/permutation/__init__.py,sha256=BDELJoCtaz0Byc4V6f7chtfYqkG4l_trM30kgzNxluM,134
|
|
37
|
+
risk/stats/_stats/permutation/_test_functions.py,sha256=qeHsljh_gVG1EqFPzYPvLG_G3HnoXtKwsmowVXmEKnI,2970
|
|
38
|
+
risk/stats/_stats/permutation/permutation.py,sha256=aglSO8NiP0hzTdcZKIQy1ZwaTJaXPBPBULTB8JG_02E,10377
|
|
39
|
+
risk_network-0.0.16b2.dist-info/licenses/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
|
40
|
+
risk_network-0.0.16b2.dist-info/METADATA,sha256=7usyb0e7b7qPUqaxIYp-UT2zrasH5Ur4N6E96jqA7aY,5390
|
|
41
|
+
risk_network-0.0.16b2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
42
|
+
risk_network-0.0.16b2.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
|
43
|
+
risk_network-0.0.16b2.dist-info/RECORD,,
|