risk-network 0.0.12b0__py3-none-any.whl → 0.0.12b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/__init__.py +10 -0
- risk/annotations/annotations.py +354 -0
- risk/annotations/io.py +241 -0
- risk/annotations/nltk_setup.py +86 -0
- risk/log/__init__.py +11 -0
- risk/log/console.py +141 -0
- risk/log/parameters.py +171 -0
- risk/neighborhoods/__init__.py +7 -0
- risk/neighborhoods/api.py +442 -0
- risk/neighborhoods/community.py +441 -0
- risk/neighborhoods/domains.py +360 -0
- risk/neighborhoods/neighborhoods.py +514 -0
- risk/neighborhoods/stats/__init__.py +13 -0
- risk/neighborhoods/stats/permutation/__init__.py +6 -0
- risk/neighborhoods/stats/permutation/permutation.py +240 -0
- risk/neighborhoods/stats/permutation/test_functions.py +70 -0
- risk/neighborhoods/stats/tests.py +275 -0
- risk/network/__init__.py +4 -0
- risk/network/graph/__init__.py +4 -0
- risk/network/graph/api.py +200 -0
- risk/network/graph/graph.py +268 -0
- risk/network/graph/stats.py +166 -0
- risk/network/graph/summary.py +253 -0
- risk/network/io.py +693 -0
- risk/network/plotter/__init__.py +4 -0
- risk/network/plotter/api.py +54 -0
- risk/network/plotter/canvas.py +291 -0
- risk/network/plotter/contour.py +329 -0
- risk/network/plotter/labels.py +935 -0
- risk/network/plotter/network.py +294 -0
- risk/network/plotter/plotter.py +141 -0
- risk/network/plotter/utils/colors.py +419 -0
- risk/network/plotter/utils/layout.py +94 -0
- risk_network-0.0.12b1.dist-info/METADATA +122 -0
- risk_network-0.0.12b1.dist-info/RECORD +40 -0
- {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/WHEEL +1 -1
- risk_network-0.0.12b0.dist-info/METADATA +0 -796
- risk_network-0.0.12b0.dist-info/RECORD +0 -7
- {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/licenses/LICENSE +0 -0
- {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,240 @@
|
|
1
|
+
"""
|
2
|
+
risk/neighborhoods/stats/permutation/permutation
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
from multiprocessing import Manager, get_context
|
7
|
+
from multiprocessing.managers import ValueProxy
|
8
|
+
from typing import Any, Callable, Dict, List, Tuple, Union
|
9
|
+
|
10
|
+
import numpy as np
|
11
|
+
from scipy.sparse import csr_matrix
|
12
|
+
from threadpoolctl import threadpool_limits
|
13
|
+
from tqdm import tqdm
|
14
|
+
|
15
|
+
from risk.neighborhoods.stats.permutation.test_functions import DISPATCH_TEST_FUNCTIONS
|
16
|
+
|
17
|
+
|
18
|
+
def compute_permutation_test(
|
19
|
+
neighborhoods: csr_matrix,
|
20
|
+
annotations: csr_matrix,
|
21
|
+
score_metric: str = "sum",
|
22
|
+
null_distribution: str = "network",
|
23
|
+
num_permutations: int = 1000,
|
24
|
+
random_seed: int = 888,
|
25
|
+
max_workers: int = 1,
|
26
|
+
) -> Dict[str, Any]:
|
27
|
+
"""Compute permutation test for enrichment and depletion in neighborhoods.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
31
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
32
|
+
score_metric (str, optional): Metric to use for scoring ('sum' or 'stdev'). Defaults to "sum".
|
33
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
34
|
+
num_permutations (int, optional): Number of permutations to run. Defaults to 1000.
|
35
|
+
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
36
|
+
max_workers (int, optional): Number of workers for multiprocessing. Defaults to 1.
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
40
|
+
"""
|
41
|
+
# Ensure that the matrices are in the correct format and free of NaN values
|
42
|
+
# NOTE: Keep the data type as float32 to avoid locking issues with dot product operations
|
43
|
+
neighborhoods = neighborhoods.astype(np.float32)
|
44
|
+
annotations = annotations.astype(np.float32)
|
45
|
+
# Retrieve the appropriate neighborhood score function based on the metric
|
46
|
+
neighborhood_score_func = DISPATCH_TEST_FUNCTIONS[score_metric]
|
47
|
+
|
48
|
+
# Run the permutation test to calculate depletion and enrichment counts
|
49
|
+
counts_depletion, counts_enrichment = _run_permutation_test(
|
50
|
+
neighborhoods=neighborhoods,
|
51
|
+
annotations=annotations,
|
52
|
+
neighborhood_score_func=neighborhood_score_func,
|
53
|
+
null_distribution=null_distribution,
|
54
|
+
num_permutations=num_permutations,
|
55
|
+
random_seed=random_seed,
|
56
|
+
max_workers=max_workers,
|
57
|
+
)
|
58
|
+
# Compute p-values for depletion and enrichment
|
59
|
+
# If counts are 0, set p-value to 1/num_permutations to avoid zero p-values
|
60
|
+
depletion_pvals = np.maximum(counts_depletion, 1) / num_permutations
|
61
|
+
enrichment_pvals = np.maximum(counts_enrichment, 1) / num_permutations
|
62
|
+
|
63
|
+
return {
|
64
|
+
"depletion_pvals": depletion_pvals,
|
65
|
+
"enrichment_pvals": enrichment_pvals,
|
66
|
+
}
|
67
|
+
|
68
|
+
|
69
|
+
def _run_permutation_test(
|
70
|
+
neighborhoods: csr_matrix,
|
71
|
+
annotations: csr_matrix,
|
72
|
+
neighborhood_score_func: Callable,
|
73
|
+
null_distribution: str = "network",
|
74
|
+
num_permutations: int = 1000,
|
75
|
+
random_seed: int = 888,
|
76
|
+
max_workers: int = 4,
|
77
|
+
) -> tuple:
|
78
|
+
"""Run the permutation test to calculate depletion and enrichment counts.
|
79
|
+
|
80
|
+
Args:
|
81
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
82
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
83
|
+
neighborhood_score_func (Callable): Function to calculate neighborhood scores.
|
84
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
85
|
+
num_permutations (int, optional): Number of permutations. Defaults to 1000.
|
86
|
+
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
87
|
+
max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
tuple: Depletion and enrichment counts.
|
91
|
+
|
92
|
+
Raises:
|
93
|
+
ValueError: If an invalid null_distribution value is provided.
|
94
|
+
"""
|
95
|
+
# Initialize the RNG for reproducibility
|
96
|
+
rng = np.random.default_rng(seed=random_seed)
|
97
|
+
# Determine the indices to use based on the null distribution type
|
98
|
+
if null_distribution == "network":
|
99
|
+
idxs = range(annotations.shape[0])
|
100
|
+
elif null_distribution == "annotations":
|
101
|
+
idxs = np.nonzero(annotations.getnnz(axis=1) > 0)[0]
|
102
|
+
else:
|
103
|
+
raise ValueError(
|
104
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
105
|
+
)
|
106
|
+
|
107
|
+
# Replace NaNs with zeros in the sparse annotations matrix
|
108
|
+
annotations.data[np.isnan(annotations.data)] = 0
|
109
|
+
annotation_matrix_obsv = annotations[idxs]
|
110
|
+
neighborhoods_matrix_obsv = neighborhoods.T[idxs].T
|
111
|
+
# Calculate observed neighborhood scores
|
112
|
+
with np.errstate(invalid="ignore", divide="ignore"):
|
113
|
+
observed_neighborhood_scores = neighborhood_score_func(
|
114
|
+
neighborhoods_matrix_obsv, annotation_matrix_obsv
|
115
|
+
)
|
116
|
+
|
117
|
+
# Initialize count matrices for depletion and enrichment
|
118
|
+
counts_depletion = np.zeros(observed_neighborhood_scores.shape)
|
119
|
+
counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
|
120
|
+
# Determine the number of permutations to run in each worker process
|
121
|
+
subset_size = num_permutations // max_workers
|
122
|
+
remainder = num_permutations % max_workers
|
123
|
+
|
124
|
+
# Use the spawn context for creating a new multiprocessing pool
|
125
|
+
ctx = get_context("spawn")
|
126
|
+
manager = Manager()
|
127
|
+
progress_counter = manager.Value("i", 0)
|
128
|
+
total_progress = num_permutations
|
129
|
+
|
130
|
+
# Generate precomputed permutations
|
131
|
+
permutations = [rng.permutation(idxs) for _ in range(num_permutations)]
|
132
|
+
# Divide permutations into batches for workers
|
133
|
+
batch_size = subset_size + (1 if remainder > 0 else 0)
|
134
|
+
permutation_batches = [
|
135
|
+
permutations[i * batch_size : (i + 1) * batch_size] for i in range(max_workers)
|
136
|
+
]
|
137
|
+
|
138
|
+
# Execute the permutation test using multiprocessing
|
139
|
+
with ctx.Pool(max_workers) as pool:
|
140
|
+
with tqdm(total=total_progress, desc="Total progress", position=0) as progress:
|
141
|
+
# Prepare parameters for multiprocessing
|
142
|
+
params_list = [
|
143
|
+
(
|
144
|
+
permutation_batches[i], # Pass the batch of precomputed permutations
|
145
|
+
annotations,
|
146
|
+
neighborhoods_matrix_obsv,
|
147
|
+
observed_neighborhood_scores,
|
148
|
+
neighborhood_score_func,
|
149
|
+
num_permutations,
|
150
|
+
progress_counter,
|
151
|
+
max_workers,
|
152
|
+
)
|
153
|
+
for i in range(max_workers)
|
154
|
+
]
|
155
|
+
|
156
|
+
# Start the permutation process in parallel
|
157
|
+
results = pool.starmap_async(_permutation_process_batch, params_list, chunksize=1)
|
158
|
+
|
159
|
+
# Update progress bar based on progress_counter
|
160
|
+
while not results.ready():
|
161
|
+
progress.update(progress_counter.value - progress.n)
|
162
|
+
results.wait(0.1) # Wait for 100ms
|
163
|
+
# Ensure progress bar reaches 100%
|
164
|
+
progress.update(total_progress - progress.n)
|
165
|
+
|
166
|
+
# Accumulate results from each worker
|
167
|
+
for local_counts_depletion, local_counts_enrichment in results.get():
|
168
|
+
counts_depletion = np.add(counts_depletion, local_counts_depletion)
|
169
|
+
counts_enrichment = np.add(counts_enrichment, local_counts_enrichment)
|
170
|
+
|
171
|
+
return counts_depletion, counts_enrichment
|
172
|
+
|
173
|
+
|
174
|
+
def _permutation_process_batch(
|
175
|
+
permutations: Union[List, Tuple, np.ndarray],
|
176
|
+
annotation_matrix: csr_matrix,
|
177
|
+
neighborhoods_matrix_obsv: csr_matrix,
|
178
|
+
observed_neighborhood_scores: np.ndarray,
|
179
|
+
neighborhood_score_func: Callable,
|
180
|
+
num_permutations: int,
|
181
|
+
progress_counter: ValueProxy,
|
182
|
+
max_workers: int,
|
183
|
+
) -> tuple:
|
184
|
+
"""Process a batch of permutations in a worker process.
|
185
|
+
|
186
|
+
Args:
|
187
|
+
permutations (Union[List, Tuple, np.ndarray]): Permutation batch to process.
|
188
|
+
annotation_matrix (csr_matrix): Sparse binary matrix representing annotations.
|
189
|
+
neighborhoods_matrix_obsv (csr_matrix): Sparse binary matrix representing observed neighborhoods.
|
190
|
+
observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
|
191
|
+
neighborhood_score_func (Callable): Function to calculate neighborhood scores.
|
192
|
+
num_permutations (int): Number of total permutations across all subsets.
|
193
|
+
progress_counter (multiprocessing.managers.ValueProxy): Shared counter for tracking progress.
|
194
|
+
max_workers (int): Number of workers for multiprocessing.
|
195
|
+
|
196
|
+
Returns:
|
197
|
+
tuple: Local counts of depletion and enrichment.
|
198
|
+
"""
|
199
|
+
# Initialize local count matrices for this worker
|
200
|
+
local_counts_depletion = np.zeros(observed_neighborhood_scores.shape)
|
201
|
+
local_counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
|
202
|
+
|
203
|
+
# Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used
|
204
|
+
# NOTE: This does not work for Mac M chips due to a bug in the threadpoolctl package
|
205
|
+
# This is currently a known issue and is being addressed by the maintainers [https://github.com/joblib/threadpoolctl/issues/135]
|
206
|
+
limits = None if max_workers == 1 else 1
|
207
|
+
with threadpool_limits(limits=limits, user_api="blas"):
|
208
|
+
# Initialize a local counter for batched progress updates
|
209
|
+
local_progress = 0
|
210
|
+
# Calculate the modulo value based on total permutations for 1/100th frequency updates
|
211
|
+
modulo_value = max(1, num_permutations // 100)
|
212
|
+
|
213
|
+
for permuted_idxs in permutations:
|
214
|
+
# Apply precomputed permutation
|
215
|
+
annotation_matrix_permut = annotation_matrix[permuted_idxs]
|
216
|
+
# Calculate permuted neighborhood scores
|
217
|
+
with np.errstate(invalid="ignore", divide="ignore"):
|
218
|
+
permuted_neighborhood_scores = neighborhood_score_func(
|
219
|
+
neighborhoods_matrix_obsv, annotation_matrix_permut
|
220
|
+
)
|
221
|
+
|
222
|
+
# Update local depletion and enrichment counts
|
223
|
+
local_counts_depletion = np.add(
|
224
|
+
local_counts_depletion, permuted_neighborhood_scores <= observed_neighborhood_scores
|
225
|
+
)
|
226
|
+
local_counts_enrichment = np.add(
|
227
|
+
local_counts_enrichment,
|
228
|
+
permuted_neighborhood_scores >= observed_neighborhood_scores,
|
229
|
+
)
|
230
|
+
|
231
|
+
# Update progress
|
232
|
+
local_progress += 1
|
233
|
+
if local_progress % modulo_value == 0:
|
234
|
+
progress_counter.value += modulo_value
|
235
|
+
|
236
|
+
# Final progress update for any remaining iterations
|
237
|
+
if local_progress % modulo_value != 0:
|
238
|
+
progress_counter.value += modulo_value
|
239
|
+
|
240
|
+
return local_counts_depletion, local_counts_enrichment
|
@@ -0,0 +1,70 @@
|
|
1
|
+
"""
|
2
|
+
risk/neighborhoods/stats/permutation/test_functions
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
from scipy.sparse import csr_matrix
|
8
|
+
|
9
|
+
# NOTE: Cython optimizations provided minimal performance benefits.
|
10
|
+
# The final version with Cython is archived in the `cython_permutation` branch.
|
11
|
+
|
12
|
+
# DISPATCH_TEST_FUNCTIONS can be found at the end of the file.
|
13
|
+
|
14
|
+
|
15
|
+
def compute_neighborhood_score_by_sum(
|
16
|
+
neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
|
17
|
+
) -> np.ndarray:
|
18
|
+
"""Compute the sum of attribute values for each neighborhood using sparse matrices.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
|
22
|
+
annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
np.ndarray: Dense array of summed attribute values for each neighborhood.
|
26
|
+
"""
|
27
|
+
# Calculate the neighborhood score as the dot product of neighborhoods and annotations
|
28
|
+
neighborhood_score = neighborhoods_matrix @ annotation_matrix # Sparse matrix multiplication
|
29
|
+
# Convert the result to a dense array for downstream calculations
|
30
|
+
neighborhood_score_dense = neighborhood_score.toarray()
|
31
|
+
return neighborhood_score_dense
|
32
|
+
|
33
|
+
|
34
|
+
def compute_neighborhood_score_by_stdev(
|
35
|
+
neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
|
36
|
+
) -> np.ndarray:
|
37
|
+
"""Compute the standard deviation of neighborhood scores for sparse matrices.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
|
41
|
+
annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
np.ndarray: Standard deviation of the neighborhood scores.
|
45
|
+
"""
|
46
|
+
# Calculate the neighborhood score as the dot product of neighborhoods and annotations
|
47
|
+
neighborhood_score = neighborhoods_matrix @ annotation_matrix # Sparse matrix multiplication
|
48
|
+
# Calculate the number of elements in each neighborhood (sum of rows)
|
49
|
+
N = neighborhoods_matrix.sum(axis=1).A.flatten() # Convert to 1D array
|
50
|
+
# Avoid division by zero by replacing zeros in N with np.nan temporarily
|
51
|
+
N[N == 0] = np.nan
|
52
|
+
# Compute the mean of the neighborhood scores
|
53
|
+
M = neighborhood_score.multiply(1 / N[:, None]).toarray() # Sparse element-wise division
|
54
|
+
# Compute the mean of squares (EXX) directly using squared annotation matrix
|
55
|
+
annotation_squared = annotation_matrix.multiply(annotation_matrix) # Element-wise squaring
|
56
|
+
EXX = (neighborhoods_matrix @ annotation_squared).multiply(1 / N[:, None]).toarray()
|
57
|
+
# Calculate variance as EXX - M^2
|
58
|
+
variance = EXX - np.power(M, 2)
|
59
|
+
# Compute the standard deviation as the square root of the variance
|
60
|
+
neighborhood_stdev = np.sqrt(variance)
|
61
|
+
# Replace np.nan back with zeros in case N was 0 (no elements in the neighborhood)
|
62
|
+
neighborhood_stdev[np.isnan(neighborhood_stdev)] = 0
|
63
|
+
return neighborhood_stdev
|
64
|
+
|
65
|
+
|
66
|
+
# Dictionary to dispatch statistical test functions based on the score metric
|
67
|
+
DISPATCH_TEST_FUNCTIONS = {
|
68
|
+
"sum": compute_neighborhood_score_by_sum,
|
69
|
+
"stdev": compute_neighborhood_score_by_stdev,
|
70
|
+
}
|
@@ -0,0 +1,275 @@
|
|
1
|
+
"""
|
2
|
+
risk/neighborhoods/stats/tests
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
from typing import Any, Dict
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
from scipy.sparse import csr_matrix
|
10
|
+
from scipy.stats import binom, chi2, hypergeom, norm, poisson
|
11
|
+
|
12
|
+
|
13
|
+
def compute_binom_test(
|
14
|
+
neighborhoods: csr_matrix,
|
15
|
+
annotations: csr_matrix,
|
16
|
+
null_distribution: str = "network",
|
17
|
+
) -> Dict[str, Any]:
|
18
|
+
"""Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
22
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
23
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
27
|
+
|
28
|
+
Raises:
|
29
|
+
ValueError: If an invalid null_distribution value is provided.
|
30
|
+
"""
|
31
|
+
# Get the total number of nodes in the network
|
32
|
+
total_nodes = neighborhoods.shape[1]
|
33
|
+
|
34
|
+
# Compute sums (remain sparse here)
|
35
|
+
neighborhood_sizes = neighborhoods.sum(axis=1) # Row sums
|
36
|
+
annotation_totals = annotations.sum(axis=0) # Column sums
|
37
|
+
# Compute probabilities (convert to dense)
|
38
|
+
if null_distribution == "network":
|
39
|
+
p_values = (annotation_totals / total_nodes).A.flatten() # Dense 1D array
|
40
|
+
elif null_distribution == "annotations":
|
41
|
+
p_values = (annotation_totals / annotations.sum()).A.flatten() # Dense 1D array
|
42
|
+
else:
|
43
|
+
raise ValueError(
|
44
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
45
|
+
)
|
46
|
+
|
47
|
+
# Observed counts (sparse matrix multiplication)
|
48
|
+
annotated_counts = neighborhoods @ annotations # Sparse result
|
49
|
+
annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
|
50
|
+
|
51
|
+
# Compute enrichment and depletion p-values
|
52
|
+
enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, neighborhood_sizes.A, p_values)
|
53
|
+
depletion_pvals = binom.cdf(annotated_counts_dense, neighborhood_sizes.A, p_values)
|
54
|
+
|
55
|
+
return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
|
56
|
+
|
57
|
+
|
58
|
+
def compute_chi2_test(
|
59
|
+
neighborhoods: csr_matrix,
|
60
|
+
annotations: csr_matrix,
|
61
|
+
null_distribution: str = "network",
|
62
|
+
) -> Dict[str, Any]:
|
63
|
+
"""Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
|
64
|
+
|
65
|
+
Args:
|
66
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
67
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
68
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
72
|
+
|
73
|
+
Raises:
|
74
|
+
ValueError: If an invalid null_distribution value is provided.
|
75
|
+
"""
|
76
|
+
# Total number of nodes in the network
|
77
|
+
total_node_count = neighborhoods.shape[0]
|
78
|
+
|
79
|
+
if null_distribution == "network":
|
80
|
+
# Case 1: Use all nodes as the background
|
81
|
+
background_population = total_node_count
|
82
|
+
neighborhood_sums = neighborhoods.sum(axis=0) # Column sums of neighborhoods
|
83
|
+
annotation_sums = annotations.sum(axis=0) # Column sums of annotations
|
84
|
+
elif null_distribution == "annotations":
|
85
|
+
# Case 2: Only consider nodes with at least one annotation
|
86
|
+
annotated_nodes = (
|
87
|
+
np.ravel(annotations.sum(axis=1)) > 0
|
88
|
+
) # Row-wise sum to filter nodes with annotations
|
89
|
+
background_population = annotated_nodes.sum() # Total number of annotated nodes
|
90
|
+
neighborhood_sums = neighborhoods[annotated_nodes].sum(
|
91
|
+
axis=0
|
92
|
+
) # Neighborhood sums for annotated nodes
|
93
|
+
annotation_sums = annotations[annotated_nodes].sum(
|
94
|
+
axis=0
|
95
|
+
) # Annotation sums for annotated nodes
|
96
|
+
else:
|
97
|
+
raise ValueError(
|
98
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
99
|
+
)
|
100
|
+
|
101
|
+
# Convert to dense arrays for downstream computations
|
102
|
+
neighborhood_sums = np.asarray(neighborhood_sums).reshape(-1, 1) # Ensure column vector shape
|
103
|
+
annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
|
104
|
+
|
105
|
+
# Observed values: number of annotated nodes in each neighborhood
|
106
|
+
observed = neighborhoods.T @ annotations # Shape: (neighborhoods, annotations)
|
107
|
+
# Expected values under the null
|
108
|
+
expected = (neighborhood_sums @ annotation_sums) / background_population
|
109
|
+
# Chi-squared statistic: sum((observed - expected)^2 / expected)
|
110
|
+
with np.errstate(divide="ignore", invalid="ignore"): # Handle divide-by-zero
|
111
|
+
chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
|
112
|
+
|
113
|
+
# Compute p-values for enrichment (upper tail) and depletion (lower tail)
|
114
|
+
enrichment_pvals = chi2.sf(chi2_stat, df=1) # Survival function for upper tail
|
115
|
+
depletion_pvals = chi2.cdf(chi2_stat, df=1) # Cumulative distribution for lower tail
|
116
|
+
|
117
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
118
|
+
|
119
|
+
|
120
|
+
def compute_hypergeom_test(
|
121
|
+
neighborhoods: csr_matrix,
|
122
|
+
annotations: csr_matrix,
|
123
|
+
null_distribution: str = "network",
|
124
|
+
) -> Dict[str, Any]:
|
125
|
+
"""Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
|
126
|
+
|
127
|
+
Args:
|
128
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
129
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
130
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
134
|
+
|
135
|
+
Raises:
|
136
|
+
ValueError: If an invalid null_distribution value is provided.
|
137
|
+
"""
|
138
|
+
# Get the total number of nodes in the network
|
139
|
+
total_nodes = neighborhoods.shape[1]
|
140
|
+
|
141
|
+
# Compute sums
|
142
|
+
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Convert to dense array
|
143
|
+
annotation_sums = annotations.sum(axis=0).A.flatten() # Convert to dense array
|
144
|
+
|
145
|
+
if null_distribution == "network":
|
146
|
+
background_population = total_nodes
|
147
|
+
elif null_distribution == "annotations":
|
148
|
+
annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Boolean mask
|
149
|
+
background_population = annotated_nodes.sum()
|
150
|
+
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
151
|
+
annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
|
152
|
+
else:
|
153
|
+
raise ValueError(
|
154
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
155
|
+
)
|
156
|
+
|
157
|
+
# Observed counts
|
158
|
+
annotated_in_neighborhood = neighborhoods.T @ annotations # Sparse result
|
159
|
+
annotated_in_neighborhood = annotated_in_neighborhood.toarray() # Convert to dense
|
160
|
+
# Align shapes for broadcasting
|
161
|
+
neighborhood_sums = neighborhood_sums.reshape(-1, 1)
|
162
|
+
annotation_sums = annotation_sums.reshape(1, -1)
|
163
|
+
background_population = np.array(background_population).reshape(1, 1)
|
164
|
+
|
165
|
+
# Compute hypergeometric p-values
|
166
|
+
depletion_pvals = hypergeom.cdf(
|
167
|
+
annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
|
168
|
+
)
|
169
|
+
enrichment_pvals = hypergeom.sf(
|
170
|
+
annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
|
171
|
+
)
|
172
|
+
|
173
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
174
|
+
|
175
|
+
|
176
|
+
def compute_poisson_test(
|
177
|
+
neighborhoods: csr_matrix,
|
178
|
+
annotations: csr_matrix,
|
179
|
+
null_distribution: str = "network",
|
180
|
+
) -> Dict[str, Any]:
|
181
|
+
"""Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
|
182
|
+
|
183
|
+
Args:
|
184
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
185
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
186
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
187
|
+
|
188
|
+
Returns:
|
189
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
190
|
+
|
191
|
+
Raises:
|
192
|
+
ValueError: If an invalid null_distribution value is provided.
|
193
|
+
"""
|
194
|
+
# Matrix multiplication to get the number of annotated nodes in each neighborhood
|
195
|
+
annotated_in_neighborhood = neighborhoods @ annotations # Sparse result
|
196
|
+
# Convert annotated counts to dense for downstream calculations
|
197
|
+
annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
|
198
|
+
|
199
|
+
# Compute lambda_expected based on the chosen null distribution
|
200
|
+
if null_distribution == "network":
|
201
|
+
# Use the mean across neighborhoods (axis=1)
|
202
|
+
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
|
203
|
+
elif null_distribution == "annotations":
|
204
|
+
# Use the mean across annotations (axis=0)
|
205
|
+
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
|
206
|
+
else:
|
207
|
+
raise ValueError(
|
208
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
209
|
+
)
|
210
|
+
|
211
|
+
# Compute p-values for enrichment and depletion using Poisson distribution
|
212
|
+
enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
|
213
|
+
depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
|
214
|
+
|
215
|
+
return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
|
216
|
+
|
217
|
+
|
218
|
+
def compute_zscore_test(
|
219
|
+
neighborhoods: csr_matrix,
|
220
|
+
annotations: csr_matrix,
|
221
|
+
null_distribution: str = "network",
|
222
|
+
) -> Dict[str, Any]:
|
223
|
+
"""Compute z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
|
224
|
+
|
225
|
+
Args:
|
226
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
227
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
228
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
229
|
+
|
230
|
+
Returns:
|
231
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
232
|
+
|
233
|
+
Raises:
|
234
|
+
ValueError: If an invalid null_distribution value is provided.
|
235
|
+
"""
|
236
|
+
# Total number of nodes in the network
|
237
|
+
total_node_count = neighborhoods.shape[1]
|
238
|
+
|
239
|
+
# Compute sums
|
240
|
+
if null_distribution == "network":
|
241
|
+
background_population = total_node_count
|
242
|
+
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
|
243
|
+
annotation_sums = annotations.sum(axis=0).A.flatten() # Dense row sums
|
244
|
+
elif null_distribution == "annotations":
|
245
|
+
annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Dense boolean mask
|
246
|
+
background_population = annotated_nodes.sum()
|
247
|
+
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
248
|
+
annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
|
249
|
+
else:
|
250
|
+
raise ValueError(
|
251
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
252
|
+
)
|
253
|
+
|
254
|
+
# Observed values
|
255
|
+
observed = (neighborhoods.T @ annotations).toarray() # Convert sparse result to dense
|
256
|
+
# Expected values under the null
|
257
|
+
neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
|
258
|
+
annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
|
259
|
+
expected = (neighborhood_sums @ annotation_sums) / background_population
|
260
|
+
|
261
|
+
# Standard deviation under the null
|
262
|
+
std_dev = np.sqrt(
|
263
|
+
expected
|
264
|
+
* (1 - annotation_sums / background_population)
|
265
|
+
* (1 - neighborhood_sums / background_population)
|
266
|
+
)
|
267
|
+
std_dev[std_dev == 0] = np.nan # Avoid division by zero
|
268
|
+
# Compute z-scores
|
269
|
+
z_scores = (observed - expected) / std_dev
|
270
|
+
|
271
|
+
# Convert z-scores to depletion and enrichment p-values
|
272
|
+
enrichment_pvals = norm.sf(z_scores) # Upper tail
|
273
|
+
depletion_pvals = norm.cdf(z_scores) # Lower tail
|
274
|
+
|
275
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
risk/network/__init__.py
ADDED