risk-network 0.0.9b28__py3-none-any.whl → 0.0.9b30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/stats/stat_tests.py +61 -36
- {risk_network-0.0.9b28.dist-info → risk_network-0.0.9b30.dist-info}/METADATA +1 -1
- {risk_network-0.0.9b28.dist-info → risk_network-0.0.9b30.dist-info}/RECORD +7 -7
- {risk_network-0.0.9b28.dist-info → risk_network-0.0.9b30.dist-info}/LICENSE +0 -0
- {risk_network-0.0.9b28.dist-info → risk_network-0.0.9b30.dist-info}/WHEEL +0 -0
- {risk_network-0.0.9b28.dist-info → risk_network-0.0.9b30.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/stats/stat_tests.py
CHANGED
@@ -3,6 +3,7 @@ risk/stats/stat_tests
|
|
3
3
|
~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
6
7
|
from typing import Any, Dict
|
7
8
|
|
8
9
|
import numpy as np
|
@@ -120,56 +121,80 @@ def compute_hypergeom_test(
|
|
120
121
|
annotations: csr_matrix,
|
121
122
|
null_distribution: str = "network",
|
122
123
|
) -> Dict[str, Any]:
|
123
|
-
"""
|
124
|
-
Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
|
124
|
+
"""Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
|
125
125
|
|
126
126
|
Args:
|
127
127
|
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
128
128
|
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
129
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotations').
|
129
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations').
|
130
|
+
Defaults to "network".
|
130
131
|
|
131
132
|
Returns:
|
132
133
|
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
133
134
|
"""
|
134
|
-
#
|
135
|
+
# Get the total number of nodes in the network
|
135
136
|
total_nodes = neighborhoods.shape[1]
|
137
|
+
# Calculate neighborhood and annotation sums
|
138
|
+
neighborhood_sums = neighborhoods.sum(axis=0).A1 # A1 returns a 1D array
|
139
|
+
annotation_sums = annotations.sum(axis=0).A1
|
136
140
|
|
137
|
-
|
138
|
-
|
139
|
-
annotation_sums = annotations.sum(axis=0).A.flatten()
|
140
|
-
|
141
|
-
if null_distribution == "network":
|
142
|
-
background_population = total_nodes
|
143
|
-
elif null_distribution == "annotations":
|
144
|
-
# Boolean mask for nodes with annotations
|
145
|
-
annotated_nodes = annotations.getnnz(axis=1) > 0
|
141
|
+
if null_distribution == "annotations":
|
142
|
+
annotated_nodes = annotations.getnnz(axis=1) > 0 # Nodes with any annotation
|
146
143
|
background_population = annotated_nodes.sum()
|
147
|
-
# Filter neighborhoods and annotations to include only annotated nodes
|
148
144
|
neighborhoods = neighborhoods[annotated_nodes]
|
149
145
|
annotations = annotations[annotated_nodes]
|
150
|
-
neighborhood_sums = neighborhoods.sum(axis=0).
|
151
|
-
annotation_sums = annotations.sum(axis=0).
|
146
|
+
neighborhood_sums = neighborhoods.sum(axis=0).A1
|
147
|
+
annotation_sums = annotations.sum(axis=0).A1
|
148
|
+
elif null_distribution == "network":
|
149
|
+
background_population = total_nodes
|
152
150
|
else:
|
153
|
-
raise ValueError(
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
#
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
151
|
+
raise ValueError("Invalid null_distribution value. Choose 'network' or 'annotations'.")
|
152
|
+
|
153
|
+
# Sparse matrix multiplication for observed counts
|
154
|
+
annotated_in_neighborhood = neighborhoods.T @ annotations # Result is sparse
|
155
|
+
# Determine the axis with fewer vectors
|
156
|
+
axis_to_process = 0 if annotations.shape[0] < annotations.shape[1] else 1
|
157
|
+
|
158
|
+
# Initialize p-value arrays
|
159
|
+
depletion_pvals = np.empty(annotated_in_neighborhood.shape, dtype=np.float64)
|
160
|
+
enrichment_pvals = np.empty(annotated_in_neighborhood.shape, dtype=np.float64)
|
161
|
+
|
162
|
+
def compute_pvals_for_index(idx: int):
|
163
|
+
"""Compute p-values for a given index."""
|
164
|
+
if axis_to_process == 0: # Process rows
|
165
|
+
observed_counts = annotated_in_neighborhood[idx, :].toarray().flatten()
|
166
|
+
neigh_total = neighborhood_sums[idx]
|
167
|
+
return (
|
168
|
+
idx,
|
169
|
+
hypergeom.cdf(observed_counts, background_population, annotation_sums, neigh_total),
|
170
|
+
hypergeom.sf(
|
171
|
+
observed_counts - 1, background_population, annotation_sums, neigh_total
|
172
|
+
),
|
173
|
+
)
|
174
|
+
else: # Process columns
|
175
|
+
observed_counts = annotated_in_neighborhood[:, idx].toarray().flatten()
|
176
|
+
ann_total = annotation_sums[idx]
|
177
|
+
return (
|
178
|
+
idx,
|
179
|
+
hypergeom.cdf(observed_counts, background_population, ann_total, neighborhood_sums),
|
180
|
+
hypergeom.sf(
|
181
|
+
observed_counts - 1, background_population, ann_total, neighborhood_sums
|
182
|
+
),
|
183
|
+
)
|
184
|
+
|
185
|
+
# Use ThreadPoolExecutor to process indices in parallel
|
186
|
+
num_indices = annotations.shape[axis_to_process]
|
187
|
+
with ThreadPoolExecutor() as executor:
|
188
|
+
results = executor.map(compute_pvals_for_index, range(num_indices))
|
189
|
+
|
190
|
+
# Collect results
|
191
|
+
for idx, dep_pval, enr_pval in results:
|
192
|
+
if axis_to_process == 0: # Rows
|
193
|
+
depletion_pvals[idx, :] = dep_pval
|
194
|
+
enrichment_pvals[idx, :] = enr_pval
|
195
|
+
else: # Columns
|
196
|
+
depletion_pvals[:, idx] = dep_pval
|
197
|
+
enrichment_pvals[:, idx] = enr_pval
|
173
198
|
|
174
199
|
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
175
200
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
risk/__init__.py,sha256=
|
1
|
+
risk/__init__.py,sha256=fTPZhSt0RrpQ6uKRXrk-CSsXCmnWnq5_hfufVq6Bu-0,127
|
2
2
|
risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
|
3
3
|
risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
|
4
4
|
risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
|
@@ -30,12 +30,12 @@ risk/network/plotter/utils/colors.py,sha256=VU1sLPRC99ll6EGK4vRNgLMUXU8lja1vjiXU
|
|
30
30
|
risk/network/plotter/utils/layout.py,sha256=OPqV8jzV9dpnOhYU4SYMSfsIXalVzESrlBSI_Y43OGU,3640
|
31
31
|
risk/stats/__init__.py,sha256=2zdLv3tUHKyAjwAo7LprVXRaak1cHgrpYMVMSik6JM4,324
|
32
32
|
risk/stats/significance.py,sha256=6cKv2xBQXWTHZ6HpNWIqlNfKKS5pG_BcCUdMM3r_zw4,7336
|
33
|
-
risk/stats/stat_tests.py,sha256=
|
33
|
+
risk/stats/stat_tests.py,sha256=e6Ep_cedc-JIK6Ap-YimX-X2oCxfFFmT5rHw99mYfYk,13171
|
34
34
|
risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
|
35
35
|
risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
|
36
36
|
risk/stats/permutation/test_functions.py,sha256=D3XMPM8CasUNytWSRce22TI6KK6XulYn5uGG4lWxaHs,3120
|
37
|
-
risk_network-0.0.
|
38
|
-
risk_network-0.0.
|
39
|
-
risk_network-0.0.
|
40
|
-
risk_network-0.0.
|
41
|
-
risk_network-0.0.
|
37
|
+
risk_network-0.0.9b30.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
38
|
+
risk_network-0.0.9b30.dist-info/METADATA,sha256=BtXlxahgmGpryFSTVXLOOliVwQ5HG3_WNdDwStbkcgo,47627
|
39
|
+
risk_network-0.0.9b30.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
40
|
+
risk_network-0.0.9b30.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
41
|
+
risk_network-0.0.9b30.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|