risk-network 0.0.9b26__py3-none-any.whl → 0.0.9b28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/annotations.py +39 -38
- risk/neighborhoods/api.py +1 -5
- risk/neighborhoods/community.py +140 -95
- risk/neighborhoods/neighborhoods.py +34 -18
- risk/network/geometry.py +24 -27
- risk/network/graph/api.py +6 -6
- risk/network/graph/{network.py → graph.py} +7 -7
- risk/network/graph/summary.py +3 -3
- risk/network/io.py +39 -15
- risk/network/plotter/__init__.py +2 -2
- risk/network/plotter/api.py +12 -12
- risk/network/plotter/canvas.py +7 -7
- risk/network/plotter/contour.py +6 -6
- risk/network/plotter/labels.py +5 -5
- risk/network/plotter/network.py +6 -136
- risk/network/plotter/plotter.py +143 -0
- risk/network/plotter/utils/colors.py +11 -11
- risk/network/plotter/utils/layout.py +2 -2
- risk/stats/__init__.py +8 -6
- risk/stats/{stats.py → significance.py} +2 -2
- risk/stats/stat_tests.py +272 -0
- {risk_network-0.0.9b26.dist-info → risk_network-0.0.9b28.dist-info}/METADATA +1 -1
- risk_network-0.0.9b28.dist-info/RECORD +41 -0
- risk/stats/binom.py +0 -51
- risk/stats/chi2.py +0 -69
- risk/stats/hypergeom.py +0 -64
- risk/stats/poisson.py +0 -50
- risk/stats/zscore.py +0 -68
- risk_network-0.0.9b26.dist-info/RECORD +0 -44
- {risk_network-0.0.9b26.dist-info → risk_network-0.0.9b28.dist-info}/LICENSE +0 -0
- {risk_network-0.0.9b26.dist-info → risk_network-0.0.9b28.dist-info}/WHEEL +0 -0
- {risk_network-0.0.9b26.dist-info → risk_network-0.0.9b28.dist-info}/top_level.txt +0 -0
risk/stats/stat_tests.py
ADDED
@@ -0,0 +1,272 @@
|
|
1
|
+
"""
|
2
|
+
risk/stats/stat_tests
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
from typing import Any, Dict
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
from scipy.sparse import csr_matrix
|
10
|
+
from scipy.stats import binom
|
11
|
+
from scipy.stats import chi2
|
12
|
+
from scipy.stats import hypergeom
|
13
|
+
from scipy.stats import norm
|
14
|
+
from scipy.stats import poisson
|
15
|
+
|
16
|
+
|
17
|
+
def compute_binom_test(
|
18
|
+
neighborhoods: csr_matrix,
|
19
|
+
annotations: csr_matrix,
|
20
|
+
null_distribution: str = "network",
|
21
|
+
) -> Dict[str, Any]:
|
22
|
+
"""Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
26
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
27
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
28
|
+
|
29
|
+
Returns:
|
30
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
31
|
+
"""
|
32
|
+
# Get the total number of nodes in the network
|
33
|
+
total_nodes = neighborhoods.shape[1]
|
34
|
+
|
35
|
+
# Compute sums (remain sparse here)
|
36
|
+
neighborhood_sizes = neighborhoods.sum(axis=1) # Row sums
|
37
|
+
annotation_totals = annotations.sum(axis=0) # Column sums
|
38
|
+
# Compute probabilities (convert to dense)
|
39
|
+
if null_distribution == "network":
|
40
|
+
p_values = (annotation_totals / total_nodes).A.flatten() # Dense 1D array
|
41
|
+
elif null_distribution == "annotations":
|
42
|
+
p_values = (annotation_totals / annotations.sum()).A.flatten() # Dense 1D array
|
43
|
+
else:
|
44
|
+
raise ValueError(
|
45
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
46
|
+
)
|
47
|
+
|
48
|
+
# Observed counts (sparse matrix multiplication)
|
49
|
+
annotated_counts = neighborhoods @ annotations # Sparse result
|
50
|
+
annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
|
51
|
+
|
52
|
+
# Compute enrichment and depletion p-values
|
53
|
+
enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, neighborhood_sizes.A, p_values)
|
54
|
+
depletion_pvals = binom.cdf(annotated_counts_dense, neighborhood_sizes.A, p_values)
|
55
|
+
|
56
|
+
return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
|
57
|
+
|
58
|
+
|
59
|
+
def compute_chi2_test(
|
60
|
+
neighborhoods: csr_matrix,
|
61
|
+
annotations: csr_matrix,
|
62
|
+
null_distribution: str = "network",
|
63
|
+
) -> Dict[str, Any]:
|
64
|
+
"""Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
68
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
69
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
73
|
+
"""
|
74
|
+
# Total number of nodes in the network
|
75
|
+
total_node_count = neighborhoods.shape[0]
|
76
|
+
|
77
|
+
if null_distribution == "network":
|
78
|
+
# Case 1: Use all nodes as the background
|
79
|
+
background_population = total_node_count
|
80
|
+
neighborhood_sums = neighborhoods.sum(axis=0) # Column sums of neighborhoods
|
81
|
+
annotation_sums = annotations.sum(axis=0) # Column sums of annotations
|
82
|
+
elif null_distribution == "annotations":
|
83
|
+
# Case 2: Only consider nodes with at least one annotation
|
84
|
+
annotated_nodes = (
|
85
|
+
np.ravel(annotations.sum(axis=1)) > 0
|
86
|
+
) # Row-wise sum to filter nodes with annotations
|
87
|
+
background_population = annotated_nodes.sum() # Total number of annotated nodes
|
88
|
+
neighborhood_sums = neighborhoods[annotated_nodes].sum(
|
89
|
+
axis=0
|
90
|
+
) # Neighborhood sums for annotated nodes
|
91
|
+
annotation_sums = annotations[annotated_nodes].sum(
|
92
|
+
axis=0
|
93
|
+
) # Annotation sums for annotated nodes
|
94
|
+
else:
|
95
|
+
raise ValueError(
|
96
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
97
|
+
)
|
98
|
+
|
99
|
+
# Convert to dense arrays for downstream computations
|
100
|
+
neighborhood_sums = np.asarray(neighborhood_sums).reshape(-1, 1) # Ensure column vector shape
|
101
|
+
annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
|
102
|
+
|
103
|
+
# Observed values: number of annotated nodes in each neighborhood
|
104
|
+
observed = neighborhoods.T @ annotations # Shape: (neighborhoods, annotations)
|
105
|
+
# Expected values under the null
|
106
|
+
expected = (neighborhood_sums @ annotation_sums) / background_population
|
107
|
+
# Chi-squared statistic: sum((observed - expected)^2 / expected)
|
108
|
+
with np.errstate(divide="ignore", invalid="ignore"): # Handle divide-by-zero
|
109
|
+
chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
|
110
|
+
|
111
|
+
# Compute p-values for enrichment (upper tail) and depletion (lower tail)
|
112
|
+
enrichment_pvals = chi2.sf(chi2_stat, df=1) # Survival function for upper tail
|
113
|
+
depletion_pvals = chi2.cdf(chi2_stat, df=1) # Cumulative distribution for lower tail
|
114
|
+
|
115
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
116
|
+
|
117
|
+
|
118
|
+
def compute_hypergeom_test(
|
119
|
+
neighborhoods: csr_matrix,
|
120
|
+
annotations: csr_matrix,
|
121
|
+
null_distribution: str = "network",
|
122
|
+
) -> Dict[str, Any]:
|
123
|
+
"""
|
124
|
+
Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
|
125
|
+
|
126
|
+
Args:
|
127
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
128
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
129
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
130
|
+
|
131
|
+
Returns:
|
132
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
133
|
+
"""
|
134
|
+
# Total number of nodes
|
135
|
+
total_nodes = neighborhoods.shape[1]
|
136
|
+
|
137
|
+
# Compute sums directly using sparse operations
|
138
|
+
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()
|
139
|
+
annotation_sums = annotations.sum(axis=0).A.flatten()
|
140
|
+
|
141
|
+
if null_distribution == "network":
|
142
|
+
background_population = total_nodes
|
143
|
+
elif null_distribution == "annotations":
|
144
|
+
# Boolean mask for nodes with annotations
|
145
|
+
annotated_nodes = annotations.getnnz(axis=1) > 0
|
146
|
+
background_population = annotated_nodes.sum()
|
147
|
+
# Filter neighborhoods and annotations to include only annotated nodes
|
148
|
+
neighborhoods = neighborhoods[annotated_nodes]
|
149
|
+
annotations = annotations[annotated_nodes]
|
150
|
+
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()
|
151
|
+
annotation_sums = annotations.sum(axis=0).A.flatten()
|
152
|
+
else:
|
153
|
+
raise ValueError(
|
154
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
155
|
+
)
|
156
|
+
|
157
|
+
# Compute annotated nodes in each neighborhood
|
158
|
+
annotated_in_neighborhood = neighborhoods.T @ annotations # Sparse multiplication
|
159
|
+
# Convert to dense arrays for vectorized operations
|
160
|
+
annotated_in_neighborhood = annotated_in_neighborhood.toarray()
|
161
|
+
# Align shapes for broadcasting
|
162
|
+
neighborhood_sums = neighborhood_sums[:, np.newaxis]
|
163
|
+
annotation_sums = annotation_sums[np.newaxis, :]
|
164
|
+
background_population = np.array([[background_population]])
|
165
|
+
|
166
|
+
# Fully vectorized hypergeometric calculations
|
167
|
+
depletion_pvals = hypergeom.cdf(
|
168
|
+
annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
|
169
|
+
)
|
170
|
+
enrichment_pvals = hypergeom.sf(
|
171
|
+
annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
|
172
|
+
)
|
173
|
+
|
174
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
175
|
+
|
176
|
+
|
177
|
+
def compute_poisson_test(
|
178
|
+
neighborhoods: csr_matrix,
|
179
|
+
annotations: csr_matrix,
|
180
|
+
null_distribution: str = "network",
|
181
|
+
) -> Dict[str, Any]:
|
182
|
+
"""
|
183
|
+
Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
|
184
|
+
|
185
|
+
Args:
|
186
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
187
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
188
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
189
|
+
|
190
|
+
Returns:
|
191
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
192
|
+
"""
|
193
|
+
# Matrix multiplication to get the number of annotated nodes in each neighborhood
|
194
|
+
annotated_in_neighborhood = neighborhoods @ annotations # Sparse result
|
195
|
+
# Convert annotated counts to dense for downstream calculations
|
196
|
+
annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
|
197
|
+
|
198
|
+
# Compute lambda_expected based on the chosen null distribution
|
199
|
+
if null_distribution == "network":
|
200
|
+
# Use the mean across neighborhoods (axis=1)
|
201
|
+
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
|
202
|
+
elif null_distribution == "annotations":
|
203
|
+
# Use the mean across annotations (axis=0)
|
204
|
+
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
|
205
|
+
else:
|
206
|
+
raise ValueError(
|
207
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
208
|
+
)
|
209
|
+
|
210
|
+
# Compute p-values for enrichment and depletion using Poisson distribution
|
211
|
+
enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
|
212
|
+
depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
|
213
|
+
|
214
|
+
return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
|
215
|
+
|
216
|
+
|
217
|
+
def compute_zscore_test(
|
218
|
+
neighborhoods: csr_matrix,
|
219
|
+
annotations: csr_matrix,
|
220
|
+
null_distribution: str = "network",
|
221
|
+
) -> Dict[str, Any]:
|
222
|
+
"""
|
223
|
+
Compute Z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
|
224
|
+
|
225
|
+
Args:
|
226
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
227
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
228
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
229
|
+
|
230
|
+
Returns:
|
231
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
232
|
+
"""
|
233
|
+
# Total number of nodes in the network
|
234
|
+
total_node_count = neighborhoods.shape[1]
|
235
|
+
|
236
|
+
# Compute sums
|
237
|
+
if null_distribution == "network":
|
238
|
+
background_population = total_node_count
|
239
|
+
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
|
240
|
+
annotation_sums = annotations.sum(axis=0).A.flatten() # Dense row sums
|
241
|
+
elif null_distribution == "annotations":
|
242
|
+
annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Dense boolean mask
|
243
|
+
background_population = annotated_nodes.sum()
|
244
|
+
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
245
|
+
annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
|
246
|
+
else:
|
247
|
+
raise ValueError(
|
248
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
249
|
+
)
|
250
|
+
|
251
|
+
# Observed values
|
252
|
+
observed = (neighborhoods.T @ annotations).toarray() # Convert sparse result to dense
|
253
|
+
# Expected values under the null
|
254
|
+
neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
|
255
|
+
annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
|
256
|
+
expected = (neighborhood_sums @ annotation_sums) / background_population
|
257
|
+
|
258
|
+
# Standard deviation under the null
|
259
|
+
std_dev = np.sqrt(
|
260
|
+
expected
|
261
|
+
* (1 - annotation_sums / background_population)
|
262
|
+
* (1 - neighborhood_sums / background_population)
|
263
|
+
)
|
264
|
+
std_dev[std_dev == 0] = np.nan # Avoid division by zero
|
265
|
+
# Compute Z-scores
|
266
|
+
z_scores = (observed - expected) / std_dev
|
267
|
+
|
268
|
+
# Convert Z-scores to depletion and enrichment p-values
|
269
|
+
enrichment_pvals = norm.sf(z_scores) # Upper tail
|
270
|
+
depletion_pvals = norm.cdf(z_scores) # Lower tail
|
271
|
+
|
272
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
risk/__init__.py,sha256=32Lq_wPcVY8stW7c0jkvgihM15jnYka5Hnw8M9gbjN0,127
|
2
|
+
risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
|
3
|
+
risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
|
4
|
+
risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
|
5
|
+
risk/annotations/annotations.py,sha256=g8ca9H49dZIqHv6Od3Dem4BIo_euy8alL3PDauT6ZJI,14088
|
6
|
+
risk/annotations/io.py,sha256=z1AJySsU-KL_IYuHa7j3nvuczmOHgK3WfaQ4TRunvrA,10499
|
7
|
+
risk/log/__init__.py,sha256=7LxDysQu7doi0LAvlY2YbjN6iJH0fNknqy8lSLgeljo,217
|
8
|
+
risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
|
9
|
+
risk/log/parameters.py,sha256=VtwfMzLU1xI4yji3-Ch5vHjH-KdwTfwaEMmi7hFQTs0,5716
|
10
|
+
risk/neighborhoods/__init__.py,sha256=Q74HwTH7okI-vaskJPy2bYwb5sNjGASTzJ6m8V8arCU,234
|
11
|
+
risk/neighborhoods/api.py,sha256=TjIMVnSPC702zMlwyaz2i0ofNx-d9L9g3P-TTSBMx90,23341
|
12
|
+
risk/neighborhoods/community.py,sha256=5Q_-VAJC-5SY5EUsB8gIlemeDoAL85uLjyl16pItHiQ,16699
|
13
|
+
risk/neighborhoods/domains.py,sha256=jMJ4-Qzwgmo6Hya8h0E2_IcMaLpbuH_FWlmSjJl2ikc,12832
|
14
|
+
risk/neighborhoods/neighborhoods.py,sha256=l9FhADB1C-OxM8E9QXOcA4osUDgA1vs4ud-OCGKKybc,21457
|
15
|
+
risk/network/__init__.py,sha256=oVi3FA1XXKD84014Cykq-9bpX4_s0F3aAUfNOU-07Qw,73
|
16
|
+
risk/network/geometry.py,sha256=dU1hMq4j9gG0nkDqGRl_NiZ2Z-xvT_HF11FwEQ7oOR4,6570
|
17
|
+
risk/network/io.py,sha256=PqsRw1g7nfJJ3xs4aYcim3JWWLMFS1irgtg5hIyht5I,24376
|
18
|
+
risk/network/graph/__init__.py,sha256=ziGJew3yhtqvrb9LUuneDu_LwW2Wa9vd4UuhoL5l1CA,91
|
19
|
+
risk/network/graph/api.py,sha256=t5Mh5_lD2uTLioEJFfCRe7ncc5iLNYzxd6r05wSiv7s,8169
|
20
|
+
risk/network/graph/graph.py,sha256=qEWyZvuaGT_vvjhreBdmRPX3gst2wQFaXhFAvikPSqw,12158
|
21
|
+
risk/network/graph/summary.py,sha256=eYJP78EHxu3ZhKoDCFshNxuEIB3dvH0PUg2T7qNkjC8,10289
|
22
|
+
risk/network/plotter/__init__.py,sha256=4gWtQHGzQVNHmEBXi31Zf0tX0y2sTcE66J_yGnn7268,99
|
23
|
+
risk/network/plotter/api.py,sha256=oJIj7vYv-3VpfN41ndCNtxcWIuhT2ULwAaPPU2f4oeM,1785
|
24
|
+
risk/network/plotter/canvas.py,sha256=ifyTMyXYRzlcdSYy6C23k3dmwtbLDrOfdMvEjkW2gLg,13460
|
25
|
+
risk/network/plotter/contour.py,sha256=oQDKmAKaEasnK1zqY7_bNctZ_IevZW2vxrbsnSrOSCI,15459
|
26
|
+
risk/network/plotter/labels.py,sha256=k5GWvgHS8bLekJk7Gtxy6G7tDeJDZPQ-z3VxYWjAWRM,45489
|
27
|
+
risk/network/plotter/network.py,sha256=0VySlJ4n3tkHsOhVVSa3yiSppT8y1dmIwa-DhRn0tcM,14131
|
28
|
+
risk/network/plotter/plotter.py,sha256=4PeAeutJbgvwy4USh5RdHALLtkmeAtaxQcd48r7Zxa0,5999
|
29
|
+
risk/network/plotter/utils/colors.py,sha256=VU1sLPRC99ll6EGK4vRNgLMUXU8lja1vjiXUL8GdfBE,18910
|
30
|
+
risk/network/plotter/utils/layout.py,sha256=OPqV8jzV9dpnOhYU4SYMSfsIXalVzESrlBSI_Y43OGU,3640
|
31
|
+
risk/stats/__init__.py,sha256=2zdLv3tUHKyAjwAo7LprVXRaak1cHgrpYMVMSik6JM4,324
|
32
|
+
risk/stats/significance.py,sha256=6cKv2xBQXWTHZ6HpNWIqlNfKKS5pG_BcCUdMM3r_zw4,7336
|
33
|
+
risk/stats/stat_tests.py,sha256=qYn85VrNJeIlEptkEUoYsPco4BQ604CLJxXczgekXgc,11986
|
34
|
+
risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
|
35
|
+
risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
|
36
|
+
risk/stats/permutation/test_functions.py,sha256=D3XMPM8CasUNytWSRce22TI6KK6XulYn5uGG4lWxaHs,3120
|
37
|
+
risk_network-0.0.9b28.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
38
|
+
risk_network-0.0.9b28.dist-info/METADATA,sha256=v9uuUQ9EwyI5WzIirw_ONry2KPaiHqTiw2TDWX60Y6c,47627
|
39
|
+
risk_network-0.0.9b28.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
40
|
+
risk_network-0.0.9b28.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
41
|
+
risk_network-0.0.9b28.dist-info/RECORD,,
|
risk/stats/binom.py
DELETED
@@ -1,51 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
risk/stats/binomial
|
3
|
-
~~~~~~~~~~~~~~~~~~~
|
4
|
-
"""
|
5
|
-
|
6
|
-
from typing import Any, Dict
|
7
|
-
|
8
|
-
from scipy.sparse import csr_matrix
|
9
|
-
from scipy.stats import binom
|
10
|
-
|
11
|
-
|
12
|
-
def compute_binom_test(
|
13
|
-
neighborhoods: csr_matrix,
|
14
|
-
annotations: csr_matrix,
|
15
|
-
null_distribution: str = "network",
|
16
|
-
) -> Dict[str, Any]:
|
17
|
-
"""Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
|
18
|
-
|
19
|
-
Args:
|
20
|
-
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
21
|
-
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
22
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
23
|
-
|
24
|
-
Returns:
|
25
|
-
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
26
|
-
"""
|
27
|
-
# Get the total number of nodes in the network
|
28
|
-
total_nodes = neighborhoods.shape[1]
|
29
|
-
|
30
|
-
# Compute sums (remain sparse here)
|
31
|
-
neighborhood_sizes = neighborhoods.sum(axis=1) # Row sums
|
32
|
-
annotation_totals = annotations.sum(axis=0) # Column sums
|
33
|
-
# Compute probabilities (convert to dense)
|
34
|
-
if null_distribution == "network":
|
35
|
-
p_values = (annotation_totals / total_nodes).A.flatten() # Dense 1D array
|
36
|
-
elif null_distribution == "annotations":
|
37
|
-
p_values = (annotation_totals / annotations.sum()).A.flatten() # Dense 1D array
|
38
|
-
else:
|
39
|
-
raise ValueError(
|
40
|
-
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
41
|
-
)
|
42
|
-
|
43
|
-
# Observed counts (sparse matrix multiplication)
|
44
|
-
annotated_counts = neighborhoods @ annotations # Sparse result
|
45
|
-
annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
|
46
|
-
|
47
|
-
# Compute enrichment and depletion p-values
|
48
|
-
enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, neighborhood_sizes.A, p_values)
|
49
|
-
depletion_pvals = binom.cdf(annotated_counts_dense, neighborhood_sizes.A, p_values)
|
50
|
-
|
51
|
-
return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
|
risk/stats/chi2.py
DELETED
@@ -1,69 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
risk/stats/chi2
|
3
|
-
~~~~~~~~~~~~~~~
|
4
|
-
"""
|
5
|
-
|
6
|
-
from typing import Any, Dict
|
7
|
-
|
8
|
-
import numpy as np
|
9
|
-
from scipy.sparse import csr_matrix
|
10
|
-
from scipy.stats import chi2
|
11
|
-
|
12
|
-
|
13
|
-
def compute_chi2_test(
|
14
|
-
neighborhoods: csr_matrix,
|
15
|
-
annotations: csr_matrix,
|
16
|
-
null_distribution: str = "network",
|
17
|
-
) -> Dict[str, Any]:
|
18
|
-
"""Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
|
19
|
-
|
20
|
-
Args:
|
21
|
-
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
22
|
-
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
23
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
24
|
-
|
25
|
-
Returns:
|
26
|
-
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
27
|
-
"""
|
28
|
-
# Total number of nodes in the network
|
29
|
-
total_node_count = neighborhoods.shape[0]
|
30
|
-
|
31
|
-
if null_distribution == "network":
|
32
|
-
# Case 1: Use all nodes as the background
|
33
|
-
background_population = total_node_count
|
34
|
-
neighborhood_sums = neighborhoods.sum(axis=0) # Column sums of neighborhoods
|
35
|
-
annotation_sums = annotations.sum(axis=0) # Column sums of annotations
|
36
|
-
elif null_distribution == "annotations":
|
37
|
-
# Case 2: Only consider nodes with at least one annotation
|
38
|
-
annotated_nodes = (
|
39
|
-
np.ravel(annotations.sum(axis=1)) > 0
|
40
|
-
) # Row-wise sum to filter nodes with annotations
|
41
|
-
background_population = annotated_nodes.sum() # Total number of annotated nodes
|
42
|
-
neighborhood_sums = neighborhoods[annotated_nodes].sum(
|
43
|
-
axis=0
|
44
|
-
) # Neighborhood sums for annotated nodes
|
45
|
-
annotation_sums = annotations[annotated_nodes].sum(
|
46
|
-
axis=0
|
47
|
-
) # Annotation sums for annotated nodes
|
48
|
-
else:
|
49
|
-
raise ValueError(
|
50
|
-
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
51
|
-
)
|
52
|
-
|
53
|
-
# Convert to dense arrays for downstream computations
|
54
|
-
neighborhood_sums = np.asarray(neighborhood_sums).reshape(-1, 1) # Ensure column vector shape
|
55
|
-
annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
|
56
|
-
|
57
|
-
# Observed values: number of annotated nodes in each neighborhood
|
58
|
-
observed = neighborhoods.T @ annotations # Shape: (neighborhoods, annotations)
|
59
|
-
# Expected values under the null
|
60
|
-
expected = (neighborhood_sums @ annotation_sums) / background_population
|
61
|
-
# Chi-squared statistic: sum((observed - expected)^2 / expected)
|
62
|
-
with np.errstate(divide="ignore", invalid="ignore"): # Handle divide-by-zero
|
63
|
-
chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
|
64
|
-
|
65
|
-
# Compute p-values for enrichment (upper tail) and depletion (lower tail)
|
66
|
-
enrichment_pvals = chi2.sf(chi2_stat, df=1) # Survival function for upper tail
|
67
|
-
depletion_pvals = chi2.cdf(chi2_stat, df=1) # Cumulative distribution for lower tail
|
68
|
-
|
69
|
-
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
risk/stats/hypergeom.py
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
risk/stats/hypergeom
|
3
|
-
~~~~~~~~~~~~~~~~~~~~
|
4
|
-
"""
|
5
|
-
|
6
|
-
from typing import Any, Dict
|
7
|
-
|
8
|
-
import numpy as np
|
9
|
-
from scipy.sparse import csr_matrix
|
10
|
-
from scipy.stats import hypergeom
|
11
|
-
|
12
|
-
|
13
|
-
def compute_hypergeom_test(
|
14
|
-
neighborhoods: csr_matrix,
|
15
|
-
annotations: csr_matrix,
|
16
|
-
null_distribution: str = "network",
|
17
|
-
) -> Dict[str, Any]:
|
18
|
-
"""
|
19
|
-
Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
|
20
|
-
|
21
|
-
Args:
|
22
|
-
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
23
|
-
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
24
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
25
|
-
|
26
|
-
Returns:
|
27
|
-
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
28
|
-
"""
|
29
|
-
# Get the total number of nodes in the network
|
30
|
-
total_nodes = neighborhoods.shape[1]
|
31
|
-
|
32
|
-
# Compute sums
|
33
|
-
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Convert to dense array
|
34
|
-
annotation_sums = annotations.sum(axis=0).A.flatten() # Convert to dense array
|
35
|
-
|
36
|
-
if null_distribution == "network":
|
37
|
-
background_population = total_nodes
|
38
|
-
elif null_distribution == "annotations":
|
39
|
-
annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Boolean mask
|
40
|
-
background_population = annotated_nodes.sum()
|
41
|
-
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
42
|
-
annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
|
43
|
-
else:
|
44
|
-
raise ValueError(
|
45
|
-
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
46
|
-
)
|
47
|
-
|
48
|
-
# Observed counts
|
49
|
-
annotated_in_neighborhood = neighborhoods.T @ annotations # Sparse result
|
50
|
-
annotated_in_neighborhood = annotated_in_neighborhood.toarray() # Convert to dense
|
51
|
-
# Align shapes for broadcasting
|
52
|
-
neighborhood_sums = neighborhood_sums.reshape(-1, 1)
|
53
|
-
annotation_sums = annotation_sums.reshape(1, -1)
|
54
|
-
background_population = np.array(background_population).reshape(1, 1)
|
55
|
-
|
56
|
-
# Compute hypergeometric p-values
|
57
|
-
depletion_pvals = hypergeom.cdf(
|
58
|
-
annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
|
59
|
-
)
|
60
|
-
enrichment_pvals = hypergeom.sf(
|
61
|
-
annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
|
62
|
-
)
|
63
|
-
|
64
|
-
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
risk/stats/poisson.py
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
risk/stats/poisson
|
3
|
-
~~~~~~~~~~~~~~~~~~
|
4
|
-
"""
|
5
|
-
|
6
|
-
from typing import Any, Dict
|
7
|
-
|
8
|
-
import numpy as np
|
9
|
-
from scipy.sparse import csr_matrix
|
10
|
-
from scipy.stats import poisson
|
11
|
-
|
12
|
-
|
13
|
-
def compute_poisson_test(
|
14
|
-
neighborhoods: csr_matrix,
|
15
|
-
annotations: csr_matrix,
|
16
|
-
null_distribution: str = "network",
|
17
|
-
) -> Dict[str, Any]:
|
18
|
-
"""
|
19
|
-
Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
|
20
|
-
|
21
|
-
Args:
|
22
|
-
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
23
|
-
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
24
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
25
|
-
|
26
|
-
Returns:
|
27
|
-
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
28
|
-
"""
|
29
|
-
# Matrix multiplication to get the number of annotated nodes in each neighborhood
|
30
|
-
annotated_in_neighborhood = neighborhoods @ annotations # Sparse result
|
31
|
-
# Convert annotated counts to dense for downstream calculations
|
32
|
-
annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
|
33
|
-
|
34
|
-
# Compute lambda_expected based on the chosen null distribution
|
35
|
-
if null_distribution == "network":
|
36
|
-
# Use the mean across neighborhoods (axis=1)
|
37
|
-
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
|
38
|
-
elif null_distribution == "annotations":
|
39
|
-
# Use the mean across annotations (axis=0)
|
40
|
-
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
|
41
|
-
else:
|
42
|
-
raise ValueError(
|
43
|
-
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
44
|
-
)
|
45
|
-
|
46
|
-
# Compute p-values for enrichment and depletion using Poisson distribution
|
47
|
-
enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
|
48
|
-
depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
|
49
|
-
|
50
|
-
return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
|
risk/stats/zscore.py
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
risk/stats/zscore
|
3
|
-
~~~~~~~~~~~~~~~~~~
|
4
|
-
"""
|
5
|
-
|
6
|
-
from typing import Any, Dict
|
7
|
-
|
8
|
-
import numpy as np
|
9
|
-
from scipy.sparse import csr_matrix
|
10
|
-
from scipy.stats import norm
|
11
|
-
|
12
|
-
|
13
|
-
def compute_zscore_test(
|
14
|
-
neighborhoods: csr_matrix,
|
15
|
-
annotations: csr_matrix,
|
16
|
-
null_distribution: str = "network",
|
17
|
-
) -> Dict[str, Any]:
|
18
|
-
"""
|
19
|
-
Compute Z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
|
20
|
-
|
21
|
-
Args:
|
22
|
-
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
23
|
-
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
24
|
-
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
25
|
-
|
26
|
-
Returns:
|
27
|
-
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
28
|
-
"""
|
29
|
-
# Total number of nodes in the network
|
30
|
-
total_node_count = neighborhoods.shape[1]
|
31
|
-
|
32
|
-
# Compute sums
|
33
|
-
if null_distribution == "network":
|
34
|
-
background_population = total_node_count
|
35
|
-
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
|
36
|
-
annotation_sums = annotations.sum(axis=0).A.flatten() # Dense row sums
|
37
|
-
elif null_distribution == "annotations":
|
38
|
-
annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Dense boolean mask
|
39
|
-
background_population = annotated_nodes.sum()
|
40
|
-
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
41
|
-
annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
|
42
|
-
else:
|
43
|
-
raise ValueError(
|
44
|
-
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
45
|
-
)
|
46
|
-
|
47
|
-
# Observed values
|
48
|
-
observed = (neighborhoods.T @ annotations).toarray() # Convert sparse result to dense
|
49
|
-
# Expected values under the null
|
50
|
-
neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
|
51
|
-
annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
|
52
|
-
expected = (neighborhood_sums @ annotation_sums) / background_population
|
53
|
-
|
54
|
-
# Standard deviation under the null
|
55
|
-
std_dev = np.sqrt(
|
56
|
-
expected
|
57
|
-
* (1 - annotation_sums / background_population)
|
58
|
-
* (1 - neighborhood_sums / background_population)
|
59
|
-
)
|
60
|
-
std_dev[std_dev == 0] = np.nan # Avoid division by zero
|
61
|
-
# Compute Z-scores
|
62
|
-
z_scores = (observed - expected) / std_dev
|
63
|
-
|
64
|
-
# Convert Z-scores to depletion and enrichment p-values
|
65
|
-
enrichment_pvals = norm.sf(z_scores) # Upper tail
|
66
|
-
depletion_pvals = norm.cdf(z_scores) # Lower tail
|
67
|
-
|
68
|
-
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|