risk-network 0.0.9b26__py3-none-any.whl → 0.0.9b28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,272 @@
1
+ """
2
+ risk/stats/stat_tests
3
+ ~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from typing import Any, Dict
7
+
8
+ import numpy as np
9
+ from scipy.sparse import csr_matrix
10
+ from scipy.stats import binom
11
+ from scipy.stats import chi2
12
+ from scipy.stats import hypergeom
13
+ from scipy.stats import norm
14
+ from scipy.stats import poisson
15
+
16
+
17
+ def compute_binom_test(
18
+ neighborhoods: csr_matrix,
19
+ annotations: csr_matrix,
20
+ null_distribution: str = "network",
21
+ ) -> Dict[str, Any]:
22
+ """Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
23
+
24
+ Args:
25
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
26
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
27
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
28
+
29
+ Returns:
30
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
31
+ """
32
+ # Get the total number of nodes in the network
33
+ total_nodes = neighborhoods.shape[1]
34
+
35
+ # Compute sums (remain sparse here)
36
+ neighborhood_sizes = neighborhoods.sum(axis=1) # Row sums
37
+ annotation_totals = annotations.sum(axis=0) # Column sums
38
+ # Compute probabilities (convert to dense)
39
+ if null_distribution == "network":
40
+ p_values = (annotation_totals / total_nodes).A.flatten() # Dense 1D array
41
+ elif null_distribution == "annotations":
42
+ p_values = (annotation_totals / annotations.sum()).A.flatten() # Dense 1D array
43
+ else:
44
+ raise ValueError(
45
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
46
+ )
47
+
48
+ # Observed counts (sparse matrix multiplication)
49
+ annotated_counts = neighborhoods @ annotations # Sparse result
50
+ annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
51
+
52
+ # Compute enrichment and depletion p-values
53
+ enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, neighborhood_sizes.A, p_values)
54
+ depletion_pvals = binom.cdf(annotated_counts_dense, neighborhood_sizes.A, p_values)
55
+
56
+ return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
57
+
58
+
59
+ def compute_chi2_test(
60
+ neighborhoods: csr_matrix,
61
+ annotations: csr_matrix,
62
+ null_distribution: str = "network",
63
+ ) -> Dict[str, Any]:
64
+ """Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
65
+
66
+ Args:
67
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
68
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
69
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
70
+
71
+ Returns:
72
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
73
+ """
74
+ # Total number of nodes in the network
75
+ total_node_count = neighborhoods.shape[0]
76
+
77
+ if null_distribution == "network":
78
+ # Case 1: Use all nodes as the background
79
+ background_population = total_node_count
80
+ neighborhood_sums = neighborhoods.sum(axis=0) # Column sums of neighborhoods
81
+ annotation_sums = annotations.sum(axis=0) # Column sums of annotations
82
+ elif null_distribution == "annotations":
83
+ # Case 2: Only consider nodes with at least one annotation
84
+ annotated_nodes = (
85
+ np.ravel(annotations.sum(axis=1)) > 0
86
+ ) # Row-wise sum to filter nodes with annotations
87
+ background_population = annotated_nodes.sum() # Total number of annotated nodes
88
+ neighborhood_sums = neighborhoods[annotated_nodes].sum(
89
+ axis=0
90
+ ) # Neighborhood sums for annotated nodes
91
+ annotation_sums = annotations[annotated_nodes].sum(
92
+ axis=0
93
+ ) # Annotation sums for annotated nodes
94
+ else:
95
+ raise ValueError(
96
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
97
+ )
98
+
99
+ # Convert to dense arrays for downstream computations
100
+ neighborhood_sums = np.asarray(neighborhood_sums).reshape(-1, 1) # Ensure column vector shape
101
+ annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
102
+
103
+ # Observed values: number of annotated nodes in each neighborhood
104
+ observed = neighborhoods.T @ annotations # Shape: (neighborhoods, annotations)
105
+ # Expected values under the null
106
+ expected = (neighborhood_sums @ annotation_sums) / background_population
107
+ # Chi-squared statistic: sum((observed - expected)^2 / expected)
108
+ with np.errstate(divide="ignore", invalid="ignore"): # Handle divide-by-zero
109
+ chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
110
+
111
+ # Compute p-values for enrichment (upper tail) and depletion (lower tail)
112
+ enrichment_pvals = chi2.sf(chi2_stat, df=1) # Survival function for upper tail
113
+ depletion_pvals = chi2.cdf(chi2_stat, df=1) # Cumulative distribution for lower tail
114
+
115
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
116
+
117
+
118
+ def compute_hypergeom_test(
119
+ neighborhoods: csr_matrix,
120
+ annotations: csr_matrix,
121
+ null_distribution: str = "network",
122
+ ) -> Dict[str, Any]:
123
+ """
124
+ Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
125
+
126
+ Args:
127
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
128
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
129
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
130
+
131
+ Returns:
132
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
133
+ """
134
+ # Total number of nodes
135
+ total_nodes = neighborhoods.shape[1]
136
+
137
+ # Compute sums directly using sparse operations
138
+ neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()
139
+ annotation_sums = annotations.sum(axis=0).A.flatten()
140
+
141
+ if null_distribution == "network":
142
+ background_population = total_nodes
143
+ elif null_distribution == "annotations":
144
+ # Boolean mask for nodes with annotations
145
+ annotated_nodes = annotations.getnnz(axis=1) > 0
146
+ background_population = annotated_nodes.sum()
147
+ # Filter neighborhoods and annotations to include only annotated nodes
148
+ neighborhoods = neighborhoods[annotated_nodes]
149
+ annotations = annotations[annotated_nodes]
150
+ neighborhood_sums = neighborhoods.sum(axis=0).A.flatten()
151
+ annotation_sums = annotations.sum(axis=0).A.flatten()
152
+ else:
153
+ raise ValueError(
154
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
155
+ )
156
+
157
+ # Compute annotated nodes in each neighborhood
158
+ annotated_in_neighborhood = neighborhoods.T @ annotations # Sparse multiplication
159
+ # Convert to dense arrays for vectorized operations
160
+ annotated_in_neighborhood = annotated_in_neighborhood.toarray()
161
+ # Align shapes for broadcasting
162
+ neighborhood_sums = neighborhood_sums[:, np.newaxis]
163
+ annotation_sums = annotation_sums[np.newaxis, :]
164
+ background_population = np.array([[background_population]])
165
+
166
+ # Fully vectorized hypergeometric calculations
167
+ depletion_pvals = hypergeom.cdf(
168
+ annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
169
+ )
170
+ enrichment_pvals = hypergeom.sf(
171
+ annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
172
+ )
173
+
174
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
175
+
176
+
177
+ def compute_poisson_test(
178
+ neighborhoods: csr_matrix,
179
+ annotations: csr_matrix,
180
+ null_distribution: str = "network",
181
+ ) -> Dict[str, Any]:
182
+ """
183
+ Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
184
+
185
+ Args:
186
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
187
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
188
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
189
+
190
+ Returns:
191
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
192
+ """
193
+ # Matrix multiplication to get the number of annotated nodes in each neighborhood
194
+ annotated_in_neighborhood = neighborhoods @ annotations # Sparse result
195
+ # Convert annotated counts to dense for downstream calculations
196
+ annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
197
+
198
+ # Compute lambda_expected based on the chosen null distribution
199
+ if null_distribution == "network":
200
+ # Use the mean across neighborhoods (axis=1)
201
+ lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
202
+ elif null_distribution == "annotations":
203
+ # Use the mean across annotations (axis=0)
204
+ lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
205
+ else:
206
+ raise ValueError(
207
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
208
+ )
209
+
210
+ # Compute p-values for enrichment and depletion using Poisson distribution
211
+ enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
212
+ depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
213
+
214
+ return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
215
+
216
+
217
+ def compute_zscore_test(
218
+ neighborhoods: csr_matrix,
219
+ annotations: csr_matrix,
220
+ null_distribution: str = "network",
221
+ ) -> Dict[str, Any]:
222
+ """
223
+ Compute Z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
224
+
225
+ Args:
226
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
227
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
228
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
229
+
230
+ Returns:
231
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
232
+ """
233
+ # Total number of nodes in the network
234
+ total_node_count = neighborhoods.shape[1]
235
+
236
+ # Compute sums
237
+ if null_distribution == "network":
238
+ background_population = total_node_count
239
+ neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
240
+ annotation_sums = annotations.sum(axis=0).A.flatten() # Dense row sums
241
+ elif null_distribution == "annotations":
242
+ annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Dense boolean mask
243
+ background_population = annotated_nodes.sum()
244
+ neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
245
+ annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
246
+ else:
247
+ raise ValueError(
248
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
249
+ )
250
+
251
+ # Observed values
252
+ observed = (neighborhoods.T @ annotations).toarray() # Convert sparse result to dense
253
+ # Expected values under the null
254
+ neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
255
+ annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
256
+ expected = (neighborhood_sums @ annotation_sums) / background_population
257
+
258
+ # Standard deviation under the null
259
+ std_dev = np.sqrt(
260
+ expected
261
+ * (1 - annotation_sums / background_population)
262
+ * (1 - neighborhood_sums / background_population)
263
+ )
264
+ std_dev[std_dev == 0] = np.nan # Avoid division by zero
265
+ # Compute Z-scores
266
+ z_scores = (observed - expected) / std_dev
267
+
268
+ # Convert Z-scores to depletion and enrichment p-values
269
+ enrichment_pvals = norm.sf(z_scores) # Upper tail
270
+ depletion_pvals = norm.cdf(z_scores) # Lower tail
271
+
272
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: risk-network
3
- Version: 0.0.9b26
3
+ Version: 0.0.9b28
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -0,0 +1,41 @@
1
+ risk/__init__.py,sha256=32Lq_wPcVY8stW7c0jkvgihM15jnYka5Hnw8M9gbjN0,127
2
+ risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
3
+ risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
4
+ risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
5
+ risk/annotations/annotations.py,sha256=g8ca9H49dZIqHv6Od3Dem4BIo_euy8alL3PDauT6ZJI,14088
6
+ risk/annotations/io.py,sha256=z1AJySsU-KL_IYuHa7j3nvuczmOHgK3WfaQ4TRunvrA,10499
7
+ risk/log/__init__.py,sha256=7LxDysQu7doi0LAvlY2YbjN6iJH0fNknqy8lSLgeljo,217
8
+ risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
9
+ risk/log/parameters.py,sha256=VtwfMzLU1xI4yji3-Ch5vHjH-KdwTfwaEMmi7hFQTs0,5716
10
+ risk/neighborhoods/__init__.py,sha256=Q74HwTH7okI-vaskJPy2bYwb5sNjGASTzJ6m8V8arCU,234
11
+ risk/neighborhoods/api.py,sha256=TjIMVnSPC702zMlwyaz2i0ofNx-d9L9g3P-TTSBMx90,23341
12
+ risk/neighborhoods/community.py,sha256=5Q_-VAJC-5SY5EUsB8gIlemeDoAL85uLjyl16pItHiQ,16699
13
+ risk/neighborhoods/domains.py,sha256=jMJ4-Qzwgmo6Hya8h0E2_IcMaLpbuH_FWlmSjJl2ikc,12832
14
+ risk/neighborhoods/neighborhoods.py,sha256=l9FhADB1C-OxM8E9QXOcA4osUDgA1vs4ud-OCGKKybc,21457
15
+ risk/network/__init__.py,sha256=oVi3FA1XXKD84014Cykq-9bpX4_s0F3aAUfNOU-07Qw,73
16
+ risk/network/geometry.py,sha256=dU1hMq4j9gG0nkDqGRl_NiZ2Z-xvT_HF11FwEQ7oOR4,6570
17
+ risk/network/io.py,sha256=PqsRw1g7nfJJ3xs4aYcim3JWWLMFS1irgtg5hIyht5I,24376
18
+ risk/network/graph/__init__.py,sha256=ziGJew3yhtqvrb9LUuneDu_LwW2Wa9vd4UuhoL5l1CA,91
19
+ risk/network/graph/api.py,sha256=t5Mh5_lD2uTLioEJFfCRe7ncc5iLNYzxd6r05wSiv7s,8169
20
+ risk/network/graph/graph.py,sha256=qEWyZvuaGT_vvjhreBdmRPX3gst2wQFaXhFAvikPSqw,12158
21
+ risk/network/graph/summary.py,sha256=eYJP78EHxu3ZhKoDCFshNxuEIB3dvH0PUg2T7qNkjC8,10289
22
+ risk/network/plotter/__init__.py,sha256=4gWtQHGzQVNHmEBXi31Zf0tX0y2sTcE66J_yGnn7268,99
23
+ risk/network/plotter/api.py,sha256=oJIj7vYv-3VpfN41ndCNtxcWIuhT2ULwAaPPU2f4oeM,1785
24
+ risk/network/plotter/canvas.py,sha256=ifyTMyXYRzlcdSYy6C23k3dmwtbLDrOfdMvEjkW2gLg,13460
25
+ risk/network/plotter/contour.py,sha256=oQDKmAKaEasnK1zqY7_bNctZ_IevZW2vxrbsnSrOSCI,15459
26
+ risk/network/plotter/labels.py,sha256=k5GWvgHS8bLekJk7Gtxy6G7tDeJDZPQ-z3VxYWjAWRM,45489
27
+ risk/network/plotter/network.py,sha256=0VySlJ4n3tkHsOhVVSa3yiSppT8y1dmIwa-DhRn0tcM,14131
28
+ risk/network/plotter/plotter.py,sha256=4PeAeutJbgvwy4USh5RdHALLtkmeAtaxQcd48r7Zxa0,5999
29
+ risk/network/plotter/utils/colors.py,sha256=VU1sLPRC99ll6EGK4vRNgLMUXU8lja1vjiXUL8GdfBE,18910
30
+ risk/network/plotter/utils/layout.py,sha256=OPqV8jzV9dpnOhYU4SYMSfsIXalVzESrlBSI_Y43OGU,3640
31
+ risk/stats/__init__.py,sha256=2zdLv3tUHKyAjwAo7LprVXRaak1cHgrpYMVMSik6JM4,324
32
+ risk/stats/significance.py,sha256=6cKv2xBQXWTHZ6HpNWIqlNfKKS5pG_BcCUdMM3r_zw4,7336
33
+ risk/stats/stat_tests.py,sha256=qYn85VrNJeIlEptkEUoYsPco4BQ604CLJxXczgekXgc,11986
34
+ risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
35
+ risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
36
+ risk/stats/permutation/test_functions.py,sha256=D3XMPM8CasUNytWSRce22TI6KK6XulYn5uGG4lWxaHs,3120
37
+ risk_network-0.0.9b28.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
38
+ risk_network-0.0.9b28.dist-info/METADATA,sha256=v9uuUQ9EwyI5WzIirw_ONry2KPaiHqTiw2TDWX60Y6c,47627
39
+ risk_network-0.0.9b28.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
40
+ risk_network-0.0.9b28.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
41
+ risk_network-0.0.9b28.dist-info/RECORD,,
risk/stats/binom.py DELETED
@@ -1,51 +0,0 @@
1
- """
2
- risk/stats/binomial
3
- ~~~~~~~~~~~~~~~~~~~
4
- """
5
-
6
- from typing import Any, Dict
7
-
8
- from scipy.sparse import csr_matrix
9
- from scipy.stats import binom
10
-
11
-
12
- def compute_binom_test(
13
- neighborhoods: csr_matrix,
14
- annotations: csr_matrix,
15
- null_distribution: str = "network",
16
- ) -> Dict[str, Any]:
17
- """Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
18
-
19
- Args:
20
- neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
21
- annotations (csr_matrix): Sparse binary matrix representing annotations.
22
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
23
-
24
- Returns:
25
- Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
26
- """
27
- # Get the total number of nodes in the network
28
- total_nodes = neighborhoods.shape[1]
29
-
30
- # Compute sums (remain sparse here)
31
- neighborhood_sizes = neighborhoods.sum(axis=1) # Row sums
32
- annotation_totals = annotations.sum(axis=0) # Column sums
33
- # Compute probabilities (convert to dense)
34
- if null_distribution == "network":
35
- p_values = (annotation_totals / total_nodes).A.flatten() # Dense 1D array
36
- elif null_distribution == "annotations":
37
- p_values = (annotation_totals / annotations.sum()).A.flatten() # Dense 1D array
38
- else:
39
- raise ValueError(
40
- "Invalid null_distribution value. Choose either 'network' or 'annotations'."
41
- )
42
-
43
- # Observed counts (sparse matrix multiplication)
44
- annotated_counts = neighborhoods @ annotations # Sparse result
45
- annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
46
-
47
- # Compute enrichment and depletion p-values
48
- enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, neighborhood_sizes.A, p_values)
49
- depletion_pvals = binom.cdf(annotated_counts_dense, neighborhood_sizes.A, p_values)
50
-
51
- return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
risk/stats/chi2.py DELETED
@@ -1,69 +0,0 @@
1
- """
2
- risk/stats/chi2
3
- ~~~~~~~~~~~~~~~
4
- """
5
-
6
- from typing import Any, Dict
7
-
8
- import numpy as np
9
- from scipy.sparse import csr_matrix
10
- from scipy.stats import chi2
11
-
12
-
13
- def compute_chi2_test(
14
- neighborhoods: csr_matrix,
15
- annotations: csr_matrix,
16
- null_distribution: str = "network",
17
- ) -> Dict[str, Any]:
18
- """Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
19
-
20
- Args:
21
- neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
22
- annotations (csr_matrix): Sparse binary matrix representing annotations.
23
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
24
-
25
- Returns:
26
- Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
27
- """
28
- # Total number of nodes in the network
29
- total_node_count = neighborhoods.shape[0]
30
-
31
- if null_distribution == "network":
32
- # Case 1: Use all nodes as the background
33
- background_population = total_node_count
34
- neighborhood_sums = neighborhoods.sum(axis=0) # Column sums of neighborhoods
35
- annotation_sums = annotations.sum(axis=0) # Column sums of annotations
36
- elif null_distribution == "annotations":
37
- # Case 2: Only consider nodes with at least one annotation
38
- annotated_nodes = (
39
- np.ravel(annotations.sum(axis=1)) > 0
40
- ) # Row-wise sum to filter nodes with annotations
41
- background_population = annotated_nodes.sum() # Total number of annotated nodes
42
- neighborhood_sums = neighborhoods[annotated_nodes].sum(
43
- axis=0
44
- ) # Neighborhood sums for annotated nodes
45
- annotation_sums = annotations[annotated_nodes].sum(
46
- axis=0
47
- ) # Annotation sums for annotated nodes
48
- else:
49
- raise ValueError(
50
- "Invalid null_distribution value. Choose either 'network' or 'annotations'."
51
- )
52
-
53
- # Convert to dense arrays for downstream computations
54
- neighborhood_sums = np.asarray(neighborhood_sums).reshape(-1, 1) # Ensure column vector shape
55
- annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
56
-
57
- # Observed values: number of annotated nodes in each neighborhood
58
- observed = neighborhoods.T @ annotations # Shape: (neighborhoods, annotations)
59
- # Expected values under the null
60
- expected = (neighborhood_sums @ annotation_sums) / background_population
61
- # Chi-squared statistic: sum((observed - expected)^2 / expected)
62
- with np.errstate(divide="ignore", invalid="ignore"): # Handle divide-by-zero
63
- chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
64
-
65
- # Compute p-values for enrichment (upper tail) and depletion (lower tail)
66
- enrichment_pvals = chi2.sf(chi2_stat, df=1) # Survival function for upper tail
67
- depletion_pvals = chi2.cdf(chi2_stat, df=1) # Cumulative distribution for lower tail
68
-
69
- return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
risk/stats/hypergeom.py DELETED
@@ -1,64 +0,0 @@
1
- """
2
- risk/stats/hypergeom
3
- ~~~~~~~~~~~~~~~~~~~~
4
- """
5
-
6
- from typing import Any, Dict
7
-
8
- import numpy as np
9
- from scipy.sparse import csr_matrix
10
- from scipy.stats import hypergeom
11
-
12
-
13
- def compute_hypergeom_test(
14
- neighborhoods: csr_matrix,
15
- annotations: csr_matrix,
16
- null_distribution: str = "network",
17
- ) -> Dict[str, Any]:
18
- """
19
- Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
20
-
21
- Args:
22
- neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
23
- annotations (csr_matrix): Sparse binary matrix representing annotations.
24
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
25
-
26
- Returns:
27
- Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
28
- """
29
- # Get the total number of nodes in the network
30
- total_nodes = neighborhoods.shape[1]
31
-
32
- # Compute sums
33
- neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Convert to dense array
34
- annotation_sums = annotations.sum(axis=0).A.flatten() # Convert to dense array
35
-
36
- if null_distribution == "network":
37
- background_population = total_nodes
38
- elif null_distribution == "annotations":
39
- annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Boolean mask
40
- background_population = annotated_nodes.sum()
41
- neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
42
- annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
43
- else:
44
- raise ValueError(
45
- "Invalid null_distribution value. Choose either 'network' or 'annotations'."
46
- )
47
-
48
- # Observed counts
49
- annotated_in_neighborhood = neighborhoods.T @ annotations # Sparse result
50
- annotated_in_neighborhood = annotated_in_neighborhood.toarray() # Convert to dense
51
- # Align shapes for broadcasting
52
- neighborhood_sums = neighborhood_sums.reshape(-1, 1)
53
- annotation_sums = annotation_sums.reshape(1, -1)
54
- background_population = np.array(background_population).reshape(1, 1)
55
-
56
- # Compute hypergeometric p-values
57
- depletion_pvals = hypergeom.cdf(
58
- annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
59
- )
60
- enrichment_pvals = hypergeom.sf(
61
- annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
62
- )
63
-
64
- return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
risk/stats/poisson.py DELETED
@@ -1,50 +0,0 @@
1
- """
2
- risk/stats/poisson
3
- ~~~~~~~~~~~~~~~~~~
4
- """
5
-
6
- from typing import Any, Dict
7
-
8
- import numpy as np
9
- from scipy.sparse import csr_matrix
10
- from scipy.stats import poisson
11
-
12
-
13
- def compute_poisson_test(
14
- neighborhoods: csr_matrix,
15
- annotations: csr_matrix,
16
- null_distribution: str = "network",
17
- ) -> Dict[str, Any]:
18
- """
19
- Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
20
-
21
- Args:
22
- neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
23
- annotations (csr_matrix): Sparse binary matrix representing annotations.
24
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
25
-
26
- Returns:
27
- Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
28
- """
29
- # Matrix multiplication to get the number of annotated nodes in each neighborhood
30
- annotated_in_neighborhood = neighborhoods @ annotations # Sparse result
31
- # Convert annotated counts to dense for downstream calculations
32
- annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
33
-
34
- # Compute lambda_expected based on the chosen null distribution
35
- if null_distribution == "network":
36
- # Use the mean across neighborhoods (axis=1)
37
- lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
38
- elif null_distribution == "annotations":
39
- # Use the mean across annotations (axis=0)
40
- lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
41
- else:
42
- raise ValueError(
43
- "Invalid null_distribution value. Choose either 'network' or 'annotations'."
44
- )
45
-
46
- # Compute p-values for enrichment and depletion using Poisson distribution
47
- enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
48
- depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
49
-
50
- return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
risk/stats/zscore.py DELETED
@@ -1,68 +0,0 @@
1
- """
2
- risk/stats/zscore
3
- ~~~~~~~~~~~~~~~~~~
4
- """
5
-
6
- from typing import Any, Dict
7
-
8
- import numpy as np
9
- from scipy.sparse import csr_matrix
10
- from scipy.stats import norm
11
-
12
-
13
- def compute_zscore_test(
14
- neighborhoods: csr_matrix,
15
- annotations: csr_matrix,
16
- null_distribution: str = "network",
17
- ) -> Dict[str, Any]:
18
- """
19
- Compute Z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
20
-
21
- Args:
22
- neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
23
- annotations (csr_matrix): Sparse binary matrix representing annotations.
24
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
25
-
26
- Returns:
27
- Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
28
- """
29
- # Total number of nodes in the network
30
- total_node_count = neighborhoods.shape[1]
31
-
32
- # Compute sums
33
- if null_distribution == "network":
34
- background_population = total_node_count
35
- neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
36
- annotation_sums = annotations.sum(axis=0).A.flatten() # Dense row sums
37
- elif null_distribution == "annotations":
38
- annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Dense boolean mask
39
- background_population = annotated_nodes.sum()
40
- neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
41
- annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
42
- else:
43
- raise ValueError(
44
- "Invalid null_distribution value. Choose either 'network' or 'annotations'."
45
- )
46
-
47
- # Observed values
48
- observed = (neighborhoods.T @ annotations).toarray() # Convert sparse result to dense
49
- # Expected values under the null
50
- neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
51
- annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
52
- expected = (neighborhood_sums @ annotation_sums) / background_population
53
-
54
- # Standard deviation under the null
55
- std_dev = np.sqrt(
56
- expected
57
- * (1 - annotation_sums / background_population)
58
- * (1 - neighborhood_sums / background_population)
59
- )
60
- std_dev[std_dev == 0] = np.nan # Avoid division by zero
61
- # Compute Z-scores
62
- z_scores = (observed - expected) / std_dev
63
-
64
- # Convert Z-scores to depletion and enrichment p-values
65
- enrichment_pvals = norm.sf(z_scores) # Upper tail
66
- depletion_pvals = norm.cdf(z_scores) # Lower tail
67
-
68
- return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}