risk-network 0.0.12b0__py3-none-any.whl → 0.0.12b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. risk/__init__.py +1 -1
  2. risk/annotations/__init__.py +10 -0
  3. risk/annotations/annotations.py +354 -0
  4. risk/annotations/io.py +241 -0
  5. risk/annotations/nltk_setup.py +86 -0
  6. risk/log/__init__.py +11 -0
  7. risk/log/console.py +141 -0
  8. risk/log/parameters.py +171 -0
  9. risk/neighborhoods/__init__.py +7 -0
  10. risk/neighborhoods/api.py +442 -0
  11. risk/neighborhoods/community.py +441 -0
  12. risk/neighborhoods/domains.py +360 -0
  13. risk/neighborhoods/neighborhoods.py +514 -0
  14. risk/neighborhoods/stats/__init__.py +13 -0
  15. risk/neighborhoods/stats/permutation/__init__.py +6 -0
  16. risk/neighborhoods/stats/permutation/permutation.py +240 -0
  17. risk/neighborhoods/stats/permutation/test_functions.py +70 -0
  18. risk/neighborhoods/stats/tests.py +275 -0
  19. risk/network/__init__.py +4 -0
  20. risk/network/graph/__init__.py +4 -0
  21. risk/network/graph/api.py +200 -0
  22. risk/network/graph/graph.py +268 -0
  23. risk/network/graph/stats.py +166 -0
  24. risk/network/graph/summary.py +253 -0
  25. risk/network/io.py +693 -0
  26. risk/network/plotter/__init__.py +4 -0
  27. risk/network/plotter/api.py +54 -0
  28. risk/network/plotter/canvas.py +291 -0
  29. risk/network/plotter/contour.py +329 -0
  30. risk/network/plotter/labels.py +935 -0
  31. risk/network/plotter/network.py +294 -0
  32. risk/network/plotter/plotter.py +141 -0
  33. risk/network/plotter/utils/colors.py +419 -0
  34. risk/network/plotter/utils/layout.py +94 -0
  35. risk_network-0.0.12b1.dist-info/METADATA +122 -0
  36. risk_network-0.0.12b1.dist-info/RECORD +40 -0
  37. {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/WHEEL +1 -1
  38. risk_network-0.0.12b0.dist-info/METADATA +0 -796
  39. risk_network-0.0.12b0.dist-info/RECORD +0 -7
  40. {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/licenses/LICENSE +0 -0
  41. {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,240 @@
1
+ """
2
+ risk/neighborhoods/stats/permutation/permutation
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from multiprocessing import Manager, get_context
7
+ from multiprocessing.managers import ValueProxy
8
+ from typing import Any, Callable, Dict, List, Tuple, Union
9
+
10
+ import numpy as np
11
+ from scipy.sparse import csr_matrix
12
+ from threadpoolctl import threadpool_limits
13
+ from tqdm import tqdm
14
+
15
+ from risk.neighborhoods.stats.permutation.test_functions import DISPATCH_TEST_FUNCTIONS
16
+
17
+
18
+ def compute_permutation_test(
19
+ neighborhoods: csr_matrix,
20
+ annotations: csr_matrix,
21
+ score_metric: str = "sum",
22
+ null_distribution: str = "network",
23
+ num_permutations: int = 1000,
24
+ random_seed: int = 888,
25
+ max_workers: int = 1,
26
+ ) -> Dict[str, Any]:
27
+ """Compute permutation test for enrichment and depletion in neighborhoods.
28
+
29
+ Args:
30
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
31
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
32
+ score_metric (str, optional): Metric to use for scoring ('sum' or 'stdev'). Defaults to "sum".
33
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
34
+ num_permutations (int, optional): Number of permutations to run. Defaults to 1000.
35
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
36
+ max_workers (int, optional): Number of workers for multiprocessing. Defaults to 1.
37
+
38
+ Returns:
39
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
40
+ """
41
+ # Ensure that the matrices are in the correct format and free of NaN values
42
+ # NOTE: Keep the data type as float32 to avoid locking issues with dot product operations
43
+ neighborhoods = neighborhoods.astype(np.float32)
44
+ annotations = annotations.astype(np.float32)
45
+ # Retrieve the appropriate neighborhood score function based on the metric
46
+ neighborhood_score_func = DISPATCH_TEST_FUNCTIONS[score_metric]
47
+
48
+ # Run the permutation test to calculate depletion and enrichment counts
49
+ counts_depletion, counts_enrichment = _run_permutation_test(
50
+ neighborhoods=neighborhoods,
51
+ annotations=annotations,
52
+ neighborhood_score_func=neighborhood_score_func,
53
+ null_distribution=null_distribution,
54
+ num_permutations=num_permutations,
55
+ random_seed=random_seed,
56
+ max_workers=max_workers,
57
+ )
58
+ # Compute p-values for depletion and enrichment
59
+ # If counts are 0, set p-value to 1/num_permutations to avoid zero p-values
60
+ depletion_pvals = np.maximum(counts_depletion, 1) / num_permutations
61
+ enrichment_pvals = np.maximum(counts_enrichment, 1) / num_permutations
62
+
63
+ return {
64
+ "depletion_pvals": depletion_pvals,
65
+ "enrichment_pvals": enrichment_pvals,
66
+ }
67
+
68
+
69
+ def _run_permutation_test(
70
+ neighborhoods: csr_matrix,
71
+ annotations: csr_matrix,
72
+ neighborhood_score_func: Callable,
73
+ null_distribution: str = "network",
74
+ num_permutations: int = 1000,
75
+ random_seed: int = 888,
76
+ max_workers: int = 4,
77
+ ) -> tuple:
78
+ """Run the permutation test to calculate depletion and enrichment counts.
79
+
80
+ Args:
81
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
82
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
83
+ neighborhood_score_func (Callable): Function to calculate neighborhood scores.
84
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
85
+ num_permutations (int, optional): Number of permutations. Defaults to 1000.
86
+ random_seed (int, optional): Seed for random number generation. Defaults to 888.
87
+ max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
88
+
89
+ Returns:
90
+ tuple: Depletion and enrichment counts.
91
+
92
+ Raises:
93
+ ValueError: If an invalid null_distribution value is provided.
94
+ """
95
+ # Initialize the RNG for reproducibility
96
+ rng = np.random.default_rng(seed=random_seed)
97
+ # Determine the indices to use based on the null distribution type
98
+ if null_distribution == "network":
99
+ idxs = range(annotations.shape[0])
100
+ elif null_distribution == "annotations":
101
+ idxs = np.nonzero(annotations.getnnz(axis=1) > 0)[0]
102
+ else:
103
+ raise ValueError(
104
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
105
+ )
106
+
107
+ # Replace NaNs with zeros in the sparse annotations matrix
108
+ annotations.data[np.isnan(annotations.data)] = 0
109
+ annotation_matrix_obsv = annotations[idxs]
110
+ neighborhoods_matrix_obsv = neighborhoods.T[idxs].T
111
+ # Calculate observed neighborhood scores
112
+ with np.errstate(invalid="ignore", divide="ignore"):
113
+ observed_neighborhood_scores = neighborhood_score_func(
114
+ neighborhoods_matrix_obsv, annotation_matrix_obsv
115
+ )
116
+
117
+ # Initialize count matrices for depletion and enrichment
118
+ counts_depletion = np.zeros(observed_neighborhood_scores.shape)
119
+ counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
120
+ # Determine the number of permutations to run in each worker process
121
+ subset_size = num_permutations // max_workers
122
+ remainder = num_permutations % max_workers
123
+
124
+ # Use the spawn context for creating a new multiprocessing pool
125
+ ctx = get_context("spawn")
126
+ manager = Manager()
127
+ progress_counter = manager.Value("i", 0)
128
+ total_progress = num_permutations
129
+
130
+ # Generate precomputed permutations
131
+ permutations = [rng.permutation(idxs) for _ in range(num_permutations)]
132
+ # Divide permutations into batches for workers
133
+ batch_size = subset_size + (1 if remainder > 0 else 0)
134
+ permutation_batches = [
135
+ permutations[i * batch_size : (i + 1) * batch_size] for i in range(max_workers)
136
+ ]
137
+
138
+ # Execute the permutation test using multiprocessing
139
+ with ctx.Pool(max_workers) as pool:
140
+ with tqdm(total=total_progress, desc="Total progress", position=0) as progress:
141
+ # Prepare parameters for multiprocessing
142
+ params_list = [
143
+ (
144
+ permutation_batches[i], # Pass the batch of precomputed permutations
145
+ annotations,
146
+ neighborhoods_matrix_obsv,
147
+ observed_neighborhood_scores,
148
+ neighborhood_score_func,
149
+ num_permutations,
150
+ progress_counter,
151
+ max_workers,
152
+ )
153
+ for i in range(max_workers)
154
+ ]
155
+
156
+ # Start the permutation process in parallel
157
+ results = pool.starmap_async(_permutation_process_batch, params_list, chunksize=1)
158
+
159
+ # Update progress bar based on progress_counter
160
+ while not results.ready():
161
+ progress.update(progress_counter.value - progress.n)
162
+ results.wait(0.1) # Wait for 100ms
163
+ # Ensure progress bar reaches 100%
164
+ progress.update(total_progress - progress.n)
165
+
166
+ # Accumulate results from each worker
167
+ for local_counts_depletion, local_counts_enrichment in results.get():
168
+ counts_depletion = np.add(counts_depletion, local_counts_depletion)
169
+ counts_enrichment = np.add(counts_enrichment, local_counts_enrichment)
170
+
171
+ return counts_depletion, counts_enrichment
172
+
173
+
174
+ def _permutation_process_batch(
175
+ permutations: Union[List, Tuple, np.ndarray],
176
+ annotation_matrix: csr_matrix,
177
+ neighborhoods_matrix_obsv: csr_matrix,
178
+ observed_neighborhood_scores: np.ndarray,
179
+ neighborhood_score_func: Callable,
180
+ num_permutations: int,
181
+ progress_counter: ValueProxy,
182
+ max_workers: int,
183
+ ) -> tuple:
184
+ """Process a batch of permutations in a worker process.
185
+
186
+ Args:
187
+ permutations (Union[List, Tuple, np.ndarray]): Permutation batch to process.
188
+ annotation_matrix (csr_matrix): Sparse binary matrix representing annotations.
189
+ neighborhoods_matrix_obsv (csr_matrix): Sparse binary matrix representing observed neighborhoods.
190
+ observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
191
+ neighborhood_score_func (Callable): Function to calculate neighborhood scores.
192
+ num_permutations (int): Number of total permutations across all subsets.
193
+ progress_counter (multiprocessing.managers.ValueProxy): Shared counter for tracking progress.
194
+ max_workers (int): Number of workers for multiprocessing.
195
+
196
+ Returns:
197
+ tuple: Local counts of depletion and enrichment.
198
+ """
199
+ # Initialize local count matrices for this worker
200
+ local_counts_depletion = np.zeros(observed_neighborhood_scores.shape)
201
+ local_counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
202
+
203
+ # Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used
204
+ # NOTE: This does not work for Mac M chips due to a bug in the threadpoolctl package
205
+ # This is currently a known issue and is being addressed by the maintainers [https://github.com/joblib/threadpoolctl/issues/135]
206
+ limits = None if max_workers == 1 else 1
207
+ with threadpool_limits(limits=limits, user_api="blas"):
208
+ # Initialize a local counter for batched progress updates
209
+ local_progress = 0
210
+ # Calculate the modulo value based on total permutations for 1/100th frequency updates
211
+ modulo_value = max(1, num_permutations // 100)
212
+
213
+ for permuted_idxs in permutations:
214
+ # Apply precomputed permutation
215
+ annotation_matrix_permut = annotation_matrix[permuted_idxs]
216
+ # Calculate permuted neighborhood scores
217
+ with np.errstate(invalid="ignore", divide="ignore"):
218
+ permuted_neighborhood_scores = neighborhood_score_func(
219
+ neighborhoods_matrix_obsv, annotation_matrix_permut
220
+ )
221
+
222
+ # Update local depletion and enrichment counts
223
+ local_counts_depletion = np.add(
224
+ local_counts_depletion, permuted_neighborhood_scores <= observed_neighborhood_scores
225
+ )
226
+ local_counts_enrichment = np.add(
227
+ local_counts_enrichment,
228
+ permuted_neighborhood_scores >= observed_neighborhood_scores,
229
+ )
230
+
231
+ # Update progress
232
+ local_progress += 1
233
+ if local_progress % modulo_value == 0:
234
+ progress_counter.value += modulo_value
235
+
236
+ # Final progress update for any remaining iterations
237
+ if local_progress % modulo_value != 0:
238
+ progress_counter.value += modulo_value
239
+
240
+ return local_counts_depletion, local_counts_enrichment
@@ -0,0 +1,70 @@
1
+ """
2
+ risk/neighborhoods/stats/permutation/test_functions
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ import numpy as np
7
+ from scipy.sparse import csr_matrix
8
+
9
+ # NOTE: Cython optimizations provided minimal performance benefits.
10
+ # The final version with Cython is archived in the `cython_permutation` branch.
11
+
12
+ # DISPATCH_TEST_FUNCTIONS can be found at the end of the file.
13
+
14
+
15
+ def compute_neighborhood_score_by_sum(
16
+ neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
17
+ ) -> np.ndarray:
18
+ """Compute the sum of attribute values for each neighborhood using sparse matrices.
19
+
20
+ Args:
21
+ neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
22
+ annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
23
+
24
+ Returns:
25
+ np.ndarray: Dense array of summed attribute values for each neighborhood.
26
+ """
27
+ # Calculate the neighborhood score as the dot product of neighborhoods and annotations
28
+ neighborhood_score = neighborhoods_matrix @ annotation_matrix # Sparse matrix multiplication
29
+ # Convert the result to a dense array for downstream calculations
30
+ neighborhood_score_dense = neighborhood_score.toarray()
31
+ return neighborhood_score_dense
32
+
33
+
34
+ def compute_neighborhood_score_by_stdev(
35
+ neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
36
+ ) -> np.ndarray:
37
+ """Compute the standard deviation of neighborhood scores for sparse matrices.
38
+
39
+ Args:
40
+ neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
41
+ annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
42
+
43
+ Returns:
44
+ np.ndarray: Standard deviation of the neighborhood scores.
45
+ """
46
+ # Calculate the neighborhood score as the dot product of neighborhoods and annotations
47
+ neighborhood_score = neighborhoods_matrix @ annotation_matrix # Sparse matrix multiplication
48
+ # Calculate the number of elements in each neighborhood (sum of rows)
49
+ N = neighborhoods_matrix.sum(axis=1).A.flatten() # Convert to 1D array
50
+ # Avoid division by zero by replacing zeros in N with np.nan temporarily
51
+ N[N == 0] = np.nan
52
+ # Compute the mean of the neighborhood scores
53
+ M = neighborhood_score.multiply(1 / N[:, None]).toarray() # Sparse element-wise division
54
+ # Compute the mean of squares (EXX) directly using squared annotation matrix
55
+ annotation_squared = annotation_matrix.multiply(annotation_matrix) # Element-wise squaring
56
+ EXX = (neighborhoods_matrix @ annotation_squared).multiply(1 / N[:, None]).toarray()
57
+ # Calculate variance as EXX - M^2
58
+ variance = EXX - np.power(M, 2)
59
+ # Compute the standard deviation as the square root of the variance
60
+ neighborhood_stdev = np.sqrt(variance)
61
+ # Replace np.nan back with zeros in case N was 0 (no elements in the neighborhood)
62
+ neighborhood_stdev[np.isnan(neighborhood_stdev)] = 0
63
+ return neighborhood_stdev
64
+
65
+
66
+ # Dictionary to dispatch statistical test functions based on the score metric
67
+ DISPATCH_TEST_FUNCTIONS = {
68
+ "sum": compute_neighborhood_score_by_sum,
69
+ "stdev": compute_neighborhood_score_by_stdev,
70
+ }
@@ -0,0 +1,275 @@
1
+ """
2
+ risk/neighborhoods/stats/tests
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from typing import Any, Dict
7
+
8
+ import numpy as np
9
+ from scipy.sparse import csr_matrix
10
+ from scipy.stats import binom, chi2, hypergeom, norm, poisson
11
+
12
+
13
+ def compute_binom_test(
14
+ neighborhoods: csr_matrix,
15
+ annotations: csr_matrix,
16
+ null_distribution: str = "network",
17
+ ) -> Dict[str, Any]:
18
+ """Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
19
+
20
+ Args:
21
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
22
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
23
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
24
+
25
+ Returns:
26
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
27
+
28
+ Raises:
29
+ ValueError: If an invalid null_distribution value is provided.
30
+ """
31
+ # Get the total number of nodes in the network
32
+ total_nodes = neighborhoods.shape[1]
33
+
34
+ # Compute sums (remain sparse here)
35
+ neighborhood_sizes = neighborhoods.sum(axis=1) # Row sums
36
+ annotation_totals = annotations.sum(axis=0) # Column sums
37
+ # Compute probabilities (convert to dense)
38
+ if null_distribution == "network":
39
+ p_values = (annotation_totals / total_nodes).A.flatten() # Dense 1D array
40
+ elif null_distribution == "annotations":
41
+ p_values = (annotation_totals / annotations.sum()).A.flatten() # Dense 1D array
42
+ else:
43
+ raise ValueError(
44
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
45
+ )
46
+
47
+ # Observed counts (sparse matrix multiplication)
48
+ annotated_counts = neighborhoods @ annotations # Sparse result
49
+ annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
50
+
51
+ # Compute enrichment and depletion p-values
52
+ enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, neighborhood_sizes.A, p_values)
53
+ depletion_pvals = binom.cdf(annotated_counts_dense, neighborhood_sizes.A, p_values)
54
+
55
+ return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
56
+
57
+
58
+ def compute_chi2_test(
59
+ neighborhoods: csr_matrix,
60
+ annotations: csr_matrix,
61
+ null_distribution: str = "network",
62
+ ) -> Dict[str, Any]:
63
+ """Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
64
+
65
+ Args:
66
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
67
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
68
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
69
+
70
+ Returns:
71
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
72
+
73
+ Raises:
74
+ ValueError: If an invalid null_distribution value is provided.
75
+ """
76
+ # Total number of nodes in the network
77
+ total_node_count = neighborhoods.shape[0]
78
+
79
+ if null_distribution == "network":
80
+ # Case 1: Use all nodes as the background
81
+ background_population = total_node_count
82
+ neighborhood_sums = neighborhoods.sum(axis=0) # Column sums of neighborhoods
83
+ annotation_sums = annotations.sum(axis=0) # Column sums of annotations
84
+ elif null_distribution == "annotations":
85
+ # Case 2: Only consider nodes with at least one annotation
86
+ annotated_nodes = (
87
+ np.ravel(annotations.sum(axis=1)) > 0
88
+ ) # Row-wise sum to filter nodes with annotations
89
+ background_population = annotated_nodes.sum() # Total number of annotated nodes
90
+ neighborhood_sums = neighborhoods[annotated_nodes].sum(
91
+ axis=0
92
+ ) # Neighborhood sums for annotated nodes
93
+ annotation_sums = annotations[annotated_nodes].sum(
94
+ axis=0
95
+ ) # Annotation sums for annotated nodes
96
+ else:
97
+ raise ValueError(
98
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
99
+ )
100
+
101
+ # Convert to dense arrays for downstream computations
102
+ neighborhood_sums = np.asarray(neighborhood_sums).reshape(-1, 1) # Ensure column vector shape
103
+ annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
104
+
105
+ # Observed values: number of annotated nodes in each neighborhood
106
+ observed = neighborhoods.T @ annotations # Shape: (neighborhoods, annotations)
107
+ # Expected values under the null
108
+ expected = (neighborhood_sums @ annotation_sums) / background_population
109
+ # Chi-squared statistic: sum((observed - expected)^2 / expected)
110
+ with np.errstate(divide="ignore", invalid="ignore"): # Handle divide-by-zero
111
+ chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
112
+
113
+ # Compute p-values for enrichment (upper tail) and depletion (lower tail)
114
+ enrichment_pvals = chi2.sf(chi2_stat, df=1) # Survival function for upper tail
115
+ depletion_pvals = chi2.cdf(chi2_stat, df=1) # Cumulative distribution for lower tail
116
+
117
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
118
+
119
+
120
+ def compute_hypergeom_test(
121
+ neighborhoods: csr_matrix,
122
+ annotations: csr_matrix,
123
+ null_distribution: str = "network",
124
+ ) -> Dict[str, Any]:
125
+ """Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
126
+
127
+ Args:
128
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
129
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
130
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
131
+
132
+ Returns:
133
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
134
+
135
+ Raises:
136
+ ValueError: If an invalid null_distribution value is provided.
137
+ """
138
+ # Get the total number of nodes in the network
139
+ total_nodes = neighborhoods.shape[1]
140
+
141
+ # Compute sums
142
+ neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Convert to dense array
143
+ annotation_sums = annotations.sum(axis=0).A.flatten() # Convert to dense array
144
+
145
+ if null_distribution == "network":
146
+ background_population = total_nodes
147
+ elif null_distribution == "annotations":
148
+ annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Boolean mask
149
+ background_population = annotated_nodes.sum()
150
+ neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
151
+ annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
152
+ else:
153
+ raise ValueError(
154
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
155
+ )
156
+
157
+ # Observed counts
158
+ annotated_in_neighborhood = neighborhoods.T @ annotations # Sparse result
159
+ annotated_in_neighborhood = annotated_in_neighborhood.toarray() # Convert to dense
160
+ # Align shapes for broadcasting
161
+ neighborhood_sums = neighborhood_sums.reshape(-1, 1)
162
+ annotation_sums = annotation_sums.reshape(1, -1)
163
+ background_population = np.array(background_population).reshape(1, 1)
164
+
165
+ # Compute hypergeometric p-values
166
+ depletion_pvals = hypergeom.cdf(
167
+ annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
168
+ )
169
+ enrichment_pvals = hypergeom.sf(
170
+ annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
171
+ )
172
+
173
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
174
+
175
+
176
+ def compute_poisson_test(
177
+ neighborhoods: csr_matrix,
178
+ annotations: csr_matrix,
179
+ null_distribution: str = "network",
180
+ ) -> Dict[str, Any]:
181
+ """Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
182
+
183
+ Args:
184
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
185
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
186
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
187
+
188
+ Returns:
189
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
190
+
191
+ Raises:
192
+ ValueError: If an invalid null_distribution value is provided.
193
+ """
194
+ # Matrix multiplication to get the number of annotated nodes in each neighborhood
195
+ annotated_in_neighborhood = neighborhoods @ annotations # Sparse result
196
+ # Convert annotated counts to dense for downstream calculations
197
+ annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
198
+
199
+ # Compute lambda_expected based on the chosen null distribution
200
+ if null_distribution == "network":
201
+ # Use the mean across neighborhoods (axis=1)
202
+ lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
203
+ elif null_distribution == "annotations":
204
+ # Use the mean across annotations (axis=0)
205
+ lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
206
+ else:
207
+ raise ValueError(
208
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
209
+ )
210
+
211
+ # Compute p-values for enrichment and depletion using Poisson distribution
212
+ enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
213
+ depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
214
+
215
+ return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
216
+
217
+
218
+ def compute_zscore_test(
219
+ neighborhoods: csr_matrix,
220
+ annotations: csr_matrix,
221
+ null_distribution: str = "network",
222
+ ) -> Dict[str, Any]:
223
+ """Compute z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
224
+
225
+ Args:
226
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
227
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
228
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
229
+
230
+ Returns:
231
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
232
+
233
+ Raises:
234
+ ValueError: If an invalid null_distribution value is provided.
235
+ """
236
+ # Total number of nodes in the network
237
+ total_node_count = neighborhoods.shape[1]
238
+
239
+ # Compute sums
240
+ if null_distribution == "network":
241
+ background_population = total_node_count
242
+ neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
243
+ annotation_sums = annotations.sum(axis=0).A.flatten() # Dense row sums
244
+ elif null_distribution == "annotations":
245
+ annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Dense boolean mask
246
+ background_population = annotated_nodes.sum()
247
+ neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
248
+ annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
249
+ else:
250
+ raise ValueError(
251
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
252
+ )
253
+
254
+ # Observed values
255
+ observed = (neighborhoods.T @ annotations).toarray() # Convert sparse result to dense
256
+ # Expected values under the null
257
+ neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
258
+ annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
259
+ expected = (neighborhood_sums @ annotation_sums) / background_population
260
+
261
+ # Standard deviation under the null
262
+ std_dev = np.sqrt(
263
+ expected
264
+ * (1 - annotation_sums / background_population)
265
+ * (1 - neighborhood_sums / background_population)
266
+ )
267
+ std_dev[std_dev == 0] = np.nan # Avoid division by zero
268
+ # Compute z-scores
269
+ z_scores = (observed - expected) / std_dev
270
+
271
+ # Convert z-scores to depletion and enrichment p-values
272
+ enrichment_pvals = norm.sf(z_scores) # Upper tail
273
+ depletion_pvals = norm.cdf(z_scores) # Lower tail
274
+
275
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
@@ -0,0 +1,4 @@
1
+ """
2
+ risk/network
3
+ ~~~~~~~~~~~~
4
+ """
@@ -0,0 +1,4 @@
1
+ """
2
+ risk/network/graph
3
+ ~~~~~~~~~~~~~~~~~~
4
+ """