risk-network 0.0.7b3__py3-none-any.whl → 0.0.7b5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/neighborhoods/neighborhoods.py +0 -1
- risk/network/graph.py +16 -27
- risk/risk.py +37 -51
- risk/stats/__init__.py +2 -2
- risk/stats/hypergeom.py +32 -108
- risk/stats/poisson.py +40 -0
- {risk_network-0.0.7b3.dist-info → risk_network-0.0.7b5.dist-info}/METADATA +1 -1
- {risk_network-0.0.7b3.dist-info → risk_network-0.0.7b5.dist-info}/RECORD +12 -12
- risk/stats/fisher_exact.py +0 -132
- {risk_network-0.0.7b3.dist-info → risk_network-0.0.7b5.dist-info}/LICENSE +0 -0
- {risk_network-0.0.7b3.dist-info → risk_network-0.0.7b5.dist-info}/WHEEL +0 -0
- {risk_network-0.0.7b3.dist-info → risk_network-0.0.7b5.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
@@ -200,7 +200,6 @@ def _impute_neighbors_with_similarity(
|
|
200
200
|
depth = 1
|
201
201
|
rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
|
202
202
|
while len(rows_to_impute) and depth <= max_depth:
|
203
|
-
next_rows_to_impute = []
|
204
203
|
# Iterate over all enriched nodes
|
205
204
|
for row_index in range(binary_enrichment_matrix.shape[0]):
|
206
205
|
if binary_enrichment_matrix[row_index].sum() != 0:
|
risk/network/graph.py
CHANGED
@@ -3,7 +3,6 @@ risk/network/graph
|
|
3
3
|
~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
import random
|
7
6
|
from collections import defaultdict
|
8
7
|
from typing import Any, Dict, List, Tuple, Union
|
9
8
|
|
@@ -307,7 +306,7 @@ def _get_colors(
|
|
307
306
|
List[Tuple]: List of RGBA colors.
|
308
307
|
"""
|
309
308
|
# Set random seed for reproducibility
|
310
|
-
random.seed(random_seed)
|
309
|
+
np.random.seed(random_seed)
|
311
310
|
# Determine the number of colors to generate based on the number of domains
|
312
311
|
num_colors_to_generate = len(domain_id_to_node_ids_map)
|
313
312
|
if color:
|
@@ -322,23 +321,15 @@ def _get_colors(
|
|
322
321
|
# Step 2: Calculate pairwise distances between centroids
|
323
322
|
centroid_array = np.array(centroids)
|
324
323
|
dist_matrix = np.linalg.norm(centroid_array[:, None] - centroid_array, axis=-1)
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
#
|
329
|
-
color_positions =
|
330
|
-
|
331
|
-
)
|
332
|
-
|
333
|
-
# Step 4: Randomly shuffle color positions to generate a new color palette
|
334
|
-
# while maintaining the dissimilarity between neighboring colors. This shuffling
|
335
|
-
# preserves the relative distances between centroids, ensuring that close centroids
|
336
|
-
# remain visually distinct while introducing randomness into the overall color arrangement.
|
337
|
-
random.shuffle(color_positions)
|
338
|
-
# Ensure that all positions remain between 0 and 1
|
324
|
+
# Step 3: Assign distant colors to close centroids
|
325
|
+
color_positions = _assign_distant_colors(dist_matrix, num_colors_to_generate)
|
326
|
+
# Step 4: Randomly shift the entire color palette while maintaining relative distances
|
327
|
+
global_shift = np.random.uniform(-0.1, 0.1) # Small global shift to change the overall palette
|
328
|
+
color_positions = (color_positions + global_shift) % 1 # Wrap around to keep within [0, 1]
|
329
|
+
# Step 5: Ensure that all positions remain between 0 and 1
|
339
330
|
color_positions = np.clip(color_positions, 0, 1)
|
340
331
|
|
341
|
-
# Step
|
332
|
+
# Step 6: Generate RGBA colors based on positions
|
342
333
|
return [colormap(pos) for pos in color_positions]
|
343
334
|
|
344
335
|
|
@@ -365,28 +356,26 @@ def _calculate_centroids(network, domain_id_to_node_ids_map):
|
|
365
356
|
return centroids
|
366
357
|
|
367
358
|
|
368
|
-
def _assign_distant_colors(
|
359
|
+
def _assign_distant_colors(dist_matrix, num_colors_to_generate):
|
369
360
|
"""Assign colors to centroids that are close in space, ensuring stark color differences.
|
370
361
|
|
371
362
|
Args:
|
372
|
-
remaining_indices (set): Indices of centroids left to color.
|
373
363
|
dist_matrix (ndarray): Matrix of pairwise centroid distances.
|
374
|
-
colormap (Colormap): The colormap used to assign colors.
|
375
364
|
num_colors_to_generate (int): Number of colors to generate.
|
376
365
|
|
377
366
|
Returns:
|
378
|
-
np.array: Array of color positions in the
|
367
|
+
np.array: Array of color positions in the range [0, 1].
|
379
368
|
"""
|
380
369
|
color_positions = np.zeros(num_colors_to_generate)
|
381
|
-
#
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
# Assign colors starting with the most distant points in proximity order
|
370
|
+
# Step 1: Sort indices by centroid proximity (based on sum of distances to others)
|
371
|
+
proximity_order = sorted(
|
372
|
+
range(num_colors_to_generate), key=lambda idx: np.sum(dist_matrix[idx])
|
373
|
+
)
|
374
|
+
# Step 2: Assign colors starting with the most distant points in proximity order
|
386
375
|
for i, idx in enumerate(proximity_order):
|
387
376
|
color_positions[idx] = i / num_colors_to_generate
|
388
377
|
|
389
|
-
# Adjust colors so that centroids close to one another are maximally distant on the color spectrum
|
378
|
+
# Step 3: Adjust colors so that centroids close to one another are maximally distant on the color spectrum
|
390
379
|
half_spectrum = int(num_colors_to_generate / 2)
|
391
380
|
for i in range(half_spectrum):
|
392
381
|
# Split the spectrum so that close centroids are assigned distant colors
|
risk/risk.py
CHANGED
@@ -20,9 +20,9 @@ from risk.neighborhoods import (
|
|
20
20
|
from risk.network import NetworkIO, NetworkGraph, NetworkPlotter
|
21
21
|
from risk.stats import (
|
22
22
|
calculate_significance_matrices,
|
23
|
-
compute_fisher_exact_test,
|
24
23
|
compute_hypergeom_test,
|
25
24
|
compute_permutation_test,
|
25
|
+
compute_poisson_test,
|
26
26
|
)
|
27
27
|
|
28
28
|
|
@@ -45,20 +45,16 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
45
45
|
"""Access the logged parameters."""
|
46
46
|
return params
|
47
47
|
|
48
|
-
def
|
48
|
+
def load_neighborhoods_by_hypergeom(
|
49
49
|
self,
|
50
50
|
network: nx.Graph,
|
51
51
|
annotations: Dict[str, Any],
|
52
52
|
distance_metric: str = "dijkstra",
|
53
53
|
louvain_resolution: float = 0.1,
|
54
54
|
edge_length_threshold: float = 0.5,
|
55
|
-
score_metric: str = "sum",
|
56
|
-
null_distribution: str = "network",
|
57
|
-
num_permutations: int = 1000,
|
58
55
|
random_seed: int = 888,
|
59
|
-
max_workers: int = 1,
|
60
56
|
) -> Dict[str, Any]:
|
61
|
-
"""Load significant neighborhoods for the network using the
|
57
|
+
"""Load significant neighborhoods for the network using the hypergeometric test.
|
62
58
|
|
63
59
|
Args:
|
64
60
|
network (nx.Graph): The network graph.
|
@@ -66,27 +62,19 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
66
62
|
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
|
67
63
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
68
64
|
edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
|
69
|
-
score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
|
70
|
-
null_distribution (str, optional): Distribution used for permutation tests. Defaults to "network".
|
71
|
-
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
72
65
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
73
|
-
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
74
66
|
|
75
67
|
Returns:
|
76
68
|
dict: Computed significance of neighborhoods.
|
77
69
|
"""
|
78
|
-
print_header("Running
|
70
|
+
print_header("Running hypergeometric test")
|
79
71
|
# Log neighborhood analysis parameters
|
80
72
|
params.log_neighborhoods(
|
81
73
|
distance_metric=distance_metric,
|
82
74
|
louvain_resolution=louvain_resolution,
|
83
75
|
edge_length_threshold=edge_length_threshold,
|
84
|
-
statistical_test_function="
|
85
|
-
score_metric=score_metric,
|
86
|
-
null_distribution=null_distribution,
|
87
|
-
num_permutations=num_permutations,
|
76
|
+
statistical_test_function="hypergeom",
|
88
77
|
random_seed=random_seed,
|
89
|
-
max_workers=max_workers,
|
90
78
|
)
|
91
79
|
|
92
80
|
# Load neighborhoods based on the network and distance metric
|
@@ -97,27 +85,16 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
97
85
|
edge_length_threshold=edge_length_threshold,
|
98
86
|
random_seed=random_seed,
|
99
87
|
)
|
100
|
-
|
101
|
-
|
102
|
-
print(f"Neighborhood scoring metric: '{score_metric}'")
|
103
|
-
print(f"Null distribution: '{null_distribution}'")
|
104
|
-
print(f"Number of permutations: {num_permutations}")
|
105
|
-
print(f"Maximum workers: {max_workers}")
|
106
|
-
# Run permutation test to compute neighborhood significance
|
107
|
-
neighborhood_significance = compute_permutation_test(
|
88
|
+
# Run hypergeometric test to compute neighborhood significance
|
89
|
+
neighborhood_significance = compute_hypergeom_test(
|
108
90
|
neighborhoods=neighborhoods,
|
109
91
|
annotations=annotations["matrix"],
|
110
|
-
score_metric=score_metric,
|
111
|
-
null_distribution=null_distribution,
|
112
|
-
num_permutations=num_permutations,
|
113
|
-
random_seed=random_seed,
|
114
|
-
max_workers=max_workers,
|
115
92
|
)
|
116
93
|
|
117
94
|
# Return the computed neighborhood significance
|
118
95
|
return neighborhood_significance
|
119
96
|
|
120
|
-
def
|
97
|
+
def load_neighborhoods_by_poisson(
|
121
98
|
self,
|
122
99
|
network: nx.Graph,
|
123
100
|
annotations: Dict[str, Any],
|
@@ -125,9 +102,8 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
125
102
|
louvain_resolution: float = 0.1,
|
126
103
|
edge_length_threshold: float = 0.5,
|
127
104
|
random_seed: int = 888,
|
128
|
-
max_workers: int = 1,
|
129
105
|
) -> Dict[str, Any]:
|
130
|
-
"""Load significant neighborhoods for the network using the
|
106
|
+
"""Load significant neighborhoods for the network using the Poisson test.
|
131
107
|
|
132
108
|
Args:
|
133
109
|
network (nx.Graph): The network graph.
|
@@ -136,20 +112,18 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
136
112
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
137
113
|
edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
|
138
114
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
139
|
-
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
140
115
|
|
141
116
|
Returns:
|
142
117
|
dict: Computed significance of neighborhoods.
|
143
118
|
"""
|
144
|
-
print_header("Running
|
119
|
+
print_header("Running Poisson test")
|
145
120
|
# Log neighborhood analysis parameters
|
146
121
|
params.log_neighborhoods(
|
147
122
|
distance_metric=distance_metric,
|
148
123
|
louvain_resolution=louvain_resolution,
|
149
124
|
edge_length_threshold=edge_length_threshold,
|
150
|
-
statistical_test_function="
|
125
|
+
statistical_test_function="poisson",
|
151
126
|
random_seed=random_seed,
|
152
|
-
max_workers=max_workers,
|
153
127
|
)
|
154
128
|
|
155
129
|
# Load neighborhoods based on the network and distance metric
|
@@ -160,30 +134,29 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
160
134
|
edge_length_threshold=edge_length_threshold,
|
161
135
|
random_seed=random_seed,
|
162
136
|
)
|
163
|
-
|
164
|
-
|
165
|
-
print(f"Maximum workers: {max_workers}")
|
166
|
-
# Run Fisher's exact test to compute neighborhood significance
|
167
|
-
neighborhood_significance = compute_fisher_exact_test(
|
137
|
+
# Run Poisson test to compute neighborhood significance
|
138
|
+
neighborhood_significance = compute_poisson_test(
|
168
139
|
neighborhoods=neighborhoods,
|
169
140
|
annotations=annotations["matrix"],
|
170
|
-
max_workers=max_workers,
|
171
141
|
)
|
172
142
|
|
173
143
|
# Return the computed neighborhood significance
|
174
144
|
return neighborhood_significance
|
175
145
|
|
176
|
-
def
|
146
|
+
def load_neighborhoods_by_permutation(
|
177
147
|
self,
|
178
148
|
network: nx.Graph,
|
179
149
|
annotations: Dict[str, Any],
|
180
150
|
distance_metric: str = "dijkstra",
|
181
151
|
louvain_resolution: float = 0.1,
|
182
152
|
edge_length_threshold: float = 0.5,
|
153
|
+
score_metric: str = "sum",
|
154
|
+
null_distribution: str = "network",
|
155
|
+
num_permutations: int = 1000,
|
183
156
|
random_seed: int = 888,
|
184
157
|
max_workers: int = 1,
|
185
158
|
) -> Dict[str, Any]:
|
186
|
-
"""Load significant neighborhoods for the network using the
|
159
|
+
"""Load significant neighborhoods for the network using the permutation test.
|
187
160
|
|
188
161
|
Args:
|
189
162
|
network (nx.Graph): The network graph.
|
@@ -191,19 +164,25 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
191
164
|
distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
|
192
165
|
louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
|
193
166
|
edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
|
167
|
+
score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
|
168
|
+
null_distribution (str, optional): Distribution used for permutation tests. Defaults to "network".
|
169
|
+
num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
|
194
170
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
195
171
|
max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
|
196
172
|
|
197
173
|
Returns:
|
198
174
|
dict: Computed significance of neighborhoods.
|
199
175
|
"""
|
200
|
-
print_header("Running
|
176
|
+
print_header("Running permutation test")
|
201
177
|
# Log neighborhood analysis parameters
|
202
178
|
params.log_neighborhoods(
|
203
179
|
distance_metric=distance_metric,
|
204
180
|
louvain_resolution=louvain_resolution,
|
205
181
|
edge_length_threshold=edge_length_threshold,
|
206
|
-
statistical_test_function="
|
182
|
+
statistical_test_function="permutation",
|
183
|
+
score_metric=score_metric,
|
184
|
+
null_distribution=null_distribution,
|
185
|
+
num_permutations=num_permutations,
|
207
186
|
random_seed=random_seed,
|
208
187
|
max_workers=max_workers,
|
209
188
|
)
|
@@ -217,12 +196,19 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
217
196
|
random_seed=random_seed,
|
218
197
|
)
|
219
198
|
|
220
|
-
# Log and display
|
199
|
+
# Log and display permutation test settings
|
200
|
+
print(f"Neighborhood scoring metric: '{score_metric}'")
|
201
|
+
print(f"Null distribution: '{null_distribution}'")
|
202
|
+
print(f"Number of permutations: {num_permutations}")
|
221
203
|
print(f"Maximum workers: {max_workers}")
|
222
|
-
# Run
|
223
|
-
neighborhood_significance =
|
204
|
+
# Run permutation test to compute neighborhood significance
|
205
|
+
neighborhood_significance = compute_permutation_test(
|
224
206
|
neighborhoods=neighborhoods,
|
225
207
|
annotations=annotations["matrix"],
|
208
|
+
score_metric=score_metric,
|
209
|
+
null_distribution=null_distribution,
|
210
|
+
num_permutations=num_permutations,
|
211
|
+
random_seed=random_seed,
|
226
212
|
max_workers=max_workers,
|
227
213
|
)
|
228
214
|
|
@@ -315,7 +301,7 @@ class RISK(NetworkIO, AnnotationsIO):
|
|
315
301
|
max_cluster_size=max_cluster_size,
|
316
302
|
)
|
317
303
|
|
318
|
-
print_header(
|
304
|
+
print_header("Optimizing distance threshold for domains")
|
319
305
|
# Define domains in the network using the specified clustering settings
|
320
306
|
domains = self._define_domains(
|
321
307
|
neighborhoods=processed_neighborhoods,
|
risk/stats/__init__.py
CHANGED
@@ -3,7 +3,7 @@ risk/stats
|
|
3
3
|
~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from .stats import calculate_significance_matrices
|
7
|
-
from .fisher_exact import compute_fisher_exact_test
|
8
6
|
from .hypergeom import compute_hypergeom_test
|
9
7
|
from .permutation import compute_permutation_test
|
8
|
+
from .poisson import compute_poisson_test
|
9
|
+
from .stats import calculate_significance_matrices
|
risk/stats/hypergeom.py
CHANGED
@@ -3,8 +3,6 @@ risk/stats/hypergeom
|
|
3
3
|
~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from multiprocessing import get_context, Manager
|
7
|
-
from tqdm import tqdm
|
8
6
|
from typing import Any, Dict
|
9
7
|
|
10
8
|
import numpy as np
|
@@ -14,118 +12,44 @@ from scipy.stats import hypergeom
|
|
14
12
|
def compute_hypergeom_test(
|
15
13
|
neighborhoods: np.ndarray,
|
16
14
|
annotations: np.ndarray,
|
17
|
-
max_workers: int = 4,
|
18
15
|
) -> Dict[str, Any]:
|
19
16
|
"""Compute hypergeometric test for enrichment and depletion in neighborhoods.
|
20
17
|
|
21
18
|
Args:
|
22
|
-
neighborhoods (np.ndarray): Binary matrix representing neighborhoods
|
23
|
-
|
24
|
-
|
19
|
+
neighborhoods (np.ndarray): Binary matrix representing neighborhoods, where rows are nodes
|
20
|
+
and columns are neighborhoods. Entries indicate the presence (1) or absence (0) of a node
|
21
|
+
in a neighborhood.
|
22
|
+
annotations (np.ndarray): Binary matrix representing annotations, where rows are nodes
|
23
|
+
and columns are annotations. Entries indicate the presence (1) or absence (0) of a node
|
24
|
+
being annotated.
|
25
25
|
|
26
26
|
Returns:
|
27
|
-
|
27
|
+
Dict[str, Any]: A dictionary with two keys:
|
28
|
+
- "enrichment_pvals" (np.ndarray): P-values for enrichment, indicating the probability
|
29
|
+
of observing more annotations in a neighborhood than expected under the hypergeometric test.
|
30
|
+
- "depletion_pvals" (np.ndarray): P-values for depletion, indicating the probability
|
31
|
+
of observing fewer annotations in a neighborhood than expected under the hypergeometric test.
|
28
32
|
"""
|
29
|
-
# Ensure
|
30
|
-
neighborhoods = neighborhoods.astype(
|
31
|
-
annotations = annotations.astype(
|
32
|
-
|
33
|
-
#
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
#
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
with ctx.Pool(max_workers) as pool:
|
45
|
-
with tqdm(total=total_tasks, desc="Total progress", position=0) as progress:
|
46
|
-
params_list = []
|
47
|
-
start_idx = 0
|
48
|
-
for i in range(max_workers):
|
49
|
-
end_idx = start_idx + chunk_size + (1 if i < remainder else 0)
|
50
|
-
params_list.append(
|
51
|
-
(neighborhoods, annotations, start_idx, end_idx, progress_counter)
|
52
|
-
)
|
53
|
-
start_idx = end_idx
|
54
|
-
|
55
|
-
# Start the hypergeometric test process in parallel
|
56
|
-
results = pool.starmap_async(_hypergeom_process_subset, params_list, chunksize=1)
|
57
|
-
|
58
|
-
# Update progress bar based on progress_counter
|
59
|
-
while not results.ready():
|
60
|
-
progress.update(progress_counter.value - progress.n)
|
61
|
-
results.wait(0.05) # Wait for 50ms
|
62
|
-
# Ensure progress bar reaches 100%
|
63
|
-
progress.update(total_tasks - progress.n)
|
64
|
-
|
65
|
-
# Accumulate results from each worker
|
66
|
-
depletion_pvals, enrichment_pvals = [], []
|
67
|
-
for dp, ep in results.get():
|
68
|
-
depletion_pvals.extend(dp)
|
69
|
-
enrichment_pvals.extend(ep)
|
70
|
-
|
71
|
-
# Reshape the results back into arrays with the appropriate dimensions
|
72
|
-
depletion_pvals = np.array(depletion_pvals).reshape(
|
73
|
-
neighborhoods.shape[1], annotations.shape[1]
|
33
|
+
# Ensure both matrices are binary (presence/absence)
|
34
|
+
neighborhoods = (neighborhoods > 0).astype(int)
|
35
|
+
annotations = (annotations > 0).astype(int)
|
36
|
+
total_node_count = annotations.shape[0]
|
37
|
+
# Sum of values in each neighborhood
|
38
|
+
neighborhood_sums = np.sum(neighborhoods, axis=0)[:, np.newaxis]
|
39
|
+
# Repeating neighborhood sums for each annotation
|
40
|
+
neighborhood_size_matrix = np.tile(neighborhood_sums, (1, annotations.shape[1]))
|
41
|
+
# Total number of nodes annotated to each attribute
|
42
|
+
annotated_node_counts = np.tile(np.sum(annotations, axis=0), (neighborhoods.shape[1], 1))
|
43
|
+
# Nodes in each neighborhood annotated to each attribute
|
44
|
+
annotated_in_neighborhood = np.dot(neighborhoods, annotations)
|
45
|
+
# Calculate p-values using the hypergeometric distribution
|
46
|
+
depletion_pvals = hypergeom.cdf(
|
47
|
+
annotated_in_neighborhood, total_node_count, annotated_node_counts, neighborhood_size_matrix
|
74
48
|
)
|
75
|
-
enrichment_pvals =
|
76
|
-
|
49
|
+
enrichment_pvals = hypergeom.sf(
|
50
|
+
annotated_in_neighborhood - 1,
|
51
|
+
total_node_count,
|
52
|
+
annotated_node_counts,
|
53
|
+
neighborhood_size_matrix,
|
77
54
|
)
|
78
|
-
|
79
|
-
return {
|
80
|
-
"depletion_pvals": depletion_pvals,
|
81
|
-
"enrichment_pvals": enrichment_pvals,
|
82
|
-
}
|
83
|
-
|
84
|
-
|
85
|
-
def _hypergeom_process_subset(
|
86
|
-
neighborhoods: np.ndarray,
|
87
|
-
annotations: np.ndarray,
|
88
|
-
start_idx: int,
|
89
|
-
end_idx: int,
|
90
|
-
progress_counter,
|
91
|
-
) -> tuple:
|
92
|
-
"""Process a subset of neighborhoods using the hypergeometric test.
|
93
|
-
|
94
|
-
Args:
|
95
|
-
neighborhoods (np.ndarray): The full neighborhood matrix.
|
96
|
-
annotations (np.ndarray): The annotation matrix.
|
97
|
-
start_idx (int): Starting index of the neighborhood-annotation pairs to process.
|
98
|
-
end_idx (int): Ending index of the neighborhood-annotation pairs to process.
|
99
|
-
progress_counter: Shared counter for tracking progress.
|
100
|
-
|
101
|
-
Returns:
|
102
|
-
tuple: Local p-values for depletion and enrichment.
|
103
|
-
"""
|
104
|
-
# Initialize lists to store p-values for depletion and enrichment
|
105
|
-
depletion_pvals = []
|
106
|
-
enrichment_pvals = []
|
107
|
-
# Process the subset of tasks assigned to this worker
|
108
|
-
for idx in range(start_idx, end_idx):
|
109
|
-
i = idx // annotations.shape[1] # Neighborhood index
|
110
|
-
j = idx % annotations.shape[1] # Annotation index
|
111
|
-
|
112
|
-
neighborhood = neighborhoods[:, i]
|
113
|
-
annotation = annotations[:, j]
|
114
|
-
|
115
|
-
# Calculate the required values for the hypergeometric test
|
116
|
-
M = annotations.shape[0] # Total number of items (population size)
|
117
|
-
n = np.sum(annotation) # Total number of successes in population
|
118
|
-
N = np.sum(neighborhood) # Total number of draws (sample size)
|
119
|
-
k = np.sum(neighborhood & annotation) # Number of successes in sample
|
120
|
-
|
121
|
-
# Perform hypergeometric test for depletion
|
122
|
-
p_value_depletion = hypergeom.cdf(k, M, n, N)
|
123
|
-
depletion_pvals.append(p_value_depletion)
|
124
|
-
# Perform hypergeometric test for enrichment
|
125
|
-
p_value_enrichment = hypergeom.sf(k - 1, M, n, N)
|
126
|
-
enrichment_pvals.append(p_value_enrichment)
|
127
|
-
|
128
|
-
# Update the shared progress counter
|
129
|
-
progress_counter.value += 1
|
130
|
-
|
131
|
-
return depletion_pvals, enrichment_pvals
|
55
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
risk/stats/poisson.py
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
"""
|
2
|
+
risk/stats/poisson
|
3
|
+
~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
from typing import Dict, Any
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
from scipy.stats import poisson
|
10
|
+
|
11
|
+
|
12
|
+
def compute_poisson_test(neighborhoods: np.ndarray, annotations: np.ndarray) -> Dict[str, Any]:
|
13
|
+
"""Compute Poisson test for enrichment and depletion in neighborhoods.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
neighborhoods (np.ndarray): Binary matrix representing neighborhoods, where rows are nodes
|
17
|
+
and columns are neighborhoods. Entries indicate the presence (1) or absence (0) of a node
|
18
|
+
in a neighborhood.
|
19
|
+
annotations (np.ndarray): Binary matrix representing annotations, where rows are nodes
|
20
|
+
and columns are annotations. Entries indicate the presence (1) or absence (0) of a node
|
21
|
+
being annotated.
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
Dict[str, Any]: A dictionary with two keys:
|
25
|
+
- "enrichment_pvals" (np.ndarray): P-values for enrichment, indicating the probability
|
26
|
+
of observing more annotations in a neighborhood than expected under the Poisson distribution.
|
27
|
+
- "depletion_pvals" (np.ndarray): P-values for depletion, indicating the probability of
|
28
|
+
observing fewer annotations in a neighborhood than expected under the Poisson distribution.
|
29
|
+
"""
|
30
|
+
neighborhoods = (neighborhoods > 0).astype(int)
|
31
|
+
annotations = (annotations > 0).astype(int)
|
32
|
+
annotated_in_neighborhood = np.dot(neighborhoods, annotations)
|
33
|
+
lambda_expected = np.mean(annotated_in_neighborhood, axis=0)
|
34
|
+
# Enrichment (observing more than expected)
|
35
|
+
enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood - 1, lambda_expected)
|
36
|
+
|
37
|
+
# Depletion (observing fewer than expected)
|
38
|
+
depletion_pvals = poisson.cdf(annotated_in_neighborhood, lambda_expected)
|
39
|
+
|
40
|
+
return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
|
@@ -1,6 +1,6 @@
|
|
1
|
-
risk/__init__.py,sha256=
|
1
|
+
risk/__init__.py,sha256=dGMZvusp_heb_yF3HEnVZDfVhFlvQDEuBQKDQfIAJvk,112
|
2
2
|
risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
|
3
|
-
risk/risk.py,sha256=
|
3
|
+
risk/risk.py,sha256=EhKdNC5ntEsBAXG7Rw1Y-ho0HzbsvoU9XYE8djD-Axs,19972
|
4
4
|
risk/annotations/__init__.py,sha256=vUpVvMRE5if01Ic8QY6M2Ae3EFGJHdugEe9PdEkAW4Y,138
|
5
5
|
risk/annotations/annotations.py,sha256=K7cUA6vYTKYAvj0xHqrAwNEYtmPq4H7LDYENAOVQdQ0,11014
|
6
6
|
risk/annotations/io.py,sha256=lo7NKqOVkeeBp58JBxWJHtA0xjL5Yoxqe9Ox0daKlZk,9457
|
@@ -10,21 +10,21 @@ risk/log/params.py,sha256=Rfdg5UcGCrG80m6V79FyORERWUqIzHFO7tGiY4zAImM,6347
|
|
10
10
|
risk/neighborhoods/__init__.py,sha256=tKKEg4lsbqFukpgYlUGxU_v_9FOqK7V0uvM9T2QzoL0,206
|
11
11
|
risk/neighborhoods/community.py,sha256=7ebo1Q5KokSQISnxZIh2SQxsKXdXm8aVkp-h_DiQ3K0,6818
|
12
12
|
risk/neighborhoods/domains.py,sha256=bxJUxqFTynzX0mf3E8-AA4_Rfccje1reeVVhfzb1-pE,10672
|
13
|
-
risk/neighborhoods/neighborhoods.py,sha256=
|
13
|
+
risk/neighborhoods/neighborhoods.py,sha256=N02r2nnCfDtzVicuUt2WA77EUPHtruqjX8qJmXUP7ik,17475
|
14
14
|
risk/network/__init__.py,sha256=iEPeJdZfqp0toxtbElryB8jbz9_t_k4QQ3iDvKE8C_0,126
|
15
15
|
risk/network/geometry.py,sha256=H1yGVVqgbfpzBzJwEheDLfvGLSA284jGQQTn612L4Vc,6759
|
16
|
-
risk/network/graph.py,sha256=
|
16
|
+
risk/network/graph.py,sha256=_LEoom4EEowGALuJKSXcev9RAAHu2FqIeq3u7mkifW0,16479
|
17
17
|
risk/network/io.py,sha256=gG50kOknO-D3HkW1HsbHMkTMvjUtn3l4W4Jwd-rXNr8,21202
|
18
18
|
risk/network/plot.py,sha256=F6KPjmBYWrThKZScHs9SuzoKQiytBvzrmGhGberHjwo,62063
|
19
|
-
risk/stats/__init__.py,sha256=
|
20
|
-
risk/stats/
|
21
|
-
risk/stats/
|
19
|
+
risk/stats/__init__.py,sha256=WcgoETQ-hS0LQqKRsAMIPtP15xZ-4eul6VUBuUx4Wzc,220
|
20
|
+
risk/stats/hypergeom.py,sha256=CfGJ1fd7QKIbBVy85p6-upXwNi19TJioDuekA65PHCQ,2473
|
21
|
+
risk/stats/poisson.py,sha256=eCBgxVdNUTJ_0aVxSU8ddSFGIXeSOY7Vx3YQBaEzN2k,1836
|
22
22
|
risk/stats/stats.py,sha256=kvShov-94W6ffgDUTb522vB9hDJQSyTsYif_UIaFfSM,7059
|
23
23
|
risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
|
24
24
|
risk/stats/permutation/permutation.py,sha256=qLWdwxEY6nmkYPxpM8HLDcd2mbqYv9Qr7CKtJvhLqIM,9220
|
25
25
|
risk/stats/permutation/test_functions.py,sha256=HuDIM-V1jkkfE1rlaIqrWWBSKZt3dQ1f-YEDjWpnLSE,2343
|
26
|
-
risk_network-0.0.
|
27
|
-
risk_network-0.0.
|
28
|
-
risk_network-0.0.
|
29
|
-
risk_network-0.0.
|
30
|
-
risk_network-0.0.
|
26
|
+
risk_network-0.0.7b5.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
27
|
+
risk_network-0.0.7b5.dist-info/METADATA,sha256=DaAqg8en6KjGKUGgxI96K749ZwhFRY92h0RsnvoGqx4,43142
|
28
|
+
risk_network-0.0.7b5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
29
|
+
risk_network-0.0.7b5.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
30
|
+
risk_network-0.0.7b5.dist-info/RECORD,,
|
risk/stats/fisher_exact.py
DELETED
@@ -1,132 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
risk/stats/fisher_exact
|
3
|
-
~~~~~~~~~~~~~~~~~~~~~~~
|
4
|
-
"""
|
5
|
-
|
6
|
-
from multiprocessing import get_context, Manager
|
7
|
-
from tqdm import tqdm
|
8
|
-
from typing import Any, Dict
|
9
|
-
|
10
|
-
import numpy as np
|
11
|
-
from scipy.stats import fisher_exact
|
12
|
-
|
13
|
-
|
14
|
-
def compute_fisher_exact_test(
|
15
|
-
neighborhoods: np.ndarray,
|
16
|
-
annotations: np.ndarray,
|
17
|
-
max_workers: int = 4,
|
18
|
-
) -> Dict[str, Any]:
|
19
|
-
"""Compute Fisher's exact test for enrichment and depletion in neighborhoods.
|
20
|
-
|
21
|
-
Args:
|
22
|
-
neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
|
23
|
-
annotations (np.ndarray): Binary matrix representing annotations.
|
24
|
-
max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
|
25
|
-
|
26
|
-
Returns:
|
27
|
-
dict: Dictionary containing depletion and enrichment p-values.
|
28
|
-
"""
|
29
|
-
# Ensure that the matrices are binary (boolean) and free of NaN values
|
30
|
-
neighborhoods = neighborhoods.astype(bool) # Convert to boolean
|
31
|
-
annotations = annotations.astype(bool) # Convert to boolean
|
32
|
-
|
33
|
-
# Initialize the process of calculating p-values using multiprocessing
|
34
|
-
ctx = get_context("spawn")
|
35
|
-
manager = Manager()
|
36
|
-
progress_counter = manager.Value("i", 0)
|
37
|
-
total_tasks = neighborhoods.shape[1] * annotations.shape[1]
|
38
|
-
|
39
|
-
# Calculate the workload per worker
|
40
|
-
chunk_size = total_tasks // max_workers
|
41
|
-
remainder = total_tasks % max_workers
|
42
|
-
|
43
|
-
# Execute the Fisher's exact test using multiprocessing
|
44
|
-
with ctx.Pool(max_workers) as pool:
|
45
|
-
with tqdm(total=total_tasks, desc="Total progress", position=0) as progress:
|
46
|
-
params_list = []
|
47
|
-
start_idx = 0
|
48
|
-
for i in range(max_workers):
|
49
|
-
end_idx = start_idx + chunk_size + (1 if i < remainder else 0)
|
50
|
-
params_list.append(
|
51
|
-
(neighborhoods, annotations, start_idx, end_idx, progress_counter)
|
52
|
-
)
|
53
|
-
start_idx = end_idx
|
54
|
-
|
55
|
-
# Start the Fisher's exact test process in parallel
|
56
|
-
results = pool.starmap_async(_fisher_exact_process_subset, params_list, chunksize=1)
|
57
|
-
|
58
|
-
# Update progress bar based on progress_counter
|
59
|
-
while not results.ready():
|
60
|
-
progress.update(progress_counter.value - progress.n)
|
61
|
-
results.wait(0.05) # Wait for 50ms
|
62
|
-
# Ensure progress bar reaches 100%
|
63
|
-
progress.update(total_tasks - progress.n)
|
64
|
-
|
65
|
-
# Accumulate results from each worker
|
66
|
-
depletion_pvals, enrichment_pvals = [], []
|
67
|
-
for dp, ep in results.get():
|
68
|
-
depletion_pvals.extend(dp)
|
69
|
-
enrichment_pvals.extend(ep)
|
70
|
-
|
71
|
-
# Reshape the results back into arrays with the appropriate dimensions
|
72
|
-
depletion_pvals = np.array(depletion_pvals).reshape(
|
73
|
-
neighborhoods.shape[1], annotations.shape[1]
|
74
|
-
)
|
75
|
-
enrichment_pvals = np.array(enrichment_pvals).reshape(
|
76
|
-
neighborhoods.shape[1], annotations.shape[1]
|
77
|
-
)
|
78
|
-
|
79
|
-
return {
|
80
|
-
"depletion_pvals": depletion_pvals,
|
81
|
-
"enrichment_pvals": enrichment_pvals,
|
82
|
-
}
|
83
|
-
|
84
|
-
|
85
|
-
def _fisher_exact_process_subset(
|
86
|
-
neighborhoods: np.ndarray,
|
87
|
-
annotations: np.ndarray,
|
88
|
-
start_idx: int,
|
89
|
-
end_idx: int,
|
90
|
-
progress_counter,
|
91
|
-
) -> tuple:
|
92
|
-
"""Process a subset of neighborhoods using Fisher's exact test.
|
93
|
-
|
94
|
-
Args:
|
95
|
-
neighborhoods (np.ndarray): The full neighborhood matrix.
|
96
|
-
annotations (np.ndarray): The annotation matrix.
|
97
|
-
start_idx (int): Starting index of the neighborhood-annotation pairs to process.
|
98
|
-
end_idx (int): Ending index of the neighborhood-annotation pairs to process.
|
99
|
-
progress_counter: Shared counter for tracking progress.
|
100
|
-
|
101
|
-
Returns:
|
102
|
-
tuple: Local p-values for depletion and enrichment.
|
103
|
-
"""
|
104
|
-
# Initialize lists to store p-values for depletion and enrichment
|
105
|
-
depletion_pvals = []
|
106
|
-
enrichment_pvals = []
|
107
|
-
# Process the subset of tasks assigned to this worker
|
108
|
-
for idx in range(start_idx, end_idx):
|
109
|
-
i = idx // annotations.shape[1] # Neighborhood index
|
110
|
-
j = idx % annotations.shape[1] # Annotation index
|
111
|
-
|
112
|
-
neighborhood = neighborhoods[:, i]
|
113
|
-
annotation = annotations[:, j]
|
114
|
-
|
115
|
-
# Calculate the contingency table values
|
116
|
-
TP = np.sum(neighborhood & annotation)
|
117
|
-
FP = np.sum(neighborhood & ~annotation)
|
118
|
-
FN = np.sum(~neighborhood & annotation)
|
119
|
-
TN = np.sum(~neighborhood & ~annotation)
|
120
|
-
table = np.array([[TP, FP], [FN, TN]])
|
121
|
-
|
122
|
-
# Perform Fisher's exact test for depletion (alternative='less')
|
123
|
-
_, p_value_depletion = fisher_exact(table, alternative="less")
|
124
|
-
depletion_pvals.append(p_value_depletion)
|
125
|
-
# Perform Fisher's exact test for enrichment (alternative='greater')
|
126
|
-
_, p_value_enrichment = fisher_exact(table, alternative="greater")
|
127
|
-
enrichment_pvals.append(p_value_enrichment)
|
128
|
-
|
129
|
-
# Update the shared progress counter
|
130
|
-
progress_counter.value += 1
|
131
|
-
|
132
|
-
return depletion_pvals, enrichment_pvals
|
File without changes
|
File without changes
|
File without changes
|