risk-network 0.0.8b18__py3-none-any.whl → 0.0.9b26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +2 -2
- risk/annotations/__init__.py +2 -2
- risk/annotations/annotations.py +133 -72
- risk/annotations/io.py +50 -34
- risk/log/__init__.py +4 -2
- risk/log/{config.py → console.py} +5 -3
- risk/log/{params.py → parameters.py} +21 -46
- risk/neighborhoods/__init__.py +3 -5
- risk/neighborhoods/api.py +446 -0
- risk/neighborhoods/community.py +281 -96
- risk/neighborhoods/domains.py +92 -38
- risk/neighborhoods/neighborhoods.py +210 -149
- risk/network/__init__.py +1 -3
- risk/network/geometry.py +69 -58
- risk/network/graph/__init__.py +6 -0
- risk/network/graph/api.py +194 -0
- risk/network/graph/network.py +269 -0
- risk/network/graph/summary.py +254 -0
- risk/network/io.py +58 -48
- risk/network/plotter/__init__.py +6 -0
- risk/network/plotter/api.py +54 -0
- risk/network/{plot → plotter}/canvas.py +80 -26
- risk/network/{plot → plotter}/contour.py +43 -34
- risk/network/{plot → plotter}/labels.py +123 -113
- risk/network/plotter/network.py +424 -0
- risk/network/plotter/utils/colors.py +416 -0
- risk/network/plotter/utils/layout.py +94 -0
- risk/risk.py +11 -469
- risk/stats/__init__.py +8 -4
- risk/stats/binom.py +51 -0
- risk/stats/chi2.py +69 -0
- risk/stats/hypergeom.py +28 -18
- risk/stats/permutation/__init__.py +1 -1
- risk/stats/permutation/permutation.py +45 -39
- risk/stats/permutation/test_functions.py +25 -17
- risk/stats/poisson.py +17 -11
- risk/stats/stats.py +20 -16
- risk/stats/zscore.py +68 -0
- {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
- risk_network-0.0.9b26.dist-info/RECORD +44 -0
- {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
- risk/network/graph.py +0 -159
- risk/network/plot/__init__.py +0 -6
- risk/network/plot/network.py +0 -282
- risk/network/plot/plotter.py +0 -137
- risk/network/plot/utils/color.py +0 -353
- risk/network/plot/utils/layout.py +0 -53
- risk_network-0.0.8b18.dist-info/RECORD +0 -37
- {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
- {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0
risk/stats/hypergeom.py
CHANGED
@@ -6,44 +6,54 @@ risk/stats/hypergeom
|
|
6
6
|
from typing import Any, Dict
|
7
7
|
|
8
8
|
import numpy as np
|
9
|
+
from scipy.sparse import csr_matrix
|
9
10
|
from scipy.stats import hypergeom
|
10
11
|
|
11
12
|
|
12
13
|
def compute_hypergeom_test(
|
13
|
-
neighborhoods:
|
14
|
+
neighborhoods: csr_matrix,
|
15
|
+
annotations: csr_matrix,
|
16
|
+
null_distribution: str = "network",
|
14
17
|
) -> Dict[str, Any]:
|
15
|
-
"""
|
18
|
+
"""
|
19
|
+
Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
|
16
20
|
|
17
21
|
Args:
|
18
|
-
neighborhoods (
|
19
|
-
annotations (
|
22
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
23
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
20
24
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
21
25
|
|
22
26
|
Returns:
|
23
|
-
|
27
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
24
28
|
"""
|
25
29
|
# Get the total number of nodes in the network
|
26
|
-
|
30
|
+
total_nodes = neighborhoods.shape[1]
|
31
|
+
|
32
|
+
# Compute sums
|
33
|
+
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Convert to dense array
|
34
|
+
annotation_sums = annotations.sum(axis=0).A.flatten() # Convert to dense array
|
27
35
|
|
28
36
|
if null_distribution == "network":
|
29
|
-
|
30
|
-
background_population = total_node_count
|
31
|
-
neighborhood_sums = np.sum(neighborhoods, axis=0, keepdims=True).T
|
32
|
-
annotation_sums = np.sum(annotations, axis=0, keepdims=True)
|
37
|
+
background_population = total_nodes
|
33
38
|
elif null_distribution == "annotations":
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
annotation_sums = np.sum(annotations[annotated_nodes], axis=0, keepdims=True)
|
39
|
+
annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Boolean mask
|
40
|
+
background_population = annotated_nodes.sum()
|
41
|
+
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
42
|
+
annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
|
39
43
|
else:
|
40
44
|
raise ValueError(
|
41
45
|
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
42
46
|
)
|
43
47
|
|
44
|
-
#
|
45
|
-
annotated_in_neighborhood = neighborhoods.T @ annotations
|
46
|
-
|
48
|
+
# Observed counts
|
49
|
+
annotated_in_neighborhood = neighborhoods.T @ annotations # Sparse result
|
50
|
+
annotated_in_neighborhood = annotated_in_neighborhood.toarray() # Convert to dense
|
51
|
+
# Align shapes for broadcasting
|
52
|
+
neighborhood_sums = neighborhood_sums.reshape(-1, 1)
|
53
|
+
annotation_sums = annotation_sums.reshape(1, -1)
|
54
|
+
background_population = np.array(background_population).reshape(1, 1)
|
55
|
+
|
56
|
+
# Compute hypergeometric p-values
|
47
57
|
depletion_pvals = hypergeom.cdf(
|
48
58
|
annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
|
49
59
|
)
|
@@ -5,18 +5,19 @@ risk/stats/permutation/permutation
|
|
5
5
|
|
6
6
|
from multiprocessing import get_context, Manager
|
7
7
|
from multiprocessing.managers import ValueProxy
|
8
|
-
from
|
9
|
-
from typing import Any, Callable, Dict
|
8
|
+
from typing import Any, Callable, Dict, List, Tuple, Union
|
10
9
|
|
11
10
|
import numpy as np
|
11
|
+
from scipy.sparse import csr_matrix
|
12
12
|
from threadpoolctl import threadpool_limits
|
13
|
+
from tqdm import tqdm
|
13
14
|
|
14
15
|
from risk.stats.permutation.test_functions import DISPATCH_TEST_FUNCTIONS
|
15
16
|
|
16
17
|
|
17
18
|
def compute_permutation_test(
|
18
|
-
neighborhoods:
|
19
|
-
annotations:
|
19
|
+
neighborhoods: csr_matrix,
|
20
|
+
annotations: csr_matrix,
|
20
21
|
score_metric: str = "sum",
|
21
22
|
null_distribution: str = "network",
|
22
23
|
num_permutations: int = 1000,
|
@@ -26,18 +27,19 @@ def compute_permutation_test(
|
|
26
27
|
"""Compute permutation test for enrichment and depletion in neighborhoods.
|
27
28
|
|
28
29
|
Args:
|
29
|
-
neighborhoods (
|
30
|
-
annotations (
|
31
|
-
score_metric (str, optional): Metric to use for scoring ('sum'
|
30
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
31
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
32
|
+
score_metric (str, optional): Metric to use for scoring ('sum' or 'stdev'). Defaults to "sum".
|
32
33
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
33
34
|
num_permutations (int, optional): Number of permutations to run. Defaults to 1000.
|
34
35
|
random_seed (int, optional): Seed for random number generation. Defaults to 888.
|
35
36
|
max_workers (int, optional): Number of workers for multiprocessing. Defaults to 1.
|
36
37
|
|
37
38
|
Returns:
|
38
|
-
|
39
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
39
40
|
"""
|
40
41
|
# Ensure that the matrices are in the correct format and free of NaN values
|
42
|
+
# NOTE: Keep the data type as float32 to avoid locking issues with dot product operations
|
41
43
|
neighborhoods = neighborhoods.astype(np.float32)
|
42
44
|
annotations = annotations.astype(np.float32)
|
43
45
|
# Retrieve the appropriate neighborhood score function based on the metric
|
@@ -65,19 +67,19 @@ def compute_permutation_test(
|
|
65
67
|
|
66
68
|
|
67
69
|
def _run_permutation_test(
|
68
|
-
neighborhoods:
|
69
|
-
annotations:
|
70
|
+
neighborhoods: csr_matrix,
|
71
|
+
annotations: csr_matrix,
|
70
72
|
neighborhood_score_func: Callable,
|
71
73
|
null_distribution: str = "network",
|
72
74
|
num_permutations: int = 1000,
|
73
75
|
random_seed: int = 888,
|
74
76
|
max_workers: int = 4,
|
75
77
|
) -> tuple:
|
76
|
-
"""Run
|
78
|
+
"""Run the permutation test to calculate depletion and enrichment counts.
|
77
79
|
|
78
80
|
Args:
|
79
|
-
neighborhoods (
|
80
|
-
annotations (
|
81
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
82
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
81
83
|
neighborhood_score_func (Callable): Function to calculate neighborhood scores.
|
82
84
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
83
85
|
num_permutations (int, optional): Number of permutations. Defaults to 1000.
|
@@ -93,14 +95,14 @@ def _run_permutation_test(
|
|
93
95
|
if null_distribution == "network":
|
94
96
|
idxs = range(annotations.shape[0])
|
95
97
|
elif null_distribution == "annotations":
|
96
|
-
idxs = np.nonzero(
|
98
|
+
idxs = np.nonzero(annotations.getnnz(axis=1) > 0)[0]
|
97
99
|
else:
|
98
100
|
raise ValueError(
|
99
101
|
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
100
102
|
)
|
101
103
|
|
102
|
-
# Replace NaNs with zeros in the annotations matrix
|
103
|
-
annotations[np.isnan(annotations)] = 0
|
104
|
+
# Replace NaNs with zeros in the sparse annotations matrix
|
105
|
+
annotations.data[np.isnan(annotations.data)] = 0
|
104
106
|
annotation_matrix_obsv = annotations[idxs]
|
105
107
|
neighborhoods_matrix_obsv = neighborhoods.T[idxs].T
|
106
108
|
# Calculate observed neighborhood scores
|
@@ -121,28 +123,35 @@ def _run_permutation_test(
|
|
121
123
|
manager = Manager()
|
122
124
|
progress_counter = manager.Value("i", 0)
|
123
125
|
total_progress = num_permutations
|
126
|
+
|
127
|
+
# Generate precomputed permutations
|
128
|
+
permutations = [rng.permutation(idxs) for _ in range(num_permutations)]
|
129
|
+
# Divide permutations into batches for workers
|
130
|
+
batch_size = subset_size + (1 if remainder > 0 else 0)
|
131
|
+
permutation_batches = [
|
132
|
+
permutations[i * batch_size : (i + 1) * batch_size] for i in range(max_workers)
|
133
|
+
]
|
134
|
+
|
124
135
|
# Execute the permutation test using multiprocessing
|
125
136
|
with ctx.Pool(max_workers) as pool:
|
126
137
|
with tqdm(total=total_progress, desc="Total progress", position=0) as progress:
|
127
138
|
# Prepare parameters for multiprocessing
|
128
139
|
params_list = [
|
129
140
|
(
|
141
|
+
permutation_batches[i], # Pass the batch of precomputed permutations
|
130
142
|
annotations,
|
131
|
-
np.array(idxs),
|
132
143
|
neighborhoods_matrix_obsv,
|
133
144
|
observed_neighborhood_scores,
|
134
145
|
neighborhood_score_func,
|
135
|
-
subset_size + (1 if i < remainder else 0),
|
136
146
|
num_permutations,
|
137
147
|
progress_counter,
|
138
148
|
max_workers,
|
139
|
-
rng, # Pass the random number generator to each worker
|
140
149
|
)
|
141
150
|
for i in range(max_workers)
|
142
151
|
]
|
143
152
|
|
144
153
|
# Start the permutation process in parallel
|
145
|
-
results = pool.starmap_async(
|
154
|
+
results = pool.starmap_async(_permutation_process_batch, params_list, chunksize=1)
|
146
155
|
|
147
156
|
# Update progress bar based on progress_counter
|
148
157
|
while not results.ready():
|
@@ -159,31 +168,27 @@ def _run_permutation_test(
|
|
159
168
|
return counts_depletion, counts_enrichment
|
160
169
|
|
161
170
|
|
162
|
-
def
|
163
|
-
|
164
|
-
|
165
|
-
neighborhoods_matrix_obsv:
|
171
|
+
def _permutation_process_batch(
|
172
|
+
permutations: Union[List, Tuple, np.ndarray],
|
173
|
+
annotation_matrix: csr_matrix,
|
174
|
+
neighborhoods_matrix_obsv: csr_matrix,
|
166
175
|
observed_neighborhood_scores: np.ndarray,
|
167
176
|
neighborhood_score_func: Callable,
|
168
|
-
subset_size: int,
|
169
177
|
num_permutations: int,
|
170
178
|
progress_counter: ValueProxy,
|
171
179
|
max_workers: int,
|
172
|
-
rng: np.random.Generator,
|
173
180
|
) -> tuple:
|
174
|
-
"""Process a
|
181
|
+
"""Process a batch of permutations in a worker process.
|
175
182
|
|
176
183
|
Args:
|
177
|
-
|
178
|
-
|
179
|
-
neighborhoods_matrix_obsv (
|
184
|
+
permutations (Union[List, Tuple, np.ndarray]): Permutation batch to process.
|
185
|
+
annotation_matrix (csr_matrix): Sparse binary matrix representing annotations.
|
186
|
+
neighborhoods_matrix_obsv (csr_matrix): Sparse binary matrix representing observed neighborhoods.
|
180
187
|
observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
|
181
188
|
neighborhood_score_func (Callable): Function to calculate neighborhood scores.
|
182
|
-
subset_size (int): Number of permutations to run in this subset.
|
183
189
|
num_permutations (int): Number of total permutations across all subsets.
|
184
190
|
progress_counter (multiprocessing.managers.ValueProxy): Shared counter for tracking progress.
|
185
191
|
max_workers (int): Number of workers for multiprocessing.
|
186
|
-
rng (np.random.Generator): Random number generator object.
|
187
192
|
|
188
193
|
Returns:
|
189
194
|
tuple: Local counts of depletion and enrichment.
|
@@ -192,7 +197,9 @@ def _permutation_process_subset(
|
|
192
197
|
local_counts_depletion = np.zeros(observed_neighborhood_scores.shape)
|
193
198
|
local_counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
|
194
199
|
|
195
|
-
#
|
200
|
+
# Limit the number of threads used by NumPy's BLAS implementation to 1 when more than one worker is used
|
201
|
+
# NOTE: This does not work for Mac M chips due to a bug in the threadpoolctl package
|
202
|
+
# This is currently a known issue and is being addressed by the maintainers [https://github.com/joblib/threadpoolctl/issues/135]
|
196
203
|
limits = None if max_workers == 1 else 1
|
197
204
|
with threadpool_limits(limits=limits, user_api="blas"):
|
198
205
|
# Initialize a local counter for batched progress updates
|
@@ -200,16 +207,16 @@ def _permutation_process_subset(
|
|
200
207
|
# Calculate the modulo value based on total permutations for 1/100th frequency updates
|
201
208
|
modulo_value = max(1, num_permutations // 100)
|
202
209
|
|
203
|
-
for
|
204
|
-
#
|
205
|
-
annotation_matrix_permut = annotation_matrix[
|
210
|
+
for permuted_idxs in permutations:
|
211
|
+
# Apply precomputed permutation
|
212
|
+
annotation_matrix_permut = annotation_matrix[permuted_idxs]
|
206
213
|
# Calculate permuted neighborhood scores
|
207
214
|
with np.errstate(invalid="ignore", divide="ignore"):
|
208
215
|
permuted_neighborhood_scores = neighborhood_score_func(
|
209
216
|
neighborhoods_matrix_obsv, annotation_matrix_permut
|
210
217
|
)
|
211
218
|
|
212
|
-
# Update local depletion and enrichment counts
|
219
|
+
# Update local depletion and enrichment counts
|
213
220
|
local_counts_depletion = np.add(
|
214
221
|
local_counts_depletion, permuted_neighborhood_scores <= observed_neighborhood_scores
|
215
222
|
)
|
@@ -218,9 +225,8 @@ def _permutation_process_subset(
|
|
218
225
|
permuted_neighborhood_scores >= observed_neighborhood_scores,
|
219
226
|
)
|
220
227
|
|
221
|
-
# Update
|
228
|
+
# Update progress
|
222
229
|
local_progress += 1
|
223
|
-
# Update shared progress counter every 1/100th of total permutations
|
224
230
|
if local_progress % modulo_value == 0:
|
225
231
|
progress_counter.value += modulo_value
|
226
232
|
|
@@ -4,6 +4,7 @@ risk/stats/permutation/test_functions
|
|
4
4
|
"""
|
5
5
|
|
6
6
|
import numpy as np
|
7
|
+
from scipy.sparse import csr_matrix
|
7
8
|
|
8
9
|
# Note: Cython optimizations provided minimal performance benefits.
|
9
10
|
# The final version with Cython is archived in the `cython_permutation` branch.
|
@@ -11,46 +12,53 @@ import numpy as np
|
|
11
12
|
|
12
13
|
|
13
14
|
def compute_neighborhood_score_by_sum(
|
14
|
-
neighborhoods_matrix:
|
15
|
+
neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
|
15
16
|
) -> np.ndarray:
|
16
|
-
"""Compute the sum of attribute values for each neighborhood.
|
17
|
+
"""Compute the sum of attribute values for each neighborhood using sparse matrices.
|
17
18
|
|
18
19
|
Args:
|
19
|
-
neighborhoods_matrix (
|
20
|
-
annotation_matrix (
|
20
|
+
neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
|
21
|
+
annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
|
21
22
|
|
22
23
|
Returns:
|
23
|
-
np.ndarray:
|
24
|
+
np.ndarray: Dense array of summed attribute values for each neighborhood.
|
24
25
|
"""
|
25
26
|
# Calculate the neighborhood score as the dot product of neighborhoods and annotations
|
26
|
-
|
27
|
-
|
27
|
+
neighborhood_score = neighborhoods_matrix @ annotation_matrix # Sparse matrix multiplication
|
28
|
+
# Convert the result to a dense array for downstream calculations
|
29
|
+
neighborhood_score_dense = neighborhood_score.toarray()
|
30
|
+
return neighborhood_score_dense
|
28
31
|
|
29
32
|
|
30
33
|
def compute_neighborhood_score_by_stdev(
|
31
|
-
neighborhoods_matrix:
|
34
|
+
neighborhoods_matrix: csr_matrix, annotation_matrix: csr_matrix
|
32
35
|
) -> np.ndarray:
|
33
|
-
"""Compute the standard deviation of neighborhood scores.
|
36
|
+
"""Compute the standard deviation of neighborhood scores for sparse matrices.
|
34
37
|
|
35
38
|
Args:
|
36
|
-
neighborhoods_matrix (
|
37
|
-
annotation_matrix (
|
39
|
+
neighborhoods_matrix (csr_matrix): Sparse binary matrix representing neighborhoods.
|
40
|
+
annotation_matrix (csr_matrix): Sparse matrix representing annotation values.
|
38
41
|
|
39
42
|
Returns:
|
40
43
|
np.ndarray: Standard deviation of the neighborhood scores.
|
41
44
|
"""
|
42
45
|
# Calculate the neighborhood score as the dot product of neighborhoods and annotations
|
43
|
-
neighborhood_score =
|
44
|
-
# Calculate the number of elements in each neighborhood
|
45
|
-
N =
|
46
|
+
neighborhood_score = neighborhoods_matrix @ annotation_matrix # Sparse matrix multiplication
|
47
|
+
# Calculate the number of elements in each neighborhood (sum of rows)
|
48
|
+
N = neighborhoods_matrix.sum(axis=1).A.flatten() # Convert to 1D array
|
49
|
+
# Avoid division by zero by replacing zeros in N with np.nan temporarily
|
50
|
+
N[N == 0] = np.nan
|
46
51
|
# Compute the mean of the neighborhood scores
|
47
|
-
M = neighborhood_score / N[:, None]
|
52
|
+
M = neighborhood_score.multiply(1 / N[:, None]).toarray() # Sparse element-wise division
|
48
53
|
# Compute the mean of squares (EXX) directly using squared annotation matrix
|
49
|
-
|
54
|
+
annotation_squared = annotation_matrix.multiply(annotation_matrix) # Element-wise squaring
|
55
|
+
EXX = (neighborhoods_matrix @ annotation_squared).multiply(1 / N[:, None]).toarray()
|
50
56
|
# Calculate variance as EXX - M^2
|
51
|
-
variance = EXX - M
|
57
|
+
variance = EXX - np.power(M, 2)
|
52
58
|
# Compute the standard deviation as the square root of the variance
|
53
59
|
neighborhood_stdev = np.sqrt(variance)
|
60
|
+
# Replace np.nan back with zeros in case N was 0 (no elements in the neighborhood)
|
61
|
+
neighborhood_stdev[np.isnan(neighborhood_stdev)] = 0
|
54
62
|
return neighborhood_stdev
|
55
63
|
|
56
64
|
|
risk/stats/poisson.py
CHANGED
@@ -3,42 +3,48 @@ risk/stats/poisson
|
|
3
3
|
~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from typing import
|
6
|
+
from typing import Any, Dict
|
7
7
|
|
8
8
|
import numpy as np
|
9
|
+
from scipy.sparse import csr_matrix
|
9
10
|
from scipy.stats import poisson
|
10
11
|
|
11
12
|
|
12
13
|
def compute_poisson_test(
|
13
|
-
neighborhoods:
|
14
|
+
neighborhoods: csr_matrix,
|
15
|
+
annotations: csr_matrix,
|
16
|
+
null_distribution: str = "network",
|
14
17
|
) -> Dict[str, Any]:
|
15
|
-
"""
|
18
|
+
"""
|
19
|
+
Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
|
16
20
|
|
17
21
|
Args:
|
18
|
-
neighborhoods (
|
19
|
-
annotations (
|
22
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
23
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
20
24
|
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
21
25
|
|
22
26
|
Returns:
|
23
|
-
|
27
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
24
28
|
"""
|
25
29
|
# Matrix multiplication to get the number of annotated nodes in each neighborhood
|
26
|
-
annotated_in_neighborhood = neighborhoods @ annotations
|
30
|
+
annotated_in_neighborhood = neighborhoods @ annotations # Sparse result
|
31
|
+
# Convert annotated counts to dense for downstream calculations
|
32
|
+
annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
|
27
33
|
|
28
34
|
# Compute lambda_expected based on the chosen null distribution
|
29
35
|
if null_distribution == "network":
|
30
36
|
# Use the mean across neighborhoods (axis=1)
|
31
|
-
lambda_expected = np.mean(
|
37
|
+
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
|
32
38
|
elif null_distribution == "annotations":
|
33
39
|
# Use the mean across annotations (axis=0)
|
34
|
-
lambda_expected = np.mean(
|
40
|
+
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
|
35
41
|
else:
|
36
42
|
raise ValueError(
|
37
43
|
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
38
44
|
)
|
39
45
|
|
40
46
|
# Compute p-values for enrichment and depletion using Poisson distribution
|
41
|
-
enrichment_pvals = 1 - poisson.cdf(
|
42
|
-
depletion_pvals = poisson.cdf(
|
47
|
+
enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
|
48
|
+
depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
|
43
49
|
|
44
50
|
return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
|
risk/stats/stats.py
CHANGED
@@ -3,7 +3,7 @@ risk/stats/stats
|
|
3
3
|
~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from typing import Union
|
6
|
+
from typing import Any, Dict, Union
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
from statsmodels.stats.multitest import fdrcorrection
|
@@ -15,7 +15,7 @@ def calculate_significance_matrices(
|
|
15
15
|
tail: str = "right",
|
16
16
|
pval_cutoff: float = 0.05,
|
17
17
|
fdr_cutoff: float = 0.05,
|
18
|
-
) ->
|
18
|
+
) -> Dict[str, Any]:
|
19
19
|
"""Calculate significance matrices based on p-values and specified tail.
|
20
20
|
|
21
21
|
Args:
|
@@ -26,8 +26,8 @@ def calculate_significance_matrices(
|
|
26
26
|
fdr_cutoff (float, optional): Cutoff for FDR significance if applied. Defaults to 0.05.
|
27
27
|
|
28
28
|
Returns:
|
29
|
-
|
30
|
-
|
29
|
+
Dict[str, Any]: Dictionary containing the enrichment matrix, binary significance matrix,
|
30
|
+
and the matrix of significant enrichment values.
|
31
31
|
"""
|
32
32
|
if fdr_cutoff < 1.0:
|
33
33
|
# Apply FDR correction to depletion p-values
|
@@ -44,7 +44,7 @@ def calculate_significance_matrices(
|
|
44
44
|
enrichment_pvals, enrichment_qvals, pval_cutoff=pval_cutoff, fdr_cutoff=fdr_cutoff
|
45
45
|
)
|
46
46
|
# Compute the enrichment matrix using both q-values and p-values
|
47
|
-
enrichment_matrix = (
|
47
|
+
enrichment_matrix = (enrichment_pvals**0.5) * (enrichment_qvals**2)
|
48
48
|
else:
|
49
49
|
# Compute threshold matrices based on p-value cutoffs only
|
50
50
|
depletion_alpha_threshold_matrix = _compute_threshold_matrix(
|
@@ -62,7 +62,7 @@ def calculate_significance_matrices(
|
|
62
62
|
log_enrichment_matrix = -np.log10(enrichment_matrix)
|
63
63
|
|
64
64
|
# Select the appropriate significance matrices based on the specified tail
|
65
|
-
|
65
|
+
significance_matrix, significant_binary_significance_matrix = _select_significance_matrices(
|
66
66
|
tail,
|
67
67
|
log_depletion_matrix,
|
68
68
|
depletion_alpha_threshold_matrix,
|
@@ -71,12 +71,14 @@ def calculate_significance_matrices(
|
|
71
71
|
)
|
72
72
|
|
73
73
|
# Filter the enrichment matrix using the binary significance matrix
|
74
|
-
|
74
|
+
significant_significance_matrix = np.where(
|
75
|
+
significant_binary_significance_matrix == 1, significance_matrix, 0
|
76
|
+
)
|
75
77
|
|
76
78
|
return {
|
77
|
-
"
|
78
|
-
"
|
79
|
-
"
|
79
|
+
"significance_matrix": significance_matrix,
|
80
|
+
"significant_significance_matrix": significant_significance_matrix,
|
81
|
+
"significant_binary_significance_matrix": significant_binary_significance_matrix,
|
80
82
|
}
|
81
83
|
|
82
84
|
|
@@ -107,15 +109,15 @@ def _select_significance_matrices(
|
|
107
109
|
|
108
110
|
if tail == "left":
|
109
111
|
# Select depletion matrix and corresponding alpha threshold for left-tail analysis
|
110
|
-
|
112
|
+
significance_matrix = -log_depletion_matrix
|
111
113
|
alpha_threshold_matrix = depletion_alpha_threshold_matrix
|
112
114
|
elif tail == "right":
|
113
115
|
# Select enrichment matrix and corresponding alpha threshold for right-tail analysis
|
114
|
-
|
116
|
+
significance_matrix = log_enrichment_matrix
|
115
117
|
alpha_threshold_matrix = enrichment_alpha_threshold_matrix
|
116
118
|
elif tail == "both":
|
117
119
|
# Select the matrix with the highest absolute values while preserving the sign
|
118
|
-
|
120
|
+
significance_matrix = np.where(
|
119
121
|
np.abs(log_depletion_matrix) >= np.abs(log_enrichment_matrix),
|
120
122
|
-log_depletion_matrix,
|
121
123
|
log_enrichment_matrix,
|
@@ -124,13 +126,15 @@ def _select_significance_matrices(
|
|
124
126
|
alpha_threshold_matrix = np.logical_or(
|
125
127
|
depletion_alpha_threshold_matrix, enrichment_alpha_threshold_matrix
|
126
128
|
)
|
129
|
+
else:
|
130
|
+
raise ValueError("Invalid value for 'tail'. Must be 'left', 'right', or 'both'.")
|
127
131
|
|
128
132
|
# Create a binary significance matrix where valid indices meet the alpha threshold
|
129
133
|
valid_idxs = ~np.isnan(alpha_threshold_matrix)
|
130
|
-
|
131
|
-
|
134
|
+
significant_binary_significance_matrix = np.zeros(alpha_threshold_matrix.shape)
|
135
|
+
significant_binary_significance_matrix[valid_idxs] = alpha_threshold_matrix[valid_idxs]
|
132
136
|
|
133
|
-
return
|
137
|
+
return significance_matrix, significant_binary_significance_matrix
|
134
138
|
|
135
139
|
|
136
140
|
def _compute_threshold_matrix(
|
risk/stats/zscore.py
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
"""
|
2
|
+
risk/stats/zscore
|
3
|
+
~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
from typing import Any, Dict
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
from scipy.sparse import csr_matrix
|
10
|
+
from scipy.stats import norm
|
11
|
+
|
12
|
+
|
13
|
+
def compute_zscore_test(
|
14
|
+
neighborhoods: csr_matrix,
|
15
|
+
annotations: csr_matrix,
|
16
|
+
null_distribution: str = "network",
|
17
|
+
) -> Dict[str, Any]:
|
18
|
+
"""
|
19
|
+
Compute Z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
23
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
24
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
28
|
+
"""
|
29
|
+
# Total number of nodes in the network
|
30
|
+
total_node_count = neighborhoods.shape[1]
|
31
|
+
|
32
|
+
# Compute sums
|
33
|
+
if null_distribution == "network":
|
34
|
+
background_population = total_node_count
|
35
|
+
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
|
36
|
+
annotation_sums = annotations.sum(axis=0).A.flatten() # Dense row sums
|
37
|
+
elif null_distribution == "annotations":
|
38
|
+
annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Dense boolean mask
|
39
|
+
background_population = annotated_nodes.sum()
|
40
|
+
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
41
|
+
annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
|
42
|
+
else:
|
43
|
+
raise ValueError(
|
44
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
45
|
+
)
|
46
|
+
|
47
|
+
# Observed values
|
48
|
+
observed = (neighborhoods.T @ annotations).toarray() # Convert sparse result to dense
|
49
|
+
# Expected values under the null
|
50
|
+
neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
|
51
|
+
annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
|
52
|
+
expected = (neighborhood_sums @ annotation_sums) / background_population
|
53
|
+
|
54
|
+
# Standard deviation under the null
|
55
|
+
std_dev = np.sqrt(
|
56
|
+
expected
|
57
|
+
* (1 - annotation_sums / background_population)
|
58
|
+
* (1 - neighborhood_sums / background_population)
|
59
|
+
)
|
60
|
+
std_dev[std_dev == 0] = np.nan # Avoid division by zero
|
61
|
+
# Compute Z-scores
|
62
|
+
z_scores = (observed - expected) / std_dev
|
63
|
+
|
64
|
+
# Convert Z-scores to depletion and enrichment p-values
|
65
|
+
enrichment_pvals = norm.sf(z_scores) # Upper tail
|
66
|
+
depletion_pvals = norm.cdf(z_scores) # Lower tail
|
67
|
+
|
68
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
@@ -1,10 +1,10 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: risk-network
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.9b26
|
4
4
|
Summary: A Python package for biological network analysis
|
5
5
|
Author: Ira Horecka
|
6
6
|
Author-email: Ira Horecka <ira89@icloud.com>
|
7
|
-
License:
|
7
|
+
License: GNU GENERAL PUBLIC LICENSE
|
8
8
|
Version 3, 29 June 2007
|
9
9
|
|
10
10
|
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
@@ -695,19 +695,23 @@ Requires-Python: >=3.8
|
|
695
695
|
Description-Content-Type: text/markdown
|
696
696
|
License-File: LICENSE
|
697
697
|
Requires-Dist: ipywidgets
|
698
|
-
Requires-Dist:
|
698
|
+
Requires-Dist: leidenalg
|
699
|
+
Requires-Dist: markov_clustering
|
699
700
|
Requires-Dist: matplotlib
|
700
701
|
Requires-Dist: networkx
|
701
702
|
Requires-Dist: nltk==3.8.1
|
702
703
|
Requires-Dist: numpy
|
703
704
|
Requires-Dist: openpyxl
|
704
705
|
Requires-Dist: pandas
|
706
|
+
Requires-Dist: python-igraph
|
705
707
|
Requires-Dist: python-louvain
|
706
708
|
Requires-Dist: scikit-learn
|
707
709
|
Requires-Dist: scipy
|
708
710
|
Requires-Dist: statsmodels
|
709
711
|
Requires-Dist: threadpoolctl
|
710
712
|
Requires-Dist: tqdm
|
713
|
+
Dynamic: author
|
714
|
+
Dynamic: requires-python
|
711
715
|
|
712
716
|
# RISK Network
|
713
717
|
|
@@ -724,7 +728,7 @@ Requires-Dist: tqdm
|
|
724
728
|

|
725
729
|

|
726
730
|
|
727
|
-
**RISK (
|
731
|
+
**RISK** (Regional Inference of Significant Kinships) is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
|
728
732
|
|
729
733
|
## Documentation and Tutorial
|
730
734
|
|