risk-network 0.0.9b25__py3-none-any.whl → 0.0.9b27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/annotations.py +39 -38
- risk/neighborhoods/api.py +1 -5
- risk/neighborhoods/community.py +140 -95
- risk/neighborhoods/domains.py +11 -3
- risk/neighborhoods/neighborhoods.py +34 -18
- risk/network/geometry.py +24 -27
- risk/network/io.py +39 -15
- risk/stats/__init__.py +8 -6
- risk/stats/permutation/permutation.py +1 -1
- risk/stats/{stats.py → significance.py} +2 -2
- risk/stats/stat_tests.py +267 -0
- {risk_network-0.0.9b25.dist-info → risk_network-0.0.9b27.dist-info}/METADATA +1 -1
- {risk_network-0.0.9b25.dist-info → risk_network-0.0.9b27.dist-info}/RECORD +17 -21
- risk/stats/binom.py +0 -51
- risk/stats/chi2.py +0 -69
- risk/stats/hypergeom.py +0 -64
- risk/stats/poisson.py +0 -50
- risk/stats/zscore.py +0 -68
- {risk_network-0.0.9b25.dist-info → risk_network-0.0.9b27.dist-info}/LICENSE +0 -0
- {risk_network-0.0.9b25.dist-info → risk_network-0.0.9b27.dist-info}/WHEEL +0 -0
- {risk_network-0.0.9b25.dist-info → risk_network-0.0.9b27.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Tuple, Union
|
|
9
9
|
|
10
10
|
import networkx as nx
|
11
11
|
import numpy as np
|
12
|
+
from scipy.sparse import csr_matrix
|
12
13
|
from sklearn.exceptions import DataConversionWarning
|
13
14
|
from sklearn.metrics.pairwise import cosine_similarity
|
14
15
|
|
@@ -34,43 +35,43 @@ def get_network_neighborhoods(
|
|
34
35
|
louvain_resolution: float = 0.1,
|
35
36
|
leiden_resolution: float = 1.0,
|
36
37
|
random_seed: int = 888,
|
37
|
-
) ->
|
38
|
-
"""Calculate the combined neighborhoods for each node
|
38
|
+
) -> csr_matrix:
|
39
|
+
"""Calculate the combined neighborhoods for each node using sparse matrices.
|
39
40
|
|
40
41
|
Args:
|
41
42
|
network (nx.Graph): The network graph.
|
42
43
|
distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use.
|
43
|
-
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction
|
44
|
+
fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction thresholds.
|
44
45
|
louvain_resolution (float, optional): Resolution parameter for the Louvain method.
|
45
46
|
leiden_resolution (float, optional): Resolution parameter for the Leiden method.
|
46
47
|
random_seed (int, optional): Random seed for methods requiring random initialization.
|
47
48
|
|
48
49
|
Returns:
|
49
|
-
|
50
|
+
csr_matrix: The combined neighborhood matrix.
|
50
51
|
"""
|
51
52
|
# Set random seed for reproducibility
|
52
53
|
random.seed(random_seed)
|
53
54
|
np.random.seed(random_seed)
|
54
55
|
|
55
|
-
# Ensure distance_metric is a list
|
56
|
+
# Ensure distance_metric is a list for multi-algorithm handling
|
56
57
|
if isinstance(distance_metric, (str, np.ndarray)):
|
57
58
|
distance_metric = [distance_metric]
|
58
|
-
# Ensure fraction_shortest_edges is a list
|
59
|
+
# Ensure fraction_shortest_edges is a list for multi-threshold handling
|
59
60
|
if isinstance(fraction_shortest_edges, (float, int)):
|
60
61
|
fraction_shortest_edges = [fraction_shortest_edges] * len(distance_metric)
|
61
|
-
#
|
62
|
+
# Validate matching lengths of distance metrics and thresholds
|
62
63
|
if len(distance_metric) != len(fraction_shortest_edges):
|
63
64
|
raise ValueError(
|
64
65
|
"The number of distance metrics must match the number of edge length thresholds."
|
65
66
|
)
|
66
67
|
|
67
|
-
# Initialize
|
68
|
+
# Initialize a sparse LIL matrix for incremental updates
|
68
69
|
num_nodes = network.number_of_nodes()
|
69
|
-
|
70
|
-
|
70
|
+
# Initialize a sparse matrix with the same shape as the network
|
71
|
+
combined_neighborhoods = csr_matrix((num_nodes, num_nodes), dtype=np.uint8)
|
71
72
|
# Loop through each distance metric and corresponding edge rank fraction
|
72
73
|
for metric, percentile in zip(distance_metric, fraction_shortest_edges):
|
73
|
-
#
|
74
|
+
# Compute neighborhoods for the specified metric
|
74
75
|
if metric == "greedy_modularity":
|
75
76
|
neighborhoods = calculate_greedy_modularity_neighborhoods(
|
76
77
|
network, fraction_shortest_edges=percentile
|
@@ -107,22 +108,37 @@ def get_network_neighborhoods(
|
|
107
108
|
)
|
108
109
|
else:
|
109
110
|
raise ValueError(
|
110
|
-
"
|
111
|
+
"Invalid distance metric. Choose from: 'greedy_modularity', 'label_propagation',"
|
111
112
|
"'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
|
112
113
|
)
|
113
114
|
|
114
|
-
#
|
115
|
+
# Add the sparse neighborhood matrix
|
115
116
|
combined_neighborhoods += neighborhoods
|
116
117
|
|
117
|
-
# Ensure
|
118
|
-
|
119
|
-
# while all other values are reset to 0. This transformation simplifies the neighborhood matrix by
|
120
|
-
# focusing on the most significant connection per row (or nodes).
|
121
|
-
combined_neighborhoods = _set_max_row_value_to_one(combined_neighborhoods)
|
118
|
+
# Ensure maximum value in each row is set to 1
|
119
|
+
combined_neighborhoods = _set_max_row_value_to_one_sparse(combined_neighborhoods)
|
122
120
|
|
123
121
|
return combined_neighborhoods
|
124
122
|
|
125
123
|
|
124
|
+
def _set_max_row_value_to_one_sparse(matrix: csr_matrix) -> csr_matrix:
|
125
|
+
"""Set the maximum value in each row of a sparse matrix to 1.
|
126
|
+
|
127
|
+
Args:
|
128
|
+
matrix (csr_matrix): The input sparse matrix.
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
csr_matrix: The modified sparse matrix where only the maximum value in each row is set to 1.
|
132
|
+
"""
|
133
|
+
# Iterate over each row and set the maximum value to 1
|
134
|
+
for i in range(matrix.shape[0]):
|
135
|
+
row_data = matrix[i].data
|
136
|
+
if len(row_data) > 0:
|
137
|
+
row_data[:] = (row_data == max(row_data)).astype(int)
|
138
|
+
|
139
|
+
return matrix
|
140
|
+
|
141
|
+
|
126
142
|
def _set_max_row_value_to_one(matrix: np.ndarray) -> np.ndarray:
|
127
143
|
"""For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0. This is particularly
|
128
144
|
useful for neighborhood matrices that have undergone multiple neighborhood detection algorithms, where the
|
risk/network/geometry.py
CHANGED
@@ -3,8 +3,6 @@ risk/network/geometry
|
|
3
3
|
~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
import copy
|
7
|
-
|
8
6
|
import networkx as nx
|
9
7
|
import numpy as np
|
10
8
|
|
@@ -31,44 +29,43 @@ def assign_edge_lengths(
|
|
31
29
|
"""Compute distances between pairs of coordinates."""
|
32
30
|
u_coords, v_coords = coords[:, 0, :], coords[:, 1, :]
|
33
31
|
if is_sphere:
|
34
|
-
|
35
|
-
|
36
|
-
u_coords /= u_norm
|
37
|
-
v_coords /= v_norm
|
32
|
+
u_coords /= np.linalg.norm(u_coords, axis=1, keepdims=True)
|
33
|
+
v_coords /= np.linalg.norm(v_coords, axis=1, keepdims=True)
|
38
34
|
dot_products = np.einsum("ij,ij->i", u_coords, v_coords)
|
39
35
|
return np.arccos(np.clip(dot_products, -1.0, 1.0))
|
40
|
-
|
41
36
|
return np.linalg.norm(u_coords - v_coords, axis=1)
|
42
37
|
|
43
38
|
# Normalize graph coordinates and weights
|
44
39
|
_normalize_graph_coordinates(G)
|
45
40
|
_normalize_weights(G)
|
41
|
+
|
46
42
|
# Map nodes to sphere and adjust depth if required
|
47
43
|
if compute_sphere:
|
48
44
|
_map_to_sphere(G)
|
49
|
-
G_depth = _create_depth(
|
45
|
+
G_depth = _create_depth(G, surface_depth=surface_depth)
|
50
46
|
else:
|
51
|
-
G_depth =
|
52
|
-
|
53
|
-
# Precompute edge coordinate arrays
|
54
|
-
edge_data =
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
47
|
+
G_depth = G
|
48
|
+
|
49
|
+
# Precompute edge coordinate arrays and compute distances in bulk
|
50
|
+
edge_data = np.array(
|
51
|
+
[
|
52
|
+
[
|
53
|
+
np.array(
|
54
|
+
[G_depth.nodes[u]["x"], G_depth.nodes[u]["y"], G_depth.nodes[u].get("z", 0)]
|
55
|
+
),
|
56
|
+
np.array(
|
57
|
+
[G_depth.nodes[v]["x"], G_depth.nodes[v]["y"], G_depth.nodes[v].get("z", 0)]
|
58
|
+
),
|
59
|
+
]
|
60
|
+
for u, v in G_depth.edges
|
61
|
+
]
|
62
|
+
)
|
63
|
+
# Compute distances
|
64
|
+
distances = compute_distance_vectorized(edge_data, compute_sphere)
|
68
65
|
# Assign distances back to the graph
|
69
|
-
for (u, v), distance in zip(
|
66
|
+
for (u, v), distance in zip(G_depth.edges, distances):
|
70
67
|
if include_edge_weight:
|
71
|
-
weight = G.edges[u, v].get("normalized_weight",
|
68
|
+
weight = G.edges[u, v].get("normalized_weight", 1e-6) # Avoid divide-by-zero
|
72
69
|
G.edges[u, v]["length"] = distance / np.sqrt(weight)
|
73
70
|
else:
|
74
71
|
G.edges[u, v]["length"] = distance
|
risk/network/io.py
CHANGED
@@ -217,6 +217,9 @@ class NetworkIO:
|
|
217
217
|
|
218
218
|
Returns:
|
219
219
|
nx.Graph: Loaded and processed network.
|
220
|
+
|
221
|
+
Raises:
|
222
|
+
ValueError: If no matching attribute metadata file is found.
|
220
223
|
"""
|
221
224
|
filetype = "Cytoscape"
|
222
225
|
# Log the loading of the Cytoscape file
|
@@ -258,13 +261,29 @@ class NetworkIO:
|
|
258
261
|
|
259
262
|
# Read the node attributes (from /tables/)
|
260
263
|
attribute_metadata_keywords = ["/tables/", "SHARED_ATTRS", "edge.cytable"]
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
264
|
+
# Use a generator to find the first matching file
|
265
|
+
attribute_metadata = next(
|
266
|
+
(
|
267
|
+
os.path.join(tmp_dir, cf)
|
268
|
+
for cf in cys_files
|
269
|
+
if all(keyword in cf for keyword in attribute_metadata_keywords)
|
270
|
+
),
|
271
|
+
None, # Default if no file matches
|
272
|
+
)
|
273
|
+
if attribute_metadata:
|
274
|
+
# Optimize `read_csv` by leveraging proper options
|
275
|
+
attribute_table = pd.read_csv(
|
276
|
+
attribute_metadata,
|
277
|
+
sep=",",
|
278
|
+
header=None,
|
279
|
+
skiprows=1,
|
280
|
+
dtype=str, # Use specific dtypes to reduce memory usage
|
281
|
+
engine="c", # Use the C engine for parsing if compatible
|
282
|
+
low_memory=False, # Optimize memory handling for large files
|
283
|
+
)
|
284
|
+
else:
|
285
|
+
raise ValueError("No matching attribute metadata file found.")
|
286
|
+
|
268
287
|
# Set columns
|
269
288
|
attribute_table.columns = attribute_table.iloc[0]
|
270
289
|
# Skip first four rows
|
@@ -464,14 +483,19 @@ class NetworkIO:
|
|
464
483
|
Args:
|
465
484
|
G (nx.Graph): A NetworkX graph object.
|
466
485
|
"""
|
467
|
-
|
468
|
-
|
469
|
-
nx.set_edge_attributes(G,
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
486
|
+
# Set default weight for all edges in bulk
|
487
|
+
default_weight = 1.0
|
488
|
+
nx.set_edge_attributes(G, default_weight, "weight")
|
489
|
+
# Check and assign user-defined edge weights if available
|
490
|
+
weight_attributes = nx.get_edge_attributes(G, self.weight_label)
|
491
|
+
if weight_attributes:
|
492
|
+
nx.set_edge_attributes(G, weight_attributes, "weight")
|
493
|
+
|
494
|
+
# Log missing weights if include_edge_weight is enabled
|
495
|
+
if self.include_edge_weight:
|
496
|
+
missing_weights = len(G.edges) - len(weight_attributes)
|
497
|
+
if missing_weights > 0:
|
498
|
+
logger.debug(f"Total edges missing weights: {missing_weights}")
|
475
499
|
|
476
500
|
def _validate_nodes(self, G: nx.Graph) -> None:
|
477
501
|
"""Validate the graph structure and attributes with attribute fallback for positions and labels.
|
risk/stats/__init__.py
CHANGED
@@ -3,11 +3,13 @@ risk/stats
|
|
3
3
|
~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from risk.stats.binom import compute_binom_test
|
7
|
-
from risk.stats.chi2 import compute_chi2_test
|
8
|
-
from risk.stats.hypergeom import compute_hypergeom_test
|
9
6
|
from risk.stats.permutation import compute_permutation_test
|
10
|
-
from risk.stats.
|
11
|
-
|
7
|
+
from risk.stats.stat_tests import (
|
8
|
+
compute_binom_test,
|
9
|
+
compute_chi2_test,
|
10
|
+
compute_hypergeom_test,
|
11
|
+
compute_poisson_test,
|
12
|
+
compute_zscore_test,
|
13
|
+
)
|
12
14
|
|
13
|
-
from risk.stats.
|
15
|
+
from risk.stats.significance import calculate_significance_matrices
|
@@ -95,7 +95,7 @@ def _run_permutation_test(
|
|
95
95
|
if null_distribution == "network":
|
96
96
|
idxs = range(annotations.shape[0])
|
97
97
|
elif null_distribution == "annotations":
|
98
|
-
idxs = np.nonzero(
|
98
|
+
idxs = np.nonzero(annotations.getnnz(axis=1) > 0)[0]
|
99
99
|
else:
|
100
100
|
raise ValueError(
|
101
101
|
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
risk/stats/stat_tests.py
ADDED
@@ -0,0 +1,267 @@
|
|
1
|
+
"""
|
2
|
+
risk/stats/stat_tests
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~
|
4
|
+
"""
|
5
|
+
|
6
|
+
from typing import Any, Dict
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
from scipy.sparse import csr_matrix
|
10
|
+
from scipy.stats import binom
|
11
|
+
from scipy.stats import chi2
|
12
|
+
from scipy.stats import hypergeom
|
13
|
+
from scipy.stats import poisson
|
14
|
+
from scipy.stats import norm
|
15
|
+
|
16
|
+
|
17
|
+
def compute_binom_test(
|
18
|
+
neighborhoods: csr_matrix,
|
19
|
+
annotations: csr_matrix,
|
20
|
+
null_distribution: str = "network",
|
21
|
+
) -> Dict[str, Any]:
|
22
|
+
"""Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
26
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
27
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
28
|
+
|
29
|
+
Returns:
|
30
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
31
|
+
"""
|
32
|
+
# Get the total number of nodes in the network
|
33
|
+
total_nodes = neighborhoods.shape[1]
|
34
|
+
|
35
|
+
# Compute sums (remain sparse here)
|
36
|
+
neighborhood_sizes = neighborhoods.sum(axis=1) # Row sums
|
37
|
+
annotation_totals = annotations.sum(axis=0) # Column sums
|
38
|
+
# Compute probabilities (convert to dense)
|
39
|
+
if null_distribution == "network":
|
40
|
+
p_values = (annotation_totals / total_nodes).A.flatten() # Dense 1D array
|
41
|
+
elif null_distribution == "annotations":
|
42
|
+
p_values = (annotation_totals / annotations.sum()).A.flatten() # Dense 1D array
|
43
|
+
else:
|
44
|
+
raise ValueError(
|
45
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
46
|
+
)
|
47
|
+
|
48
|
+
# Observed counts (sparse matrix multiplication)
|
49
|
+
annotated_counts = neighborhoods @ annotations # Sparse result
|
50
|
+
annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
|
51
|
+
|
52
|
+
# Compute enrichment and depletion p-values
|
53
|
+
enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, neighborhood_sizes.A, p_values)
|
54
|
+
depletion_pvals = binom.cdf(annotated_counts_dense, neighborhood_sizes.A, p_values)
|
55
|
+
|
56
|
+
return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
|
57
|
+
|
58
|
+
|
59
|
+
def compute_chi2_test(
|
60
|
+
neighborhoods: csr_matrix,
|
61
|
+
annotations: csr_matrix,
|
62
|
+
null_distribution: str = "network",
|
63
|
+
) -> Dict[str, Any]:
|
64
|
+
"""Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
68
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
69
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
73
|
+
"""
|
74
|
+
# Total number of nodes in the network
|
75
|
+
total_node_count = neighborhoods.shape[0]
|
76
|
+
|
77
|
+
if null_distribution == "network":
|
78
|
+
# Case 1: Use all nodes as the background
|
79
|
+
background_population = total_node_count
|
80
|
+
neighborhood_sums = neighborhoods.sum(axis=0) # Column sums of neighborhoods
|
81
|
+
annotation_sums = annotations.sum(axis=0) # Column sums of annotations
|
82
|
+
elif null_distribution == "annotations":
|
83
|
+
# Case 2: Only consider nodes with at least one annotation
|
84
|
+
annotated_nodes = (
|
85
|
+
np.ravel(annotations.sum(axis=1)) > 0
|
86
|
+
) # Row-wise sum to filter nodes with annotations
|
87
|
+
background_population = annotated_nodes.sum() # Total number of annotated nodes
|
88
|
+
neighborhood_sums = neighborhoods[annotated_nodes].sum(
|
89
|
+
axis=0
|
90
|
+
) # Neighborhood sums for annotated nodes
|
91
|
+
annotation_sums = annotations[annotated_nodes].sum(
|
92
|
+
axis=0
|
93
|
+
) # Annotation sums for annotated nodes
|
94
|
+
else:
|
95
|
+
raise ValueError(
|
96
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
97
|
+
)
|
98
|
+
|
99
|
+
# Convert to dense arrays for downstream computations
|
100
|
+
neighborhood_sums = np.asarray(neighborhood_sums).reshape(-1, 1) # Ensure column vector shape
|
101
|
+
annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
|
102
|
+
|
103
|
+
# Observed values: number of annotated nodes in each neighborhood
|
104
|
+
observed = neighborhoods.T @ annotations # Shape: (neighborhoods, annotations)
|
105
|
+
# Expected values under the null
|
106
|
+
expected = (neighborhood_sums @ annotation_sums) / background_population
|
107
|
+
# Chi-squared statistic: sum((observed - expected)^2 / expected)
|
108
|
+
with np.errstate(divide="ignore", invalid="ignore"): # Handle divide-by-zero
|
109
|
+
chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
|
110
|
+
|
111
|
+
# Compute p-values for enrichment (upper tail) and depletion (lower tail)
|
112
|
+
enrichment_pvals = chi2.sf(chi2_stat, df=1) # Survival function for upper tail
|
113
|
+
depletion_pvals = chi2.cdf(chi2_stat, df=1) # Cumulative distribution for lower tail
|
114
|
+
|
115
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
116
|
+
|
117
|
+
|
118
|
+
def compute_hypergeom_test(
|
119
|
+
neighborhoods: csr_matrix,
|
120
|
+
annotations: csr_matrix,
|
121
|
+
null_distribution: str = "network",
|
122
|
+
) -> Dict[str, Any]:
|
123
|
+
"""
|
124
|
+
Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
|
125
|
+
|
126
|
+
Args:
|
127
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
128
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
129
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
130
|
+
|
131
|
+
Returns:
|
132
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
133
|
+
"""
|
134
|
+
# Get the total number of nodes in the network
|
135
|
+
total_nodes = neighborhoods.shape[1]
|
136
|
+
|
137
|
+
# Compute sums
|
138
|
+
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Convert to dense array
|
139
|
+
annotation_sums = annotations.sum(axis=0).A.flatten() # Convert to dense array
|
140
|
+
|
141
|
+
if null_distribution == "network":
|
142
|
+
background_population = total_nodes
|
143
|
+
elif null_distribution == "annotations":
|
144
|
+
annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Boolean mask
|
145
|
+
background_population = annotated_nodes.sum()
|
146
|
+
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
147
|
+
annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
|
148
|
+
else:
|
149
|
+
raise ValueError(
|
150
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
151
|
+
)
|
152
|
+
|
153
|
+
# Observed counts
|
154
|
+
annotated_in_neighborhood = neighborhoods.T @ annotations # Sparse result
|
155
|
+
annotated_in_neighborhood = annotated_in_neighborhood.toarray() # Convert to dense
|
156
|
+
# Align shapes for broadcasting
|
157
|
+
neighborhood_sums = neighborhood_sums.reshape(-1, 1)
|
158
|
+
annotation_sums = annotation_sums.reshape(1, -1)
|
159
|
+
background_population = np.array(background_population).reshape(1, 1)
|
160
|
+
|
161
|
+
# Compute hypergeometric p-values
|
162
|
+
depletion_pvals = hypergeom.cdf(
|
163
|
+
annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
|
164
|
+
)
|
165
|
+
enrichment_pvals = hypergeom.sf(
|
166
|
+
annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
|
167
|
+
)
|
168
|
+
|
169
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
170
|
+
|
171
|
+
|
172
|
+
def compute_poisson_test(
|
173
|
+
neighborhoods: csr_matrix,
|
174
|
+
annotations: csr_matrix,
|
175
|
+
null_distribution: str = "network",
|
176
|
+
) -> Dict[str, Any]:
|
177
|
+
"""
|
178
|
+
Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
|
179
|
+
|
180
|
+
Args:
|
181
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
182
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
183
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
184
|
+
|
185
|
+
Returns:
|
186
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
187
|
+
"""
|
188
|
+
# Matrix multiplication to get the number of annotated nodes in each neighborhood
|
189
|
+
annotated_in_neighborhood = neighborhoods @ annotations # Sparse result
|
190
|
+
# Convert annotated counts to dense for downstream calculations
|
191
|
+
annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
|
192
|
+
|
193
|
+
# Compute lambda_expected based on the chosen null distribution
|
194
|
+
if null_distribution == "network":
|
195
|
+
# Use the mean across neighborhoods (axis=1)
|
196
|
+
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
|
197
|
+
elif null_distribution == "annotations":
|
198
|
+
# Use the mean across annotations (axis=0)
|
199
|
+
lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
|
200
|
+
else:
|
201
|
+
raise ValueError(
|
202
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
203
|
+
)
|
204
|
+
|
205
|
+
# Compute p-values for enrichment and depletion using Poisson distribution
|
206
|
+
enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
|
207
|
+
depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
|
208
|
+
|
209
|
+
return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
|
210
|
+
|
211
|
+
|
212
|
+
def compute_zscore_test(
|
213
|
+
neighborhoods: csr_matrix,
|
214
|
+
annotations: csr_matrix,
|
215
|
+
null_distribution: str = "network",
|
216
|
+
) -> Dict[str, Any]:
|
217
|
+
"""
|
218
|
+
Compute Z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
|
219
|
+
|
220
|
+
Args:
|
221
|
+
neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
|
222
|
+
annotations (csr_matrix): Sparse binary matrix representing annotations.
|
223
|
+
null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
|
224
|
+
|
225
|
+
Returns:
|
226
|
+
Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
|
227
|
+
"""
|
228
|
+
# Total number of nodes in the network
|
229
|
+
total_node_count = neighborhoods.shape[1]
|
230
|
+
|
231
|
+
# Compute sums
|
232
|
+
if null_distribution == "network":
|
233
|
+
background_population = total_node_count
|
234
|
+
neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
|
235
|
+
annotation_sums = annotations.sum(axis=0).A.flatten() # Dense row sums
|
236
|
+
elif null_distribution == "annotations":
|
237
|
+
annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Dense boolean mask
|
238
|
+
background_population = annotated_nodes.sum()
|
239
|
+
neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
|
240
|
+
annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
|
241
|
+
else:
|
242
|
+
raise ValueError(
|
243
|
+
"Invalid null_distribution value. Choose either 'network' or 'annotations'."
|
244
|
+
)
|
245
|
+
|
246
|
+
# Observed values
|
247
|
+
observed = (neighborhoods.T @ annotations).toarray() # Convert sparse result to dense
|
248
|
+
# Expected values under the null
|
249
|
+
neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
|
250
|
+
annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
|
251
|
+
expected = (neighborhood_sums @ annotation_sums) / background_population
|
252
|
+
|
253
|
+
# Standard deviation under the null
|
254
|
+
std_dev = np.sqrt(
|
255
|
+
expected
|
256
|
+
* (1 - annotation_sums / background_population)
|
257
|
+
* (1 - neighborhood_sums / background_population)
|
258
|
+
)
|
259
|
+
std_dev[std_dev == 0] = np.nan # Avoid division by zero
|
260
|
+
# Compute Z-scores
|
261
|
+
z_scores = (observed - expected) / std_dev
|
262
|
+
|
263
|
+
# Convert Z-scores to depletion and enrichment p-values
|
264
|
+
enrichment_pvals = norm.sf(z_scores) # Upper tail
|
265
|
+
depletion_pvals = norm.cdf(z_scores) # Lower tail
|
266
|
+
|
267
|
+
return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
|
@@ -1,20 +1,20 @@
|
|
1
|
-
risk/__init__.py,sha256=
|
1
|
+
risk/__init__.py,sha256=SltIM8IfW_qsGbSLdMIbWJ-5vP7CfTfjXj5rinVcdJI,127
|
2
2
|
risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
|
3
3
|
risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
|
4
4
|
risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
|
5
|
-
risk/annotations/annotations.py,sha256=
|
5
|
+
risk/annotations/annotations.py,sha256=g8ca9H49dZIqHv6Od3Dem4BIo_euy8alL3PDauT6ZJI,14088
|
6
6
|
risk/annotations/io.py,sha256=z1AJySsU-KL_IYuHa7j3nvuczmOHgK3WfaQ4TRunvrA,10499
|
7
7
|
risk/log/__init__.py,sha256=7LxDysQu7doi0LAvlY2YbjN6iJH0fNknqy8lSLgeljo,217
|
8
8
|
risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
|
9
9
|
risk/log/parameters.py,sha256=VtwfMzLU1xI4yji3-Ch5vHjH-KdwTfwaEMmi7hFQTs0,5716
|
10
10
|
risk/neighborhoods/__init__.py,sha256=Q74HwTH7okI-vaskJPy2bYwb5sNjGASTzJ6m8V8arCU,234
|
11
|
-
risk/neighborhoods/api.py,sha256=
|
12
|
-
risk/neighborhoods/community.py,sha256=
|
13
|
-
risk/neighborhoods/domains.py,sha256=
|
14
|
-
risk/neighborhoods/neighborhoods.py,sha256=
|
11
|
+
risk/neighborhoods/api.py,sha256=TjIMVnSPC702zMlwyaz2i0ofNx-d9L9g3P-TTSBMx90,23341
|
12
|
+
risk/neighborhoods/community.py,sha256=5Q_-VAJC-5SY5EUsB8gIlemeDoAL85uLjyl16pItHiQ,16699
|
13
|
+
risk/neighborhoods/domains.py,sha256=jMJ4-Qzwgmo6Hya8h0E2_IcMaLpbuH_FWlmSjJl2ikc,12832
|
14
|
+
risk/neighborhoods/neighborhoods.py,sha256=l9FhADB1C-OxM8E9QXOcA4osUDgA1vs4ud-OCGKKybc,21457
|
15
15
|
risk/network/__init__.py,sha256=oVi3FA1XXKD84014Cykq-9bpX4_s0F3aAUfNOU-07Qw,73
|
16
|
-
risk/network/geometry.py,sha256=
|
17
|
-
risk/network/io.py,sha256=
|
16
|
+
risk/network/geometry.py,sha256=dU1hMq4j9gG0nkDqGRl_NiZ2Z-xvT_HF11FwEQ7oOR4,6570
|
17
|
+
risk/network/io.py,sha256=PqsRw1g7nfJJ3xs4aYcim3JWWLMFS1irgtg5hIyht5I,24376
|
18
18
|
risk/network/graph/__init__.py,sha256=ziGJew3yhtqvrb9LUuneDu_LwW2Wa9vd4UuhoL5l1CA,91
|
19
19
|
risk/network/graph/api.py,sha256=Ag4PjFTX6BUvmW7ZdfIgwdsr8URigX9jD9yEFRXUxrU,8220
|
20
20
|
risk/network/graph/network.py,sha256=KdIBM_-flHMWcBK4RUjU_QRfOZIf_yv9fv4L7AOLkqU,12199
|
@@ -27,18 +27,14 @@ risk/network/plotter/labels.py,sha256=QesD1ybseA6ldLmWMqVaAqSPR34yVEgEzXzg1AKQD6
|
|
27
27
|
risk/network/plotter/network.py,sha256=wcBf1GaM1wPzW-iXTrLzOmlG2_9wwfll_hJUzUO2u2Y,19917
|
28
28
|
risk/network/plotter/utils/colors.py,sha256=EFlIUZ3MGSKoHeZi9cgR6uLKK5GGJ4QzE6lmnrHViLw,18967
|
29
29
|
risk/network/plotter/utils/layout.py,sha256=2P4Bqi1dGiX9KsriLYqiq1KlHpsMdZemAUza4WcYoNA,3634
|
30
|
-
risk/stats/__init__.py,sha256=
|
31
|
-
risk/stats/
|
32
|
-
risk/stats/
|
33
|
-
risk/stats/hypergeom.py,sha256=VfQBtpgSGG826uBP1WyBMavP3ylZnhponUZ2rHFdGAE,2502
|
34
|
-
risk/stats/poisson.py,sha256=_KHe9g8XNRD4-Q486zx2UgHCO2QyvBOiHuX3hRZLEqc,2050
|
35
|
-
risk/stats/stats.py,sha256=y2DMJF3uKRIWRyYiCd2Kwxa-EqOzX5HsMBms_Vw6wK8,7322
|
36
|
-
risk/stats/zscore.py,sha256=Jx9cLKAHiDnrgW_Su9KZYYQiTVsuyJMC7vXBusnEI-c,2648
|
30
|
+
risk/stats/__init__.py,sha256=2zdLv3tUHKyAjwAo7LprVXRaak1cHgrpYMVMSik6JM4,324
|
31
|
+
risk/stats/significance.py,sha256=6cKv2xBQXWTHZ6HpNWIqlNfKKS5pG_BcCUdMM3r_zw4,7336
|
32
|
+
risk/stats/stat_tests.py,sha256=ImCC0Ao7KfLxuIEt_9JzfH92uVRPNOUzEXbV7Y-HTDo,11776
|
37
33
|
risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
|
38
|
-
risk/stats/permutation/permutation.py,sha256=
|
34
|
+
risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
|
39
35
|
risk/stats/permutation/test_functions.py,sha256=D3XMPM8CasUNytWSRce22TI6KK6XulYn5uGG4lWxaHs,3120
|
40
|
-
risk_network-0.0.
|
41
|
-
risk_network-0.0.
|
42
|
-
risk_network-0.0.
|
43
|
-
risk_network-0.0.
|
44
|
-
risk_network-0.0.
|
36
|
+
risk_network-0.0.9b27.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
37
|
+
risk_network-0.0.9b27.dist-info/METADATA,sha256=7cc6HEXAc7nDYfRkuNxlP-vMOnnxsGNSkEXMIZJ8sgo,47627
|
38
|
+
risk_network-0.0.9b27.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
39
|
+
risk_network-0.0.9b27.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
40
|
+
risk_network-0.0.9b27.dist-info/RECORD,,
|