risk-network 0.0.9b25__py3-none-any.whl → 0.0.9b27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Tuple, Union
9
9
 
10
10
  import networkx as nx
11
11
  import numpy as np
12
+ from scipy.sparse import csr_matrix
12
13
  from sklearn.exceptions import DataConversionWarning
13
14
  from sklearn.metrics.pairwise import cosine_similarity
14
15
 
@@ -34,43 +35,43 @@ def get_network_neighborhoods(
34
35
  louvain_resolution: float = 0.1,
35
36
  leiden_resolution: float = 1.0,
36
37
  random_seed: int = 888,
37
- ) -> np.ndarray:
38
- """Calculate the combined neighborhoods for each node based on the specified community detection algorithm(s).
38
+ ) -> csr_matrix:
39
+ """Calculate the combined neighborhoods for each node using sparse matrices.
39
40
 
40
41
  Args:
41
42
  network (nx.Graph): The network graph.
42
43
  distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use.
43
- fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
44
+ fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction thresholds.
44
45
  louvain_resolution (float, optional): Resolution parameter for the Louvain method.
45
46
  leiden_resolution (float, optional): Resolution parameter for the Leiden method.
46
47
  random_seed (int, optional): Random seed for methods requiring random initialization.
47
48
 
48
49
  Returns:
49
- np.ndarray: Summed neighborhood matrix from all selected algorithms.
50
+ csr_matrix: The combined neighborhood matrix.
50
51
  """
51
52
  # Set random seed for reproducibility
52
53
  random.seed(random_seed)
53
54
  np.random.seed(random_seed)
54
55
 
55
- # Ensure distance_metric is a list/tuple for multi-algorithm handling
56
+ # Ensure distance_metric is a list for multi-algorithm handling
56
57
  if isinstance(distance_metric, (str, np.ndarray)):
57
58
  distance_metric = [distance_metric]
58
- # Ensure fraction_shortest_edges is a list/tuple for multi-threshold handling
59
+ # Ensure fraction_shortest_edges is a list for multi-threshold handling
59
60
  if isinstance(fraction_shortest_edges, (float, int)):
60
61
  fraction_shortest_edges = [fraction_shortest_edges] * len(distance_metric)
61
- # Check that the number of distance metrics matches the number of edge length thresholds
62
+ # Validate matching lengths of distance metrics and thresholds
62
63
  if len(distance_metric) != len(fraction_shortest_edges):
63
64
  raise ValueError(
64
65
  "The number of distance metrics must match the number of edge length thresholds."
65
66
  )
66
67
 
67
- # Initialize combined neighborhood matrix
68
+ # Initialize a sparse LIL matrix for incremental updates
68
69
  num_nodes = network.number_of_nodes()
69
- combined_neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
70
-
70
+ # Initialize a sparse matrix with the same shape as the network
71
+ combined_neighborhoods = csr_matrix((num_nodes, num_nodes), dtype=np.uint8)
71
72
  # Loop through each distance metric and corresponding edge rank fraction
72
73
  for metric, percentile in zip(distance_metric, fraction_shortest_edges):
73
- # Call the appropriate neighborhood function based on the metric
74
+ # Compute neighborhoods for the specified metric
74
75
  if metric == "greedy_modularity":
75
76
  neighborhoods = calculate_greedy_modularity_neighborhoods(
76
77
  network, fraction_shortest_edges=percentile
@@ -107,22 +108,37 @@ def get_network_neighborhoods(
107
108
  )
108
109
  else:
109
110
  raise ValueError(
110
- "Incorrect distance metric specified. Please choose from 'greedy_modularity', 'label_propagation',"
111
+ "Invalid distance metric. Choose from: 'greedy_modularity', 'label_propagation',"
111
112
  "'leiden', 'louvain', 'markov_clustering', 'spinglass', 'walktrap'."
112
113
  )
113
114
 
114
- # Sum the neighborhood matrices
115
+ # Add the sparse neighborhood matrix
115
116
  combined_neighborhoods += neighborhoods
116
117
 
117
- # Ensure that the maximum value in each row is set to 1
118
- # This ensures that for each row, only the strongest relationship (the maximum value) is retained,
119
- # while all other values are reset to 0. This transformation simplifies the neighborhood matrix by
120
- # focusing on the most significant connection per row (or nodes).
121
- combined_neighborhoods = _set_max_row_value_to_one(combined_neighborhoods)
118
+ # Ensure maximum value in each row is set to 1
119
+ combined_neighborhoods = _set_max_row_value_to_one_sparse(combined_neighborhoods)
122
120
 
123
121
  return combined_neighborhoods
124
122
 
125
123
 
124
+ def _set_max_row_value_to_one_sparse(matrix: csr_matrix) -> csr_matrix:
125
+ """Set the maximum value in each row of a sparse matrix to 1.
126
+
127
+ Args:
128
+ matrix (csr_matrix): The input sparse matrix.
129
+
130
+ Returns:
131
+ csr_matrix: The modified sparse matrix where only the maximum value in each row is set to 1.
132
+ """
133
+ # Iterate over each row and set the maximum value to 1
134
+ for i in range(matrix.shape[0]):
135
+ row_data = matrix[i].data
136
+ if len(row_data) > 0:
137
+ row_data[:] = (row_data == max(row_data)).astype(int)
138
+
139
+ return matrix
140
+
141
+
126
142
  def _set_max_row_value_to_one(matrix: np.ndarray) -> np.ndarray:
127
143
  """For each row in the input matrix, set the maximum value(s) to 1 and all other values to 0. This is particularly
128
144
  useful for neighborhood matrices that have undergone multiple neighborhood detection algorithms, where the
risk/network/geometry.py CHANGED
@@ -3,8 +3,6 @@ risk/network/geometry
3
3
  ~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- import copy
7
-
8
6
  import networkx as nx
9
7
  import numpy as np
10
8
 
@@ -31,44 +29,43 @@ def assign_edge_lengths(
31
29
  """Compute distances between pairs of coordinates."""
32
30
  u_coords, v_coords = coords[:, 0, :], coords[:, 1, :]
33
31
  if is_sphere:
34
- u_norm = np.linalg.norm(u_coords, axis=1, keepdims=True)
35
- v_norm = np.linalg.norm(v_coords, axis=1, keepdims=True)
36
- u_coords /= u_norm
37
- v_coords /= v_norm
32
+ u_coords /= np.linalg.norm(u_coords, axis=1, keepdims=True)
33
+ v_coords /= np.linalg.norm(v_coords, axis=1, keepdims=True)
38
34
  dot_products = np.einsum("ij,ij->i", u_coords, v_coords)
39
35
  return np.arccos(np.clip(dot_products, -1.0, 1.0))
40
-
41
36
  return np.linalg.norm(u_coords - v_coords, axis=1)
42
37
 
43
38
  # Normalize graph coordinates and weights
44
39
  _normalize_graph_coordinates(G)
45
40
  _normalize_weights(G)
41
+
46
42
  # Map nodes to sphere and adjust depth if required
47
43
  if compute_sphere:
48
44
  _map_to_sphere(G)
49
- G_depth = _create_depth(copy.deepcopy(G), surface_depth=surface_depth)
45
+ G_depth = _create_depth(G, surface_depth=surface_depth)
50
46
  else:
51
- G_depth = copy.deepcopy(G)
52
-
53
- # Precompute edge coordinate arrays for vectorized computation
54
- edge_data = []
55
- for u, v in G_depth.edges:
56
- u_coords = np.array([G_depth.nodes[u]["x"], G_depth.nodes[u]["y"]])
57
- v_coords = np.array([G_depth.nodes[v]["x"], G_depth.nodes[v]["y"]])
58
- if compute_sphere:
59
- u_coords = np.append(u_coords, G_depth.nodes[u].get("z", 0))
60
- v_coords = np.append(v_coords, G_depth.nodes[v].get("z", 0))
61
- edge_data.append([u_coords, v_coords, (u, v)])
62
-
63
- # Convert to numpy for faster processing
64
- edge_coords = np.array([(e[0], e[1]) for e in edge_data])
65
- edge_indices = [e[2] for e in edge_data]
66
- # Compute distances in bulk
67
- distances = compute_distance_vectorized(edge_coords, compute_sphere)
47
+ G_depth = G
48
+
49
+ # Precompute edge coordinate arrays and compute distances in bulk
50
+ edge_data = np.array(
51
+ [
52
+ [
53
+ np.array(
54
+ [G_depth.nodes[u]["x"], G_depth.nodes[u]["y"], G_depth.nodes[u].get("z", 0)]
55
+ ),
56
+ np.array(
57
+ [G_depth.nodes[v]["x"], G_depth.nodes[v]["y"], G_depth.nodes[v].get("z", 0)]
58
+ ),
59
+ ]
60
+ for u, v in G_depth.edges
61
+ ]
62
+ )
63
+ # Compute distances
64
+ distances = compute_distance_vectorized(edge_data, compute_sphere)
68
65
  # Assign distances back to the graph
69
- for (u, v), distance in zip(edge_indices, distances):
66
+ for (u, v), distance in zip(G_depth.edges, distances):
70
67
  if include_edge_weight:
71
- weight = G.edges[u, v].get("normalized_weight", 0) + 1e-6
68
+ weight = G.edges[u, v].get("normalized_weight", 1e-6) # Avoid divide-by-zero
72
69
  G.edges[u, v]["length"] = distance / np.sqrt(weight)
73
70
  else:
74
71
  G.edges[u, v]["length"] = distance
risk/network/io.py CHANGED
@@ -217,6 +217,9 @@ class NetworkIO:
217
217
 
218
218
  Returns:
219
219
  nx.Graph: Loaded and processed network.
220
+
221
+ Raises:
222
+ ValueError: If no matching attribute metadata file is found.
220
223
  """
221
224
  filetype = "Cytoscape"
222
225
  # Log the loading of the Cytoscape file
@@ -258,13 +261,29 @@ class NetworkIO:
258
261
 
259
262
  # Read the node attributes (from /tables/)
260
263
  attribute_metadata_keywords = ["/tables/", "SHARED_ATTRS", "edge.cytable"]
261
- attribute_metadata = [
262
- os.path.join(tmp_dir, cf)
263
- for cf in cys_files
264
- if all(keyword in cf for keyword in attribute_metadata_keywords)
265
- ][0]
266
- # Load attributes file from Cytoscape as pandas data frame
267
- attribute_table = pd.read_csv(attribute_metadata, sep=",", header=None, skiprows=1)
264
+ # Use a generator to find the first matching file
265
+ attribute_metadata = next(
266
+ (
267
+ os.path.join(tmp_dir, cf)
268
+ for cf in cys_files
269
+ if all(keyword in cf for keyword in attribute_metadata_keywords)
270
+ ),
271
+ None, # Default if no file matches
272
+ )
273
+ if attribute_metadata:
274
+ # Optimize `read_csv` by leveraging proper options
275
+ attribute_table = pd.read_csv(
276
+ attribute_metadata,
277
+ sep=",",
278
+ header=None,
279
+ skiprows=1,
280
+ dtype=str, # Use specific dtypes to reduce memory usage
281
+ engine="c", # Use the C engine for parsing if compatible
282
+ low_memory=False, # Optimize memory handling for large files
283
+ )
284
+ else:
285
+ raise ValueError("No matching attribute metadata file found.")
286
+
268
287
  # Set columns
269
288
  attribute_table.columns = attribute_table.iloc[0]
270
289
  # Skip first four rows
@@ -464,14 +483,19 @@ class NetworkIO:
464
483
  Args:
465
484
  G (nx.Graph): A NetworkX graph object.
466
485
  """
467
- missing_weights = 0
468
- # Assign user-defined edge weights to the "weight" attribute
469
- nx.set_edge_attributes(G, 1.0, "weight") # Set default weight
470
- if self.weight_label in nx.get_edge_attributes(G, self.weight_label):
471
- nx.set_edge_attributes(G, nx.get_edge_attributes(G, self.weight_label), "weight")
472
-
473
- if self.include_edge_weight and missing_weights:
474
- logger.debug(f"Total edges missing weights: {missing_weights}")
486
+ # Set default weight for all edges in bulk
487
+ default_weight = 1.0
488
+ nx.set_edge_attributes(G, default_weight, "weight")
489
+ # Check and assign user-defined edge weights if available
490
+ weight_attributes = nx.get_edge_attributes(G, self.weight_label)
491
+ if weight_attributes:
492
+ nx.set_edge_attributes(G, weight_attributes, "weight")
493
+
494
+ # Log missing weights if include_edge_weight is enabled
495
+ if self.include_edge_weight:
496
+ missing_weights = len(G.edges) - len(weight_attributes)
497
+ if missing_weights > 0:
498
+ logger.debug(f"Total edges missing weights: {missing_weights}")
475
499
 
476
500
  def _validate_nodes(self, G: nx.Graph) -> None:
477
501
  """Validate the graph structure and attributes with attribute fallback for positions and labels.
risk/stats/__init__.py CHANGED
@@ -3,11 +3,13 @@ risk/stats
3
3
  ~~~~~~~~~~
4
4
  """
5
5
 
6
- from risk.stats.binom import compute_binom_test
7
- from risk.stats.chi2 import compute_chi2_test
8
- from risk.stats.hypergeom import compute_hypergeom_test
9
6
  from risk.stats.permutation import compute_permutation_test
10
- from risk.stats.poisson import compute_poisson_test
11
- from risk.stats.zscore import compute_zscore_test
7
+ from risk.stats.stat_tests import (
8
+ compute_binom_test,
9
+ compute_chi2_test,
10
+ compute_hypergeom_test,
11
+ compute_poisson_test,
12
+ compute_zscore_test,
13
+ )
12
14
 
13
- from risk.stats.stats import calculate_significance_matrices
15
+ from risk.stats.significance import calculate_significance_matrices
@@ -95,7 +95,7 @@ def _run_permutation_test(
95
95
  if null_distribution == "network":
96
96
  idxs = range(annotations.shape[0])
97
97
  elif null_distribution == "annotations":
98
- idxs = np.nonzero(np.sum(~np.isnan(annotations), axis=1))[0]
98
+ idxs = np.nonzero(annotations.getnnz(axis=1) > 0)[0]
99
99
  else:
100
100
  raise ValueError(
101
101
  "Invalid null_distribution value. Choose either 'network' or 'annotations'."
@@ -1,6 +1,6 @@
1
1
  """
2
- risk/stats/stats
3
- ~~~~~~~~~~~~~~~~
2
+ risk/stats/significance
3
+ ~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  from typing import Any, Dict, Union
@@ -0,0 +1,267 @@
1
+ """
2
+ risk/stats/stat_tests
3
+ ~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from typing import Any, Dict
7
+
8
+ import numpy as np
9
+ from scipy.sparse import csr_matrix
10
+ from scipy.stats import binom
11
+ from scipy.stats import chi2
12
+ from scipy.stats import hypergeom
13
+ from scipy.stats import poisson
14
+ from scipy.stats import norm
15
+
16
+
17
+ def compute_binom_test(
18
+ neighborhoods: csr_matrix,
19
+ annotations: csr_matrix,
20
+ null_distribution: str = "network",
21
+ ) -> Dict[str, Any]:
22
+ """Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
23
+
24
+ Args:
25
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
26
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
27
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
28
+
29
+ Returns:
30
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
31
+ """
32
+ # Get the total number of nodes in the network
33
+ total_nodes = neighborhoods.shape[1]
34
+
35
+ # Compute sums (remain sparse here)
36
+ neighborhood_sizes = neighborhoods.sum(axis=1) # Row sums
37
+ annotation_totals = annotations.sum(axis=0) # Column sums
38
+ # Compute probabilities (convert to dense)
39
+ if null_distribution == "network":
40
+ p_values = (annotation_totals / total_nodes).A.flatten() # Dense 1D array
41
+ elif null_distribution == "annotations":
42
+ p_values = (annotation_totals / annotations.sum()).A.flatten() # Dense 1D array
43
+ else:
44
+ raise ValueError(
45
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
46
+ )
47
+
48
+ # Observed counts (sparse matrix multiplication)
49
+ annotated_counts = neighborhoods @ annotations # Sparse result
50
+ annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
51
+
52
+ # Compute enrichment and depletion p-values
53
+ enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, neighborhood_sizes.A, p_values)
54
+ depletion_pvals = binom.cdf(annotated_counts_dense, neighborhood_sizes.A, p_values)
55
+
56
+ return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
57
+
58
+
59
+ def compute_chi2_test(
60
+ neighborhoods: csr_matrix,
61
+ annotations: csr_matrix,
62
+ null_distribution: str = "network",
63
+ ) -> Dict[str, Any]:
64
+ """Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
65
+
66
+ Args:
67
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
68
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
69
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
70
+
71
+ Returns:
72
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
73
+ """
74
+ # Total number of nodes in the network
75
+ total_node_count = neighborhoods.shape[0]
76
+
77
+ if null_distribution == "network":
78
+ # Case 1: Use all nodes as the background
79
+ background_population = total_node_count
80
+ neighborhood_sums = neighborhoods.sum(axis=0) # Column sums of neighborhoods
81
+ annotation_sums = annotations.sum(axis=0) # Column sums of annotations
82
+ elif null_distribution == "annotations":
83
+ # Case 2: Only consider nodes with at least one annotation
84
+ annotated_nodes = (
85
+ np.ravel(annotations.sum(axis=1)) > 0
86
+ ) # Row-wise sum to filter nodes with annotations
87
+ background_population = annotated_nodes.sum() # Total number of annotated nodes
88
+ neighborhood_sums = neighborhoods[annotated_nodes].sum(
89
+ axis=0
90
+ ) # Neighborhood sums for annotated nodes
91
+ annotation_sums = annotations[annotated_nodes].sum(
92
+ axis=0
93
+ ) # Annotation sums for annotated nodes
94
+ else:
95
+ raise ValueError(
96
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
97
+ )
98
+
99
+ # Convert to dense arrays for downstream computations
100
+ neighborhood_sums = np.asarray(neighborhood_sums).reshape(-1, 1) # Ensure column vector shape
101
+ annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
102
+
103
+ # Observed values: number of annotated nodes in each neighborhood
104
+ observed = neighborhoods.T @ annotations # Shape: (neighborhoods, annotations)
105
+ # Expected values under the null
106
+ expected = (neighborhood_sums @ annotation_sums) / background_population
107
+ # Chi-squared statistic: sum((observed - expected)^2 / expected)
108
+ with np.errstate(divide="ignore", invalid="ignore"): # Handle divide-by-zero
109
+ chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
110
+
111
+ # Compute p-values for enrichment (upper tail) and depletion (lower tail)
112
+ enrichment_pvals = chi2.sf(chi2_stat, df=1) # Survival function for upper tail
113
+ depletion_pvals = chi2.cdf(chi2_stat, df=1) # Cumulative distribution for lower tail
114
+
115
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
116
+
117
+
118
+ def compute_hypergeom_test(
119
+ neighborhoods: csr_matrix,
120
+ annotations: csr_matrix,
121
+ null_distribution: str = "network",
122
+ ) -> Dict[str, Any]:
123
+ """
124
+ Compute hypergeometric test for enrichment and depletion in neighborhoods with selectable null distribution.
125
+
126
+ Args:
127
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
128
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
129
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
130
+
131
+ Returns:
132
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
133
+ """
134
+ # Get the total number of nodes in the network
135
+ total_nodes = neighborhoods.shape[1]
136
+
137
+ # Compute sums
138
+ neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Convert to dense array
139
+ annotation_sums = annotations.sum(axis=0).A.flatten() # Convert to dense array
140
+
141
+ if null_distribution == "network":
142
+ background_population = total_nodes
143
+ elif null_distribution == "annotations":
144
+ annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Boolean mask
145
+ background_population = annotated_nodes.sum()
146
+ neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
147
+ annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
148
+ else:
149
+ raise ValueError(
150
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
151
+ )
152
+
153
+ # Observed counts
154
+ annotated_in_neighborhood = neighborhoods.T @ annotations # Sparse result
155
+ annotated_in_neighborhood = annotated_in_neighborhood.toarray() # Convert to dense
156
+ # Align shapes for broadcasting
157
+ neighborhood_sums = neighborhood_sums.reshape(-1, 1)
158
+ annotation_sums = annotation_sums.reshape(1, -1)
159
+ background_population = np.array(background_population).reshape(1, 1)
160
+
161
+ # Compute hypergeometric p-values
162
+ depletion_pvals = hypergeom.cdf(
163
+ annotated_in_neighborhood, background_population, annotation_sums, neighborhood_sums
164
+ )
165
+ enrichment_pvals = hypergeom.sf(
166
+ annotated_in_neighborhood - 1, background_population, annotation_sums, neighborhood_sums
167
+ )
168
+
169
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
170
+
171
+
172
+ def compute_poisson_test(
173
+ neighborhoods: csr_matrix,
174
+ annotations: csr_matrix,
175
+ null_distribution: str = "network",
176
+ ) -> Dict[str, Any]:
177
+ """
178
+ Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
179
+
180
+ Args:
181
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
182
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
183
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
184
+
185
+ Returns:
186
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
187
+ """
188
+ # Matrix multiplication to get the number of annotated nodes in each neighborhood
189
+ annotated_in_neighborhood = neighborhoods @ annotations # Sparse result
190
+ # Convert annotated counts to dense for downstream calculations
191
+ annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
192
+
193
+ # Compute lambda_expected based on the chosen null distribution
194
+ if null_distribution == "network":
195
+ # Use the mean across neighborhoods (axis=1)
196
+ lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
197
+ elif null_distribution == "annotations":
198
+ # Use the mean across annotations (axis=0)
199
+ lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
200
+ else:
201
+ raise ValueError(
202
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
203
+ )
204
+
205
+ # Compute p-values for enrichment and depletion using Poisson distribution
206
+ enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
207
+ depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
208
+
209
+ return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
210
+
211
+
212
+ def compute_zscore_test(
213
+ neighborhoods: csr_matrix,
214
+ annotations: csr_matrix,
215
+ null_distribution: str = "network",
216
+ ) -> Dict[str, Any]:
217
+ """
218
+ Compute Z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
219
+
220
+ Args:
221
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
222
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
223
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
224
+
225
+ Returns:
226
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
227
+ """
228
+ # Total number of nodes in the network
229
+ total_node_count = neighborhoods.shape[1]
230
+
231
+ # Compute sums
232
+ if null_distribution == "network":
233
+ background_population = total_node_count
234
+ neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
235
+ annotation_sums = annotations.sum(axis=0).A.flatten() # Dense row sums
236
+ elif null_distribution == "annotations":
237
+ annotated_nodes = annotations.sum(axis=1).A.flatten() > 0 # Dense boolean mask
238
+ background_population = annotated_nodes.sum()
239
+ neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
240
+ annotation_sums = annotations[annotated_nodes].sum(axis=0).A.flatten()
241
+ else:
242
+ raise ValueError(
243
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
244
+ )
245
+
246
+ # Observed values
247
+ observed = (neighborhoods.T @ annotations).toarray() # Convert sparse result to dense
248
+ # Expected values under the null
249
+ neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
250
+ annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
251
+ expected = (neighborhood_sums @ annotation_sums) / background_population
252
+
253
+ # Standard deviation under the null
254
+ std_dev = np.sqrt(
255
+ expected
256
+ * (1 - annotation_sums / background_population)
257
+ * (1 - neighborhood_sums / background_population)
258
+ )
259
+ std_dev[std_dev == 0] = np.nan # Avoid division by zero
260
+ # Compute Z-scores
261
+ z_scores = (observed - expected) / std_dev
262
+
263
+ # Convert Z-scores to depletion and enrichment p-values
264
+ enrichment_pvals = norm.sf(z_scores) # Upper tail
265
+ depletion_pvals = norm.cdf(z_scores) # Lower tail
266
+
267
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: risk-network
3
- Version: 0.0.9b25
3
+ Version: 0.0.9b27
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -1,20 +1,20 @@
1
- risk/__init__.py,sha256=Rgsnt64VMhbyTleA_DXr5VcFJG6748i86zd0VZHy9r4,127
1
+ risk/__init__.py,sha256=SltIM8IfW_qsGbSLdMIbWJ-5vP7CfTfjXj5rinVcdJI,127
2
2
  risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
3
3
  risk/risk.py,sha256=s827_lRknFseOP9O4zW8sP-IcCd2EzrpV_tnVY_tz5s,1104
4
4
  risk/annotations/__init__.py,sha256=parsbcux1U4urpUqh9AdzbDWuLj9HlMidycMPkpSQFo,179
5
- risk/annotations/annotations.py,sha256=XmVuLL5NFAj6F30fZY22N8nb4LK6sig7fE0NXL1iZp8,14497
5
+ risk/annotations/annotations.py,sha256=g8ca9H49dZIqHv6Od3Dem4BIo_euy8alL3PDauT6ZJI,14088
6
6
  risk/annotations/io.py,sha256=z1AJySsU-KL_IYuHa7j3nvuczmOHgK3WfaQ4TRunvrA,10499
7
7
  risk/log/__init__.py,sha256=7LxDysQu7doi0LAvlY2YbjN6iJH0fNknqy8lSLgeljo,217
8
8
  risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
9
9
  risk/log/parameters.py,sha256=VtwfMzLU1xI4yji3-Ch5vHjH-KdwTfwaEMmi7hFQTs0,5716
10
10
  risk/neighborhoods/__init__.py,sha256=Q74HwTH7okI-vaskJPy2bYwb5sNjGASTzJ6m8V8arCU,234
11
- risk/neighborhoods/api.py,sha256=KdUouMHJPwvePJGdz7Ck1GWYhN96QDb_SuPyTt3KwAc,23515
12
- risk/neighborhoods/community.py,sha256=VIDvB-SsMDDvWkUaYXf_E-gcg0HELMVv2MKshPwJAFQ,15480
13
- risk/neighborhoods/domains.py,sha256=MufM4cbvP3HrJyESOuGT0wYD_cz3rjT0SGqEnbytkh8,12523
14
- risk/neighborhoods/neighborhoods.py,sha256=bBUY7hXqcsOoAEkPdRoRNuj36WsllXicmz_LxZfEuyw,21186
11
+ risk/neighborhoods/api.py,sha256=TjIMVnSPC702zMlwyaz2i0ofNx-d9L9g3P-TTSBMx90,23341
12
+ risk/neighborhoods/community.py,sha256=5Q_-VAJC-5SY5EUsB8gIlemeDoAL85uLjyl16pItHiQ,16699
13
+ risk/neighborhoods/domains.py,sha256=jMJ4-Qzwgmo6Hya8h0E2_IcMaLpbuH_FWlmSjJl2ikc,12832
14
+ risk/neighborhoods/neighborhoods.py,sha256=l9FhADB1C-OxM8E9QXOcA4osUDgA1vs4ud-OCGKKybc,21457
15
15
  risk/network/__init__.py,sha256=oVi3FA1XXKD84014Cykq-9bpX4_s0F3aAUfNOU-07Qw,73
16
- risk/network/geometry.py,sha256=omyb9afSKMUtQ-RKVHUoRyxJifOW0ASenHjyCjg43kg,6836
17
- risk/network/io.py,sha256=JV5hqf1oIwWUVw07BjhD0qACQGbtIeA8NSMDcFql88k,23465
16
+ risk/network/geometry.py,sha256=dU1hMq4j9gG0nkDqGRl_NiZ2Z-xvT_HF11FwEQ7oOR4,6570
17
+ risk/network/io.py,sha256=PqsRw1g7nfJJ3xs4aYcim3JWWLMFS1irgtg5hIyht5I,24376
18
18
  risk/network/graph/__init__.py,sha256=ziGJew3yhtqvrb9LUuneDu_LwW2Wa9vd4UuhoL5l1CA,91
19
19
  risk/network/graph/api.py,sha256=Ag4PjFTX6BUvmW7ZdfIgwdsr8URigX9jD9yEFRXUxrU,8220
20
20
  risk/network/graph/network.py,sha256=KdIBM_-flHMWcBK4RUjU_QRfOZIf_yv9fv4L7AOLkqU,12199
@@ -27,18 +27,14 @@ risk/network/plotter/labels.py,sha256=QesD1ybseA6ldLmWMqVaAqSPR34yVEgEzXzg1AKQD6
27
27
  risk/network/plotter/network.py,sha256=wcBf1GaM1wPzW-iXTrLzOmlG2_9wwfll_hJUzUO2u2Y,19917
28
28
  risk/network/plotter/utils/colors.py,sha256=EFlIUZ3MGSKoHeZi9cgR6uLKK5GGJ4QzE6lmnrHViLw,18967
29
29
  risk/network/plotter/utils/layout.py,sha256=2P4Bqi1dGiX9KsriLYqiq1KlHpsMdZemAUza4WcYoNA,3634
30
- risk/stats/__init__.py,sha256=1CPRtT1LDwudrvFgkVtSom8cp4cM7b4X6b4fHPaNHw0,405
31
- risk/stats/binom.py,sha256=8Qwcxnq1u-AycwQs_sQxwuxgkgDpES-A-kIcj4fRc3g,2032
32
- risk/stats/chi2.py,sha256=MGFNrWP40i9TxnMsZYbDgqdMrN_Fe0xFsnWU8xNsVSs,3046
33
- risk/stats/hypergeom.py,sha256=VfQBtpgSGG826uBP1WyBMavP3ylZnhponUZ2rHFdGAE,2502
34
- risk/stats/poisson.py,sha256=_KHe9g8XNRD4-Q486zx2UgHCO2QyvBOiHuX3hRZLEqc,2050
35
- risk/stats/stats.py,sha256=y2DMJF3uKRIWRyYiCd2Kwxa-EqOzX5HsMBms_Vw6wK8,7322
36
- risk/stats/zscore.py,sha256=Jx9cLKAHiDnrgW_Su9KZYYQiTVsuyJMC7vXBusnEI-c,2648
30
+ risk/stats/__init__.py,sha256=2zdLv3tUHKyAjwAo7LprVXRaak1cHgrpYMVMSik6JM4,324
31
+ risk/stats/significance.py,sha256=6cKv2xBQXWTHZ6HpNWIqlNfKKS5pG_BcCUdMM3r_zw4,7336
32
+ risk/stats/stat_tests.py,sha256=ImCC0Ao7KfLxuIEt_9JzfH92uVRPNOUzEXbV7Y-HTDo,11776
37
33
  risk/stats/permutation/__init__.py,sha256=OLmYLm2uj96hPsSaUs0vUqFYw6Thwch_aHtpL7L0ZFw,127
38
- risk/stats/permutation/permutation.py,sha256=693DyWPNz6L_wCL06F7gj2u1df0qVc4F3Na36jCLYMI,10577
34
+ risk/stats/permutation/permutation.py,sha256=BWjgdBpLVcHvmwHy0bmD4aJFccxifNBSrrCBPppyKf4,10569
39
35
  risk/stats/permutation/test_functions.py,sha256=D3XMPM8CasUNytWSRce22TI6KK6XulYn5uGG4lWxaHs,3120
40
- risk_network-0.0.9b25.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
41
- risk_network-0.0.9b25.dist-info/METADATA,sha256=XJSNAooxsGNwoMnp-6Nx0YCnp1zBWVm9ej2yjtUUPDg,47627
42
- risk_network-0.0.9b25.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
43
- risk_network-0.0.9b25.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
44
- risk_network-0.0.9b25.dist-info/RECORD,,
36
+ risk_network-0.0.9b27.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
37
+ risk_network-0.0.9b27.dist-info/METADATA,sha256=7cc6HEXAc7nDYfRkuNxlP-vMOnnxsGNSkEXMIZJ8sgo,47627
38
+ risk_network-0.0.9b27.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
39
+ risk_network-0.0.9b27.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
40
+ risk_network-0.0.9b27.dist-info/RECORD,,