risk-network 0.0.7b3__tar.gz → 0.0.7b5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/PKG-INFO +1 -1
  2. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/__init__.py +1 -1
  3. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/neighborhoods/neighborhoods.py +0 -1
  4. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/network/graph.py +16 -27
  5. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/risk.py +37 -51
  6. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/stats/__init__.py +2 -2
  7. risk_network-0.0.7b5/risk/stats/hypergeom.py +55 -0
  8. risk_network-0.0.7b5/risk/stats/poisson.py +40 -0
  9. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk_network.egg-info/PKG-INFO +1 -1
  10. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk_network.egg-info/SOURCES.txt +1 -1
  11. risk_network-0.0.7b3/risk/stats/fisher_exact.py +0 -132
  12. risk_network-0.0.7b3/risk/stats/hypergeom.py +0 -131
  13. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/LICENSE +0 -0
  14. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/MANIFEST.in +0 -0
  15. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/README.md +0 -0
  16. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/pyproject.toml +0 -0
  17. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/annotations/__init__.py +0 -0
  18. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/annotations/annotations.py +0 -0
  19. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/annotations/io.py +0 -0
  20. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/constants.py +0 -0
  21. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/log/__init__.py +0 -0
  22. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/log/console.py +0 -0
  23. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/log/params.py +0 -0
  24. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/neighborhoods/__init__.py +0 -0
  25. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/neighborhoods/community.py +0 -0
  26. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/neighborhoods/domains.py +0 -0
  27. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/network/__init__.py +0 -0
  28. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/network/geometry.py +0 -0
  29. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/network/io.py +0 -0
  30. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/network/plot.py +0 -0
  31. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/stats/permutation/__init__.py +0 -0
  32. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/stats/permutation/permutation.py +0 -0
  33. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/stats/permutation/test_functions.py +0 -0
  34. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk/stats/stats.py +0 -0
  35. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk_network.egg-info/dependency_links.txt +0 -0
  36. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk_network.egg-info/requires.txt +0 -0
  37. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/risk_network.egg-info/top_level.txt +0 -0
  38. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/setup.cfg +0 -0
  39. {risk_network-0.0.7b3 → risk_network-0.0.7b5}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.7b3
3
+ Version: 0.0.7b5
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.7-beta.3"
10
+ __version__ = "0.0.7-beta.5"
@@ -200,7 +200,6 @@ def _impute_neighbors_with_similarity(
200
200
  depth = 1
201
201
  rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
202
202
  while len(rows_to_impute) and depth <= max_depth:
203
- next_rows_to_impute = []
204
203
  # Iterate over all enriched nodes
205
204
  for row_index in range(binary_enrichment_matrix.shape[0]):
206
205
  if binary_enrichment_matrix[row_index].sum() != 0:
@@ -3,7 +3,6 @@ risk/network/graph
3
3
  ~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- import random
7
6
  from collections import defaultdict
8
7
  from typing import Any, Dict, List, Tuple, Union
9
8
 
@@ -307,7 +306,7 @@ def _get_colors(
307
306
  List[Tuple]: List of RGBA colors.
308
307
  """
309
308
  # Set random seed for reproducibility
310
- random.seed(random_seed)
309
+ np.random.seed(random_seed)
311
310
  # Determine the number of colors to generate based on the number of domains
312
311
  num_colors_to_generate = len(domain_id_to_node_ids_map)
313
312
  if color:
@@ -322,23 +321,15 @@ def _get_colors(
322
321
  # Step 2: Calculate pairwise distances between centroids
323
322
  centroid_array = np.array(centroids)
324
323
  dist_matrix = np.linalg.norm(centroid_array[:, None] - centroid_array, axis=-1)
325
-
326
- # Step 3: Generate positions in the colormap, with a focus on centroids that are close
327
- remaining_indices = set(range(num_colors_to_generate))
328
- # Assign distant colors to close centroids
329
- color_positions = _assign_distant_colors(
330
- remaining_indices, dist_matrix, colormap, num_colors_to_generate
331
- )
332
-
333
- # Step 4: Randomly shuffle color positions to generate a new color palette
334
- # while maintaining the dissimilarity between neighboring colors. This shuffling
335
- # preserves the relative distances between centroids, ensuring that close centroids
336
- # remain visually distinct while introducing randomness into the overall color arrangement.
337
- random.shuffle(color_positions)
338
- # Ensure that all positions remain between 0 and 1
324
+ # Step 3: Assign distant colors to close centroids
325
+ color_positions = _assign_distant_colors(dist_matrix, num_colors_to_generate)
326
+ # Step 4: Randomly shift the entire color palette while maintaining relative distances
327
+ global_shift = np.random.uniform(-0.1, 0.1) # Small global shift to change the overall palette
328
+ color_positions = (color_positions + global_shift) % 1 # Wrap around to keep within [0, 1]
329
+ # Step 5: Ensure that all positions remain between 0 and 1
339
330
  color_positions = np.clip(color_positions, 0, 1)
340
331
 
341
- # Step 5: Generate colors based on positions
332
+ # Step 6: Generate RGBA colors based on positions
342
333
  return [colormap(pos) for pos in color_positions]
343
334
 
344
335
 
@@ -365,28 +356,26 @@ def _calculate_centroids(network, domain_id_to_node_ids_map):
365
356
  return centroids
366
357
 
367
358
 
368
- def _assign_distant_colors(remaining_indices, dist_matrix, colormap, num_colors_to_generate):
359
+ def _assign_distant_colors(dist_matrix, num_colors_to_generate):
369
360
  """Assign colors to centroids that are close in space, ensuring stark color differences.
370
361
 
371
362
  Args:
372
- remaining_indices (set): Indices of centroids left to color.
373
363
  dist_matrix (ndarray): Matrix of pairwise centroid distances.
374
- colormap (Colormap): The colormap used to assign colors.
375
364
  num_colors_to_generate (int): Number of colors to generate.
376
365
 
377
366
  Returns:
378
- np.array: Array of color positions in the colormap.
367
+ np.array: Array of color positions in the range [0, 1].
379
368
  """
380
369
  color_positions = np.zeros(num_colors_to_generate)
381
- # Convert the set to a list to index over it
382
- remaining_indices = list(remaining_indices)
383
- # Sort remaining indices by centroid proximity (based on sum of distances to others)
384
- proximity_order = sorted(remaining_indices, key=lambda idx: np.sum(dist_matrix[idx]))
385
- # Assign colors starting with the most distant points in proximity order
370
+ # Step 1: Sort indices by centroid proximity (based on sum of distances to others)
371
+ proximity_order = sorted(
372
+ range(num_colors_to_generate), key=lambda idx: np.sum(dist_matrix[idx])
373
+ )
374
+ # Step 2: Assign colors starting with the most distant points in proximity order
386
375
  for i, idx in enumerate(proximity_order):
387
376
  color_positions[idx] = i / num_colors_to_generate
388
377
 
389
- # Adjust colors so that centroids close to one another are maximally distant on the color spectrum
378
+ # Step 3: Adjust colors so that centroids close to one another are maximally distant on the color spectrum
390
379
  half_spectrum = int(num_colors_to_generate / 2)
391
380
  for i in range(half_spectrum):
392
381
  # Split the spectrum so that close centroids are assigned distant colors
@@ -20,9 +20,9 @@ from risk.neighborhoods import (
20
20
  from risk.network import NetworkIO, NetworkGraph, NetworkPlotter
21
21
  from risk.stats import (
22
22
  calculate_significance_matrices,
23
- compute_fisher_exact_test,
24
23
  compute_hypergeom_test,
25
24
  compute_permutation_test,
25
+ compute_poisson_test,
26
26
  )
27
27
 
28
28
 
@@ -45,20 +45,16 @@ class RISK(NetworkIO, AnnotationsIO):
45
45
  """Access the logged parameters."""
46
46
  return params
47
47
 
48
- def load_neighborhoods_by_permutation(
48
+ def load_neighborhoods_by_hypergeom(
49
49
  self,
50
50
  network: nx.Graph,
51
51
  annotations: Dict[str, Any],
52
52
  distance_metric: str = "dijkstra",
53
53
  louvain_resolution: float = 0.1,
54
54
  edge_length_threshold: float = 0.5,
55
- score_metric: str = "sum",
56
- null_distribution: str = "network",
57
- num_permutations: int = 1000,
58
55
  random_seed: int = 888,
59
- max_workers: int = 1,
60
56
  ) -> Dict[str, Any]:
61
- """Load significant neighborhoods for the network using the permutation test.
57
+ """Load significant neighborhoods for the network using the hypergeometric test.
62
58
 
63
59
  Args:
64
60
  network (nx.Graph): The network graph.
@@ -66,27 +62,19 @@ class RISK(NetworkIO, AnnotationsIO):
66
62
  distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
67
63
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
68
64
  edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
69
- score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
70
- null_distribution (str, optional): Distribution used for permutation tests. Defaults to "network".
71
- num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
72
65
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
73
- max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
74
66
 
75
67
  Returns:
76
68
  dict: Computed significance of neighborhoods.
77
69
  """
78
- print_header("Running permutation test")
70
+ print_header("Running hypergeometric test")
79
71
  # Log neighborhood analysis parameters
80
72
  params.log_neighborhoods(
81
73
  distance_metric=distance_metric,
82
74
  louvain_resolution=louvain_resolution,
83
75
  edge_length_threshold=edge_length_threshold,
84
- statistical_test_function="permutation",
85
- score_metric=score_metric,
86
- null_distribution=null_distribution,
87
- num_permutations=num_permutations,
76
+ statistical_test_function="hypergeom",
88
77
  random_seed=random_seed,
89
- max_workers=max_workers,
90
78
  )
91
79
 
92
80
  # Load neighborhoods based on the network and distance metric
@@ -97,27 +85,16 @@ class RISK(NetworkIO, AnnotationsIO):
97
85
  edge_length_threshold=edge_length_threshold,
98
86
  random_seed=random_seed,
99
87
  )
100
-
101
- # Log and display permutation test settings
102
- print(f"Neighborhood scoring metric: '{score_metric}'")
103
- print(f"Null distribution: '{null_distribution}'")
104
- print(f"Number of permutations: {num_permutations}")
105
- print(f"Maximum workers: {max_workers}")
106
- # Run permutation test to compute neighborhood significance
107
- neighborhood_significance = compute_permutation_test(
88
+ # Run hypergeometric test to compute neighborhood significance
89
+ neighborhood_significance = compute_hypergeom_test(
108
90
  neighborhoods=neighborhoods,
109
91
  annotations=annotations["matrix"],
110
- score_metric=score_metric,
111
- null_distribution=null_distribution,
112
- num_permutations=num_permutations,
113
- random_seed=random_seed,
114
- max_workers=max_workers,
115
92
  )
116
93
 
117
94
  # Return the computed neighborhood significance
118
95
  return neighborhood_significance
119
96
 
120
- def load_neighborhoods_by_fisher_exact(
97
+ def load_neighborhoods_by_poisson(
121
98
  self,
122
99
  network: nx.Graph,
123
100
  annotations: Dict[str, Any],
@@ -125,9 +102,8 @@ class RISK(NetworkIO, AnnotationsIO):
125
102
  louvain_resolution: float = 0.1,
126
103
  edge_length_threshold: float = 0.5,
127
104
  random_seed: int = 888,
128
- max_workers: int = 1,
129
105
  ) -> Dict[str, Any]:
130
- """Load significant neighborhoods for the network using the Fisher's exact test.
106
+ """Load significant neighborhoods for the network using the Poisson test.
131
107
 
132
108
  Args:
133
109
  network (nx.Graph): The network graph.
@@ -136,20 +112,18 @@ class RISK(NetworkIO, AnnotationsIO):
136
112
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
137
113
  edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
138
114
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
139
- max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
140
115
 
141
116
  Returns:
142
117
  dict: Computed significance of neighborhoods.
143
118
  """
144
- print_header("Running Fisher's exact test")
119
+ print_header("Running Poisson test")
145
120
  # Log neighborhood analysis parameters
146
121
  params.log_neighborhoods(
147
122
  distance_metric=distance_metric,
148
123
  louvain_resolution=louvain_resolution,
149
124
  edge_length_threshold=edge_length_threshold,
150
- statistical_test_function="fisher_exact",
125
+ statistical_test_function="poisson",
151
126
  random_seed=random_seed,
152
- max_workers=max_workers,
153
127
  )
154
128
 
155
129
  # Load neighborhoods based on the network and distance metric
@@ -160,30 +134,29 @@ class RISK(NetworkIO, AnnotationsIO):
160
134
  edge_length_threshold=edge_length_threshold,
161
135
  random_seed=random_seed,
162
136
  )
163
-
164
- # Log and display Fisher's exact test settings
165
- print(f"Maximum workers: {max_workers}")
166
- # Run Fisher's exact test to compute neighborhood significance
167
- neighborhood_significance = compute_fisher_exact_test(
137
+ # Run Poisson test to compute neighborhood significance
138
+ neighborhood_significance = compute_poisson_test(
168
139
  neighborhoods=neighborhoods,
169
140
  annotations=annotations["matrix"],
170
- max_workers=max_workers,
171
141
  )
172
142
 
173
143
  # Return the computed neighborhood significance
174
144
  return neighborhood_significance
175
145
 
176
- def load_neighborhoods_by_hypergeom(
146
+ def load_neighborhoods_by_permutation(
177
147
  self,
178
148
  network: nx.Graph,
179
149
  annotations: Dict[str, Any],
180
150
  distance_metric: str = "dijkstra",
181
151
  louvain_resolution: float = 0.1,
182
152
  edge_length_threshold: float = 0.5,
153
+ score_metric: str = "sum",
154
+ null_distribution: str = "network",
155
+ num_permutations: int = 1000,
183
156
  random_seed: int = 888,
184
157
  max_workers: int = 1,
185
158
  ) -> Dict[str, Any]:
186
- """Load significant neighborhoods for the network using the hypergeometric test.
159
+ """Load significant neighborhoods for the network using the permutation test.
187
160
 
188
161
  Args:
189
162
  network (nx.Graph): The network graph.
@@ -191,19 +164,25 @@ class RISK(NetworkIO, AnnotationsIO):
191
164
  distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
192
165
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
193
166
  edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
167
+ score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
168
+ null_distribution (str, optional): Distribution used for permutation tests. Defaults to "network".
169
+ num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
194
170
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
195
171
  max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
196
172
 
197
173
  Returns:
198
174
  dict: Computed significance of neighborhoods.
199
175
  """
200
- print_header("Running hypergeometric test")
176
+ print_header("Running permutation test")
201
177
  # Log neighborhood analysis parameters
202
178
  params.log_neighborhoods(
203
179
  distance_metric=distance_metric,
204
180
  louvain_resolution=louvain_resolution,
205
181
  edge_length_threshold=edge_length_threshold,
206
- statistical_test_function="hypergeom",
182
+ statistical_test_function="permutation",
183
+ score_metric=score_metric,
184
+ null_distribution=null_distribution,
185
+ num_permutations=num_permutations,
207
186
  random_seed=random_seed,
208
187
  max_workers=max_workers,
209
188
  )
@@ -217,12 +196,19 @@ class RISK(NetworkIO, AnnotationsIO):
217
196
  random_seed=random_seed,
218
197
  )
219
198
 
220
- # Log and display hypergeometric test settings
199
+ # Log and display permutation test settings
200
+ print(f"Neighborhood scoring metric: '{score_metric}'")
201
+ print(f"Null distribution: '{null_distribution}'")
202
+ print(f"Number of permutations: {num_permutations}")
221
203
  print(f"Maximum workers: {max_workers}")
222
- # Run hypergeometric test to compute neighborhood significance
223
- neighborhood_significance = compute_hypergeom_test(
204
+ # Run permutation test to compute neighborhood significance
205
+ neighborhood_significance = compute_permutation_test(
224
206
  neighborhoods=neighborhoods,
225
207
  annotations=annotations["matrix"],
208
+ score_metric=score_metric,
209
+ null_distribution=null_distribution,
210
+ num_permutations=num_permutations,
211
+ random_seed=random_seed,
226
212
  max_workers=max_workers,
227
213
  )
228
214
 
@@ -315,7 +301,7 @@ class RISK(NetworkIO, AnnotationsIO):
315
301
  max_cluster_size=max_cluster_size,
316
302
  )
317
303
 
318
- print_header(f"Optimizing distance threshold for domains")
304
+ print_header("Optimizing distance threshold for domains")
319
305
  # Define domains in the network using the specified clustering settings
320
306
  domains = self._define_domains(
321
307
  neighborhoods=processed_neighborhoods,
@@ -3,7 +3,7 @@ risk/stats
3
3
  ~~~~~~~~~~
4
4
  """
5
5
 
6
- from .stats import calculate_significance_matrices
7
- from .fisher_exact import compute_fisher_exact_test
8
6
  from .hypergeom import compute_hypergeom_test
9
7
  from .permutation import compute_permutation_test
8
+ from .poisson import compute_poisson_test
9
+ from .stats import calculate_significance_matrices
@@ -0,0 +1,55 @@
1
+ """
2
+ risk/stats/hypergeom
3
+ ~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from typing import Any, Dict
7
+
8
+ import numpy as np
9
+ from scipy.stats import hypergeom
10
+
11
+
12
+ def compute_hypergeom_test(
13
+ neighborhoods: np.ndarray,
14
+ annotations: np.ndarray,
15
+ ) -> Dict[str, Any]:
16
+ """Compute hypergeometric test for enrichment and depletion in neighborhoods.
17
+
18
+ Args:
19
+ neighborhoods (np.ndarray): Binary matrix representing neighborhoods, where rows are nodes
20
+ and columns are neighborhoods. Entries indicate the presence (1) or absence (0) of a node
21
+ in a neighborhood.
22
+ annotations (np.ndarray): Binary matrix representing annotations, where rows are nodes
23
+ and columns are annotations. Entries indicate the presence (1) or absence (0) of a node
24
+ being annotated.
25
+
26
+ Returns:
27
+ Dict[str, Any]: A dictionary with two keys:
28
+ - "enrichment_pvals" (np.ndarray): P-values for enrichment, indicating the probability
29
+ of observing more annotations in a neighborhood than expected under the hypergeometric test.
30
+ - "depletion_pvals" (np.ndarray): P-values for depletion, indicating the probability
31
+ of observing fewer annotations in a neighborhood than expected under the hypergeometric test.
32
+ """
33
+ # Ensure both matrices are binary (presence/absence)
34
+ neighborhoods = (neighborhoods > 0).astype(int)
35
+ annotations = (annotations > 0).astype(int)
36
+ total_node_count = annotations.shape[0]
37
+ # Sum of values in each neighborhood
38
+ neighborhood_sums = np.sum(neighborhoods, axis=0)[:, np.newaxis]
39
+ # Repeating neighborhood sums for each annotation
40
+ neighborhood_size_matrix = np.tile(neighborhood_sums, (1, annotations.shape[1]))
41
+ # Total number of nodes annotated to each attribute
42
+ annotated_node_counts = np.tile(np.sum(annotations, axis=0), (neighborhoods.shape[1], 1))
43
+ # Nodes in each neighborhood annotated to each attribute
44
+ annotated_in_neighborhood = np.dot(neighborhoods, annotations)
45
+ # Calculate p-values using the hypergeometric distribution
46
+ depletion_pvals = hypergeom.cdf(
47
+ annotated_in_neighborhood, total_node_count, annotated_node_counts, neighborhood_size_matrix
48
+ )
49
+ enrichment_pvals = hypergeom.sf(
50
+ annotated_in_neighborhood - 1,
51
+ total_node_count,
52
+ annotated_node_counts,
53
+ neighborhood_size_matrix,
54
+ )
55
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
@@ -0,0 +1,40 @@
1
+ """
2
+ risk/stats/poisson
3
+ ~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from typing import Dict, Any
7
+
8
+ import numpy as np
9
+ from scipy.stats import poisson
10
+
11
+
12
+ def compute_poisson_test(neighborhoods: np.ndarray, annotations: np.ndarray) -> Dict[str, Any]:
13
+ """Compute Poisson test for enrichment and depletion in neighborhoods.
14
+
15
+ Args:
16
+ neighborhoods (np.ndarray): Binary matrix representing neighborhoods, where rows are nodes
17
+ and columns are neighborhoods. Entries indicate the presence (1) or absence (0) of a node
18
+ in a neighborhood.
19
+ annotations (np.ndarray): Binary matrix representing annotations, where rows are nodes
20
+ and columns are annotations. Entries indicate the presence (1) or absence (0) of a node
21
+ being annotated.
22
+
23
+ Returns:
24
+ Dict[str, Any]: A dictionary with two keys:
25
+ - "enrichment_pvals" (np.ndarray): P-values for enrichment, indicating the probability
26
+ of observing more annotations in a neighborhood than expected under the Poisson distribution.
27
+ - "depletion_pvals" (np.ndarray): P-values for depletion, indicating the probability of
28
+ observing fewer annotations in a neighborhood than expected under the Poisson distribution.
29
+ """
30
+ neighborhoods = (neighborhoods > 0).astype(int)
31
+ annotations = (annotations > 0).astype(int)
32
+ annotated_in_neighborhood = np.dot(neighborhoods, annotations)
33
+ lambda_expected = np.mean(annotated_in_neighborhood, axis=0)
34
+ # Enrichment (observing more than expected)
35
+ enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood - 1, lambda_expected)
36
+
37
+ # Depletion (observing fewer than expected)
38
+ depletion_pvals = poisson.cdf(annotated_in_neighborhood, lambda_expected)
39
+
40
+ return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.7b3
3
+ Version: 0.0.7b5
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -22,8 +22,8 @@ risk/network/graph.py
22
22
  risk/network/io.py
23
23
  risk/network/plot.py
24
24
  risk/stats/__init__.py
25
- risk/stats/fisher_exact.py
26
25
  risk/stats/hypergeom.py
26
+ risk/stats/poisson.py
27
27
  risk/stats/stats.py
28
28
  risk/stats/permutation/__init__.py
29
29
  risk/stats/permutation/permutation.py
@@ -1,132 +0,0 @@
1
- """
2
- risk/stats/fisher_exact
3
- ~~~~~~~~~~~~~~~~~~~~~~~
4
- """
5
-
6
- from multiprocessing import get_context, Manager
7
- from tqdm import tqdm
8
- from typing import Any, Dict
9
-
10
- import numpy as np
11
- from scipy.stats import fisher_exact
12
-
13
-
14
- def compute_fisher_exact_test(
15
- neighborhoods: np.ndarray,
16
- annotations: np.ndarray,
17
- max_workers: int = 4,
18
- ) -> Dict[str, Any]:
19
- """Compute Fisher's exact test for enrichment and depletion in neighborhoods.
20
-
21
- Args:
22
- neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
23
- annotations (np.ndarray): Binary matrix representing annotations.
24
- max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
25
-
26
- Returns:
27
- dict: Dictionary containing depletion and enrichment p-values.
28
- """
29
- # Ensure that the matrices are binary (boolean) and free of NaN values
30
- neighborhoods = neighborhoods.astype(bool) # Convert to boolean
31
- annotations = annotations.astype(bool) # Convert to boolean
32
-
33
- # Initialize the process of calculating p-values using multiprocessing
34
- ctx = get_context("spawn")
35
- manager = Manager()
36
- progress_counter = manager.Value("i", 0)
37
- total_tasks = neighborhoods.shape[1] * annotations.shape[1]
38
-
39
- # Calculate the workload per worker
40
- chunk_size = total_tasks // max_workers
41
- remainder = total_tasks % max_workers
42
-
43
- # Execute the Fisher's exact test using multiprocessing
44
- with ctx.Pool(max_workers) as pool:
45
- with tqdm(total=total_tasks, desc="Total progress", position=0) as progress:
46
- params_list = []
47
- start_idx = 0
48
- for i in range(max_workers):
49
- end_idx = start_idx + chunk_size + (1 if i < remainder else 0)
50
- params_list.append(
51
- (neighborhoods, annotations, start_idx, end_idx, progress_counter)
52
- )
53
- start_idx = end_idx
54
-
55
- # Start the Fisher's exact test process in parallel
56
- results = pool.starmap_async(_fisher_exact_process_subset, params_list, chunksize=1)
57
-
58
- # Update progress bar based on progress_counter
59
- while not results.ready():
60
- progress.update(progress_counter.value - progress.n)
61
- results.wait(0.05) # Wait for 50ms
62
- # Ensure progress bar reaches 100%
63
- progress.update(total_tasks - progress.n)
64
-
65
- # Accumulate results from each worker
66
- depletion_pvals, enrichment_pvals = [], []
67
- for dp, ep in results.get():
68
- depletion_pvals.extend(dp)
69
- enrichment_pvals.extend(ep)
70
-
71
- # Reshape the results back into arrays with the appropriate dimensions
72
- depletion_pvals = np.array(depletion_pvals).reshape(
73
- neighborhoods.shape[1], annotations.shape[1]
74
- )
75
- enrichment_pvals = np.array(enrichment_pvals).reshape(
76
- neighborhoods.shape[1], annotations.shape[1]
77
- )
78
-
79
- return {
80
- "depletion_pvals": depletion_pvals,
81
- "enrichment_pvals": enrichment_pvals,
82
- }
83
-
84
-
85
- def _fisher_exact_process_subset(
86
- neighborhoods: np.ndarray,
87
- annotations: np.ndarray,
88
- start_idx: int,
89
- end_idx: int,
90
- progress_counter,
91
- ) -> tuple:
92
- """Process a subset of neighborhoods using Fisher's exact test.
93
-
94
- Args:
95
- neighborhoods (np.ndarray): The full neighborhood matrix.
96
- annotations (np.ndarray): The annotation matrix.
97
- start_idx (int): Starting index of the neighborhood-annotation pairs to process.
98
- end_idx (int): Ending index of the neighborhood-annotation pairs to process.
99
- progress_counter: Shared counter for tracking progress.
100
-
101
- Returns:
102
- tuple: Local p-values for depletion and enrichment.
103
- """
104
- # Initialize lists to store p-values for depletion and enrichment
105
- depletion_pvals = []
106
- enrichment_pvals = []
107
- # Process the subset of tasks assigned to this worker
108
- for idx in range(start_idx, end_idx):
109
- i = idx // annotations.shape[1] # Neighborhood index
110
- j = idx % annotations.shape[1] # Annotation index
111
-
112
- neighborhood = neighborhoods[:, i]
113
- annotation = annotations[:, j]
114
-
115
- # Calculate the contingency table values
116
- TP = np.sum(neighborhood & annotation)
117
- FP = np.sum(neighborhood & ~annotation)
118
- FN = np.sum(~neighborhood & annotation)
119
- TN = np.sum(~neighborhood & ~annotation)
120
- table = np.array([[TP, FP], [FN, TN]])
121
-
122
- # Perform Fisher's exact test for depletion (alternative='less')
123
- _, p_value_depletion = fisher_exact(table, alternative="less")
124
- depletion_pvals.append(p_value_depletion)
125
- # Perform Fisher's exact test for enrichment (alternative='greater')
126
- _, p_value_enrichment = fisher_exact(table, alternative="greater")
127
- enrichment_pvals.append(p_value_enrichment)
128
-
129
- # Update the shared progress counter
130
- progress_counter.value += 1
131
-
132
- return depletion_pvals, enrichment_pvals
@@ -1,131 +0,0 @@
1
- """
2
- risk/stats/hypergeom
3
- ~~~~~~~~~~~~~~~~~~~~
4
- """
5
-
6
- from multiprocessing import get_context, Manager
7
- from tqdm import tqdm
8
- from typing import Any, Dict
9
-
10
- import numpy as np
11
- from scipy.stats import hypergeom
12
-
13
-
14
- def compute_hypergeom_test(
15
- neighborhoods: np.ndarray,
16
- annotations: np.ndarray,
17
- max_workers: int = 4,
18
- ) -> Dict[str, Any]:
19
- """Compute hypergeometric test for enrichment and depletion in neighborhoods.
20
-
21
- Args:
22
- neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
23
- annotations (np.ndarray): Binary matrix representing annotations.
24
- max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
25
-
26
- Returns:
27
- dict: Dictionary containing depletion and enrichment p-values.
28
- """
29
- # Ensure that the matrices are binary (boolean) and free of NaN values
30
- neighborhoods = neighborhoods.astype(bool) # Convert to boolean
31
- annotations = annotations.astype(bool) # Convert to boolean
32
-
33
- # Initialize the process of calculating p-values using multiprocessing
34
- ctx = get_context("spawn")
35
- manager = Manager()
36
- progress_counter = manager.Value("i", 0)
37
- total_tasks = neighborhoods.shape[1] * annotations.shape[1]
38
-
39
- # Calculate the workload per worker
40
- chunk_size = total_tasks // max_workers
41
- remainder = total_tasks % max_workers
42
-
43
- # Execute the hypergeometric test using multiprocessing
44
- with ctx.Pool(max_workers) as pool:
45
- with tqdm(total=total_tasks, desc="Total progress", position=0) as progress:
46
- params_list = []
47
- start_idx = 0
48
- for i in range(max_workers):
49
- end_idx = start_idx + chunk_size + (1 if i < remainder else 0)
50
- params_list.append(
51
- (neighborhoods, annotations, start_idx, end_idx, progress_counter)
52
- )
53
- start_idx = end_idx
54
-
55
- # Start the hypergeometric test process in parallel
56
- results = pool.starmap_async(_hypergeom_process_subset, params_list, chunksize=1)
57
-
58
- # Update progress bar based on progress_counter
59
- while not results.ready():
60
- progress.update(progress_counter.value - progress.n)
61
- results.wait(0.05) # Wait for 50ms
62
- # Ensure progress bar reaches 100%
63
- progress.update(total_tasks - progress.n)
64
-
65
- # Accumulate results from each worker
66
- depletion_pvals, enrichment_pvals = [], []
67
- for dp, ep in results.get():
68
- depletion_pvals.extend(dp)
69
- enrichment_pvals.extend(ep)
70
-
71
- # Reshape the results back into arrays with the appropriate dimensions
72
- depletion_pvals = np.array(depletion_pvals).reshape(
73
- neighborhoods.shape[1], annotations.shape[1]
74
- )
75
- enrichment_pvals = np.array(enrichment_pvals).reshape(
76
- neighborhoods.shape[1], annotations.shape[1]
77
- )
78
-
79
- return {
80
- "depletion_pvals": depletion_pvals,
81
- "enrichment_pvals": enrichment_pvals,
82
- }
83
-
84
-
85
- def _hypergeom_process_subset(
86
- neighborhoods: np.ndarray,
87
- annotations: np.ndarray,
88
- start_idx: int,
89
- end_idx: int,
90
- progress_counter,
91
- ) -> tuple:
92
- """Process a subset of neighborhoods using the hypergeometric test.
93
-
94
- Args:
95
- neighborhoods (np.ndarray): The full neighborhood matrix.
96
- annotations (np.ndarray): The annotation matrix.
97
- start_idx (int): Starting index of the neighborhood-annotation pairs to process.
98
- end_idx (int): Ending index of the neighborhood-annotation pairs to process.
99
- progress_counter: Shared counter for tracking progress.
100
-
101
- Returns:
102
- tuple: Local p-values for depletion and enrichment.
103
- """
104
- # Initialize lists to store p-values for depletion and enrichment
105
- depletion_pvals = []
106
- enrichment_pvals = []
107
- # Process the subset of tasks assigned to this worker
108
- for idx in range(start_idx, end_idx):
109
- i = idx // annotations.shape[1] # Neighborhood index
110
- j = idx % annotations.shape[1] # Annotation index
111
-
112
- neighborhood = neighborhoods[:, i]
113
- annotation = annotations[:, j]
114
-
115
- # Calculate the required values for the hypergeometric test
116
- M = annotations.shape[0] # Total number of items (population size)
117
- n = np.sum(annotation) # Total number of successes in population
118
- N = np.sum(neighborhood) # Total number of draws (sample size)
119
- k = np.sum(neighborhood & annotation) # Number of successes in sample
120
-
121
- # Perform hypergeometric test for depletion
122
- p_value_depletion = hypergeom.cdf(k, M, n, N)
123
- depletion_pvals.append(p_value_depletion)
124
- # Perform hypergeometric test for enrichment
125
- p_value_enrichment = hypergeom.sf(k - 1, M, n, N)
126
- enrichment_pvals.append(p_value_enrichment)
127
-
128
- # Update the shared progress counter
129
- progress_counter.value += 1
130
-
131
- return depletion_pvals, enrichment_pvals
File without changes
File without changes
File without changes
File without changes