risk-network 0.0.7b4__tar.gz → 0.0.7b5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/PKG-INFO +1 -1
  2. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/__init__.py +1 -1
  3. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/risk.py +36 -50
  4. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/stats/__init__.py +2 -2
  5. risk_network-0.0.7b5/risk/stats/hypergeom.py +55 -0
  6. risk_network-0.0.7b5/risk/stats/poisson.py +40 -0
  7. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk_network.egg-info/PKG-INFO +1 -1
  8. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk_network.egg-info/SOURCES.txt +1 -1
  9. risk_network-0.0.7b4/risk/stats/fisher_exact.py +0 -132
  10. risk_network-0.0.7b4/risk/stats/hypergeom.py +0 -131
  11. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/LICENSE +0 -0
  12. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/MANIFEST.in +0 -0
  13. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/README.md +0 -0
  14. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/pyproject.toml +0 -0
  15. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/annotations/__init__.py +0 -0
  16. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/annotations/annotations.py +0 -0
  17. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/annotations/io.py +0 -0
  18. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/constants.py +0 -0
  19. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/log/__init__.py +0 -0
  20. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/log/console.py +0 -0
  21. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/log/params.py +0 -0
  22. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/neighborhoods/__init__.py +0 -0
  23. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/neighborhoods/community.py +0 -0
  24. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/neighborhoods/domains.py +0 -0
  25. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/neighborhoods/neighborhoods.py +0 -0
  26. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/network/__init__.py +0 -0
  27. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/network/geometry.py +0 -0
  28. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/network/graph.py +0 -0
  29. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/network/io.py +0 -0
  30. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/network/plot.py +0 -0
  31. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/stats/permutation/__init__.py +0 -0
  32. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/stats/permutation/permutation.py +0 -0
  33. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/stats/permutation/test_functions.py +0 -0
  34. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk/stats/stats.py +0 -0
  35. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk_network.egg-info/dependency_links.txt +0 -0
  36. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk_network.egg-info/requires.txt +0 -0
  37. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/risk_network.egg-info/top_level.txt +0 -0
  38. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/setup.cfg +0 -0
  39. {risk_network-0.0.7b4 → risk_network-0.0.7b5}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.7b4
3
+ Version: 0.0.7b5
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.7-beta.4"
10
+ __version__ = "0.0.7-beta.5"
@@ -20,9 +20,9 @@ from risk.neighborhoods import (
20
20
  from risk.network import NetworkIO, NetworkGraph, NetworkPlotter
21
21
  from risk.stats import (
22
22
  calculate_significance_matrices,
23
- compute_fisher_exact_test,
24
23
  compute_hypergeom_test,
25
24
  compute_permutation_test,
25
+ compute_poisson_test,
26
26
  )
27
27
 
28
28
 
@@ -45,20 +45,16 @@ class RISK(NetworkIO, AnnotationsIO):
45
45
  """Access the logged parameters."""
46
46
  return params
47
47
 
48
- def load_neighborhoods_by_permutation(
48
+ def load_neighborhoods_by_hypergeom(
49
49
  self,
50
50
  network: nx.Graph,
51
51
  annotations: Dict[str, Any],
52
52
  distance_metric: str = "dijkstra",
53
53
  louvain_resolution: float = 0.1,
54
54
  edge_length_threshold: float = 0.5,
55
- score_metric: str = "sum",
56
- null_distribution: str = "network",
57
- num_permutations: int = 1000,
58
55
  random_seed: int = 888,
59
- max_workers: int = 1,
60
56
  ) -> Dict[str, Any]:
61
- """Load significant neighborhoods for the network using the permutation test.
57
+ """Load significant neighborhoods for the network using the hypergeometric test.
62
58
 
63
59
  Args:
64
60
  network (nx.Graph): The network graph.
@@ -66,27 +62,19 @@ class RISK(NetworkIO, AnnotationsIO):
66
62
  distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
67
63
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
68
64
  edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
69
- score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
70
- null_distribution (str, optional): Distribution used for permutation tests. Defaults to "network".
71
- num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
72
65
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
73
- max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
74
66
 
75
67
  Returns:
76
68
  dict: Computed significance of neighborhoods.
77
69
  """
78
- print_header("Running permutation test")
70
+ print_header("Running hypergeometric test")
79
71
  # Log neighborhood analysis parameters
80
72
  params.log_neighborhoods(
81
73
  distance_metric=distance_metric,
82
74
  louvain_resolution=louvain_resolution,
83
75
  edge_length_threshold=edge_length_threshold,
84
- statistical_test_function="permutation",
85
- score_metric=score_metric,
86
- null_distribution=null_distribution,
87
- num_permutations=num_permutations,
76
+ statistical_test_function="hypergeom",
88
77
  random_seed=random_seed,
89
- max_workers=max_workers,
90
78
  )
91
79
 
92
80
  # Load neighborhoods based on the network and distance metric
@@ -97,27 +85,16 @@ class RISK(NetworkIO, AnnotationsIO):
97
85
  edge_length_threshold=edge_length_threshold,
98
86
  random_seed=random_seed,
99
87
  )
100
-
101
- # Log and display permutation test settings
102
- print(f"Neighborhood scoring metric: '{score_metric}'")
103
- print(f"Null distribution: '{null_distribution}'")
104
- print(f"Number of permutations: {num_permutations}")
105
- print(f"Maximum workers: {max_workers}")
106
- # Run permutation test to compute neighborhood significance
107
- neighborhood_significance = compute_permutation_test(
88
+ # Run hypergeometric test to compute neighborhood significance
89
+ neighborhood_significance = compute_hypergeom_test(
108
90
  neighborhoods=neighborhoods,
109
91
  annotations=annotations["matrix"],
110
- score_metric=score_metric,
111
- null_distribution=null_distribution,
112
- num_permutations=num_permutations,
113
- random_seed=random_seed,
114
- max_workers=max_workers,
115
92
  )
116
93
 
117
94
  # Return the computed neighborhood significance
118
95
  return neighborhood_significance
119
96
 
120
- def load_neighborhoods_by_fisher_exact(
97
+ def load_neighborhoods_by_poisson(
121
98
  self,
122
99
  network: nx.Graph,
123
100
  annotations: Dict[str, Any],
@@ -125,9 +102,8 @@ class RISK(NetworkIO, AnnotationsIO):
125
102
  louvain_resolution: float = 0.1,
126
103
  edge_length_threshold: float = 0.5,
127
104
  random_seed: int = 888,
128
- max_workers: int = 1,
129
105
  ) -> Dict[str, Any]:
130
- """Load significant neighborhoods for the network using the Fisher's exact test.
106
+ """Load significant neighborhoods for the network using the Poisson test.
131
107
 
132
108
  Args:
133
109
  network (nx.Graph): The network graph.
@@ -136,20 +112,18 @@ class RISK(NetworkIO, AnnotationsIO):
136
112
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
137
113
  edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
138
114
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
139
- max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
140
115
 
141
116
  Returns:
142
117
  dict: Computed significance of neighborhoods.
143
118
  """
144
- print_header("Running Fisher's exact test")
119
+ print_header("Running Poisson test")
145
120
  # Log neighborhood analysis parameters
146
121
  params.log_neighborhoods(
147
122
  distance_metric=distance_metric,
148
123
  louvain_resolution=louvain_resolution,
149
124
  edge_length_threshold=edge_length_threshold,
150
- statistical_test_function="fisher_exact",
125
+ statistical_test_function="poisson",
151
126
  random_seed=random_seed,
152
- max_workers=max_workers,
153
127
  )
154
128
 
155
129
  # Load neighborhoods based on the network and distance metric
@@ -160,30 +134,29 @@ class RISK(NetworkIO, AnnotationsIO):
160
134
  edge_length_threshold=edge_length_threshold,
161
135
  random_seed=random_seed,
162
136
  )
163
-
164
- # Log and display Fisher's exact test settings
165
- print(f"Maximum workers: {max_workers}")
166
- # Run Fisher's exact test to compute neighborhood significance
167
- neighborhood_significance = compute_fisher_exact_test(
137
+ # Run Poisson test to compute neighborhood significance
138
+ neighborhood_significance = compute_poisson_test(
168
139
  neighborhoods=neighborhoods,
169
140
  annotations=annotations["matrix"],
170
- max_workers=max_workers,
171
141
  )
172
142
 
173
143
  # Return the computed neighborhood significance
174
144
  return neighborhood_significance
175
145
 
176
- def load_neighborhoods_by_hypergeom(
146
+ def load_neighborhoods_by_permutation(
177
147
  self,
178
148
  network: nx.Graph,
179
149
  annotations: Dict[str, Any],
180
150
  distance_metric: str = "dijkstra",
181
151
  louvain_resolution: float = 0.1,
182
152
  edge_length_threshold: float = 0.5,
153
+ score_metric: str = "sum",
154
+ null_distribution: str = "network",
155
+ num_permutations: int = 1000,
183
156
  random_seed: int = 888,
184
157
  max_workers: int = 1,
185
158
  ) -> Dict[str, Any]:
186
- """Load significant neighborhoods for the network using the hypergeometric test.
159
+ """Load significant neighborhoods for the network using the permutation test.
187
160
 
188
161
  Args:
189
162
  network (nx.Graph): The network graph.
@@ -191,19 +164,25 @@ class RISK(NetworkIO, AnnotationsIO):
191
164
  distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "dijkstra".
192
165
  louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
193
166
  edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
167
+ score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
168
+ null_distribution (str, optional): Distribution used for permutation tests. Defaults to "network".
169
+ num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
194
170
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
195
171
  max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
196
172
 
197
173
  Returns:
198
174
  dict: Computed significance of neighborhoods.
199
175
  """
200
- print_header("Running hypergeometric test")
176
+ print_header("Running permutation test")
201
177
  # Log neighborhood analysis parameters
202
178
  params.log_neighborhoods(
203
179
  distance_metric=distance_metric,
204
180
  louvain_resolution=louvain_resolution,
205
181
  edge_length_threshold=edge_length_threshold,
206
- statistical_test_function="hypergeom",
182
+ statistical_test_function="permutation",
183
+ score_metric=score_metric,
184
+ null_distribution=null_distribution,
185
+ num_permutations=num_permutations,
207
186
  random_seed=random_seed,
208
187
  max_workers=max_workers,
209
188
  )
@@ -217,12 +196,19 @@ class RISK(NetworkIO, AnnotationsIO):
217
196
  random_seed=random_seed,
218
197
  )
219
198
 
220
- # Log and display hypergeometric test settings
199
+ # Log and display permutation test settings
200
+ print(f"Neighborhood scoring metric: '{score_metric}'")
201
+ print(f"Null distribution: '{null_distribution}'")
202
+ print(f"Number of permutations: {num_permutations}")
221
203
  print(f"Maximum workers: {max_workers}")
222
- # Run hypergeometric test to compute neighborhood significance
223
- neighborhood_significance = compute_hypergeom_test(
204
+ # Run permutation test to compute neighborhood significance
205
+ neighborhood_significance = compute_permutation_test(
224
206
  neighborhoods=neighborhoods,
225
207
  annotations=annotations["matrix"],
208
+ score_metric=score_metric,
209
+ null_distribution=null_distribution,
210
+ num_permutations=num_permutations,
211
+ random_seed=random_seed,
226
212
  max_workers=max_workers,
227
213
  )
228
214
 
@@ -3,7 +3,7 @@ risk/stats
3
3
  ~~~~~~~~~~
4
4
  """
5
5
 
6
- from .stats import calculate_significance_matrices
7
- from .fisher_exact import compute_fisher_exact_test
8
6
  from .hypergeom import compute_hypergeom_test
9
7
  from .permutation import compute_permutation_test
8
+ from .poisson import compute_poisson_test
9
+ from .stats import calculate_significance_matrices
@@ -0,0 +1,55 @@
1
+ """
2
+ risk/stats/hypergeom
3
+ ~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from typing import Any, Dict
7
+
8
+ import numpy as np
9
+ from scipy.stats import hypergeom
10
+
11
+
12
+ def compute_hypergeom_test(
13
+ neighborhoods: np.ndarray,
14
+ annotations: np.ndarray,
15
+ ) -> Dict[str, Any]:
16
+ """Compute hypergeometric test for enrichment and depletion in neighborhoods.
17
+
18
+ Args:
19
+ neighborhoods (np.ndarray): Binary matrix representing neighborhoods, where rows are nodes
20
+ and columns are neighborhoods. Entries indicate the presence (1) or absence (0) of a node
21
+ in a neighborhood.
22
+ annotations (np.ndarray): Binary matrix representing annotations, where rows are nodes
23
+ and columns are annotations. Entries indicate the presence (1) or absence (0) of a node
24
+ being annotated.
25
+
26
+ Returns:
27
+ Dict[str, Any]: A dictionary with two keys:
28
+ - "enrichment_pvals" (np.ndarray): P-values for enrichment, indicating the probability
29
+ of observing more annotations in a neighborhood than expected under the hypergeometric test.
30
+ - "depletion_pvals" (np.ndarray): P-values for depletion, indicating the probability
31
+ of observing fewer annotations in a neighborhood than expected under the hypergeometric test.
32
+ """
33
+ # Ensure both matrices are binary (presence/absence)
34
+ neighborhoods = (neighborhoods > 0).astype(int)
35
+ annotations = (annotations > 0).astype(int)
36
+ total_node_count = annotations.shape[0]
37
+ # Sum of values in each neighborhood
38
+ neighborhood_sums = np.sum(neighborhoods, axis=0)[:, np.newaxis]
39
+ # Repeating neighborhood sums for each annotation
40
+ neighborhood_size_matrix = np.tile(neighborhood_sums, (1, annotations.shape[1]))
41
+ # Total number of nodes annotated to each attribute
42
+ annotated_node_counts = np.tile(np.sum(annotations, axis=0), (neighborhoods.shape[1], 1))
43
+ # Nodes in each neighborhood annotated to each attribute
44
+ annotated_in_neighborhood = np.dot(neighborhoods, annotations)
45
+ # Calculate p-values using the hypergeometric distribution
46
+ depletion_pvals = hypergeom.cdf(
47
+ annotated_in_neighborhood, total_node_count, annotated_node_counts, neighborhood_size_matrix
48
+ )
49
+ enrichment_pvals = hypergeom.sf(
50
+ annotated_in_neighborhood - 1,
51
+ total_node_count,
52
+ annotated_node_counts,
53
+ neighborhood_size_matrix,
54
+ )
55
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
@@ -0,0 +1,40 @@
1
+ """
2
+ risk/stats/poisson
3
+ ~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from typing import Dict, Any
7
+
8
+ import numpy as np
9
+ from scipy.stats import poisson
10
+
11
+
12
+ def compute_poisson_test(neighborhoods: np.ndarray, annotations: np.ndarray) -> Dict[str, Any]:
13
+ """Compute Poisson test for enrichment and depletion in neighborhoods.
14
+
15
+ Args:
16
+ neighborhoods (np.ndarray): Binary matrix representing neighborhoods, where rows are nodes
17
+ and columns are neighborhoods. Entries indicate the presence (1) or absence (0) of a node
18
+ in a neighborhood.
19
+ annotations (np.ndarray): Binary matrix representing annotations, where rows are nodes
20
+ and columns are annotations. Entries indicate the presence (1) or absence (0) of a node
21
+ being annotated.
22
+
23
+ Returns:
24
+ Dict[str, Any]: A dictionary with two keys:
25
+ - "enrichment_pvals" (np.ndarray): P-values for enrichment, indicating the probability
26
+ of observing more annotations in a neighborhood than expected under the Poisson distribution.
27
+ - "depletion_pvals" (np.ndarray): P-values for depletion, indicating the probability of
28
+ observing fewer annotations in a neighborhood than expected under the Poisson distribution.
29
+ """
30
+ neighborhoods = (neighborhoods > 0).astype(int)
31
+ annotations = (annotations > 0).astype(int)
32
+ annotated_in_neighborhood = np.dot(neighborhoods, annotations)
33
+ lambda_expected = np.mean(annotated_in_neighborhood, axis=0)
34
+ # Enrichment (observing more than expected)
35
+ enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood - 1, lambda_expected)
36
+
37
+ # Depletion (observing fewer than expected)
38
+ depletion_pvals = poisson.cdf(annotated_in_neighborhood, lambda_expected)
39
+
40
+ return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.7b4
3
+ Version: 0.0.7b5
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -22,8 +22,8 @@ risk/network/graph.py
22
22
  risk/network/io.py
23
23
  risk/network/plot.py
24
24
  risk/stats/__init__.py
25
- risk/stats/fisher_exact.py
26
25
  risk/stats/hypergeom.py
26
+ risk/stats/poisson.py
27
27
  risk/stats/stats.py
28
28
  risk/stats/permutation/__init__.py
29
29
  risk/stats/permutation/permutation.py
@@ -1,132 +0,0 @@
1
- """
2
- risk/stats/fisher_exact
3
- ~~~~~~~~~~~~~~~~~~~~~~~
4
- """
5
-
6
- from multiprocessing import get_context, Manager
7
- from tqdm import tqdm
8
- from typing import Any, Dict
9
-
10
- import numpy as np
11
- from scipy.stats import fisher_exact
12
-
13
-
14
- def compute_fisher_exact_test(
15
- neighborhoods: np.ndarray,
16
- annotations: np.ndarray,
17
- max_workers: int = 4,
18
- ) -> Dict[str, Any]:
19
- """Compute Fisher's exact test for enrichment and depletion in neighborhoods.
20
-
21
- Args:
22
- neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
23
- annotations (np.ndarray): Binary matrix representing annotations.
24
- max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
25
-
26
- Returns:
27
- dict: Dictionary containing depletion and enrichment p-values.
28
- """
29
- # Ensure that the matrices are binary (boolean) and free of NaN values
30
- neighborhoods = neighborhoods.astype(bool) # Convert to boolean
31
- annotations = annotations.astype(bool) # Convert to boolean
32
-
33
- # Initialize the process of calculating p-values using multiprocessing
34
- ctx = get_context("spawn")
35
- manager = Manager()
36
- progress_counter = manager.Value("i", 0)
37
- total_tasks = neighborhoods.shape[1] * annotations.shape[1]
38
-
39
- # Calculate the workload per worker
40
- chunk_size = total_tasks // max_workers
41
- remainder = total_tasks % max_workers
42
-
43
- # Execute the Fisher's exact test using multiprocessing
44
- with ctx.Pool(max_workers) as pool:
45
- with tqdm(total=total_tasks, desc="Total progress", position=0) as progress:
46
- params_list = []
47
- start_idx = 0
48
- for i in range(max_workers):
49
- end_idx = start_idx + chunk_size + (1 if i < remainder else 0)
50
- params_list.append(
51
- (neighborhoods, annotations, start_idx, end_idx, progress_counter)
52
- )
53
- start_idx = end_idx
54
-
55
- # Start the Fisher's exact test process in parallel
56
- results = pool.starmap_async(_fisher_exact_process_subset, params_list, chunksize=1)
57
-
58
- # Update progress bar based on progress_counter
59
- while not results.ready():
60
- progress.update(progress_counter.value - progress.n)
61
- results.wait(0.05) # Wait for 50ms
62
- # Ensure progress bar reaches 100%
63
- progress.update(total_tasks - progress.n)
64
-
65
- # Accumulate results from each worker
66
- depletion_pvals, enrichment_pvals = [], []
67
- for dp, ep in results.get():
68
- depletion_pvals.extend(dp)
69
- enrichment_pvals.extend(ep)
70
-
71
- # Reshape the results back into arrays with the appropriate dimensions
72
- depletion_pvals = np.array(depletion_pvals).reshape(
73
- neighborhoods.shape[1], annotations.shape[1]
74
- )
75
- enrichment_pvals = np.array(enrichment_pvals).reshape(
76
- neighborhoods.shape[1], annotations.shape[1]
77
- )
78
-
79
- return {
80
- "depletion_pvals": depletion_pvals,
81
- "enrichment_pvals": enrichment_pvals,
82
- }
83
-
84
-
85
- def _fisher_exact_process_subset(
86
- neighborhoods: np.ndarray,
87
- annotations: np.ndarray,
88
- start_idx: int,
89
- end_idx: int,
90
- progress_counter,
91
- ) -> tuple:
92
- """Process a subset of neighborhoods using Fisher's exact test.
93
-
94
- Args:
95
- neighborhoods (np.ndarray): The full neighborhood matrix.
96
- annotations (np.ndarray): The annotation matrix.
97
- start_idx (int): Starting index of the neighborhood-annotation pairs to process.
98
- end_idx (int): Ending index of the neighborhood-annotation pairs to process.
99
- progress_counter: Shared counter for tracking progress.
100
-
101
- Returns:
102
- tuple: Local p-values for depletion and enrichment.
103
- """
104
- # Initialize lists to store p-values for depletion and enrichment
105
- depletion_pvals = []
106
- enrichment_pvals = []
107
- # Process the subset of tasks assigned to this worker
108
- for idx in range(start_idx, end_idx):
109
- i = idx // annotations.shape[1] # Neighborhood index
110
- j = idx % annotations.shape[1] # Annotation index
111
-
112
- neighborhood = neighborhoods[:, i]
113
- annotation = annotations[:, j]
114
-
115
- # Calculate the contingency table values
116
- TP = np.sum(neighborhood & annotation)
117
- FP = np.sum(neighborhood & ~annotation)
118
- FN = np.sum(~neighborhood & annotation)
119
- TN = np.sum(~neighborhood & ~annotation)
120
- table = np.array([[TP, FP], [FN, TN]])
121
-
122
- # Perform Fisher's exact test for depletion (alternative='less')
123
- _, p_value_depletion = fisher_exact(table, alternative="less")
124
- depletion_pvals.append(p_value_depletion)
125
- # Perform Fisher's exact test for enrichment (alternative='greater')
126
- _, p_value_enrichment = fisher_exact(table, alternative="greater")
127
- enrichment_pvals.append(p_value_enrichment)
128
-
129
- # Update the shared progress counter
130
- progress_counter.value += 1
131
-
132
- return depletion_pvals, enrichment_pvals
@@ -1,131 +0,0 @@
1
- """
2
- risk/stats/hypergeom
3
- ~~~~~~~~~~~~~~~~~~~~
4
- """
5
-
6
- from multiprocessing import get_context, Manager
7
- from tqdm import tqdm
8
- from typing import Any, Dict
9
-
10
- import numpy as np
11
- from scipy.stats import hypergeom
12
-
13
-
14
- def compute_hypergeom_test(
15
- neighborhoods: np.ndarray,
16
- annotations: np.ndarray,
17
- max_workers: int = 4,
18
- ) -> Dict[str, Any]:
19
- """Compute hypergeometric test for enrichment and depletion in neighborhoods.
20
-
21
- Args:
22
- neighborhoods (np.ndarray): Binary matrix representing neighborhoods.
23
- annotations (np.ndarray): Binary matrix representing annotations.
24
- max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
25
-
26
- Returns:
27
- dict: Dictionary containing depletion and enrichment p-values.
28
- """
29
- # Ensure that the matrices are binary (boolean) and free of NaN values
30
- neighborhoods = neighborhoods.astype(bool) # Convert to boolean
31
- annotations = annotations.astype(bool) # Convert to boolean
32
-
33
- # Initialize the process of calculating p-values using multiprocessing
34
- ctx = get_context("spawn")
35
- manager = Manager()
36
- progress_counter = manager.Value("i", 0)
37
- total_tasks = neighborhoods.shape[1] * annotations.shape[1]
38
-
39
- # Calculate the workload per worker
40
- chunk_size = total_tasks // max_workers
41
- remainder = total_tasks % max_workers
42
-
43
- # Execute the hypergeometric test using multiprocessing
44
- with ctx.Pool(max_workers) as pool:
45
- with tqdm(total=total_tasks, desc="Total progress", position=0) as progress:
46
- params_list = []
47
- start_idx = 0
48
- for i in range(max_workers):
49
- end_idx = start_idx + chunk_size + (1 if i < remainder else 0)
50
- params_list.append(
51
- (neighborhoods, annotations, start_idx, end_idx, progress_counter)
52
- )
53
- start_idx = end_idx
54
-
55
- # Start the hypergeometric test process in parallel
56
- results = pool.starmap_async(_hypergeom_process_subset, params_list, chunksize=1)
57
-
58
- # Update progress bar based on progress_counter
59
- while not results.ready():
60
- progress.update(progress_counter.value - progress.n)
61
- results.wait(0.05) # Wait for 50ms
62
- # Ensure progress bar reaches 100%
63
- progress.update(total_tasks - progress.n)
64
-
65
- # Accumulate results from each worker
66
- depletion_pvals, enrichment_pvals = [], []
67
- for dp, ep in results.get():
68
- depletion_pvals.extend(dp)
69
- enrichment_pvals.extend(ep)
70
-
71
- # Reshape the results back into arrays with the appropriate dimensions
72
- depletion_pvals = np.array(depletion_pvals).reshape(
73
- neighborhoods.shape[1], annotations.shape[1]
74
- )
75
- enrichment_pvals = np.array(enrichment_pvals).reshape(
76
- neighborhoods.shape[1], annotations.shape[1]
77
- )
78
-
79
- return {
80
- "depletion_pvals": depletion_pvals,
81
- "enrichment_pvals": enrichment_pvals,
82
- }
83
-
84
-
85
- def _hypergeom_process_subset(
86
- neighborhoods: np.ndarray,
87
- annotations: np.ndarray,
88
- start_idx: int,
89
- end_idx: int,
90
- progress_counter,
91
- ) -> tuple:
92
- """Process a subset of neighborhoods using the hypergeometric test.
93
-
94
- Args:
95
- neighborhoods (np.ndarray): The full neighborhood matrix.
96
- annotations (np.ndarray): The annotation matrix.
97
- start_idx (int): Starting index of the neighborhood-annotation pairs to process.
98
- end_idx (int): Ending index of the neighborhood-annotation pairs to process.
99
- progress_counter: Shared counter for tracking progress.
100
-
101
- Returns:
102
- tuple: Local p-values for depletion and enrichment.
103
- """
104
- # Initialize lists to store p-values for depletion and enrichment
105
- depletion_pvals = []
106
- enrichment_pvals = []
107
- # Process the subset of tasks assigned to this worker
108
- for idx in range(start_idx, end_idx):
109
- i = idx // annotations.shape[1] # Neighborhood index
110
- j = idx % annotations.shape[1] # Annotation index
111
-
112
- neighborhood = neighborhoods[:, i]
113
- annotation = annotations[:, j]
114
-
115
- # Calculate the required values for the hypergeometric test
116
- M = annotations.shape[0] # Total number of items (population size)
117
- n = np.sum(annotation) # Total number of successes in population
118
- N = np.sum(neighborhood) # Total number of draws (sample size)
119
- k = np.sum(neighborhood & annotation) # Number of successes in sample
120
-
121
- # Perform hypergeometric test for depletion
122
- p_value_depletion = hypergeom.cdf(k, M, n, N)
123
- depletion_pvals.append(p_value_depletion)
124
- # Perform hypergeometric test for enrichment
125
- p_value_enrichment = hypergeom.sf(k - 1, M, n, N)
126
- enrichment_pvals.append(p_value_enrichment)
127
-
128
- # Update the shared progress counter
129
- progress_counter.value += 1
130
-
131
- return depletion_pvals, enrichment_pvals
File without changes
File without changes
File without changes
File without changes