risk-network 0.0.8b18__py3-none-any.whl → 0.0.9b26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. risk/__init__.py +2 -2
  2. risk/annotations/__init__.py +2 -2
  3. risk/annotations/annotations.py +133 -72
  4. risk/annotations/io.py +50 -34
  5. risk/log/__init__.py +4 -2
  6. risk/log/{config.py → console.py} +5 -3
  7. risk/log/{params.py → parameters.py} +21 -46
  8. risk/neighborhoods/__init__.py +3 -5
  9. risk/neighborhoods/api.py +446 -0
  10. risk/neighborhoods/community.py +281 -96
  11. risk/neighborhoods/domains.py +92 -38
  12. risk/neighborhoods/neighborhoods.py +210 -149
  13. risk/network/__init__.py +1 -3
  14. risk/network/geometry.py +69 -58
  15. risk/network/graph/__init__.py +6 -0
  16. risk/network/graph/api.py +194 -0
  17. risk/network/graph/network.py +269 -0
  18. risk/network/graph/summary.py +254 -0
  19. risk/network/io.py +58 -48
  20. risk/network/plotter/__init__.py +6 -0
  21. risk/network/plotter/api.py +54 -0
  22. risk/network/{plot → plotter}/canvas.py +80 -26
  23. risk/network/{plot → plotter}/contour.py +43 -34
  24. risk/network/{plot → plotter}/labels.py +123 -113
  25. risk/network/plotter/network.py +424 -0
  26. risk/network/plotter/utils/colors.py +416 -0
  27. risk/network/plotter/utils/layout.py +94 -0
  28. risk/risk.py +11 -469
  29. risk/stats/__init__.py +8 -4
  30. risk/stats/binom.py +51 -0
  31. risk/stats/chi2.py +69 -0
  32. risk/stats/hypergeom.py +28 -18
  33. risk/stats/permutation/__init__.py +1 -1
  34. risk/stats/permutation/permutation.py +45 -39
  35. risk/stats/permutation/test_functions.py +25 -17
  36. risk/stats/poisson.py +17 -11
  37. risk/stats/stats.py +20 -16
  38. risk/stats/zscore.py +68 -0
  39. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
  40. risk_network-0.0.9b26.dist-info/RECORD +44 -0
  41. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
  42. risk/network/graph.py +0 -159
  43. risk/network/plot/__init__.py +0 -6
  44. risk/network/plot/network.py +0 -282
  45. risk/network/plot/plotter.py +0 -137
  46. risk/network/plot/utils/color.py +0 -353
  47. risk/network/plot/utils/layout.py +0 -53
  48. risk_network-0.0.8b18.dist-info/RECORD +0 -37
  49. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
  50. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0
risk/risk.py CHANGED
@@ -3,34 +3,21 @@ risk/risk
3
3
  ~~~~~~~~~
4
4
  """
5
5
 
6
- from typing import Any, Dict, Tuple, Union
6
+ from risk.network import NetworkIO
7
+ from risk.annotations import AnnotationsIO
8
+ from risk.neighborhoods import NeighborhoodsAPI
9
+ from risk.network.graph import GraphAPI
10
+ from risk.network.plotter import PlotterAPI
7
11
 
8
- import networkx as nx
9
- import numpy as np
10
- import pandas as pd
12
+ from risk.log import params, set_global_verbosity
11
13
 
12
- from risk.annotations import AnnotationsIO, define_top_annotations
13
- from risk.log import params, logger, log_header, set_global_verbosity
14
- from risk.neighborhoods import (
15
- define_domains,
16
- get_network_neighborhoods,
17
- process_neighborhoods,
18
- trim_domains_and_top_annotations,
19
- )
20
- from risk.network import NetworkIO, NetworkGraph, NetworkPlotter
21
- from risk.stats import (
22
- calculate_significance_matrices,
23
- compute_hypergeom_test,
24
- compute_permutation_test,
25
- compute_poisson_test,
26
- )
27
14
 
28
-
29
- class RISK(NetworkIO, AnnotationsIO):
15
+ class RISK(NetworkIO, AnnotationsIO, NeighborhoodsAPI, GraphAPI, PlotterAPI):
30
16
  """RISK: A class for network analysis and visualization.
31
17
 
32
18
  The RISK class integrates functionalities for loading networks, processing annotations,
33
- and performing network-based statistical analysis, such as neighborhood significance testing.
19
+ performing network-based statistical analysis to quantify neighborhood relationships,
20
+ and visualizing networks and their properties.
34
21
  """
35
22
 
36
23
  def __init__(self, verbose: bool = True):
@@ -41,451 +28,6 @@ class RISK(NetworkIO, AnnotationsIO):
41
28
  """
42
29
  # Set global verbosity for logging
43
30
  set_global_verbosity(verbose)
44
- # Initialize and log network parameters
45
- params.initialize()
31
+ # Provide public access to network parameters
32
+ self.params = params
46
33
  super().__init__()
47
-
48
- @property
49
- def params(self) -> params:
50
- """Access the logged network parameters.
51
-
52
- Returns:
53
- Params: An instance of the Params class with logged parameters and methods to access or update them.
54
- """
55
- return params
56
-
57
- def load_neighborhoods_by_hypergeom(
58
- self,
59
- network: nx.Graph,
60
- annotations: Dict[str, Any],
61
- distance_metric: str = "louvain",
62
- louvain_resolution: float = 0.1,
63
- edge_length_threshold: float = 0.5,
64
- null_distribution: str = "network",
65
- random_seed: int = 888,
66
- ) -> Dict[str, Any]:
67
- """Load significant neighborhoods for the network using the hypergeometric test.
68
-
69
- Args:
70
- network (nx.Graph): The network graph.
71
- annotations (dict): The annotations associated with the network.
72
- distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
73
- louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
74
- edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
75
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
76
- random_seed (int, optional): Seed for random number generation. Defaults to 888.
77
-
78
- Returns:
79
- dict: Computed significance of neighborhoods.
80
- """
81
- log_header("Running hypergeometric test")
82
- # Log neighborhood analysis parameters
83
- params.log_neighborhoods(
84
- distance_metric=distance_metric,
85
- louvain_resolution=louvain_resolution,
86
- edge_length_threshold=edge_length_threshold,
87
- statistical_test_function="hypergeom",
88
- null_distribution=null_distribution,
89
- random_seed=random_seed,
90
- )
91
-
92
- # Load neighborhoods based on the network and distance metric
93
- neighborhoods = self._load_neighborhoods(
94
- network,
95
- distance_metric,
96
- louvain_resolution=louvain_resolution,
97
- edge_length_threshold=edge_length_threshold,
98
- random_seed=random_seed,
99
- )
100
- # Run hypergeometric test to compute neighborhood significance
101
- neighborhood_significance = compute_hypergeom_test(
102
- neighborhoods=neighborhoods,
103
- annotations=annotations["matrix"],
104
- null_distribution=null_distribution,
105
- )
106
-
107
- # Return the computed neighborhood significance
108
- return neighborhood_significance
109
-
110
- def load_neighborhoods_by_poisson(
111
- self,
112
- network: nx.Graph,
113
- annotations: Dict[str, Any],
114
- distance_metric: str = "louvain",
115
- louvain_resolution: float = 0.1,
116
- edge_length_threshold: float = 0.5,
117
- null_distribution: str = "network",
118
- random_seed: int = 888,
119
- ) -> Dict[str, Any]:
120
- """Load significant neighborhoods for the network using the Poisson test.
121
-
122
- Args:
123
- network (nx.Graph): The network graph.
124
- annotations (dict): The annotations associated with the network.
125
- distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
126
- louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
127
- edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
128
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
129
- random_seed (int, optional): Seed for random number generation. Defaults to 888.
130
-
131
- Returns:
132
- dict: Computed significance of neighborhoods.
133
- """
134
- log_header("Running Poisson test")
135
- # Log neighborhood analysis parameters
136
- params.log_neighborhoods(
137
- distance_metric=distance_metric,
138
- louvain_resolution=louvain_resolution,
139
- edge_length_threshold=edge_length_threshold,
140
- statistical_test_function="poisson",
141
- null_distribution=null_distribution,
142
- random_seed=random_seed,
143
- )
144
-
145
- # Load neighborhoods based on the network and distance metric
146
- neighborhoods = self._load_neighborhoods(
147
- network,
148
- distance_metric,
149
- louvain_resolution=louvain_resolution,
150
- edge_length_threshold=edge_length_threshold,
151
- random_seed=random_seed,
152
- )
153
- # Run Poisson test to compute neighborhood significance
154
- neighborhood_significance = compute_poisson_test(
155
- neighborhoods=neighborhoods,
156
- annotations=annotations["matrix"],
157
- null_distribution=null_distribution,
158
- )
159
-
160
- # Return the computed neighborhood significance
161
- return neighborhood_significance
162
-
163
- def load_neighborhoods_by_permutation(
164
- self,
165
- network: nx.Graph,
166
- annotations: Dict[str, Any],
167
- distance_metric: str = "louvain",
168
- louvain_resolution: float = 0.1,
169
- edge_length_threshold: float = 0.5,
170
- score_metric: str = "sum",
171
- null_distribution: str = "network",
172
- num_permutations: int = 1000,
173
- random_seed: int = 888,
174
- max_workers: int = 1,
175
- ) -> Dict[str, Any]:
176
- """Load significant neighborhoods for the network using the permutation test.
177
-
178
- Args:
179
- network (nx.Graph): The network graph.
180
- annotations (dict): The annotations associated with the network.
181
- distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
182
- louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
183
- edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
184
- score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
185
- null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
186
- num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
187
- random_seed (int, optional): Seed for random number generation. Defaults to 888.
188
- max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
189
-
190
- Returns:
191
- dict: Computed significance of neighborhoods.
192
- """
193
- log_header("Running permutation test")
194
- # Log neighborhood analysis parameters
195
- params.log_neighborhoods(
196
- distance_metric=distance_metric,
197
- louvain_resolution=louvain_resolution,
198
- edge_length_threshold=edge_length_threshold,
199
- statistical_test_function="permutation",
200
- score_metric=score_metric,
201
- null_distribution=null_distribution,
202
- num_permutations=num_permutations,
203
- random_seed=random_seed,
204
- max_workers=max_workers,
205
- )
206
-
207
- # Load neighborhoods based on the network and distance metric
208
- neighborhoods = self._load_neighborhoods(
209
- network,
210
- distance_metric,
211
- louvain_resolution=louvain_resolution,
212
- edge_length_threshold=edge_length_threshold,
213
- random_seed=random_seed,
214
- )
215
-
216
- # Log and display permutation test settings
217
- logger.debug(f"Neighborhood scoring metric: '{score_metric}'")
218
- logger.debug(f"Null distribution: '{null_distribution}'")
219
- logger.debug(f"Number of permutations: {num_permutations}")
220
- logger.debug(f"Maximum workers: {max_workers}")
221
- # Run permutation test to compute neighborhood significance
222
- neighborhood_significance = compute_permutation_test(
223
- neighborhoods=neighborhoods,
224
- annotations=annotations["matrix"],
225
- score_metric=score_metric,
226
- null_distribution=null_distribution,
227
- num_permutations=num_permutations,
228
- random_seed=random_seed,
229
- max_workers=max_workers,
230
- )
231
-
232
- # Return the computed neighborhood significance
233
- return neighborhood_significance
234
-
235
- def load_graph(
236
- self,
237
- network: nx.Graph,
238
- annotations: Dict[str, Any],
239
- neighborhoods: Dict[str, Any],
240
- tail: str = "right", # OPTIONS: "right" (enrichment), "left" (depletion), "both"
241
- pval_cutoff: float = 0.01, # OPTIONS: Any value between 0 to 1
242
- fdr_cutoff: float = 0.9999, # OPTIONS: Any value between 0 to 1
243
- impute_depth: int = 0,
244
- prune_threshold: float = 0.0,
245
- linkage_criterion: str = "distance",
246
- linkage_method: str = "average",
247
- linkage_metric: str = "yule",
248
- min_cluster_size: int = 5,
249
- max_cluster_size: int = 1000,
250
- ) -> NetworkGraph:
251
- """Load and process the network graph, defining top annotations and domains.
252
-
253
- Args:
254
- network (nx.Graph): The network graph.
255
- annotations (pd.DataFrame): DataFrame containing annotation data for the network.
256
- neighborhoods (dict): Neighborhood enrichment data.
257
- tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
258
- pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
259
- fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
260
- impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
261
- prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
262
- linkage_criterion (str, optional): Clustering criterion for defining domains. Defaults to "distance".
263
- linkage_method (str, optional): Clustering method to use. Defaults to "average".
264
- linkage_metric (str, optional): Metric to use for calculating distances. Defaults to "yule".
265
- min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
266
- max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
267
-
268
- Returns:
269
- NetworkGraph: A fully initialized and processed NetworkGraph object.
270
- """
271
- # Log the parameters and display headers
272
- log_header("Finding significant neighborhoods")
273
- params.log_graph(
274
- tail=tail,
275
- pval_cutoff=pval_cutoff,
276
- fdr_cutoff=fdr_cutoff,
277
- impute_depth=impute_depth,
278
- prune_threshold=prune_threshold,
279
- linkage_criterion=linkage_criterion,
280
- linkage_method=linkage_method,
281
- linkage_metric=linkage_metric,
282
- min_cluster_size=min_cluster_size,
283
- max_cluster_size=max_cluster_size,
284
- )
285
-
286
- logger.debug(f"p-value cutoff: {pval_cutoff}")
287
- logger.debug(f"FDR BH cutoff: {fdr_cutoff}")
288
- logger.debug(
289
- f"Significance tail: '{tail}' ({'enrichment' if tail == 'right' else 'depletion' if tail == 'left' else 'both'})"
290
- )
291
- # Calculate significant neighborhoods based on the provided parameters
292
- significant_neighborhoods = calculate_significance_matrices(
293
- neighborhoods["depletion_pvals"],
294
- neighborhoods["enrichment_pvals"],
295
- tail=tail,
296
- pval_cutoff=pval_cutoff,
297
- fdr_cutoff=fdr_cutoff,
298
- )
299
-
300
- log_header("Processing neighborhoods")
301
- # Process neighborhoods by imputing and pruning based on the given settings
302
- processed_neighborhoods = process_neighborhoods(
303
- network=network,
304
- neighborhoods=significant_neighborhoods,
305
- impute_depth=impute_depth,
306
- prune_threshold=prune_threshold,
307
- )
308
-
309
- log_header("Finding top annotations")
310
- logger.debug(f"Min cluster size: {min_cluster_size}")
311
- logger.debug(f"Max cluster size: {max_cluster_size}")
312
- # Define top annotations based on processed neighborhoods
313
- top_annotations = self._define_top_annotations(
314
- network=network,
315
- annotations=annotations,
316
- neighborhoods=processed_neighborhoods,
317
- min_cluster_size=min_cluster_size,
318
- max_cluster_size=max_cluster_size,
319
- )
320
-
321
- log_header("Optimizing distance threshold for domains")
322
- # Define domains in the network using the specified clustering settings
323
- domains = self._define_domains(
324
- neighborhoods=processed_neighborhoods,
325
- top_annotations=top_annotations,
326
- linkage_criterion=linkage_criterion,
327
- linkage_method=linkage_method,
328
- linkage_metric=linkage_metric,
329
- )
330
- # Trim domains and top annotations based on cluster size constraints
331
- top_annotations, domains, trimmed_domains = trim_domains_and_top_annotations(
332
- domains=domains,
333
- top_annotations=top_annotations,
334
- min_cluster_size=min_cluster_size,
335
- max_cluster_size=max_cluster_size,
336
- )
337
-
338
- # Prepare node mapping and enrichment sums for the final NetworkGraph object
339
- ordered_nodes = annotations["ordered_nodes"]
340
- node_label_to_id = dict(zip(ordered_nodes, range(len(ordered_nodes))))
341
- node_enrichment_sums = processed_neighborhoods["node_enrichment_sums"]
342
-
343
- # Return the fully initialized NetworkGraph object
344
- return NetworkGraph(
345
- network=network,
346
- top_annotations=top_annotations,
347
- domains=domains,
348
- trimmed_domains=trimmed_domains,
349
- node_label_to_node_id_map=node_label_to_id,
350
- node_enrichment_sums=node_enrichment_sums,
351
- )
352
-
353
- def load_plotter(
354
- self,
355
- graph: NetworkGraph,
356
- figsize: Tuple = (10, 10),
357
- background_color: str = "white",
358
- background_alpha: Union[float, None] = 1.0,
359
- pad: float = 0.3,
360
- ) -> NetworkPlotter:
361
- """Get a NetworkPlotter object for plotting.
362
-
363
- Args:
364
- graph (NetworkGraph): The graph to plot.
365
- figsize (tuple, optional): Size of the figure. Defaults to (10, 10).
366
- background_color (str, optional): Background color of the plot. Defaults to "white".
367
- background_alpha (float, None, optional): Transparency level of the background color. If provided, it overrides
368
- any existing alpha values found in background_color. Defaults to 1.0.
369
- pad (float, optional): Padding value to adjust the axis limits. Defaults to 0.3.
370
-
371
- Returns:
372
- NetworkPlotter: A NetworkPlotter object configured with the given parameters.
373
- """
374
- log_header("Loading plotter")
375
-
376
- # Initialize and return a NetworkPlotter object
377
- return NetworkPlotter(
378
- graph,
379
- figsize=figsize,
380
- background_color=background_color,
381
- background_alpha=background_alpha,
382
- pad=pad,
383
- )
384
-
385
- def _load_neighborhoods(
386
- self,
387
- network: nx.Graph,
388
- distance_metric: str = "louvain",
389
- louvain_resolution: float = 0.1,
390
- edge_length_threshold: float = 0.5,
391
- random_seed: int = 888,
392
- ) -> np.ndarray:
393
- """Load significant neighborhoods for the network.
394
-
395
- Args:
396
- network (nx.Graph): The network graph.
397
- annotations (pd.DataFrame): The matrix of annotations associated with the network.
398
- distance_metric (str, optional): Distance metric for neighborhood analysis. Defaults to "louvain".
399
- louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
400
- edge_length_threshold (float, optional): Edge length threshold for neighborhood analysis. Defaults to 0.5.
401
- random_seed (int, optional): Seed for random number generation. Defaults to 888.
402
-
403
- Returns:
404
- np.ndarray: Neighborhood matrix calculated based on the selected distance metric.
405
- """
406
- # Display the chosen distance metric
407
- if distance_metric == "louvain":
408
- for_print_distance_metric = f"louvain (resolution={louvain_resolution})"
409
- else:
410
- for_print_distance_metric = distance_metric
411
- # Log and display neighborhood settings
412
- logger.debug(f"Distance metric: '{for_print_distance_metric}'")
413
- logger.debug(f"Edge length threshold: {edge_length_threshold}")
414
- logger.debug(f"Random seed: {random_seed}")
415
-
416
- # Compute neighborhoods based on the network and distance metric
417
- neighborhoods = get_network_neighborhoods(
418
- network,
419
- distance_metric,
420
- edge_length_threshold,
421
- louvain_resolution=louvain_resolution,
422
- random_seed=random_seed,
423
- )
424
-
425
- # Return the computed neighborhoods
426
- return neighborhoods
427
-
428
- def _define_top_annotations(
429
- self,
430
- network: nx.Graph,
431
- annotations: Dict[str, Any],
432
- neighborhoods: Dict[str, Any],
433
- min_cluster_size: int = 5,
434
- max_cluster_size: int = 1000,
435
- ) -> pd.DataFrame:
436
- """Define top annotations for the network.
437
-
438
- Args:
439
- network (nx.Graph): The network graph.
440
- annotations (dict): Annotations data for the network.
441
- neighborhoods (dict): Neighborhood enrichment data.
442
- min_cluster_size (int, optional): Minimum size for clusters. Defaults to 5.
443
- max_cluster_size (int, optional): Maximum size for clusters. Defaults to 1000.
444
-
445
- Returns:
446
- dict: Top annotations identified within the network.
447
- """
448
- # Extract necessary data from annotations and neighborhoods
449
- ordered_annotations = annotations["ordered_annotations"]
450
- neighborhood_enrichment_sums = neighborhoods["neighborhood_enrichment_counts"]
451
- neighborhoods_binary_enrichment_matrix = neighborhoods["binary_enrichment_matrix"]
452
- # Call external function to define top annotations
453
- return define_top_annotations(
454
- network=network,
455
- ordered_annotation_labels=ordered_annotations,
456
- neighborhood_enrichment_sums=neighborhood_enrichment_sums,
457
- binary_enrichment_matrix=neighborhoods_binary_enrichment_matrix,
458
- min_cluster_size=min_cluster_size,
459
- max_cluster_size=max_cluster_size,
460
- )
461
-
462
- def _define_domains(
463
- self,
464
- neighborhoods: Dict[str, Any],
465
- top_annotations: pd.DataFrame,
466
- linkage_criterion: str,
467
- linkage_method: str,
468
- linkage_metric: str,
469
- ) -> pd.DataFrame:
470
- """Define domains in the network based on enrichment data.
471
-
472
- Args:
473
- neighborhoods (dict): Enrichment data for neighborhoods.
474
- top_annotations (pd.DataFrame): Enrichment matrix for top annotations.
475
- linkage_criterion (str): Clustering criterion for defining domains.
476
- linkage_method (str): Clustering method to use.
477
- linkage_metric (str): Metric to use for calculating distances.
478
-
479
- Returns:
480
- pd.DataFrame: Matrix of defined domains.
481
- """
482
- # Extract the significant enrichment matrix from the neighborhoods data
483
- significant_neighborhoods_enrichment = neighborhoods["significant_enrichment_matrix"]
484
- # Call external function to define domains based on the extracted data
485
- return define_domains(
486
- top_annotations=top_annotations,
487
- significant_neighborhoods_enrichment=significant_neighborhoods_enrichment,
488
- linkage_criterion=linkage_criterion,
489
- linkage_method=linkage_method,
490
- linkage_metric=linkage_metric,
491
- )
risk/stats/__init__.py CHANGED
@@ -3,7 +3,11 @@ risk/stats
3
3
  ~~~~~~~~~~
4
4
  """
5
5
 
6
- from .hypergeom import compute_hypergeom_test
7
- from .permutation import compute_permutation_test
8
- from .poisson import compute_poisson_test
9
- from .stats import calculate_significance_matrices
6
+ from risk.stats.binom import compute_binom_test
7
+ from risk.stats.chi2 import compute_chi2_test
8
+ from risk.stats.hypergeom import compute_hypergeom_test
9
+ from risk.stats.permutation import compute_permutation_test
10
+ from risk.stats.poisson import compute_poisson_test
11
+ from risk.stats.zscore import compute_zscore_test
12
+
13
+ from risk.stats.stats import calculate_significance_matrices
risk/stats/binom.py ADDED
@@ -0,0 +1,51 @@
1
+ """
2
+ risk/stats/binomial
3
+ ~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from typing import Any, Dict
7
+
8
+ from scipy.sparse import csr_matrix
9
+ from scipy.stats import binom
10
+
11
+
12
+ def compute_binom_test(
13
+ neighborhoods: csr_matrix,
14
+ annotations: csr_matrix,
15
+ null_distribution: str = "network",
16
+ ) -> Dict[str, Any]:
17
+ """Compute Binomial test for enrichment and depletion in neighborhoods with selectable null distribution.
18
+
19
+ Args:
20
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
21
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
22
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
23
+
24
+ Returns:
25
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
26
+ """
27
+ # Get the total number of nodes in the network
28
+ total_nodes = neighborhoods.shape[1]
29
+
30
+ # Compute sums (remain sparse here)
31
+ neighborhood_sizes = neighborhoods.sum(axis=1) # Row sums
32
+ annotation_totals = annotations.sum(axis=0) # Column sums
33
+ # Compute probabilities (convert to dense)
34
+ if null_distribution == "network":
35
+ p_values = (annotation_totals / total_nodes).A.flatten() # Dense 1D array
36
+ elif null_distribution == "annotations":
37
+ p_values = (annotation_totals / annotations.sum()).A.flatten() # Dense 1D array
38
+ else:
39
+ raise ValueError(
40
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
41
+ )
42
+
43
+ # Observed counts (sparse matrix multiplication)
44
+ annotated_counts = neighborhoods @ annotations # Sparse result
45
+ annotated_counts_dense = annotated_counts.toarray() # Convert for dense operations
46
+
47
+ # Compute enrichment and depletion p-values
48
+ enrichment_pvals = 1 - binom.cdf(annotated_counts_dense - 1, neighborhood_sizes.A, p_values)
49
+ depletion_pvals = binom.cdf(annotated_counts_dense, neighborhood_sizes.A, p_values)
50
+
51
+ return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
risk/stats/chi2.py ADDED
@@ -0,0 +1,69 @@
1
+ """
2
+ risk/stats/chi2
3
+ ~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from typing import Any, Dict
7
+
8
+ import numpy as np
9
+ from scipy.sparse import csr_matrix
10
+ from scipy.stats import chi2
11
+
12
+
13
+ def compute_chi2_test(
14
+ neighborhoods: csr_matrix,
15
+ annotations: csr_matrix,
16
+ null_distribution: str = "network",
17
+ ) -> Dict[str, Any]:
18
+ """Compute chi-squared test for enrichment and depletion in neighborhoods with selectable null distribution.
19
+
20
+ Args:
21
+ neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
22
+ annotations (csr_matrix): Sparse binary matrix representing annotations.
23
+ null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
24
+
25
+ Returns:
26
+ Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
27
+ """
28
+ # Total number of nodes in the network
29
+ total_node_count = neighborhoods.shape[0]
30
+
31
+ if null_distribution == "network":
32
+ # Case 1: Use all nodes as the background
33
+ background_population = total_node_count
34
+ neighborhood_sums = neighborhoods.sum(axis=0) # Column sums of neighborhoods
35
+ annotation_sums = annotations.sum(axis=0) # Column sums of annotations
36
+ elif null_distribution == "annotations":
37
+ # Case 2: Only consider nodes with at least one annotation
38
+ annotated_nodes = (
39
+ np.ravel(annotations.sum(axis=1)) > 0
40
+ ) # Row-wise sum to filter nodes with annotations
41
+ background_population = annotated_nodes.sum() # Total number of annotated nodes
42
+ neighborhood_sums = neighborhoods[annotated_nodes].sum(
43
+ axis=0
44
+ ) # Neighborhood sums for annotated nodes
45
+ annotation_sums = annotations[annotated_nodes].sum(
46
+ axis=0
47
+ ) # Annotation sums for annotated nodes
48
+ else:
49
+ raise ValueError(
50
+ "Invalid null_distribution value. Choose either 'network' or 'annotations'."
51
+ )
52
+
53
+ # Convert to dense arrays for downstream computations
54
+ neighborhood_sums = np.asarray(neighborhood_sums).reshape(-1, 1) # Ensure column vector shape
55
+ annotation_sums = np.asarray(annotation_sums).reshape(1, -1) # Ensure row vector shape
56
+
57
+ # Observed values: number of annotated nodes in each neighborhood
58
+ observed = neighborhoods.T @ annotations # Shape: (neighborhoods, annotations)
59
+ # Expected values under the null
60
+ expected = (neighborhood_sums @ annotation_sums) / background_population
61
+ # Chi-squared statistic: sum((observed - expected)^2 / expected)
62
+ with np.errstate(divide="ignore", invalid="ignore"): # Handle divide-by-zero
63
+ chi2_stat = np.where(expected > 0, np.power(observed - expected, 2) / expected, 0)
64
+
65
+ # Compute p-values for enrichment (upper tail) and depletion (lower tail)
66
+ enrichment_pvals = chi2.sf(chi2_stat, df=1) # Survival function for upper tail
67
+ depletion_pvals = chi2.cdf(chi2_stat, df=1) # Cumulative distribution for lower tail
68
+
69
+ return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}