risk-network 0.0.14b2__tar.gz → 0.0.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. risk_network-0.0.15/PKG-INFO +109 -0
  2. risk_network-0.0.15/README.md +68 -0
  3. {risk_network-0.0.14b2 → risk_network-0.0.15}/pyproject.toml +2 -2
  4. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/__init__.py +1 -1
  5. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_neighborhoods/_api.py +1 -95
  6. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_neighborhoods/_domains.py +77 -26
  7. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_neighborhoods/_neighborhoods.py +45 -23
  8. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_neighborhoods/_stats/__init__.py +0 -2
  9. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_neighborhoods/_stats/_tests.py +1 -105
  10. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_graph/_summary.py +20 -22
  11. risk_network-0.0.15/src/risk_network.egg-info/PKG-INFO +109 -0
  12. {risk_network-0.0.14b2 → risk_network-0.0.15}/tests/test_load_graph.py +88 -0
  13. {risk_network-0.0.14b2 → risk_network-0.0.15}/tests/test_load_neighborhoods.py +0 -52
  14. {risk_network-0.0.14b2 → risk_network-0.0.15}/tests/test_load_plotter.py +2 -1
  15. risk_network-0.0.14b2/PKG-INFO +0 -125
  16. risk_network-0.0.14b2/README.md +0 -84
  17. risk_network-0.0.14b2/src/risk_network.egg-info/PKG-INFO +0 -125
  18. {risk_network-0.0.14b2 → risk_network-0.0.15}/LICENSE +0 -0
  19. {risk_network-0.0.14b2 → risk_network-0.0.15}/setup.cfg +0 -0
  20. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_annotation/__init__.py +0 -0
  21. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_annotation/_annotation.py +0 -0
  22. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_annotation/_io.py +0 -0
  23. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_annotation/_nltk_setup.py +0 -0
  24. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_log/__init__.py +0 -0
  25. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_log/_console.py +0 -0
  26. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_log/_parameters.py +0 -0
  27. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_neighborhoods/__init__.py +0 -0
  28. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_neighborhoods/_community.py +0 -0
  29. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_neighborhoods/_stats/_permutation/__init__.py +0 -0
  30. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_neighborhoods/_stats/_permutation/_permutation.py +0 -0
  31. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_neighborhoods/_stats/_permutation/_test_functions.py +0 -0
  32. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/__init__.py +0 -0
  33. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_graph/__init__.py +0 -0
  34. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_graph/_api.py +0 -0
  35. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_graph/_graph.py +0 -0
  36. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_graph/_stats.py +0 -0
  37. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_io.py +0 -0
  38. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_plotter/__init__.py +0 -0
  39. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_plotter/_api.py +0 -0
  40. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_plotter/_canvas.py +0 -0
  41. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_plotter/_contour.py +0 -0
  42. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_plotter/_labels.py +0 -0
  43. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_plotter/_network.py +0 -0
  44. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_plotter/_plotter.py +0 -0
  45. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_plotter/_utils/__init__.py +0 -0
  46. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_plotter/_utils/_colors.py +0 -0
  47. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_network/_plotter/_utils/_layout.py +0 -0
  48. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk/_risk.py +0 -0
  49. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk_network.egg-info/SOURCES.txt +0 -0
  50. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk_network.egg-info/dependency_links.txt +0 -0
  51. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk_network.egg-info/requires.txt +0 -0
  52. {risk_network-0.0.14b2 → risk_network-0.0.15}/src/risk_network.egg-info/top_level.txt +0 -0
  53. {risk_network-0.0.14b2 → risk_network-0.0.15}/tests/test_load_annotation.py +0 -0
  54. {risk_network-0.0.14b2 → risk_network-0.0.15}/tests/test_load_io_combinations.py +0 -0
  55. {risk_network-0.0.14b2 → risk_network-0.0.15}/tests/test_load_network.py +0 -0
  56. {risk_network-0.0.14b2 → risk_network-0.0.15}/tests/test_log.py +0 -0
@@ -0,0 +1,109 @@
1
+ Metadata-Version: 2.4
2
+ Name: risk-network
3
+ Version: 0.0.15
4
+ Summary: A Python package for scalable network analysis and high-quality visualization.
5
+ Author-email: Ira Horecka <ira89@icloud.com>
6
+ License: GPL-3.0-or-later
7
+ Project-URL: Homepage, https://github.com/riskportal/risk
8
+ Project-URL: Issues, https://github.com/riskportal/risk/issues
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
19
+ Classifier: Topic :: Scientific/Engineering :: Visualization
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.8
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: ipywidgets
25
+ Requires-Dist: leidenalg
26
+ Requires-Dist: markov_clustering
27
+ Requires-Dist: matplotlib
28
+ Requires-Dist: networkx
29
+ Requires-Dist: nltk
30
+ Requires-Dist: numpy
31
+ Requires-Dist: openpyxl
32
+ Requires-Dist: pandas
33
+ Requires-Dist: python-igraph
34
+ Requires-Dist: python-louvain
35
+ Requires-Dist: scikit-learn
36
+ Requires-Dist: scipy
37
+ Requires-Dist: statsmodels
38
+ Requires-Dist: threadpoolctl
39
+ Requires-Dist: tqdm
40
+ Dynamic: license-file
41
+
42
+ # RISK
43
+
44
+ ![Python](https://img.shields.io/badge/python-3.8%2B-yellow)
45
+ [![pypiv](https://img.shields.io/pypi/v/risk-network.svg)](https://pypi.python.org/pypi/risk-network)
46
+ ![License](https://img.shields.io/badge/license-GPLv3-purple)
47
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.xxxxxxx.svg)](https://doi.org/10.5281/zenodo.xxxxxxx)
48
+ ![Downloads](https://img.shields.io/pypi/dm/risk-network)
49
+ ![Tests](https://github.com/riskportal/risk/actions/workflows/ci.yml/badge.svg)
50
+
51
+ **RISK** (Regional Inference of Significant Kinships) is a next-generation tool for biological network annotation and visualization. It integrates community detection algorithms, rigorous overrepresentation analysis, and a modular framework for diverse network types. RISK identifies biologically coherent relationships within networks and generates publication-ready visualizations, making it a useful tool for biological and interdisciplinary network analysis.
52
+
53
+ For a full description of RISK and its applications, see:
54
+ <br>
55
+ **Horecka and Röst (2025)**, _"RISK: a next-generation tool for biological network annotation and visualization"_.
56
+ <br>
57
+ DOI: [10.5281/zenodo.xxxxxxx](https://doi.org/10.5281/zenodo.xxxxxxx)
58
+
59
+ ## Documentation and Tutorial
60
+
61
+ Full documentation is available at:
62
+
63
+ - **Docs:** [https://riskportal.github.io/risk-docs](https://riskportal.github.io/risk-docs)
64
+ - **Tutorial Jupyter Notebook Repository:** [https://github.com/riskportal/risk-docs](https://github.com/riskportal/risk-docs)
65
+
66
+ ## Installation
67
+
68
+ RISK is compatible with Python 3.8 or later and runs on all major operating systems. To install the latest version of RISK, run:
69
+
70
+ ```bash
71
+ pip install risk-network --upgrade
72
+ ```
73
+
74
+ ## Key Features of RISK
75
+
76
+ - **Broad Data Compatibility**: Accepts multiple network formats (Cytoscape, Cytoscape JSON, GPickle, NetworkX) and user-provided annotations formatted as term–to–gene membership tables (JSON, CSV, TSV, Excel, Python dictionaries).
77
+ - **Flexible Clustering**: Offers Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap, with user-defined resolution parameters to detect both coarse and fine-grained modules.
78
+ - **Statistical Testing**: Provides permutation, hypergeometric, chi-squared, and binomial tests, balancing statistical rigor with speed.
79
+ - **High-Resolution Visualization**: Generates publication-ready figures with customizable node/edge properties, contour overlays, and export to SVG, PNG, or PDF.
80
+
81
+ ## Example Usage
82
+
83
+ We applied RISK to a _Saccharomyces cerevisiae_ protein–protein interaction (PPI) network (Michaelis _et al_., 2023; 3,839 proteins, 30,955 interactions). RISK identified compact, functional modules overrepresented in Gene Ontology Biological Process (GO BP) terms (Ashburner _et al_., 2000), revealing biological organization including ribosomal assembly, mitochondrial organization, and RNA polymerase activity (P < 0.0001).
84
+
85
+ [![RISK analysis of the yeast PPI network](https://i.imgur.com/fSNf5Ad.jpeg)](https://i.imgur.com/fSNf5Ad.jpeg)
86
+ **RISK workflow overview and analysis of the yeast PPI network**. GO BP terms are color-coded to represent key cellular processes—including ribosomal assembly, mitochondrial organization, and RNA polymerase activity (P < 0.0001).
87
+
88
+ ## Citation
89
+
90
+ If you use RISK in your research, please cite the following:
91
+
92
+ **Horecka and Röst (2025)**, _"RISK: a next-generation tool for biological network annotation and visualization"_.
93
+ <br>
94
+ DOI: [10.5281/zenodo.xxxxxxx](https://doi.org/10.5281/zenodo.xxxxxxx)
95
+
96
+ ## Contributing
97
+
98
+ We welcome contributions from the community:
99
+
100
+ - [Issues Tracker](https://github.com/riskportal/risk/issues)
101
+ - [Source Code](https://github.com/riskportal/risk/tree/main/risk)
102
+
103
+ ## Support
104
+
105
+ If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/riskportal/risk/issues) on GitHub.
106
+
107
+ ## License
108
+
109
+ RISK is open source under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
@@ -0,0 +1,68 @@
1
+ # RISK
2
+
3
+ ![Python](https://img.shields.io/badge/python-3.8%2B-yellow)
4
+ [![pypiv](https://img.shields.io/pypi/v/risk-network.svg)](https://pypi.python.org/pypi/risk-network)
5
+ ![License](https://img.shields.io/badge/license-GPLv3-purple)
6
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.xxxxxxx.svg)](https://doi.org/10.5281/zenodo.xxxxxxx)
7
+ ![Downloads](https://img.shields.io/pypi/dm/risk-network)
8
+ ![Tests](https://github.com/riskportal/risk/actions/workflows/ci.yml/badge.svg)
9
+
10
+ **RISK** (Regional Inference of Significant Kinships) is a next-generation tool for biological network annotation and visualization. It integrates community detection algorithms, rigorous overrepresentation analysis, and a modular framework for diverse network types. RISK identifies biologically coherent relationships within networks and generates publication-ready visualizations, making it a useful tool for biological and interdisciplinary network analysis.
11
+
12
+ For a full description of RISK and its applications, see:
13
+ <br>
14
+ **Horecka and Röst (2025)**, _"RISK: a next-generation tool for biological network annotation and visualization"_.
15
+ <br>
16
+ DOI: [10.5281/zenodo.xxxxxxx](https://doi.org/10.5281/zenodo.xxxxxxx)
17
+
18
+ ## Documentation and Tutorial
19
+
20
+ Full documentation is available at:
21
+
22
+ - **Docs:** [https://riskportal.github.io/risk-docs](https://riskportal.github.io/risk-docs)
23
+ - **Tutorial Jupyter Notebook Repository:** [https://github.com/riskportal/risk-docs](https://github.com/riskportal/risk-docs)
24
+
25
+ ## Installation
26
+
27
+ RISK is compatible with Python 3.8 or later and runs on all major operating systems. To install the latest version of RISK, run:
28
+
29
+ ```bash
30
+ pip install risk-network --upgrade
31
+ ```
32
+
33
+ ## Key Features of RISK
34
+
35
+ - **Broad Data Compatibility**: Accepts multiple network formats (Cytoscape, Cytoscape JSON, GPickle, NetworkX) and user-provided annotations formatted as term–to–gene membership tables (JSON, CSV, TSV, Excel, Python dictionaries).
36
+ - **Flexible Clustering**: Offers Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap, with user-defined resolution parameters to detect both coarse and fine-grained modules.
37
+ - **Statistical Testing**: Provides permutation, hypergeometric, chi-squared, and binomial tests, balancing statistical rigor with speed.
38
+ - **High-Resolution Visualization**: Generates publication-ready figures with customizable node/edge properties, contour overlays, and export to SVG, PNG, or PDF.
39
+
40
+ ## Example Usage
41
+
42
+ We applied RISK to a _Saccharomyces cerevisiae_ protein–protein interaction (PPI) network (Michaelis _et al_., 2023; 3,839 proteins, 30,955 interactions). RISK identified compact, functional modules overrepresented in Gene Ontology Biological Process (GO BP) terms (Ashburner _et al_., 2000), revealing biological organization including ribosomal assembly, mitochondrial organization, and RNA polymerase activity (P < 0.0001).
43
+
44
+ [![RISK analysis of the yeast PPI network](https://i.imgur.com/fSNf5Ad.jpeg)](https://i.imgur.com/fSNf5Ad.jpeg)
45
+ **RISK workflow overview and analysis of the yeast PPI network**. GO BP terms are color-coded to represent key cellular processes—including ribosomal assembly, mitochondrial organization, and RNA polymerase activity (P < 0.0001).
46
+
47
+ ## Citation
48
+
49
+ If you use RISK in your research, please cite the following:
50
+
51
+ **Horecka and Röst (2025)**, _"RISK: a next-generation tool for biological network annotation and visualization"_.
52
+ <br>
53
+ DOI: [10.5281/zenodo.xxxxxxx](https://doi.org/10.5281/zenodo.xxxxxxx)
54
+
55
+ ## Contributing
56
+
57
+ We welcome contributions from the community:
58
+
59
+ - [Issues Tracker](https://github.com/riskportal/risk/issues)
60
+ - [Source Code](https://github.com/riskportal/risk/tree/main/risk)
61
+
62
+ ## Support
63
+
64
+ If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/riskportal/risk/issues) on GitHub.
65
+
66
+ ## License
67
+
68
+ RISK is open source under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
@@ -48,8 +48,8 @@ dependencies = [
48
48
  text = "GPL-3.0-or-later"
49
49
 
50
50
  [project.urls]
51
- Homepage = "https://github.com/riskportal/network"
52
- Issues = "https://github.com/riskportal/network/issues"
51
+ Homepage = "https://github.com/riskportal/risk"
52
+ Issues = "https://github.com/riskportal/risk/issues"
53
53
 
54
54
  [tool.setuptools]
55
55
  package-dir = {"" = "src"}
@@ -8,4 +8,4 @@ RISK: Regional Inference of Significant Kinships
8
8
  from ._risk import RISK
9
9
 
10
10
  __all__ = ["RISK"]
11
- __version__ = "0.0.14-beta.2"
11
+ __version__ = "0.0.15"
@@ -17,8 +17,6 @@ from ._stats import (
17
17
  compute_chi2_test,
18
18
  compute_hypergeom_test,
19
19
  compute_permutation_test,
20
- compute_poisson_test,
21
- compute_zscore_test,
22
20
  )
23
21
 
24
22
 
@@ -226,98 +224,6 @@ class NeighborhoodsAPI:
226
224
  max_workers=max_workers,
227
225
  )
228
226
 
229
- def load_neighborhoods_poisson(
230
- self,
231
- network: nx.Graph,
232
- annotation: Dict[str, Any],
233
- distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
234
- louvain_resolution: float = 0.1,
235
- leiden_resolution: float = 1.0,
236
- fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
237
- null_distribution: str = "network",
238
- random_seed: int = 888,
239
- ) -> Dict[str, Any]:
240
- """
241
- Load significant neighborhoods for the network using the Poisson test.
242
-
243
- Args:
244
- network (nx.Graph): The network graph.
245
- annotation (Dict[str, Any]): The annotation associated with the network.
246
- distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
247
- metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
248
- 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
249
- louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
250
- leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
251
- fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
252
- Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
253
- Defaults to 0.5.
254
- null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
255
- random_seed (int, optional): Seed for random number generation. Defaults to 888.
256
-
257
- Returns:
258
- Dict[str, Any]: Computed significance of neighborhoods.
259
- """
260
- log_header("Running Poisson test")
261
- # Compute neighborhood significance using the Poisson test
262
- return self._load_neighborhoods_by_statistical_test(
263
- network=network,
264
- annotation=annotation,
265
- distance_metric=distance_metric,
266
- louvain_resolution=louvain_resolution,
267
- leiden_resolution=leiden_resolution,
268
- fraction_shortest_edges=fraction_shortest_edges,
269
- null_distribution=null_distribution,
270
- random_seed=random_seed,
271
- statistical_test_key="poisson",
272
- statistical_test_function=compute_poisson_test,
273
- )
274
-
275
- def load_neighborhoods_zscore(
276
- self,
277
- network: nx.Graph,
278
- annotation: Dict[str, Any],
279
- distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
280
- louvain_resolution: float = 0.1,
281
- leiden_resolution: float = 1.0,
282
- fraction_shortest_edges: Union[float, List, Tuple, np.ndarray] = 0.5,
283
- null_distribution: str = "network",
284
- random_seed: int = 888,
285
- ) -> Dict[str, Any]:
286
- """
287
- Load significant neighborhoods for the network using the z-score test.
288
-
289
- Args:
290
- network (nx.Graph): The network graph.
291
- annotation (Dict[str, Any]): The annotation associated with the network.
292
- distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
293
- metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
294
- 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
295
- louvain_resolution (float, optional): Resolution parameter for Louvain clustering. Defaults to 0.1.
296
- leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
297
- fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
298
- Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
299
- Defaults to 0.5.
300
- null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
301
- random_seed (int, optional): Seed for random number generation. Defaults to 888.
302
-
303
- Returns:
304
- Dict[str, Any]: Computed significance of neighborhoods.
305
- """
306
- log_header("Running z-score test")
307
- # Compute neighborhood significance using the z-score test
308
- return self._load_neighborhoods_by_statistical_test(
309
- network=network,
310
- annotation=annotation,
311
- distance_metric=distance_metric,
312
- louvain_resolution=louvain_resolution,
313
- leiden_resolution=leiden_resolution,
314
- fraction_shortest_edges=fraction_shortest_edges,
315
- null_distribution=null_distribution,
316
- random_seed=random_seed,
317
- statistical_test_key="zscore",
318
- statistical_test_function=compute_zscore_test,
319
- )
320
-
321
227
  def _load_neighborhoods_by_statistical_test(
322
228
  self,
323
229
  network: nx.Graph,
@@ -348,7 +254,7 @@ class NeighborhoodsAPI:
348
254
  null_distribution (str, optional): The type of null distribution to use ('network' or 'annotation').
349
255
  Defaults to "network".
350
256
  random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
351
- statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "poisson").
257
+ statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "binom").
352
258
  Used for logging and debugging. Defaults to "hypergeom".
353
259
  statistical_test_function (Any, optional): The function implementing the statistical test.
354
260
  It should accept neighborhoods, annotation, null distribution, and additional kwargs.
@@ -54,37 +54,48 @@ def define_domains(
54
54
  Raises:
55
55
  ValueError: If the clustering criterion is set to "off" or if an error occurs during clustering.
56
56
  """
57
- try:
58
- if linkage_criterion == "off":
59
- raise ValueError("Clustering is turned off.")
57
+ # Validate args first; let user mistakes raise immediately
58
+ clustering_off = _validate_clustering_args(
59
+ linkage_criterion, linkage_method, linkage_metric, linkage_threshold
60
+ )
60
61
 
62
+ # If clustering is turned off, assign unique domains and skip
63
+ if clustering_off:
64
+ n_rows = len(top_annotation)
65
+ logger.warning("Clustering is turned off. Skipping clustering.")
66
+ top_annotation["domain"] = range(1, n_rows + 1)
67
+ else:
61
68
  # Transpose the matrix to cluster annotations
62
69
  m = significant_neighborhoods_significance[:, top_annotation["significant_annotation"]].T
63
70
  # Safeguard the matrix by replacing NaN, Inf, and -Inf values
64
71
  m = _safeguard_matrix(m)
65
- # Optimize silhouette score across different linkage methods and distance metrics
66
- best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
67
- m, linkage_criterion, linkage_method, linkage_metric, linkage_threshold
68
- )
69
- # Perform hierarchical clustering
70
- Z = linkage(m, method=best_linkage, metric=best_metric)
71
- logger.warning(
72
- f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'\nLinkage threshold: {round(best_threshold, 3)}"
73
- )
74
- # Calculate the optimal threshold for clustering
75
- max_d_optimal = np.max(Z[:, 2]) * best_threshold
76
- # Assign domains to the annotation matrix
77
- domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
78
- top_annotation["domain"] = 0
79
- top_annotation.loc[top_annotation["significant_annotation"], "domain"] = domains
80
- except (ValueError, LinAlgError):
81
- # If a ValueError is encountered, handle it by assigning unique domains
82
- n_rows = len(top_annotation)
83
- if linkage_criterion == "off":
84
- logger.warning("Clustering is turned off. Skipping clustering.")
85
- else:
86
- logger.error("Error encountered. Skipping clustering.")
87
- top_annotation["domain"] = range(1, n_rows + 1) # Assign unique domains
72
+ try:
73
+ # Optimize silhouette score across different linkage methods and distance metrics
74
+ (
75
+ best_linkage,
76
+ best_metric,
77
+ best_threshold,
78
+ ) = _optimize_silhouette_across_linkage_and_metrics(
79
+ m, linkage_criterion, linkage_method, linkage_metric, linkage_threshold
80
+ )
81
+ # Perform hierarchical clustering
82
+ Z = linkage(m, method=best_linkage, metric=best_metric)
83
+ logger.warning(
84
+ f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'\nLinkage threshold: {round(best_threshold, 3)}"
85
+ )
86
+ # Calculate the optimal threshold for clustering
87
+ max_d_optimal = np.max(Z[:, 2]) * best_threshold
88
+ # Assign domains to the annotation matrix
89
+ domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
90
+ top_annotation["domain"] = 0
91
+ top_annotation.loc[top_annotation["significant_annotation"], "domain"] = domains
92
+ except (LinAlgError, ValueError):
93
+ # Numerical errors or degenerate input are handled gracefully (not user error)
94
+ n_rows = len(top_annotation)
95
+ logger.error(
96
+ "Clustering failed due to numerical or data degeneracy. Assigning unique domains."
97
+ )
98
+ top_annotation["domain"] = range(1, n_rows + 1)
88
99
 
89
100
  # Create DataFrames to store domain information
90
101
  node_to_significance = pd.DataFrame(
@@ -184,6 +195,46 @@ def trim_domains(
184
195
  return valid_domains, valid_trimmed_domains_matrix
185
196
 
186
197
 
198
+ def _validate_clustering_args(
199
+ linkage_criterion: str,
200
+ linkage_method: str,
201
+ linkage_metric: str,
202
+ linkage_threshold: Union[float, str],
203
+ ) -> bool:
204
+ """
205
+ Validate user-provided clustering arguments.
206
+
207
+ Returns:
208
+ bool: True if clustering is turned off (criterion == 'off'); False otherwise.
209
+
210
+ Raises:
211
+ ValueError: If any argument is invalid (user error).
212
+ """
213
+ # Allow opting out of clustering without raising
214
+ if linkage_criterion == "off":
215
+ return True
216
+ # Validate linkage method (allow "auto")
217
+ if linkage_method != "auto" and linkage_method not in LINKAGE_METHODS:
218
+ raise ValueError(
219
+ f"Invalid linkage_method '{linkage_method}'. Allowed values are 'auto' or one of: {sorted(LINKAGE_METHODS)}"
220
+ )
221
+ # Validate linkage metric (allow "auto")
222
+ if linkage_metric != "auto" and linkage_metric not in LINKAGE_METRICS:
223
+ raise ValueError(
224
+ f"Invalid linkage_metric '{linkage_metric}'. Allowed values are 'auto' or one of: {sorted(LINKAGE_METRICS)}"
225
+ )
226
+ # Validate linkage threshold (allow "auto"; otherwise must be float in (0, 1])
227
+ if linkage_threshold != "auto":
228
+ try:
229
+ lt = float(linkage_threshold)
230
+ except (TypeError, ValueError):
231
+ raise ValueError("linkage_threshold must be 'auto' or a float in the interval (0, 1].")
232
+ if not (0.0 < lt <= 1.0):
233
+ raise ValueError(f"linkage_threshold must be within (0, 1]. Received: {lt}")
234
+
235
+ return False
236
+
237
+
187
238
  def _safeguard_matrix(matrix: np.ndarray) -> np.ndarray:
188
239
  """
189
240
  Safeguard the matrix by replacing NaN, Inf, and -Inf values.
@@ -394,34 +394,33 @@ def _prune_neighbors(
394
394
  # Identify indices with non-zero rows in the binary significance matrix
395
395
  non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
396
396
  median_distances = []
397
+ distance_lookup = {}
397
398
  for node in non_zero_indices:
398
- neighbors = [
399
- n
400
- for n in network.neighbors(node)
401
- if significant_binary_significance_matrix[n].sum() != 0
402
- ]
403
- if neighbors:
404
- median_distance = np.median(
405
- [_get_euclidean_distance(node, n, network) for n in neighbors]
406
- )
407
- median_distances.append(median_distance)
399
+ dist = _median_distance_to_significant_neighbors(
400
+ node, network, significant_binary_significance_matrix
401
+ )
402
+ if dist is not None:
403
+ median_distances.append(dist)
404
+ distance_lookup[node] = dist
405
+
406
+ if not median_distances:
407
+ logger.warning("No significant neighbors found for pruning.")
408
+ significant_significance_matrix = np.where(
409
+ significant_binary_significance_matrix == 1, significance_matrix, 0
410
+ )
411
+ return (
412
+ significance_matrix,
413
+ significant_binary_significance_matrix,
414
+ significant_significance_matrix,
415
+ )
408
416
 
409
417
  # Calculate the distance threshold value based on rank
410
418
  distance_threshold_value = _calculate_threshold(median_distances, 1 - distance_threshold)
411
419
  # Prune nodes that are outliers based on the distance threshold
412
- for row_index in non_zero_indices:
413
- neighbors = [
414
- n
415
- for n in network.neighbors(row_index)
416
- if significant_binary_significance_matrix[n].sum() != 0
417
- ]
418
- if neighbors:
419
- median_distance = np.median(
420
- [_get_euclidean_distance(row_index, n, network) for n in neighbors]
421
- )
422
- if median_distance >= distance_threshold_value:
423
- significance_matrix[row_index] = 0
424
- significant_binary_significance_matrix[row_index] = 0
420
+ for node, dist in distance_lookup.items():
421
+ if dist >= distance_threshold_value:
422
+ significance_matrix[node] = 0
423
+ significant_binary_significance_matrix[node] = 0
425
424
 
426
425
  # Create a matrix where non-significant entries are set to zero
427
426
  significant_significance_matrix = np.where(
@@ -435,6 +434,29 @@ def _prune_neighbors(
435
434
  )
436
435
 
437
436
 
437
+ def _median_distance_to_significant_neighbors(
438
+ node, network, significance_mask
439
+ ) -> Union[float, None]:
440
+ """
441
+ Calculate the median distance from a node to its significant neighbors.
442
+
443
+ Args:
444
+ node (Any): The node for which the median distance is being calculated.
445
+ network (nx.Graph): The network graph containing the nodes.
446
+ significance_mask (np.ndarray): Binary matrix indicating significant nodes.
447
+
448
+ Returns:
449
+ Union[float, None]: The median distance to significant neighbors, or None if no significant neighbors exist.
450
+ """
451
+ neighbors = [n for n in network.neighbors(node) if significance_mask[n].sum() != 0]
452
+ if not neighbors:
453
+ return None
454
+ # Calculate distances to significant neighbors
455
+ distances = [_get_euclidean_distance(node, n, network) for n in neighbors]
456
+
457
+ return np.median(distances)
458
+
459
+
438
460
  def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
439
461
  """
440
462
  Calculate the Euclidean distance between two nodes in the network.
@@ -8,6 +8,4 @@ from ._tests import (
8
8
  compute_binom_test,
9
9
  compute_chi2_test,
10
10
  compute_hypergeom_test,
11
- compute_poisson_test,
12
- compute_zscore_test,
13
11
  )
@@ -7,7 +7,7 @@ from typing import Any, Dict
7
7
 
8
8
  import numpy as np
9
9
  from scipy.sparse import csr_matrix
10
- from scipy.stats import binom, chi2, hypergeom, norm, poisson
10
+ from scipy.stats import binom, chi2, hypergeom, norm
11
11
 
12
12
 
13
13
  def compute_binom_test(
@@ -174,107 +174,3 @@ def compute_hypergeom_test(
174
174
  )
175
175
 
176
176
  return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}
177
-
178
-
179
- def compute_poisson_test(
180
- neighborhoods: csr_matrix,
181
- annotation: csr_matrix,
182
- null_distribution: str = "network",
183
- ) -> Dict[str, Any]:
184
- """
185
- Compute Poisson test for enrichment and depletion in neighborhoods with selectable null distribution.
186
-
187
- Args:
188
- neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
189
- annotation (csr_matrix): Sparse binary matrix representing annotation.
190
- null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
191
-
192
- Returns:
193
- Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
194
-
195
- Raises:
196
- ValueError: If an invalid null_distribution value is provided.
197
- """
198
- # Matrix multiplication to get the number of annotated nodes in each neighborhood
199
- annotated_in_neighborhood = neighborhoods @ annotation # Sparse result
200
- # Convert annotated counts to dense for downstream calculations
201
- annotated_in_neighborhood_dense = annotated_in_neighborhood.toarray()
202
-
203
- # Compute lambda_expected based on the chosen null distribution
204
- if null_distribution == "network":
205
- # Use the mean across neighborhoods (axis=1)
206
- lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=1, keepdims=True)
207
- elif null_distribution == "annotation":
208
- # Use the mean across annotations (axis=0)
209
- lambda_expected = np.mean(annotated_in_neighborhood_dense, axis=0, keepdims=True)
210
- else:
211
- raise ValueError(
212
- "Invalid null_distribution value. Choose either 'network' or 'annotation'."
213
- )
214
-
215
- # Compute p-values for enrichment and depletion using Poisson distribution
216
- enrichment_pvals = 1 - poisson.cdf(annotated_in_neighborhood_dense - 1, lambda_expected)
217
- depletion_pvals = poisson.cdf(annotated_in_neighborhood_dense, lambda_expected)
218
-
219
- return {"enrichment_pvals": enrichment_pvals, "depletion_pvals": depletion_pvals}
220
-
221
-
222
- def compute_zscore_test(
223
- neighborhoods: csr_matrix,
224
- annotation: csr_matrix,
225
- null_distribution: str = "network",
226
- ) -> Dict[str, Any]:
227
- """
228
- Compute z-score test for enrichment and depletion in neighborhoods with selectable null distribution.
229
-
230
- Args:
231
- neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
232
- annotation (csr_matrix): Sparse binary matrix representing annotation.
233
- null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
234
-
235
- Returns:
236
- Dict[str, Any]: Dictionary containing depletion and enrichment p-values.
237
-
238
- Raises:
239
- ValueError: If an invalid null_distribution value is provided.
240
- """
241
- # Total number of nodes in the network
242
- total_node_count = neighborhoods.shape[1]
243
-
244
- # Compute sums
245
- if null_distribution == "network":
246
- background_population = total_node_count
247
- neighborhood_sums = neighborhoods.sum(axis=0).A.flatten() # Dense column sums
248
- annotation_sums = annotation.sum(axis=0).A.flatten() # Dense row sums
249
- elif null_distribution == "annotation":
250
- annotated_nodes = annotation.sum(axis=1).A.flatten() > 0 # Dense boolean mask
251
- background_population = annotated_nodes.sum()
252
- neighborhood_sums = neighborhoods[annotated_nodes].sum(axis=0).A.flatten()
253
- annotation_sums = annotation[annotated_nodes].sum(axis=0).A.flatten()
254
- else:
255
- raise ValueError(
256
- "Invalid null_distribution value. Choose either 'network' or 'annotation'."
257
- )
258
-
259
- # Observed values
260
- observed = (neighborhoods.T @ annotation).toarray() # Convert sparse result to dense
261
- # Expected values under the null
262
- neighborhood_sums = neighborhood_sums.reshape(-1, 1) # Ensure correct shape
263
- annotation_sums = annotation_sums.reshape(1, -1) # Ensure correct shape
264
- expected = (neighborhood_sums @ annotation_sums) / background_population
265
-
266
- # Standard deviation under the null
267
- std_dev = np.sqrt(
268
- expected
269
- * (1 - annotation_sums / background_population)
270
- * (1 - neighborhood_sums / background_population)
271
- )
272
- std_dev[std_dev == 0] = np.nan # Avoid division by zero
273
- # Compute z-scores
274
- z_scores = (observed - expected) / std_dev
275
-
276
- # Convert z-scores to depletion and enrichment p-values
277
- enrichment_pvals = norm.sf(z_scores) # Upper tail
278
- depletion_pvals = norm.cdf(z_scores) # Lower tail
279
-
280
- return {"depletion_pvals": depletion_pvals, "enrichment_pvals": enrichment_pvals}