risk-network 0.0.11__tar.gz → 0.0.12b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. risk_network-0.0.12b1/PKG-INFO +122 -0
  2. {risk_network-0.0.11 → risk_network-0.0.12b1}/README.md +7 -7
  3. {risk_network-0.0.11 → risk_network-0.0.12b1}/pyproject.toml +21 -5
  4. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/__init__.py +1 -1
  5. risk_network-0.0.12b1/src/risk/annotations/__init__.py +10 -0
  6. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/annotations/io.py +48 -47
  7. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/annotations/nltk_setup.py +2 -1
  8. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/log/__init__.py +1 -1
  9. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/log/parameters.py +21 -22
  10. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/neighborhoods/__init__.py +0 -1
  11. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/neighborhoods/api.py +2 -2
  12. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/neighborhoods/community.py +33 -4
  13. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/neighborhoods/domains.py +6 -4
  14. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/neighborhoods/neighborhoods.py +7 -1
  15. risk_network-0.0.12b1/src/risk/neighborhoods/stats/__init__.py +13 -0
  16. risk_network-0.0.12b1/src/risk/neighborhoods/stats/permutation/__init__.py +6 -0
  17. {risk_network-0.0.11/risk → risk_network-0.0.12b1/src/risk/neighborhoods}/stats/permutation/permutation.py +7 -4
  18. {risk_network-0.0.11/risk → risk_network-0.0.12b1/src/risk/neighborhoods}/stats/permutation/test_functions.py +2 -2
  19. risk_network-0.0.11/risk/stats/stat_tests.py → risk_network-0.0.12b1/src/risk/neighborhoods/stats/tests.py +21 -13
  20. risk_network-0.0.12b1/src/risk/network/__init__.py +4 -0
  21. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/graph/__init__.py +0 -2
  22. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/graph/api.py +2 -2
  23. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/graph/graph.py +56 -57
  24. risk_network-0.0.11/risk/stats/significance.py → risk_network-0.0.12b1/src/risk/network/graph/stats.py +2 -2
  25. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/graph/summary.py +2 -3
  26. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/io.py +151 -8
  27. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/__init__.py +0 -2
  28. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/api.py +1 -1
  29. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/canvas.py +35 -35
  30. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/contour.py +11 -12
  31. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/labels.py +257 -246
  32. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/plotter.py +2 -4
  33. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/utils/colors.py +3 -0
  34. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/risk.py +5 -5
  35. risk_network-0.0.12b1/src/risk_network.egg-info/PKG-INFO +122 -0
  36. risk_network-0.0.12b1/src/risk_network.egg-info/SOURCES.txt +50 -0
  37. risk_network-0.0.12b1/tests/test_load_annotations.py +291 -0
  38. risk_network-0.0.12b1/tests/test_load_graph.py +426 -0
  39. risk_network-0.0.12b1/tests/test_load_io_combinations.py +95 -0
  40. risk_network-0.0.12b1/tests/test_load_neighborhoods.py +455 -0
  41. risk_network-0.0.12b1/tests/test_load_network.py +401 -0
  42. risk_network-0.0.12b1/tests/test_load_plotter.py +1483 -0
  43. risk_network-0.0.12b1/tests/test_log.py +72 -0
  44. risk_network-0.0.11/MANIFEST.in +0 -20
  45. risk_network-0.0.11/PKG-INFO +0 -798
  46. risk_network-0.0.11/risk/annotations/__init__.py +0 -7
  47. risk_network-0.0.11/risk/network/__init__.py +0 -6
  48. risk_network-0.0.11/risk/network/geometry.py +0 -150
  49. risk_network-0.0.11/risk/stats/__init__.py +0 -15
  50. risk_network-0.0.11/risk/stats/permutation/__init__.py +0 -6
  51. risk_network-0.0.11/risk_network.egg-info/PKG-INFO +0 -798
  52. risk_network-0.0.11/risk_network.egg-info/SOURCES.txt +0 -46
  53. risk_network-0.0.11/setup.py +0 -67
  54. {risk_network-0.0.11 → risk_network-0.0.12b1}/LICENSE +0 -0
  55. {risk_network-0.0.11 → risk_network-0.0.12b1}/setup.cfg +0 -0
  56. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/annotations/annotations.py +0 -0
  57. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/log/console.py +0 -0
  58. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/network.py +0 -0
  59. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/utils/layout.py +0 -0
  60. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk_network.egg-info/dependency_links.txt +0 -0
  61. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk_network.egg-info/requires.txt +0 -0
  62. {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk_network.egg-info/top_level.txt +0 -0
@@ -0,0 +1,122 @@
1
+ Metadata-Version: 2.4
2
+ Name: risk-network
3
+ Version: 0.0.12b1
4
+ Summary: A Python package for biological network analysis
5
+ Author-email: Ira Horecka <ira89@icloud.com>
6
+ License: GPL-3.0-or-later
7
+ Project-URL: Homepage, https://github.com/riskportal/network
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
15
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
16
+ Classifier: Topic :: Scientific/Engineering :: Visualization
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Classifier: Development Status :: 4 - Beta
19
+ Requires-Python: >=3.8
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: ipywidgets
23
+ Requires-Dist: leidenalg
24
+ Requires-Dist: markov_clustering
25
+ Requires-Dist: matplotlib
26
+ Requires-Dist: networkx
27
+ Requires-Dist: nltk
28
+ Requires-Dist: numpy
29
+ Requires-Dist: openpyxl
30
+ Requires-Dist: pandas
31
+ Requires-Dist: python-igraph
32
+ Requires-Dist: python-louvain
33
+ Requires-Dist: scikit-learn
34
+ Requires-Dist: scipy
35
+ Requires-Dist: statsmodels
36
+ Requires-Dist: threadpoolctl
37
+ Requires-Dist: tqdm
38
+ Dynamic: license-file
39
+
40
+ # RISK Network
41
+
42
+ <p align="center">
43
+ <img src="https://i.imgur.com/8TleEJs.png" width="50%" />
44
+ </p>
45
+
46
+ <br>
47
+
48
+ ![Python](https://img.shields.io/badge/python-3.8%2B-yellow)
49
+ [![pypiv](https://img.shields.io/pypi/v/risk-network.svg)](https://pypi.python.org/pypi/risk-network)
50
+ ![License](https://img.shields.io/badge/license-GPLv3-purple)
51
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.xxxxxxx.svg)](https://doi.org/10.5281/zenodo.xxxxxxx)
52
+ ![Downloads](https://img.shields.io/pypi/dm/risk-network)
53
+ ![Tests](https://github.com/riskportal/network/actions/workflows/ci.yml/badge.svg)
54
+
55
+ **RISK** (Regional Inference of Significant Kinships) is a next-generation tool for biological network annotation and visualization. RISK integrates community detection-based clustering, rigorous statistical enrichment analysis, and a modular framework to uncover biologically meaningful relationships and generate high-resolution visualizations. RISK supports diverse data formats and is optimized for large-scale network analysis, making it a valuable resource for researchers in systems biology and beyond.
56
+
57
+ ## Documentation and Tutorial
58
+
59
+ Full documentation is available at:
60
+
61
+ - **Docs:** [https://riskportal.github.io/network-tutorial](https://riskportal.github.io/network-tutorial)
62
+ - **Tutorial Jupyter Notebook Repository:** [https://github.com/riskportal/network-tutorial](https://github.com/riskportal/network-tutorial)
63
+
64
+ ## Installation
65
+
66
+ RISK is compatible with Python 3.8 or later and runs on all major operating systems. To install the latest version of RISK, run:
67
+
68
+ ```bash
69
+ pip install risk-network --upgrade
70
+ ```
71
+
72
+ ## Features
73
+
74
+ - **Comprehensive Network Analysis**: Analyze biological networks (e.g., protein–protein interaction and genetic interaction networks) as well as non-biological networks.
75
+ - **Advanced Clustering Algorithms**: Supports Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap for identifying structured network regions.
76
+ - **Flexible Visualization**: Produce customizable, high-resolution network visualizations with kernel density estimate overlays, adjustable node and edge attributes, and export options in SVG, PNG, and PDF formats.
77
+ - **Efficient Data Handling**: Supports multiple input/output formats, including JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
78
+ - **Statistical Analysis**: Assess functional enrichment using hypergeometric, permutation (network-aware), binomial, chi-squared, Poisson, and z-score tests, ensuring statistical adaptability across datasets.
79
+ - **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
80
+
81
+ ## Example Usage
82
+
83
+ We applied RISK to a *Saccharomyces cerevisiae* protein–protein interaction network from Michaelis et al. (2023), filtering for proteins with six or more interactions to emphasize core functional relationships. RISK identified compact, statistically enriched clusters corresponding to biological processes such as ribosomal assembly and mitochondrial organization.
84
+
85
+ [![Figure 1](https://i.imgur.com/lJHJrJr.jpeg)](https://i.imgur.com/lJHJrJr.jpeg)
86
+
87
+ This figure highlights RISK’s capability to detect both established and novel functional modules within the yeast interactome.
88
+
89
+ ## Citation
90
+
91
+ If you use RISK in your research, please cite:
92
+
93
+ **Horecka et al.**, "RISK: a next-generation tool for biological network annotation and visualization", **Bioinformatics**, 2025. DOI: [10.1234/zenodo.xxxxxxx](https://doi.org/10.1234/zenodo.xxxxxxx)
94
+
95
+ ## Software Architecture and Implementation
96
+
97
+ RISK features a streamlined, modular architecture designed to meet diverse research needs. RISK’s modular design enables users to run individual components—such as clustering, statistical testing, or visualization—independently or in combination, depending on the analysis workflow. It includes dedicated modules for:
98
+
99
+ - **Data I/O**: Supports JSON, CSV, TSV, Excel, Cytoscape, and GPickle formats.
100
+ - **Clustering**: Supports multiple clustering methods, including Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap. Provides flexible distance metrics tailored to network structure.
101
+ - **Statistical Analysis**: Provides a suite of tests for overrepresentation analysis of annotations.
102
+ - **Visualization**: Offers customizable, high-resolution output in multiple formats, including SVG, PNG, and PDF.
103
+ - **Configuration Management**: Centralized parameters in risk.params ensure reproducibility and easy tuning for large-scale analyses.
104
+
105
+ ## Performance and Efficiency
106
+
107
+ Benchmarking results demonstrate that RISK efficiently scales to networks exceeding hundreds of thousands of edges, maintaining low execution times and optimal memory usage across statistical tests.
108
+
109
+ ## Contributing
110
+
111
+ We welcome contributions from the community:
112
+
113
+ - [Issues Tracker](https://github.com/riskportal/network/issues)
114
+ - [Source Code](https://github.com/riskportal/network/tree/main/risk)
115
+
116
+ ## Support
117
+
118
+ If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/riskportal/network/issues) on GitHub.
119
+
120
+ ## License
121
+
122
+ RISK is open source under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
@@ -17,7 +17,10 @@
17
17
 
18
18
  ## Documentation and Tutorial
19
19
 
20
- An interactive Jupyter notebook tutorial can be found [here](https://github.com/riskportal/network-tutorial). We highly recommend new users to consult the documentation and tutorial early on to fully utilize RISK's capabilities.
20
+ Full documentation is available at:
21
+
22
+ - **Docs:** [https://riskportal.github.io/network-tutorial](https://riskportal.github.io/network-tutorial)
23
+ - **Tutorial Jupyter Notebook Repository:** [https://github.com/riskportal/network-tutorial](https://github.com/riskportal/network-tutorial)
21
24
 
22
25
  ## Installation
23
26
 
@@ -33,7 +36,7 @@ pip install risk-network --upgrade
33
36
  - **Advanced Clustering Algorithms**: Supports Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap for identifying structured network regions.
34
37
  - **Flexible Visualization**: Produce customizable, high-resolution network visualizations with kernel density estimate overlays, adjustable node and edge attributes, and export options in SVG, PNG, and PDF formats.
35
38
  - **Efficient Data Handling**: Supports multiple input/output formats, including JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
36
- - **Statistical Analysis**: Assess functional enrichment using hypergeometric, permutation, binomial, chi-squared, Poisson, and z-score tests, ensuring statistical adaptability across datasets.
39
+ - **Statistical Analysis**: Assess functional enrichment using hypergeometric, permutation (network-aware), binomial, chi-squared, Poisson, and z-score tests, ensuring statistical adaptability across datasets.
37
40
  - **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
38
41
 
39
42
  ## Example Usage
@@ -52,12 +55,13 @@ If you use RISK in your research, please cite:
52
55
 
53
56
  ## Software Architecture and Implementation
54
57
 
55
- RISK features a streamlined, modular architecture designed to meet diverse research needs. It includes dedicated modules for:
58
+ RISK features a streamlined, modular architecture designed to meet diverse research needs. RISK’s modular design enables users to run individual components—such as clustering, statistical testing, or visualization—independently or in combination, depending on the analysis workflow. It includes dedicated modules for:
56
59
 
57
60
  - **Data I/O**: Supports JSON, CSV, TSV, Excel, Cytoscape, and GPickle formats.
58
61
  - **Clustering**: Supports multiple clustering methods, including Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap. Provides flexible distance metrics tailored to network structure.
59
62
  - **Statistical Analysis**: Provides a suite of tests for overrepresentation analysis of annotations.
60
63
  - **Visualization**: Offers customizable, high-resolution output in multiple formats, including SVG, PNG, and PDF.
64
+ - **Configuration Management**: Centralized parameters in risk.params ensure reproducibility and easy tuning for large-scale analyses.
61
65
 
62
66
  ## Performance and Efficiency
63
67
 
@@ -77,7 +81,3 @@ If you encounter issues or have suggestions for new features, please use the [Is
77
81
  ## License
78
82
 
79
83
  RISK is open source under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
80
-
81
- ---
82
-
83
- **Note**: For detailed documentation and to access the interactive tutorial, please visit the links above.
@@ -1,20 +1,19 @@
1
1
  [build-system]
2
- requires = ["setuptools", "wheel", "numpy"]
2
+ requires = ["setuptools", "numpy"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "risk-network"
7
- dynamic = ["version"] # Indicates that version is determined dynamically
7
+ dynamic = ["version"]
8
8
  description = "A Python package for biological network analysis"
9
9
  authors = [
10
10
  { name = "Ira Horecka", email = "ira89@icloud.com" },
11
11
  ]
12
12
  readme = "README.md"
13
- license = { file = "LICENSE" }
13
+ requires-python = ">=3.8"
14
14
  classifiers = [
15
15
  "Intended Audience :: Developers",
16
16
  "Intended Audience :: Science/Research",
17
- "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
18
17
  "Operating System :: OS Independent",
19
18
  "Programming Language :: Python :: 3",
20
19
  "Programming Language :: Python :: 3.8",
@@ -43,4 +42,21 @@ dependencies = [
43
42
  "threadpoolctl",
44
43
  "tqdm",
45
44
  ]
46
- requires-python = ">=3.8"
45
+
46
+ [project.license]
47
+ text = "GPL-3.0-or-later"
48
+
49
+ [project.urls]
50
+ "Homepage" = "https://github.com/riskportal/network"
51
+
52
+ [tool.setuptools]
53
+ package-dir = {"" = "src"}
54
+
55
+ [tool.setuptools.packages.find]
56
+ where = ["src"]
57
+
58
+ [tool.setuptools.dynamic]
59
+ version = { attr = "risk.__version__" }
60
+
61
+ [tool.pytest.ini_options]
62
+ pythonpath = ["src"]
@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.11"
10
+ __version__ = "0.0.12-beta.1"
@@ -0,0 +1,10 @@
1
+ """
2
+ risk/annotations
3
+ ~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from risk.annotations.annotations import (
7
+ define_top_annotations,
8
+ get_weighted_description,
9
+ )
10
+ from risk.annotations.io import AnnotationsIO
@@ -10,7 +10,7 @@ import networkx as nx
10
10
  import pandas as pd
11
11
 
12
12
  from risk.annotations.annotations import load_annotations
13
- from risk.log import params, logger, log_header
13
+ from risk.log import log_header, logger, params
14
14
 
15
15
 
16
16
  class AnnotationsIO:
@@ -20,9 +20,6 @@ class AnnotationsIO:
20
20
  and to export parameter data to various formats like JSON, CSV, and text files.
21
21
  """
22
22
 
23
- def __init__(self):
24
- pass
25
-
26
23
  def load_json_annotation(
27
24
  self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
28
25
  ) -> Dict[str, Any]:
@@ -42,7 +39,7 @@ class AnnotationsIO:
42
39
  params.log_annotations(
43
40
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
44
41
  )
45
- _log_loading(filetype, filepath=filepath)
42
+ self._log_loading(filetype, filepath=filepath)
46
43
 
47
44
  # Load the JSON file into a dictionary
48
45
  with open(filepath, "r", encoding="utf-8") as file:
@@ -81,7 +78,7 @@ class AnnotationsIO:
81
78
  params.log_annotations(
82
79
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
83
80
  )
84
- _log_loading(filetype, filepath=filepath)
81
+ self._log_loading(filetype, filepath=filepath)
85
82
 
86
83
  # Load the specified sheet from the Excel file
87
84
  annotation = pd.read_excel(filepath, sheet_name=sheet_name)
@@ -123,10 +120,10 @@ class AnnotationsIO:
123
120
  params.log_annotations(
124
121
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
125
122
  )
126
- _log_loading(filetype, filepath=filepath)
123
+ self._log_loading(filetype, filepath=filepath)
127
124
 
128
125
  # Load the CSV file into a dictionary
129
- annotations_input = _load_matrix_file(
126
+ annotations_input = self._load_matrix_file(
130
127
  filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
131
128
  )
132
129
 
@@ -161,10 +158,10 @@ class AnnotationsIO:
161
158
  params.log_annotations(
162
159
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
163
160
  )
164
- _log_loading(filetype, filepath=filepath)
161
+ self._log_loading(filetype, filepath=filepath)
165
162
 
166
163
  # Load the TSV file into a dictionary
167
- annotations_input = _load_matrix_file(
164
+ annotations_input = self._load_matrix_file(
168
165
  filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
169
166
  )
170
167
 
@@ -183,6 +180,9 @@ class AnnotationsIO:
183
180
 
184
181
  Returns:
185
182
  Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
183
+
184
+ Raises:
185
+ TypeError: If the content is not a dictionary.
186
186
  """
187
187
  # Ensure the input content is a dictionary
188
188
  if not isinstance(content, dict):
@@ -193,48 +193,49 @@ class AnnotationsIO:
193
193
  filetype = "Dictionary"
194
194
  # Log the loading of the annotations from the dictionary
195
195
  params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
196
- _log_loading(filetype, "In-memory dictionary")
196
+ self._log_loading(filetype, "In-memory dictionary")
197
197
 
198
198
  # Load the annotations as a dictionary from the provided dictionary
199
199
  return load_annotations(network, content, min_nodes_per_term)
200
200
 
201
+ def _load_matrix_file(
202
+ self,
203
+ filepath: str,
204
+ label_colname: str,
205
+ nodes_colname: str,
206
+ delimiter: str = ",",
207
+ nodes_delimiter: str = ";",
208
+ ) -> Dict[str, Any]:
209
+ """Load annotations from a CSV or TSV file and convert them to a dictionary.
201
210
 
202
- def _load_matrix_file(
203
- filepath: str,
204
- label_colname: str,
205
- nodes_colname: str,
206
- delimiter: str = ",",
207
- nodes_delimiter: str = ";",
208
- ) -> Dict[str, Any]:
209
- """Load annotations from a CSV or TSV file and convert them to a dictionary.
210
-
211
- Args:
212
- filepath (str): Path to the annotation file.
213
- label_colname (str): Name of the column containing the labels (e.g., GO terms).
214
- nodes_colname (str): Name of the column containing the nodes associated with each label.
215
- delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
216
- nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
217
-
218
- Returns:
219
- Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
220
- """
221
- # Load the CSV or TSV file into a DataFrame
222
- annotation = pd.read_csv(filepath, delimiter=delimiter)
223
- # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
224
- annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
225
- # Create a dictionary pairing labels with their corresponding list of nodes
226
- label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
227
- return label_node_dict
211
+ Args:
212
+ filepath (str): Path to the annotation file.
213
+ label_colname (str): Name of the column containing the labels (e.g., GO terms).
214
+ nodes_colname (str): Name of the column containing the nodes associated with each label.
215
+ delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
216
+ nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
228
217
 
218
+ Returns:
219
+ Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
220
+ """
221
+ # Load the CSV or TSV file into a DataFrame
222
+ annotation = pd.read_csv(filepath, delimiter=delimiter)
223
+ # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
224
+ annotation[nodes_colname] = annotation[nodes_colname].apply(
225
+ lambda x: x.split(nodes_delimiter)
226
+ )
227
+ # Create a dictionary pairing labels with their corresponding list of nodes
228
+ label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
229
+ return label_node_dict
229
230
 
230
- def _log_loading(filetype: str, filepath: str = "") -> None:
231
- """Log information about the network file being loaded.
231
+ def _log_loading(self, filetype: str, filepath: str = "") -> None:
232
+ """Log information about the network file being loaded.
232
233
 
233
- Args:
234
- filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
235
- filepath (str, optional): The path to the file being loaded.
236
- """
237
- log_header("Loading annotations")
238
- logger.debug(f"Filetype: {filetype}")
239
- if filepath:
240
- logger.debug(f"Filepath: {filepath}")
234
+ Args:
235
+ filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
236
+ filepath (str, optional): The path to the file being loaded.
237
+ """
238
+ log_header("Loading annotations")
239
+ logger.debug(f"Filetype: {filetype}")
240
+ if filepath:
241
+ logger.debug(f"Filepath: {filepath}")
@@ -8,7 +8,8 @@ import zipfile
8
8
  from typing import List, Tuple
9
9
 
10
10
  import nltk
11
- from nltk.data import find, path as nltk_data_path
11
+ from nltk.data import find
12
+ from nltk.data import path as nltk_data_path
12
13
 
13
14
  from risk.log import logger
14
15
 
@@ -3,7 +3,7 @@ risk/log
3
3
  ~~~~~~~~
4
4
  """
5
5
 
6
- from risk.log.console import logger, log_header, set_global_verbosity
6
+ from risk.log.console import log_header, logger, set_global_verbosity
7
7
  from risk.log.parameters import Params
8
8
 
9
9
  # Initialize the global parameters logger
@@ -11,7 +11,7 @@ from typing import Any, Dict
11
11
 
12
12
  import numpy as np
13
13
 
14
- from risk.log.console import logger, log_header
14
+ from risk.log.console import log_header, logger
15
15
 
16
16
  # Suppress all warnings - this is to resolve warnings from multiprocessing
17
17
  warnings.filterwarnings("ignore")
@@ -137,7 +137,7 @@ class Params:
137
137
  Dict[str, Any]: A dictionary containing the processed parameters.
138
138
  """
139
139
  log_header("Loading parameters")
140
- return _convert_ndarray_to_list(
140
+ return self._convert_ndarray_to_list(
141
141
  {
142
142
  "annotations": self.annotations,
143
143
  "datetime": self.datetime,
@@ -148,25 +148,24 @@ class Params:
148
148
  }
149
149
  )
150
150
 
151
+ def _convert_ndarray_to_list(self, d: Dict[str, Any]) -> Dict[str, Any]:
152
+ """Recursively convert all np.ndarray values in the dictionary to lists.
151
153
 
152
- def _convert_ndarray_to_list(d: Dict[str, Any]) -> Dict[str, Any]:
153
- """Recursively convert all np.ndarray values in the dictionary to lists.
154
-
155
- Args:
156
- d (Dict[str, Any]): The dictionary to process.
154
+ Args:
155
+ d (Dict[str, Any]): The dictionary to process.
157
156
 
158
- Returns:
159
- Dict[str, Any]: The processed dictionary with np.ndarray values converted to lists.
160
- """
161
- if isinstance(d, dict):
162
- # Recursively process each value in the dictionary
163
- return {k: _convert_ndarray_to_list(v) for k, v in d.items()}
164
- if isinstance(d, list):
165
- # Recursively process each item in the list
166
- return [_convert_ndarray_to_list(v) for v in d]
167
- if isinstance(d, np.ndarray):
168
- # Convert numpy arrays to lists
169
- return d.tolist()
170
-
171
- # Return the value unchanged if it's not a dict, List, or ndarray
172
- return d
157
+ Returns:
158
+ Dict[str, Any]: The processed dictionary with np.ndarray values converted to lists.
159
+ """
160
+ if isinstance(d, dict):
161
+ # Recursively process each value in the dictionary
162
+ return {k: self._convert_ndarray_to_list(v) for k, v in d.items()}
163
+ if isinstance(d, list):
164
+ # Recursively process each item in the list
165
+ return [self._convert_ndarray_to_list(v) for v in d]
166
+ if isinstance(d, np.ndarray):
167
+ # Convert numpy arrays to lists
168
+ return d.tolist()
169
+
170
+ # Return the value unchanged if it's not a dict, List, or ndarray
171
+ return d
@@ -4,5 +4,4 @@ risk/neighborhoods
4
4
  """
5
5
 
6
6
  from risk.neighborhoods.domains import define_domains, trim_domains
7
- from risk.neighborhoods.api import NeighborhoodsAPI
8
7
  from risk.neighborhoods.neighborhoods import process_neighborhoods
@@ -10,9 +10,9 @@ import networkx as nx
10
10
  import numpy as np
11
11
  from scipy.sparse import csr_matrix
12
12
 
13
- from risk.log import logger, log_header, params
13
+ from risk.log import log_header, logger, params
14
14
  from risk.neighborhoods.neighborhoods import get_network_neighborhoods
15
- from risk.stats import (
15
+ from risk.neighborhoods.stats import (
16
16
  compute_binom_test,
17
17
  compute_chi2_test,
18
18
  compute_hypergeom_test,
@@ -8,7 +8,7 @@ import igraph as ig
8
8
  import markov_clustering as mc
9
9
  import networkx as nx
10
10
  import numpy as np
11
- from leidenalg import find_partition, RBConfigurationVertexPartition
11
+ from leidenalg import RBConfigurationVertexPartition, find_partition
12
12
  from networkx.algorithms.community import greedy_modularity_communities
13
13
  from scipy.sparse import csr_matrix
14
14
 
@@ -27,6 +27,10 @@ def calculate_greedy_modularity_neighborhoods(
27
27
 
28
28
  Returns:
29
29
  csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
30
+
31
+ Raises:
32
+ ValueError: If the subgraph has no edges after filtering.
33
+ Warning: If the resulting subgraph has no edges after filtering.
30
34
  """
31
35
  # Create a subgraph with the shortest edges based on the rank fraction
32
36
  subnetwork = _create_percentile_limited_subgraph(
@@ -67,6 +71,10 @@ def calculate_label_propagation_neighborhoods(
67
71
 
68
72
  Returns:
69
73
  csr_matrix: A binary neighborhood matrix (CSR) on Label Propagation.
74
+
75
+ Raises:
76
+ ValueError: If the subgraph has no edges after filtering.
77
+ Warning: If the resulting subgraph has no edges after filtering.
70
78
  """
71
79
  # Create a subgraph with the shortest edges based on the rank fraction
72
80
  subnetwork = _create_percentile_limited_subgraph(
@@ -115,6 +123,10 @@ def calculate_leiden_neighborhoods(
115
123
 
116
124
  Returns:
117
125
  csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
126
+
127
+ Raises:
128
+ ValueError: If the subgraph has no edges after filtering.
129
+ Warning: If the resulting subgraph has no edges after filtering.
118
130
  """
119
131
  # Create a subgraph with the shortest edges based on the rank fraction
120
132
  subnetwork = _create_percentile_limited_subgraph(
@@ -167,6 +179,10 @@ def calculate_louvain_neighborhoods(
167
179
 
168
180
  Returns:
169
181
  csr_matrix: A binary neighborhood matrix in CSR format.
182
+
183
+ Raises:
184
+ ValueError: If the subgraph has no edges after filtering.
185
+ Warning: If the resulting subgraph has no edges after filtering.
170
186
  """
171
187
  # Create a subgraph with the shortest edges based on the rank fraction
172
188
  subnetwork = _create_percentile_limited_subgraph(
@@ -215,9 +231,10 @@ def calculate_markov_clustering_neighborhoods(
215
231
  Returns:
216
232
  csr_matrix: A binary neighborhood matrix (CSR) on Markov Clustering.
217
233
 
218
- Warning:
219
- This function temporarily converts the adjacency matrix to a dense format, which may lead to
220
- high memory consumption for large graphs.
234
+ Raises:
235
+ ValueError: If the subgraph has no edges after filtering.
236
+ RuntimeError: If MCL fails to run.
237
+ Warning: If the resulting subgraph has no edges after filtering.
221
238
  """
222
239
  # Create a subgraph with the shortest edges based on the rank fraction
223
240
  subnetwork = _create_percentile_limited_subgraph(
@@ -283,6 +300,10 @@ def calculate_spinglass_neighborhoods(
283
300
 
284
301
  Returns:
285
302
  csr_matrix: A binary neighborhood matrix (CSR) based on Spinglass communities.
303
+
304
+ Raises:
305
+ ValueError: If the subgraph has no edges after filtering.
306
+ Warning: If the resulting subgraph has no edges after filtering.
286
307
  """
287
308
  # Create a subgraph with the shortest edges based on the rank fraction
288
309
  subnetwork = _create_percentile_limited_subgraph(
@@ -343,6 +364,10 @@ def calculate_walktrap_neighborhoods(
343
364
 
344
365
  Returns:
345
366
  csr_matrix: A binary neighborhood matrix (CSR) on Walktrap communities.
367
+
368
+ Raises:
369
+ ValueError: If the subgraph has no edges after filtering.
370
+ Warning: If the resulting subgraph has no edges after filtering.
346
371
  """
347
372
  # Create a subgraph with the shortest edges based on the rank fraction
348
373
  subnetwork = _create_percentile_limited_subgraph(
@@ -384,6 +409,10 @@ def _create_percentile_limited_subgraph(G: nx.Graph, fraction_shortest_edges: fl
384
409
  Returns:
385
410
  nx.Graph: A subgraph with nodes and edges where the edges are within the shortest
386
411
  specified rank fraction.
412
+
413
+ Raises:
414
+ ValueError: If no edges with 'length' attributes are found in the graph.
415
+ Warning: If the resulting subgraph has no edges after filtering.
387
416
  """
388
417
  # Step 1: Extract edges with their lengths
389
418
  edges_with_length = [(u, v, d) for u, v, d in G.edges(data=True) if "length" in d]
@@ -9,19 +9,18 @@ from typing import Tuple, Union
9
9
  import numpy as np
10
10
  import pandas as pd
11
11
  from numpy.linalg import LinAlgError
12
- from scipy.cluster.hierarchy import linkage, fcluster
12
+ from scipy.cluster.hierarchy import fcluster, linkage
13
13
  from sklearn.metrics import silhouette_score
14
14
  from tqdm import tqdm
15
15
 
16
16
  from risk.annotations import get_weighted_description
17
17
  from risk.log import logger
18
18
 
19
-
20
19
  # Define constants for clustering
21
20
  # fmt: off
22
21
  LINKAGE_METHODS = {"single", "complete", "average", "weighted", "centroid", "median", "ward"}
23
22
  LINKAGE_METRICS = {
24
- "braycurtis","canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
23
+ "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
25
24
  "hamming", "jaccard", "jensenshannon", "kulczynski1", "mahalanobis", "matching", "minkowski",
26
25
  "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule",
27
26
  }
@@ -49,6 +48,9 @@ def define_domains(
49
48
 
50
49
  Returns:
51
50
  pd.DataFrame: DataFrame with the primary domain for each node.
51
+
52
+ Raises:
53
+ ValueError: If the clustering criterion is set to "off" or if an error occurs during clustering.
52
54
  """
53
55
  try:
54
56
  if linkage_criterion == "off":
@@ -242,7 +244,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
242
244
  # Evaluating optimal linkage method and metric
243
245
  for method, metric in tqdm(
244
246
  product(linkage_methods, linkage_metrics),
245
- desc="Evaluating optimal linkage method and metric",
247
+ desc="Evaluating linkage methods and metrics",
246
248
  total=total_combinations,
247
249
  bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
248
250
  ):