risk-network 0.0.11__tar.gz → 0.0.12b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk_network-0.0.12b1/PKG-INFO +122 -0
- {risk_network-0.0.11 → risk_network-0.0.12b1}/README.md +7 -7
- {risk_network-0.0.11 → risk_network-0.0.12b1}/pyproject.toml +21 -5
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/__init__.py +1 -1
- risk_network-0.0.12b1/src/risk/annotations/__init__.py +10 -0
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/annotations/io.py +48 -47
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/annotations/nltk_setup.py +2 -1
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/log/__init__.py +1 -1
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/log/parameters.py +21 -22
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/neighborhoods/__init__.py +0 -1
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/neighborhoods/api.py +2 -2
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/neighborhoods/community.py +33 -4
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/neighborhoods/domains.py +6 -4
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/neighborhoods/neighborhoods.py +7 -1
- risk_network-0.0.12b1/src/risk/neighborhoods/stats/__init__.py +13 -0
- risk_network-0.0.12b1/src/risk/neighborhoods/stats/permutation/__init__.py +6 -0
- {risk_network-0.0.11/risk → risk_network-0.0.12b1/src/risk/neighborhoods}/stats/permutation/permutation.py +7 -4
- {risk_network-0.0.11/risk → risk_network-0.0.12b1/src/risk/neighborhoods}/stats/permutation/test_functions.py +2 -2
- risk_network-0.0.11/risk/stats/stat_tests.py → risk_network-0.0.12b1/src/risk/neighborhoods/stats/tests.py +21 -13
- risk_network-0.0.12b1/src/risk/network/__init__.py +4 -0
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/graph/__init__.py +0 -2
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/graph/api.py +2 -2
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/graph/graph.py +56 -57
- risk_network-0.0.11/risk/stats/significance.py → risk_network-0.0.12b1/src/risk/network/graph/stats.py +2 -2
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/graph/summary.py +2 -3
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/io.py +151 -8
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/__init__.py +0 -2
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/api.py +1 -1
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/canvas.py +35 -35
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/contour.py +11 -12
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/labels.py +257 -246
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/plotter.py +2 -4
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/utils/colors.py +3 -0
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/risk.py +5 -5
- risk_network-0.0.12b1/src/risk_network.egg-info/PKG-INFO +122 -0
- risk_network-0.0.12b1/src/risk_network.egg-info/SOURCES.txt +50 -0
- risk_network-0.0.12b1/tests/test_load_annotations.py +291 -0
- risk_network-0.0.12b1/tests/test_load_graph.py +426 -0
- risk_network-0.0.12b1/tests/test_load_io_combinations.py +95 -0
- risk_network-0.0.12b1/tests/test_load_neighborhoods.py +455 -0
- risk_network-0.0.12b1/tests/test_load_network.py +401 -0
- risk_network-0.0.12b1/tests/test_load_plotter.py +1483 -0
- risk_network-0.0.12b1/tests/test_log.py +72 -0
- risk_network-0.0.11/MANIFEST.in +0 -20
- risk_network-0.0.11/PKG-INFO +0 -798
- risk_network-0.0.11/risk/annotations/__init__.py +0 -7
- risk_network-0.0.11/risk/network/__init__.py +0 -6
- risk_network-0.0.11/risk/network/geometry.py +0 -150
- risk_network-0.0.11/risk/stats/__init__.py +0 -15
- risk_network-0.0.11/risk/stats/permutation/__init__.py +0 -6
- risk_network-0.0.11/risk_network.egg-info/PKG-INFO +0 -798
- risk_network-0.0.11/risk_network.egg-info/SOURCES.txt +0 -46
- risk_network-0.0.11/setup.py +0 -67
- {risk_network-0.0.11 → risk_network-0.0.12b1}/LICENSE +0 -0
- {risk_network-0.0.11 → risk_network-0.0.12b1}/setup.cfg +0 -0
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/annotations/annotations.py +0 -0
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/log/console.py +0 -0
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/network.py +0 -0
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk/network/plotter/utils/layout.py +0 -0
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk_network.egg-info/dependency_links.txt +0 -0
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk_network.egg-info/requires.txt +0 -0
- {risk_network-0.0.11 → risk_network-0.0.12b1/src}/risk_network.egg-info/top_level.txt +0 -0
@@ -0,0 +1,122 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: risk-network
|
3
|
+
Version: 0.0.12b1
|
4
|
+
Summary: A Python package for biological network analysis
|
5
|
+
Author-email: Ira Horecka <ira89@icloud.com>
|
6
|
+
License: GPL-3.0-or-later
|
7
|
+
Project-URL: Homepage, https://github.com/riskportal/network
|
8
|
+
Classifier: Intended Audience :: Developers
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
10
|
+
Classifier: Operating System :: OS Independent
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
18
|
+
Classifier: Development Status :: 4 - Beta
|
19
|
+
Requires-Python: >=3.8
|
20
|
+
Description-Content-Type: text/markdown
|
21
|
+
License-File: LICENSE
|
22
|
+
Requires-Dist: ipywidgets
|
23
|
+
Requires-Dist: leidenalg
|
24
|
+
Requires-Dist: markov_clustering
|
25
|
+
Requires-Dist: matplotlib
|
26
|
+
Requires-Dist: networkx
|
27
|
+
Requires-Dist: nltk
|
28
|
+
Requires-Dist: numpy
|
29
|
+
Requires-Dist: openpyxl
|
30
|
+
Requires-Dist: pandas
|
31
|
+
Requires-Dist: python-igraph
|
32
|
+
Requires-Dist: python-louvain
|
33
|
+
Requires-Dist: scikit-learn
|
34
|
+
Requires-Dist: scipy
|
35
|
+
Requires-Dist: statsmodels
|
36
|
+
Requires-Dist: threadpoolctl
|
37
|
+
Requires-Dist: tqdm
|
38
|
+
Dynamic: license-file
|
39
|
+
|
40
|
+
# RISK Network
|
41
|
+
|
42
|
+
<p align="center">
|
43
|
+
<img src="https://i.imgur.com/8TleEJs.png" width="50%" />
|
44
|
+
</p>
|
45
|
+
|
46
|
+
<br>
|
47
|
+
|
48
|
+

|
49
|
+
[](https://pypi.python.org/pypi/risk-network)
|
50
|
+

|
51
|
+
[](https://doi.org/10.5281/zenodo.xxxxxxx)
|
52
|
+

|
53
|
+

|
54
|
+
|
55
|
+
**RISK** (Regional Inference of Significant Kinships) is a next-generation tool for biological network annotation and visualization. RISK integrates community detection-based clustering, rigorous statistical enrichment analysis, and a modular framework to uncover biologically meaningful relationships and generate high-resolution visualizations. RISK supports diverse data formats and is optimized for large-scale network analysis, making it a valuable resource for researchers in systems biology and beyond.
|
56
|
+
|
57
|
+
## Documentation and Tutorial
|
58
|
+
|
59
|
+
Full documentation is available at:
|
60
|
+
|
61
|
+
- **Docs:** [https://riskportal.github.io/network-tutorial](https://riskportal.github.io/network-tutorial)
|
62
|
+
- **Tutorial Jupyter Notebook Repository:** [https://github.com/riskportal/network-tutorial](https://github.com/riskportal/network-tutorial)
|
63
|
+
|
64
|
+
## Installation
|
65
|
+
|
66
|
+
RISK is compatible with Python 3.8 or later and runs on all major operating systems. To install the latest version of RISK, run:
|
67
|
+
|
68
|
+
```bash
|
69
|
+
pip install risk-network --upgrade
|
70
|
+
```
|
71
|
+
|
72
|
+
## Features
|
73
|
+
|
74
|
+
- **Comprehensive Network Analysis**: Analyze biological networks (e.g., protein–protein interaction and genetic interaction networks) as well as non-biological networks.
|
75
|
+
- **Advanced Clustering Algorithms**: Supports Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap for identifying structured network regions.
|
76
|
+
- **Flexible Visualization**: Produce customizable, high-resolution network visualizations with kernel density estimate overlays, adjustable node and edge attributes, and export options in SVG, PNG, and PDF formats.
|
77
|
+
- **Efficient Data Handling**: Supports multiple input/output formats, including JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
|
78
|
+
- **Statistical Analysis**: Assess functional enrichment using hypergeometric, permutation (network-aware), binomial, chi-squared, Poisson, and z-score tests, ensuring statistical adaptability across datasets.
|
79
|
+
- **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
|
80
|
+
|
81
|
+
## Example Usage
|
82
|
+
|
83
|
+
We applied RISK to a *Saccharomyces cerevisiae* protein–protein interaction network from Michaelis et al. (2023), filtering for proteins with six or more interactions to emphasize core functional relationships. RISK identified compact, statistically enriched clusters corresponding to biological processes such as ribosomal assembly and mitochondrial organization.
|
84
|
+
|
85
|
+
[](https://i.imgur.com/lJHJrJr.jpeg)
|
86
|
+
|
87
|
+
This figure highlights RISK’s capability to detect both established and novel functional modules within the yeast interactome.
|
88
|
+
|
89
|
+
## Citation
|
90
|
+
|
91
|
+
If you use RISK in your research, please cite:
|
92
|
+
|
93
|
+
**Horecka et al.**, "RISK: a next-generation tool for biological network annotation and visualization", **Bioinformatics**, 2025. DOI: [10.1234/zenodo.xxxxxxx](https://doi.org/10.1234/zenodo.xxxxxxx)
|
94
|
+
|
95
|
+
## Software Architecture and Implementation
|
96
|
+
|
97
|
+
RISK features a streamlined, modular architecture designed to meet diverse research needs. RISK’s modular design enables users to run individual components—such as clustering, statistical testing, or visualization—independently or in combination, depending on the analysis workflow. It includes dedicated modules for:
|
98
|
+
|
99
|
+
- **Data I/O**: Supports JSON, CSV, TSV, Excel, Cytoscape, and GPickle formats.
|
100
|
+
- **Clustering**: Supports multiple clustering methods, including Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap. Provides flexible distance metrics tailored to network structure.
|
101
|
+
- **Statistical Analysis**: Provides a suite of tests for overrepresentation analysis of annotations.
|
102
|
+
- **Visualization**: Offers customizable, high-resolution output in multiple formats, including SVG, PNG, and PDF.
|
103
|
+
- **Configuration Management**: Centralized parameters in risk.params ensure reproducibility and easy tuning for large-scale analyses.
|
104
|
+
|
105
|
+
## Performance and Efficiency
|
106
|
+
|
107
|
+
Benchmarking results demonstrate that RISK efficiently scales to networks exceeding hundreds of thousands of edges, maintaining low execution times and optimal memory usage across statistical tests.
|
108
|
+
|
109
|
+
## Contributing
|
110
|
+
|
111
|
+
We welcome contributions from the community:
|
112
|
+
|
113
|
+
- [Issues Tracker](https://github.com/riskportal/network/issues)
|
114
|
+
- [Source Code](https://github.com/riskportal/network/tree/main/risk)
|
115
|
+
|
116
|
+
## Support
|
117
|
+
|
118
|
+
If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/riskportal/network/issues) on GitHub.
|
119
|
+
|
120
|
+
## License
|
121
|
+
|
122
|
+
RISK is open source under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
@@ -17,7 +17,10 @@
|
|
17
17
|
|
18
18
|
## Documentation and Tutorial
|
19
19
|
|
20
|
-
|
20
|
+
Full documentation is available at:
|
21
|
+
|
22
|
+
- **Docs:** [https://riskportal.github.io/network-tutorial](https://riskportal.github.io/network-tutorial)
|
23
|
+
- **Tutorial Jupyter Notebook Repository:** [https://github.com/riskportal/network-tutorial](https://github.com/riskportal/network-tutorial)
|
21
24
|
|
22
25
|
## Installation
|
23
26
|
|
@@ -33,7 +36,7 @@ pip install risk-network --upgrade
|
|
33
36
|
- **Advanced Clustering Algorithms**: Supports Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap for identifying structured network regions.
|
34
37
|
- **Flexible Visualization**: Produce customizable, high-resolution network visualizations with kernel density estimate overlays, adjustable node and edge attributes, and export options in SVG, PNG, and PDF formats.
|
35
38
|
- **Efficient Data Handling**: Supports multiple input/output formats, including JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
|
36
|
-
- **Statistical Analysis**: Assess functional enrichment using hypergeometric, permutation, binomial, chi-squared, Poisson, and z-score tests, ensuring statistical adaptability across datasets.
|
39
|
+
- **Statistical Analysis**: Assess functional enrichment using hypergeometric, permutation (network-aware), binomial, chi-squared, Poisson, and z-score tests, ensuring statistical adaptability across datasets.
|
37
40
|
- **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
|
38
41
|
|
39
42
|
## Example Usage
|
@@ -52,12 +55,13 @@ If you use RISK in your research, please cite:
|
|
52
55
|
|
53
56
|
## Software Architecture and Implementation
|
54
57
|
|
55
|
-
RISK features a streamlined, modular architecture designed to meet diverse research needs. It includes dedicated modules for:
|
58
|
+
RISK features a streamlined, modular architecture designed to meet diverse research needs. RISK’s modular design enables users to run individual components—such as clustering, statistical testing, or visualization—independently or in combination, depending on the analysis workflow. It includes dedicated modules for:
|
56
59
|
|
57
60
|
- **Data I/O**: Supports JSON, CSV, TSV, Excel, Cytoscape, and GPickle formats.
|
58
61
|
- **Clustering**: Supports multiple clustering methods, including Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap. Provides flexible distance metrics tailored to network structure.
|
59
62
|
- **Statistical Analysis**: Provides a suite of tests for overrepresentation analysis of annotations.
|
60
63
|
- **Visualization**: Offers customizable, high-resolution output in multiple formats, including SVG, PNG, and PDF.
|
64
|
+
- **Configuration Management**: Centralized parameters in risk.params ensure reproducibility and easy tuning for large-scale analyses.
|
61
65
|
|
62
66
|
## Performance and Efficiency
|
63
67
|
|
@@ -77,7 +81,3 @@ If you encounter issues or have suggestions for new features, please use the [Is
|
|
77
81
|
## License
|
78
82
|
|
79
83
|
RISK is open source under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
80
|
-
|
81
|
-
---
|
82
|
-
|
83
|
-
**Note**: For detailed documentation and to access the interactive tutorial, please visit the links above.
|
@@ -1,20 +1,19 @@
|
|
1
1
|
[build-system]
|
2
|
-
requires = ["setuptools", "
|
2
|
+
requires = ["setuptools", "numpy"]
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "risk-network"
|
7
|
-
dynamic = ["version"]
|
7
|
+
dynamic = ["version"]
|
8
8
|
description = "A Python package for biological network analysis"
|
9
9
|
authors = [
|
10
10
|
{ name = "Ira Horecka", email = "ira89@icloud.com" },
|
11
11
|
]
|
12
12
|
readme = "README.md"
|
13
|
-
|
13
|
+
requires-python = ">=3.8"
|
14
14
|
classifiers = [
|
15
15
|
"Intended Audience :: Developers",
|
16
16
|
"Intended Audience :: Science/Research",
|
17
|
-
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
18
17
|
"Operating System :: OS Independent",
|
19
18
|
"Programming Language :: Python :: 3",
|
20
19
|
"Programming Language :: Python :: 3.8",
|
@@ -43,4 +42,21 @@ dependencies = [
|
|
43
42
|
"threadpoolctl",
|
44
43
|
"tqdm",
|
45
44
|
]
|
46
|
-
|
45
|
+
|
46
|
+
[project.license]
|
47
|
+
text = "GPL-3.0-or-later"
|
48
|
+
|
49
|
+
[project.urls]
|
50
|
+
"Homepage" = "https://github.com/riskportal/network"
|
51
|
+
|
52
|
+
[tool.setuptools]
|
53
|
+
package-dir = {"" = "src"}
|
54
|
+
|
55
|
+
[tool.setuptools.packages.find]
|
56
|
+
where = ["src"]
|
57
|
+
|
58
|
+
[tool.setuptools.dynamic]
|
59
|
+
version = { attr = "risk.__version__" }
|
60
|
+
|
61
|
+
[tool.pytest.ini_options]
|
62
|
+
pythonpath = ["src"]
|
@@ -10,7 +10,7 @@ import networkx as nx
|
|
10
10
|
import pandas as pd
|
11
11
|
|
12
12
|
from risk.annotations.annotations import load_annotations
|
13
|
-
from risk.log import
|
13
|
+
from risk.log import log_header, logger, params
|
14
14
|
|
15
15
|
|
16
16
|
class AnnotationsIO:
|
@@ -20,9 +20,6 @@ class AnnotationsIO:
|
|
20
20
|
and to export parameter data to various formats like JSON, CSV, and text files.
|
21
21
|
"""
|
22
22
|
|
23
|
-
def __init__(self):
|
24
|
-
pass
|
25
|
-
|
26
23
|
def load_json_annotation(
|
27
24
|
self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
|
28
25
|
) -> Dict[str, Any]:
|
@@ -42,7 +39,7 @@ class AnnotationsIO:
|
|
42
39
|
params.log_annotations(
|
43
40
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
44
41
|
)
|
45
|
-
_log_loading(filetype, filepath=filepath)
|
42
|
+
self._log_loading(filetype, filepath=filepath)
|
46
43
|
|
47
44
|
# Load the JSON file into a dictionary
|
48
45
|
with open(filepath, "r", encoding="utf-8") as file:
|
@@ -81,7 +78,7 @@ class AnnotationsIO:
|
|
81
78
|
params.log_annotations(
|
82
79
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
83
80
|
)
|
84
|
-
_log_loading(filetype, filepath=filepath)
|
81
|
+
self._log_loading(filetype, filepath=filepath)
|
85
82
|
|
86
83
|
# Load the specified sheet from the Excel file
|
87
84
|
annotation = pd.read_excel(filepath, sheet_name=sheet_name)
|
@@ -123,10 +120,10 @@ class AnnotationsIO:
|
|
123
120
|
params.log_annotations(
|
124
121
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
125
122
|
)
|
126
|
-
_log_loading(filetype, filepath=filepath)
|
123
|
+
self._log_loading(filetype, filepath=filepath)
|
127
124
|
|
128
125
|
# Load the CSV file into a dictionary
|
129
|
-
annotations_input = _load_matrix_file(
|
126
|
+
annotations_input = self._load_matrix_file(
|
130
127
|
filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
|
131
128
|
)
|
132
129
|
|
@@ -161,10 +158,10 @@ class AnnotationsIO:
|
|
161
158
|
params.log_annotations(
|
162
159
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
163
160
|
)
|
164
|
-
_log_loading(filetype, filepath=filepath)
|
161
|
+
self._log_loading(filetype, filepath=filepath)
|
165
162
|
|
166
163
|
# Load the TSV file into a dictionary
|
167
|
-
annotations_input = _load_matrix_file(
|
164
|
+
annotations_input = self._load_matrix_file(
|
168
165
|
filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
|
169
166
|
)
|
170
167
|
|
@@ -183,6 +180,9 @@ class AnnotationsIO:
|
|
183
180
|
|
184
181
|
Returns:
|
185
182
|
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
|
183
|
+
|
184
|
+
Raises:
|
185
|
+
TypeError: If the content is not a dictionary.
|
186
186
|
"""
|
187
187
|
# Ensure the input content is a dictionary
|
188
188
|
if not isinstance(content, dict):
|
@@ -193,48 +193,49 @@ class AnnotationsIO:
|
|
193
193
|
filetype = "Dictionary"
|
194
194
|
# Log the loading of the annotations from the dictionary
|
195
195
|
params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
|
196
|
-
_log_loading(filetype, "In-memory dictionary")
|
196
|
+
self._log_loading(filetype, "In-memory dictionary")
|
197
197
|
|
198
198
|
# Load the annotations as a dictionary from the provided dictionary
|
199
199
|
return load_annotations(network, content, min_nodes_per_term)
|
200
200
|
|
201
|
+
def _load_matrix_file(
|
202
|
+
self,
|
203
|
+
filepath: str,
|
204
|
+
label_colname: str,
|
205
|
+
nodes_colname: str,
|
206
|
+
delimiter: str = ",",
|
207
|
+
nodes_delimiter: str = ";",
|
208
|
+
) -> Dict[str, Any]:
|
209
|
+
"""Load annotations from a CSV or TSV file and convert them to a dictionary.
|
201
210
|
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
) -> Dict[str, Any]:
|
209
|
-
"""Load annotations from a CSV or TSV file and convert them to a dictionary.
|
210
|
-
|
211
|
-
Args:
|
212
|
-
filepath (str): Path to the annotation file.
|
213
|
-
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
214
|
-
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
215
|
-
delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
|
216
|
-
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
217
|
-
|
218
|
-
Returns:
|
219
|
-
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
|
220
|
-
"""
|
221
|
-
# Load the CSV or TSV file into a DataFrame
|
222
|
-
annotation = pd.read_csv(filepath, delimiter=delimiter)
|
223
|
-
# Split the nodes column by the nodes_delimiter to handle multiple nodes per label
|
224
|
-
annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
|
225
|
-
# Create a dictionary pairing labels with their corresponding list of nodes
|
226
|
-
label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
227
|
-
return label_node_dict
|
211
|
+
Args:
|
212
|
+
filepath (str): Path to the annotation file.
|
213
|
+
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
214
|
+
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
215
|
+
delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
|
216
|
+
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
228
217
|
|
218
|
+
Returns:
|
219
|
+
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
|
220
|
+
"""
|
221
|
+
# Load the CSV or TSV file into a DataFrame
|
222
|
+
annotation = pd.read_csv(filepath, delimiter=delimiter)
|
223
|
+
# Split the nodes column by the nodes_delimiter to handle multiple nodes per label
|
224
|
+
annotation[nodes_colname] = annotation[nodes_colname].apply(
|
225
|
+
lambda x: x.split(nodes_delimiter)
|
226
|
+
)
|
227
|
+
# Create a dictionary pairing labels with their corresponding list of nodes
|
228
|
+
label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
229
|
+
return label_node_dict
|
229
230
|
|
230
|
-
def _log_loading(filetype: str, filepath: str = "") -> None:
|
231
|
-
|
231
|
+
def _log_loading(self, filetype: str, filepath: str = "") -> None:
|
232
|
+
"""Log information about the network file being loaded.
|
232
233
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
234
|
+
Args:
|
235
|
+
filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
|
236
|
+
filepath (str, optional): The path to the file being loaded.
|
237
|
+
"""
|
238
|
+
log_header("Loading annotations")
|
239
|
+
logger.debug(f"Filetype: {filetype}")
|
240
|
+
if filepath:
|
241
|
+
logger.debug(f"Filepath: {filepath}")
|
@@ -11,7 +11,7 @@ from typing import Any, Dict
|
|
11
11
|
|
12
12
|
import numpy as np
|
13
13
|
|
14
|
-
from risk.log.console import
|
14
|
+
from risk.log.console import log_header, logger
|
15
15
|
|
16
16
|
# Suppress all warnings - this is to resolve warnings from multiprocessing
|
17
17
|
warnings.filterwarnings("ignore")
|
@@ -137,7 +137,7 @@ class Params:
|
|
137
137
|
Dict[str, Any]: A dictionary containing the processed parameters.
|
138
138
|
"""
|
139
139
|
log_header("Loading parameters")
|
140
|
-
return _convert_ndarray_to_list(
|
140
|
+
return self._convert_ndarray_to_list(
|
141
141
|
{
|
142
142
|
"annotations": self.annotations,
|
143
143
|
"datetime": self.datetime,
|
@@ -148,25 +148,24 @@ class Params:
|
|
148
148
|
}
|
149
149
|
)
|
150
150
|
|
151
|
+
def _convert_ndarray_to_list(self, d: Dict[str, Any]) -> Dict[str, Any]:
|
152
|
+
"""Recursively convert all np.ndarray values in the dictionary to lists.
|
151
153
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
Args:
|
156
|
-
d (Dict[str, Any]): The dictionary to process.
|
154
|
+
Args:
|
155
|
+
d (Dict[str, Any]): The dictionary to process.
|
157
156
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
157
|
+
Returns:
|
158
|
+
Dict[str, Any]: The processed dictionary with np.ndarray values converted to lists.
|
159
|
+
"""
|
160
|
+
if isinstance(d, dict):
|
161
|
+
# Recursively process each value in the dictionary
|
162
|
+
return {k: self._convert_ndarray_to_list(v) for k, v in d.items()}
|
163
|
+
if isinstance(d, list):
|
164
|
+
# Recursively process each item in the list
|
165
|
+
return [self._convert_ndarray_to_list(v) for v in d]
|
166
|
+
if isinstance(d, np.ndarray):
|
167
|
+
# Convert numpy arrays to lists
|
168
|
+
return d.tolist()
|
169
|
+
|
170
|
+
# Return the value unchanged if it's not a dict, List, or ndarray
|
171
|
+
return d
|
@@ -10,9 +10,9 @@ import networkx as nx
|
|
10
10
|
import numpy as np
|
11
11
|
from scipy.sparse import csr_matrix
|
12
12
|
|
13
|
-
from risk.log import
|
13
|
+
from risk.log import log_header, logger, params
|
14
14
|
from risk.neighborhoods.neighborhoods import get_network_neighborhoods
|
15
|
-
from risk.stats import (
|
15
|
+
from risk.neighborhoods.stats import (
|
16
16
|
compute_binom_test,
|
17
17
|
compute_chi2_test,
|
18
18
|
compute_hypergeom_test,
|
@@ -8,7 +8,7 @@ import igraph as ig
|
|
8
8
|
import markov_clustering as mc
|
9
9
|
import networkx as nx
|
10
10
|
import numpy as np
|
11
|
-
from leidenalg import
|
11
|
+
from leidenalg import RBConfigurationVertexPartition, find_partition
|
12
12
|
from networkx.algorithms.community import greedy_modularity_communities
|
13
13
|
from scipy.sparse import csr_matrix
|
14
14
|
|
@@ -27,6 +27,10 @@ def calculate_greedy_modularity_neighborhoods(
|
|
27
27
|
|
28
28
|
Returns:
|
29
29
|
csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
|
30
|
+
|
31
|
+
Raises:
|
32
|
+
ValueError: If the subgraph has no edges after filtering.
|
33
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
30
34
|
"""
|
31
35
|
# Create a subgraph with the shortest edges based on the rank fraction
|
32
36
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -67,6 +71,10 @@ def calculate_label_propagation_neighborhoods(
|
|
67
71
|
|
68
72
|
Returns:
|
69
73
|
csr_matrix: A binary neighborhood matrix (CSR) on Label Propagation.
|
74
|
+
|
75
|
+
Raises:
|
76
|
+
ValueError: If the subgraph has no edges after filtering.
|
77
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
70
78
|
"""
|
71
79
|
# Create a subgraph with the shortest edges based on the rank fraction
|
72
80
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -115,6 +123,10 @@ def calculate_leiden_neighborhoods(
|
|
115
123
|
|
116
124
|
Returns:
|
117
125
|
csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
|
126
|
+
|
127
|
+
Raises:
|
128
|
+
ValueError: If the subgraph has no edges after filtering.
|
129
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
118
130
|
"""
|
119
131
|
# Create a subgraph with the shortest edges based on the rank fraction
|
120
132
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -167,6 +179,10 @@ def calculate_louvain_neighborhoods(
|
|
167
179
|
|
168
180
|
Returns:
|
169
181
|
csr_matrix: A binary neighborhood matrix in CSR format.
|
182
|
+
|
183
|
+
Raises:
|
184
|
+
ValueError: If the subgraph has no edges after filtering.
|
185
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
170
186
|
"""
|
171
187
|
# Create a subgraph with the shortest edges based on the rank fraction
|
172
188
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -215,9 +231,10 @@ def calculate_markov_clustering_neighborhoods(
|
|
215
231
|
Returns:
|
216
232
|
csr_matrix: A binary neighborhood matrix (CSR) on Markov Clustering.
|
217
233
|
|
218
|
-
|
219
|
-
|
220
|
-
|
234
|
+
Raises:
|
235
|
+
ValueError: If the subgraph has no edges after filtering.
|
236
|
+
RuntimeError: If MCL fails to run.
|
237
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
221
238
|
"""
|
222
239
|
# Create a subgraph with the shortest edges based on the rank fraction
|
223
240
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -283,6 +300,10 @@ def calculate_spinglass_neighborhoods(
|
|
283
300
|
|
284
301
|
Returns:
|
285
302
|
csr_matrix: A binary neighborhood matrix (CSR) based on Spinglass communities.
|
303
|
+
|
304
|
+
Raises:
|
305
|
+
ValueError: If the subgraph has no edges after filtering.
|
306
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
286
307
|
"""
|
287
308
|
# Create a subgraph with the shortest edges based on the rank fraction
|
288
309
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -343,6 +364,10 @@ def calculate_walktrap_neighborhoods(
|
|
343
364
|
|
344
365
|
Returns:
|
345
366
|
csr_matrix: A binary neighborhood matrix (CSR) on Walktrap communities.
|
367
|
+
|
368
|
+
Raises:
|
369
|
+
ValueError: If the subgraph has no edges after filtering.
|
370
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
346
371
|
"""
|
347
372
|
# Create a subgraph with the shortest edges based on the rank fraction
|
348
373
|
subnetwork = _create_percentile_limited_subgraph(
|
@@ -384,6 +409,10 @@ def _create_percentile_limited_subgraph(G: nx.Graph, fraction_shortest_edges: fl
|
|
384
409
|
Returns:
|
385
410
|
nx.Graph: A subgraph with nodes and edges where the edges are within the shortest
|
386
411
|
specified rank fraction.
|
412
|
+
|
413
|
+
Raises:
|
414
|
+
ValueError: If no edges with 'length' attributes are found in the graph.
|
415
|
+
Warning: If the resulting subgraph has no edges after filtering.
|
387
416
|
"""
|
388
417
|
# Step 1: Extract edges with their lengths
|
389
418
|
edges_with_length = [(u, v, d) for u, v, d in G.edges(data=True) if "length" in d]
|
@@ -9,19 +9,18 @@ from typing import Tuple, Union
|
|
9
9
|
import numpy as np
|
10
10
|
import pandas as pd
|
11
11
|
from numpy.linalg import LinAlgError
|
12
|
-
from scipy.cluster.hierarchy import
|
12
|
+
from scipy.cluster.hierarchy import fcluster, linkage
|
13
13
|
from sklearn.metrics import silhouette_score
|
14
14
|
from tqdm import tqdm
|
15
15
|
|
16
16
|
from risk.annotations import get_weighted_description
|
17
17
|
from risk.log import logger
|
18
18
|
|
19
|
-
|
20
19
|
# Define constants for clustering
|
21
20
|
# fmt: off
|
22
21
|
LINKAGE_METHODS = {"single", "complete", "average", "weighted", "centroid", "median", "ward"}
|
23
22
|
LINKAGE_METRICS = {
|
24
|
-
"braycurtis","canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
|
23
|
+
"braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
|
25
24
|
"hamming", "jaccard", "jensenshannon", "kulczynski1", "mahalanobis", "matching", "minkowski",
|
26
25
|
"rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule",
|
27
26
|
}
|
@@ -49,6 +48,9 @@ def define_domains(
|
|
49
48
|
|
50
49
|
Returns:
|
51
50
|
pd.DataFrame: DataFrame with the primary domain for each node.
|
51
|
+
|
52
|
+
Raises:
|
53
|
+
ValueError: If the clustering criterion is set to "off" or if an error occurs during clustering.
|
52
54
|
"""
|
53
55
|
try:
|
54
56
|
if linkage_criterion == "off":
|
@@ -242,7 +244,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
|
|
242
244
|
# Evaluating optimal linkage method and metric
|
243
245
|
for method, metric in tqdm(
|
244
246
|
product(linkage_methods, linkage_metrics),
|
245
|
-
desc="Evaluating
|
247
|
+
desc="Evaluating linkage methods and metrics",
|
246
248
|
total=total_combinations,
|
247
249
|
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
|
248
250
|
):
|