napistu 0.3.6__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {napistu-0.3.6 → napistu-0.4.0}/PKG-INFO +2 -2
- {napistu-0.3.6 → napistu-0.4.0}/setup.cfg +2 -2
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/__main__.py +28 -13
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/consensus.py +19 -25
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/constants.py +102 -83
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/indices.py +3 -1
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/napistu_edgelist.py +4 -4
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/sbml.py +298 -295
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/string.py +14 -18
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/trrust.py +22 -27
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/matching/interactions.py +41 -39
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/matching/species.py +1 -1
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/modify/gaps.py +2 -1
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/network/constants.py +61 -45
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/network/data_handling.py +1 -1
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/network/neighborhoods.py +3 -3
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/network/net_create.py +440 -616
- napistu-0.4.0/src/napistu/network/net_create_utils.py +734 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/network/net_propagation.py +1 -1
- napistu-0.3.6/src/napistu/network/napistu_graph_core.py → napistu-0.4.0/src/napistu/network/ng_core.py +57 -15
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/network/ng_utils.py +28 -21
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/network/paths.py +4 -4
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/network/precompute.py +35 -74
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ontologies/genodexito.py +5 -1
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ontologies/renaming.py +4 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/sbml_dfs_core.py +127 -64
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/sbml_dfs_utils.py +50 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/utils.py +132 -46
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu.egg-info/PKG-INFO +2 -2
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu.egg-info/SOURCES.txt +4 -1
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu.egg-info/requires.txt +1 -1
- napistu-0.4.0/src/tests/conftest.py +312 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_consensus.py +74 -5
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_gaps.py +26 -15
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_network_data_handling.py +5 -2
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_network_net_create.py +93 -202
- napistu-0.4.0/src/tests/test_network_net_create_utils.py +538 -0
- napistu-0.4.0/src/tests/test_network_ng_core.py +19 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_network_ng_utils.py +1 -1
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_network_precompute.py +5 -4
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_ontologies_renaming.py +28 -24
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_rpy2_callr.py +0 -1
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_rpy2_init.py +0 -1
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_sbml_dfs_core.py +165 -15
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_sbml_dfs_utils.py +45 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_utils.py +45 -2
- napistu-0.3.6/src/tests/conftest.py +0 -154
- {napistu-0.3.6 → napistu-0.4.0}/LICENSE +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/README.md +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/pyproject.toml +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/setup.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/context/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/context/discretize.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/context/filtering.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/gcs/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/gcs/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/gcs/downloads.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/gcs/utils.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/identifiers.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/bigg.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/gtex.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/hpa.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/identifiers_etl.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/obo.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/psi_mi.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/reactome.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ingestion/yeast.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/matching/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/matching/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/matching/mount.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/__main__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/client.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/codebase.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/codebase_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/component_base.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/config.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/documentation.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/documentation_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/execution.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/health.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/profiles.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/server.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/tutorials.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/tutorials_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/mcp/utils.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/modify/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/modify/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/modify/curation.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/modify/pathwayannot.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/modify/uncompartmentalize.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/network/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/network/ig_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ontologies/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ontologies/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ontologies/dogma.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/ontologies/mygene.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/rpy2/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/rpy2/callr.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/rpy2/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/rpy2/rids.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/scverse/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/scverse/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/scverse/loading.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu/source.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu.egg-info/dependency_links.txt +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu.egg-info/entry_points.txt +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/napistu.egg-info/top_level.txt +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_constants.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_context_discretize.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_context_filtering.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_curation.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_data/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_gcs.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_identifiers.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_indices.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_ingestion_napistu_edgelist.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_ingestion_obo.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_matching_interactions.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_matching_mount.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_matching_species.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_mcp_config.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_mcp_documentation_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_mcp_server.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_network_ig_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_network_neighborhoods.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_network_net_propagation.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_network_paths.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_ontologies_genodexito.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_ontologies_mygene.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_pathwayannot.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_sbml.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_sbo.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_scverse_loading.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_set_coverage.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_source.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/test_uncompartmentalize.py +0 -0
- {napistu-0.3.6 → napistu-0.4.0}/src/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: napistu
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Connecting high-dimensional data to curated pathways
|
5
5
|
Home-page: https://github.com/napistu/napistu-py
|
6
6
|
Author: Sean Hackett
|
@@ -27,6 +27,7 @@ Requires-Dist: mygene<4.0.0,>=3.0.0
|
|
27
27
|
Requires-Dist: numpy<3.0.0,>=1.24.0
|
28
28
|
Requires-Dist: pandas<3.0.0,>=1.5.0
|
29
29
|
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
30
|
+
Requires-Dist: pyarrow<20.0.0,>=15.0.0
|
30
31
|
Requires-Dist: python-libsbml
|
31
32
|
Requires-Dist: requests>=2.25.0
|
32
33
|
Requires-Dist: scipy<2.0.0,>=1.10.0
|
@@ -51,7 +52,6 @@ Requires-Dist: markdown>=3.4.0; extra == "mcp"
|
|
51
52
|
Requires-Dist: jupyter-client>=7.0.0; extra == "mcp"
|
52
53
|
Requires-Dist: nbformat>=5.0.0; extra == "mcp"
|
53
54
|
Provides-Extra: rpy2
|
54
|
-
Requires-Dist: pyarrow<19.0.0,>=15.0.0; extra == "rpy2"
|
55
55
|
Requires-Dist: rpy2<4.0.0,>=3.5.0; extra == "rpy2"
|
56
56
|
Requires-Dist: rpy2-arrow<1.0.0,>=0.1.0; extra == "rpy2"
|
57
57
|
Provides-Extra: scverse
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[metadata]
|
2
2
|
name = napistu
|
3
|
-
version = 0.
|
3
|
+
version = 0.4.0
|
4
4
|
description = Connecting high-dimensional data to curated pathways
|
5
5
|
long_description = file: README.md
|
6
6
|
long_description_content_type = text/markdown
|
@@ -33,6 +33,7 @@ install_requires =
|
|
33
33
|
numpy>=1.24.0,<3.0.0
|
34
34
|
pandas>=1.5.0,<3.0.0
|
35
35
|
pydantic>=2.0.0,<3.0.0
|
36
|
+
pyarrow>=15.0.0,<20.0.0
|
36
37
|
python-libsbml
|
37
38
|
requests>=2.25.0
|
38
39
|
scipy>=1.10.0,<2.0.0
|
@@ -69,7 +70,6 @@ mcp =
|
|
69
70
|
jupyter-client>=7.0.0
|
70
71
|
nbformat>=5.0.0
|
71
72
|
rpy2 =
|
72
|
-
pyarrow>=15.0.0,<19.0.0
|
73
73
|
rpy2>=3.5.0,<4.0.0
|
74
74
|
rpy2-arrow>=0.1.0,<1.0.0
|
75
75
|
scverse =
|
@@ -12,7 +12,7 @@ import click_logging
|
|
12
12
|
import napistu
|
13
13
|
import igraph as ig
|
14
14
|
import pandas as pd
|
15
|
-
from napistu import consensus as
|
15
|
+
from napistu import consensus as napistu_consensus
|
16
16
|
from napistu import indices
|
17
17
|
from napistu import sbml_dfs_core
|
18
18
|
from napistu import utils
|
@@ -65,7 +65,7 @@ def ingestion():
|
|
65
65
|
"--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
|
66
66
|
)
|
67
67
|
@click_logging.simple_verbosity_option(logger)
|
68
|
-
def
|
68
|
+
def ingest_reactome(base_folder: str, overwrite=True):
|
69
69
|
logger.info("Start downloading Reactome to %s", base_folder)
|
70
70
|
reactome.reactome_sbml_download(f"{base_folder}/sbml", overwrite=overwrite)
|
71
71
|
|
@@ -76,7 +76,7 @@ def load_reactome(base_folder: str, overwrite=True):
|
|
76
76
|
"--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
|
77
77
|
)
|
78
78
|
@click_logging.simple_verbosity_option(logger)
|
79
|
-
def
|
79
|
+
def ingest_bigg(base_folder: str, overwrite: bool):
|
80
80
|
logger.info("Start downloading Bigg to %s", base_folder)
|
81
81
|
bigg.bigg_sbml_download(base_folder, overwrite)
|
82
82
|
|
@@ -84,7 +84,7 @@ def load_bigg(base_folder: str, overwrite: bool):
|
|
84
84
|
@ingestion.command(name="trrust")
|
85
85
|
@click.argument("target_uri", type=str)
|
86
86
|
@click_logging.simple_verbosity_option(logger)
|
87
|
-
def
|
87
|
+
def ingest_ttrust(target_uri: str):
|
88
88
|
logger.info("Start downloading TRRUST to %s", target_uri)
|
89
89
|
trrust.download_trrust(target_uri)
|
90
90
|
|
@@ -98,7 +98,7 @@ def load_ttrust(target_uri: str):
|
|
98
98
|
help="URL to download the zipped protein atlas subcellular localization tsv from.",
|
99
99
|
)
|
100
100
|
@click_logging.simple_verbosity_option(logger)
|
101
|
-
def
|
101
|
+
def ingest_proteinatlas_subcell(target_uri: str, url: str):
|
102
102
|
hpa.download_hpa_data(target_uri, url)
|
103
103
|
|
104
104
|
|
@@ -111,7 +111,7 @@ def load_proteinatlas_subcell(target_uri: str, url: str):
|
|
111
111
|
help="URL to download the gtex file from.",
|
112
112
|
)
|
113
113
|
@click_logging.simple_verbosity_option(logger)
|
114
|
-
def
|
114
|
+
def ingest_gtex_rnaseq(target_uri: str, url: str):
|
115
115
|
gtex.download_gtex_rnaseq(target_uri, url)
|
116
116
|
|
117
117
|
|
@@ -124,7 +124,7 @@ def load_gtex_rnaseq(target_uri: str, url: str):
|
|
124
124
|
help="Species name (e.g., Homo sapiens).",
|
125
125
|
)
|
126
126
|
@click_logging.simple_verbosity_option(logger)
|
127
|
-
def
|
127
|
+
def ingest_string_db(target_uri: str, species: str):
|
128
128
|
string.download_string(target_uri, species)
|
129
129
|
|
130
130
|
|
@@ -137,7 +137,7 @@ def load_string_db(target_uri: str, species: str):
|
|
137
137
|
help="Species name (e.g., Homo sapiens).",
|
138
138
|
)
|
139
139
|
@click_logging.simple_verbosity_option(logger)
|
140
|
-
def
|
140
|
+
def ingest_string_aliases(target_uri: str, species: str):
|
141
141
|
string.download_string_aliases(target_uri, species)
|
142
142
|
|
143
143
|
|
@@ -289,7 +289,7 @@ def create_consensus(
|
|
289
289
|
)
|
290
290
|
pw_index_df["species"] = "unknown"
|
291
291
|
pw_index = indices.PWIndex(pw_index=pw_index_df, validate_paths=False)
|
292
|
-
consensus_model =
|
292
|
+
consensus_model = napistu_consensus.construct_consensus_model(
|
293
293
|
sbml_dfs_dict, pw_index, dogmatic
|
294
294
|
)
|
295
295
|
utils.save_pickle(output_model_uri, consensus_model)
|
@@ -621,7 +621,11 @@ def exporter():
|
|
621
621
|
"--format", "-f", default="pickle", help="Output format: gml, edgelist, pickle"
|
622
622
|
)
|
623
623
|
@click.option(
|
624
|
-
"--
|
624
|
+
"--wiring_approach",
|
625
|
+
"-g",
|
626
|
+
type=str,
|
627
|
+
default="bipartite",
|
628
|
+
help="bipartite or regulatory",
|
625
629
|
)
|
626
630
|
@click.option(
|
627
631
|
"--weighting_strategy",
|
@@ -645,7 +649,7 @@ def export_igraph(
|
|
645
649
|
output_uri: str,
|
646
650
|
graph_attrs_spec_uri: str | None,
|
647
651
|
format: str,
|
648
|
-
|
652
|
+
wiring_approach: str,
|
649
653
|
weighting_strategy: str,
|
650
654
|
directed: bool,
|
651
655
|
reverse: bool,
|
@@ -663,7 +667,7 @@ def export_igraph(
|
|
663
667
|
reaction_graph_attrs=graph_attrs_spec,
|
664
668
|
directed=directed,
|
665
669
|
edge_reversed=reverse,
|
666
|
-
|
670
|
+
wiring_approach=wiring_approach,
|
667
671
|
weighting_strategy=weighting_strategy,
|
668
672
|
verbose=True,
|
669
673
|
)
|
@@ -753,7 +757,7 @@ def export_precomputed_distances(
|
|
753
757
|
weights_vars=weights_vars_list,
|
754
758
|
)
|
755
759
|
|
756
|
-
|
760
|
+
utils.save_parquet(precomputed_distances, output_uri)
|
757
761
|
|
758
762
|
|
759
763
|
@exporter.command(name="export_smbl_dfs_tables")
|
@@ -855,6 +859,17 @@ def copy_uri(input_uri, output_uri, is_file=True):
|
|
855
859
|
utils.copy_uri(input_uri, output_uri, is_file=is_file)
|
856
860
|
|
857
861
|
|
862
|
+
@helpers.command(name="validate_sbml_dfs")
|
863
|
+
@click.argument("input_uri", type=str)
|
864
|
+
@click_logging.simple_verbosity_option(logger)
|
865
|
+
def validate_sbml_dfs(input_uri):
|
866
|
+
"""Validate a sbml_dfs object"""
|
867
|
+
sbml_dfs = utils.load_pickle(input_uri)
|
868
|
+
sbml_dfs.validate()
|
869
|
+
|
870
|
+
logger.info(f"Successfully validated: {input_uri}")
|
871
|
+
|
872
|
+
|
858
873
|
@click.group()
|
859
874
|
def stats():
|
860
875
|
"""Various functions to calculate network statistics
|
@@ -15,10 +15,13 @@ from napistu import source
|
|
15
15
|
from napistu import utils
|
16
16
|
from napistu.ingestion import sbml
|
17
17
|
|
18
|
+
from napistu.constants import SCHEMA_DEFS
|
18
19
|
from napistu.constants import SBML_DFS
|
20
|
+
from napistu.constants import SBML_DFS_SCHEMA
|
19
21
|
from napistu.constants import IDENTIFIERS
|
20
22
|
from napistu.constants import SOURCE_SPEC
|
21
23
|
from napistu.constants import BQB_DEFINING_ATTRS
|
24
|
+
from napistu.constants import VALID_BQB_TERMS
|
22
25
|
|
23
26
|
logger = logging.getLogger(__name__)
|
24
27
|
# set the level to show logger.info message
|
@@ -137,8 +140,7 @@ def unnest_SBML_df(
|
|
137
140
|
"""
|
138
141
|
|
139
142
|
# check that all sbml_dfs have the same schema
|
140
|
-
|
141
|
-
table_schema = sbml_dfs_dict[list(sbml_dfs_dict.keys())[0]].schema[table]
|
143
|
+
table_schema = SBML_DFS_SCHEMA.SCHEMA[table]
|
142
144
|
|
143
145
|
df_list = [
|
144
146
|
getattr(sbml_dfs_dict[x], table).assign(model=x) for x in sbml_dfs_dict.keys()
|
@@ -192,7 +194,7 @@ def construct_meta_entities_identifiers(
|
|
192
194
|
agg_tbl = unnest_SBML_df(sbml_dfs_dict, table=table)
|
193
195
|
|
194
196
|
# since all sbml_dfs have the same schema pull out one schema for reference
|
195
|
-
table_schema =
|
197
|
+
table_schema = SBML_DFS_SCHEMA.SCHEMA[table]
|
196
198
|
|
197
199
|
# update foreign keys using provided lookup tables
|
198
200
|
if "fk" in table_schema.keys():
|
@@ -244,6 +246,8 @@ def reduce_to_consensus_ids(
|
|
244
246
|
Series mapping the index of the aggregated entities to new consensus IDs.
|
245
247
|
"""
|
246
248
|
# Step 1: Build consensus identifiers to create clusters of equivalent entities
|
249
|
+
table_name = table_schema[SCHEMA_DEFS.TABLE]
|
250
|
+
logger.debug(f"Building consensus identifiers for {table_name}")
|
247
251
|
indexed_cluster, cluster_consensus_identifiers = build_consensus_identifiers(
|
248
252
|
sbml_df, table_schema, defining_biological_qualifiers
|
249
253
|
)
|
@@ -252,25 +256,28 @@ def reduce_to_consensus_ids(
|
|
252
256
|
agg_table_harmonized = sbml_df.join(indexed_cluster)
|
253
257
|
|
254
258
|
# Step 3: Create lookup table for entity IDs
|
259
|
+
logger.debug(f"Creating lookup table for {table_name}")
|
255
260
|
lookup_table = _create_entity_lookup_table(agg_table_harmonized, table_schema)
|
256
261
|
|
257
262
|
# Step 4: Add nameness scores to help select representative names
|
258
263
|
agg_table_harmonized = utils._add_nameness_score_wrapper(
|
259
|
-
agg_table_harmonized,
|
264
|
+
agg_table_harmonized, SCHEMA_DEFS.LABEL, table_schema
|
260
265
|
)
|
261
266
|
|
262
267
|
# Step 5: Prepare the consensus table with one row per unique entity
|
268
|
+
logger.debug(f"Preparing consensus table for {table_name}")
|
263
269
|
new_id_table = _prepare_consensus_table(
|
264
270
|
agg_table_harmonized, table_schema, cluster_consensus_identifiers
|
265
271
|
)
|
266
272
|
|
267
273
|
# Step 6: Add source information if required
|
268
|
-
if
|
274
|
+
if SCHEMA_DEFS.SOURCE in table_schema.keys():
|
269
275
|
new_id_table = _add_consensus_sources(
|
270
276
|
new_id_table, agg_table_harmonized, lookup_table, table_schema, pw_index
|
271
277
|
)
|
272
278
|
|
273
279
|
# Step 7: Validate the resulting table
|
280
|
+
logger.debug(f"Validating consensus table for {table_name}")
|
274
281
|
_validate_consensus_table(new_id_table, sbml_df)
|
275
282
|
|
276
283
|
return new_id_table, lookup_table
|
@@ -667,7 +674,7 @@ def construct_meta_entities_members(
|
|
667
674
|
defined_by_schema = sbml_dfs_dict[list(sbml_dfs_dict.keys())[0]].schema[defined_by]
|
668
675
|
|
669
676
|
# Step 2: Prepare the member table and validate its structure
|
670
|
-
agg_tbl,
|
677
|
+
agg_tbl, _ = _prepare_member_table(
|
671
678
|
sbml_dfs_dict,
|
672
679
|
defined_by,
|
673
680
|
defined_lookup_tables,
|
@@ -681,9 +688,7 @@ def construct_meta_entities_members(
|
|
681
688
|
membership_lookup = _create_membership_lookup(agg_tbl, table_schema)
|
682
689
|
|
683
690
|
# Step 4: Create consensus entities and lookup table
|
684
|
-
|
685
|
-
membership_lookup, table_schema
|
686
|
-
)
|
691
|
+
_, lookup_table = _create_entity_consensus(membership_lookup, table_schema)
|
687
692
|
|
688
693
|
# Step 5: Log merger information
|
689
694
|
report_consensus_merges(
|
@@ -1507,6 +1512,11 @@ def _filter_identifiers_by_qualifier(
|
|
1507
1512
|
pd.DataFrame
|
1508
1513
|
Filtered identifiers
|
1509
1514
|
"""
|
1515
|
+
|
1516
|
+
invalid_bqbs = set(meta_identifiers[IDENTIFIERS.BQB]) - set(VALID_BQB_TERMS)
|
1517
|
+
if len(invalid_bqbs) > 0:
|
1518
|
+
logger.warning(f"Invalid biological qualifiers: {invalid_bqbs}")
|
1519
|
+
|
1510
1520
|
valid_identifiers = meta_identifiers.copy()
|
1511
1521
|
return valid_identifiers[
|
1512
1522
|
meta_identifiers[IDENTIFIERS.BQB].isin(defining_biological_qualifiers)
|
@@ -2034,22 +2044,6 @@ def _merge_entity_data_report_mismatches(
|
|
2034
2044
|
return None
|
2035
2045
|
|
2036
2046
|
|
2037
|
-
def _test_same_schema(sbml_dfs_dict: dict[str, sbml_dfs_core.SBML_dfs]) -> None:
|
2038
|
-
"""
|
2039
|
-
Ensure that all sbml_dfs in the dict have the same schema
|
2040
|
-
"""
|
2041
|
-
|
2042
|
-
if len(sbml_dfs_dict) != 0:
|
2043
|
-
# extract all schemas
|
2044
|
-
schema_list = [sbml_dfs_dict[x].schema for x in sbml_dfs_dict.keys()]
|
2045
|
-
# if multiple entries are present then are they the same?
|
2046
|
-
if len(sbml_dfs_dict) > 1:
|
2047
|
-
if not all([x == schema_list[0] for x in schema_list]):
|
2048
|
-
raise ValueError("sbml_df schemas were not identical")
|
2049
|
-
|
2050
|
-
return None
|
2051
|
-
|
2052
|
-
|
2053
2047
|
def _create_member_string(x: list[str]) -> str:
|
2054
2048
|
x.sort()
|
2055
2049
|
return "_".join(x)
|
@@ -55,28 +55,49 @@ SBML_DFS = SimpleNamespace(
|
|
55
55
|
SBO_TERM="sbo_term",
|
56
56
|
)
|
57
57
|
|
58
|
+
SCHEMA_DEFS = SimpleNamespace(
|
59
|
+
TABLE="table",
|
60
|
+
PK="pk",
|
61
|
+
FK="fk",
|
62
|
+
LABEL="label",
|
63
|
+
ID="id",
|
64
|
+
SOURCE="source",
|
65
|
+
VARS="vars",
|
66
|
+
)
|
67
|
+
|
58
68
|
SBML_DFS_SCHEMA = SimpleNamespace(
|
59
69
|
SCHEMA={
|
60
70
|
SBML_DFS.COMPARTMENTS: {
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
71
|
+
SCHEMA_DEFS.TABLE: SBML_DFS.COMPARTMENTS,
|
72
|
+
SCHEMA_DEFS.PK: SBML_DFS.C_ID,
|
73
|
+
SCHEMA_DEFS.LABEL: SBML_DFS.C_NAME,
|
74
|
+
SCHEMA_DEFS.ID: SBML_DFS.C_IDENTIFIERS,
|
75
|
+
SCHEMA_DEFS.SOURCE: SBML_DFS.C_SOURCE,
|
76
|
+
SCHEMA_DEFS.VARS: [
|
77
|
+
SBML_DFS.C_NAME,
|
78
|
+
SBML_DFS.C_IDENTIFIERS,
|
79
|
+
SBML_DFS.C_SOURCE,
|
80
|
+
],
|
66
81
|
},
|
67
82
|
SBML_DFS.SPECIES: {
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
83
|
+
SCHEMA_DEFS.TABLE: SBML_DFS.SPECIES,
|
84
|
+
SCHEMA_DEFS.PK: SBML_DFS.S_ID,
|
85
|
+
SCHEMA_DEFS.LABEL: SBML_DFS.S_NAME,
|
86
|
+
SCHEMA_DEFS.ID: SBML_DFS.S_IDENTIFIERS,
|
87
|
+
SCHEMA_DEFS.SOURCE: SBML_DFS.S_SOURCE,
|
88
|
+
SCHEMA_DEFS.VARS: [
|
89
|
+
SBML_DFS.S_NAME,
|
90
|
+
SBML_DFS.S_IDENTIFIERS,
|
91
|
+
SBML_DFS.S_SOURCE,
|
92
|
+
],
|
73
93
|
},
|
74
94
|
SBML_DFS.COMPARTMENTALIZED_SPECIES: {
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
95
|
+
SCHEMA_DEFS.TABLE: SBML_DFS.COMPARTMENTALIZED_SPECIES,
|
96
|
+
SCHEMA_DEFS.PK: SBML_DFS.SC_ID,
|
97
|
+
SCHEMA_DEFS.LABEL: SBML_DFS.SC_NAME,
|
98
|
+
SCHEMA_DEFS.FK: [SBML_DFS.S_ID, SBML_DFS.C_ID],
|
99
|
+
SCHEMA_DEFS.SOURCE: SBML_DFS.SC_SOURCE,
|
100
|
+
SCHEMA_DEFS.VARS: [
|
80
101
|
SBML_DFS.SC_NAME,
|
81
102
|
SBML_DFS.S_ID,
|
82
103
|
SBML_DFS.C_ID,
|
@@ -84,11 +105,12 @@ SBML_DFS_SCHEMA = SimpleNamespace(
|
|
84
105
|
],
|
85
106
|
},
|
86
107
|
SBML_DFS.REACTIONS: {
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
108
|
+
SCHEMA_DEFS.TABLE: SBML_DFS.REACTIONS,
|
109
|
+
SCHEMA_DEFS.PK: SBML_DFS.R_ID,
|
110
|
+
SCHEMA_DEFS.LABEL: SBML_DFS.R_NAME,
|
111
|
+
SCHEMA_DEFS.ID: SBML_DFS.R_IDENTIFIERS,
|
112
|
+
SCHEMA_DEFS.SOURCE: SBML_DFS.R_SOURCE,
|
113
|
+
SCHEMA_DEFS.VARS: [
|
92
114
|
SBML_DFS.R_NAME,
|
93
115
|
SBML_DFS.R_IDENTIFIERS,
|
94
116
|
SBML_DFS.R_SOURCE,
|
@@ -96,9 +118,10 @@ SBML_DFS_SCHEMA = SimpleNamespace(
|
|
96
118
|
],
|
97
119
|
},
|
98
120
|
SBML_DFS.REACTION_SPECIES: {
|
99
|
-
|
100
|
-
|
101
|
-
|
121
|
+
SCHEMA_DEFS.TABLE: SBML_DFS.REACTION_SPECIES,
|
122
|
+
SCHEMA_DEFS.PK: SBML_DFS.RSC_ID,
|
123
|
+
SCHEMA_DEFS.FK: [SBML_DFS.R_ID, SBML_DFS.SC_ID],
|
124
|
+
SCHEMA_DEFS.VARS: [
|
102
125
|
SBML_DFS.R_ID,
|
103
126
|
SBML_DFS.SC_ID,
|
104
127
|
SBML_DFS.STOICHIOMETRY,
|
@@ -129,10 +152,10 @@ ENTITIES_TO_ENTITY_DATA = {
|
|
129
152
|
REQUIRED_REACTION_FROMEDGELIST_COLUMNS = [
|
130
153
|
"sc_id_up",
|
131
154
|
"sc_id_down",
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
155
|
+
SBML_DFS.SBO_TERM,
|
156
|
+
SBML_DFS.R_NAME,
|
157
|
+
SBML_DFS.R_IDENTIFIERS,
|
158
|
+
SBML_DFS.R_ISREVERSIBLE,
|
136
159
|
]
|
137
160
|
|
138
161
|
NAPISTU_STANDARD_OUTPUTS = SimpleNamespace(
|
@@ -155,20 +178,6 @@ INTERACTION_EDGELIST_EXPECTED_VARS = {
|
|
155
178
|
SBML_DFS.R_ISREVERSIBLE,
|
156
179
|
}
|
157
180
|
|
158
|
-
BQB_PRIORITIES = pd.DataFrame(
|
159
|
-
[{"bqb": "BQB_IS", "bqb_rank": 1}, {"bqb": "BQB_HAS_PART", "bqb_rank": 2}]
|
160
|
-
)
|
161
|
-
|
162
|
-
ONTOLOGY_PRIORITIES = pd.DataFrame(
|
163
|
-
[
|
164
|
-
{"ontology": "reactome", "ontology_rank": 1},
|
165
|
-
{"ontology": "ensembl_gene", "ontology_rank": 2},
|
166
|
-
{"ontology": "chebi", "ontology_rank": 3},
|
167
|
-
{"ontology": "uniprot", "ontology_rank": 4},
|
168
|
-
{"ontology": "go", "ontology_rank": 5},
|
169
|
-
]
|
170
|
-
)
|
171
|
-
|
172
181
|
# SBML
|
173
182
|
# Biological qualifiers
|
174
183
|
# Biomodels qualifiers
|
@@ -189,16 +198,18 @@ BQB = SimpleNamespace(
|
|
189
198
|
UNKNOWN="BQB_UNKNOWN",
|
190
199
|
)
|
191
200
|
|
201
|
+
VALID_BQB_TERMS = list(BQB.__dict__.values())
|
202
|
+
|
192
203
|
# molecules are distinctly defined by these BQB terms
|
193
|
-
BQB_DEFINING_ATTRS = [
|
204
|
+
BQB_DEFINING_ATTRS = [BQB.IS, BQB.IS_HOMOLOG_TO]
|
194
205
|
|
195
206
|
# a looser convention which will aggregate genes, transcripts, and proteins
|
196
207
|
# if they are linked with the appropriate bioqualifiers
|
197
208
|
BQB_DEFINING_ATTRS_LOOSE = [
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
209
|
+
BQB.IS,
|
210
|
+
BQB.IS_HOMOLOG_TO,
|
211
|
+
BQB.IS_ENCODED_BY,
|
212
|
+
BQB.ENCODES,
|
202
213
|
]
|
203
214
|
|
204
215
|
# identifiers
|
@@ -206,6 +217,13 @@ IDENTIFIERS = SimpleNamespace(
|
|
206
217
|
ONTOLOGY="ontology", IDENTIFIER="identifier", BQB="bqb", URL="url"
|
207
218
|
)
|
208
219
|
|
220
|
+
BQB_PRIORITIES = pd.DataFrame(
|
221
|
+
[
|
222
|
+
{IDENTIFIERS.BQB: BQB.IS, "bqb_rank": 1},
|
223
|
+
{IDENTIFIERS.BQB: BQB.HAS_PART, "bqb_rank": 2},
|
224
|
+
]
|
225
|
+
)
|
226
|
+
|
209
227
|
IDENTIFIERS_REQUIRED_VARS = {
|
210
228
|
IDENTIFIERS.ONTOLOGY,
|
211
229
|
IDENTIFIERS.IDENTIFIER,
|
@@ -217,26 +235,9 @@ SPECIES_IDENTIFIERS_REQUIRED_VARS = IDENTIFIERS_REQUIRED_VARS | {
|
|
217
235
|
SBML_DFS.S_NAME,
|
218
236
|
}
|
219
237
|
|
220
|
-
BIOLOGICAL_QUALIFIERS = [
|
221
|
-
"BQB_IS",
|
222
|
-
"BQB_HAS_PART",
|
223
|
-
"BQB_IS_PART_OF",
|
224
|
-
"BQB_IS_VERSION_OF",
|
225
|
-
"BQB_HAS_VERSION",
|
226
|
-
"BQB_IS_HOMOLOG_TO",
|
227
|
-
"BQB_IS_DESCRIBED_BY",
|
228
|
-
"BQB_IS_ENCODED_BY",
|
229
|
-
"BQB_ENCODES",
|
230
|
-
"BQB_OCCURS_IN",
|
231
|
-
"BQB_HAS_PROPERTY",
|
232
|
-
"BQB_IS_PROPERTY_OF",
|
233
|
-
"BQB_HAS_TAXON",
|
234
|
-
"BQB_UNKNOWN",
|
235
|
-
]
|
236
|
-
|
237
238
|
|
238
239
|
def get_biological_qualifier_codes():
|
239
|
-
bio_qualifier_codes = {getattr(libsbml, bqb): bqb for bqb in
|
240
|
+
bio_qualifier_codes = {getattr(libsbml, bqb): bqb for bqb in VALID_BQB_TERMS}
|
240
241
|
|
241
242
|
return bio_qualifier_codes
|
242
243
|
|
@@ -250,6 +251,7 @@ SBOTERM_NAMES = SimpleNamespace(
|
|
250
251
|
CATALYST="catalyst",
|
251
252
|
INHIBITOR="inhibitor",
|
252
253
|
STIMULATOR="stimulator",
|
254
|
+
MODIFIED="modified",
|
253
255
|
MODIFIER="modifier",
|
254
256
|
INTERACTOR="interactor",
|
255
257
|
)
|
@@ -258,22 +260,27 @@ MINI_SBO_TO_NAME = {
|
|
258
260
|
"SBO:0000010": SBOTERM_NAMES.REACTANT,
|
259
261
|
"SBO:0000011": SBOTERM_NAMES.PRODUCT,
|
260
262
|
"SBO:0000013": SBOTERM_NAMES.CATALYST,
|
261
|
-
"SBO:0000020": SBOTERM_NAMES.INHIBITOR,
|
262
|
-
"SBO:0000459": SBOTERM_NAMES.STIMULATOR,
|
263
263
|
"SBO:0000019": SBOTERM_NAMES.MODIFIER,
|
264
|
+
"SBO:0000020": SBOTERM_NAMES.INHIBITOR,
|
264
265
|
"SBO:0000336": SBOTERM_NAMES.INTERACTOR,
|
266
|
+
"SBO:0000459": SBOTERM_NAMES.STIMULATOR,
|
267
|
+
"SBO:0000644": SBOTERM_NAMES.MODIFIED,
|
265
268
|
}
|
266
269
|
|
267
270
|
MINI_SBO_FROM_NAME = {
|
268
|
-
SBOTERM_NAMES.REACTANT: "SBO:0000010",
|
269
|
-
SBOTERM_NAMES.PRODUCT: "SBO:0000011",
|
270
271
|
SBOTERM_NAMES.CATALYST: "SBO:0000013",
|
271
272
|
SBOTERM_NAMES.INHIBITOR: "SBO:0000020",
|
272
|
-
SBOTERM_NAMES.STIMULATOR: "SBO:0000459",
|
273
|
-
SBOTERM_NAMES.MODIFIER: "SBO:0000019", # parent category of inhibitor and stimulator (i.e., activator)
|
274
273
|
SBOTERM_NAMES.INTERACTOR: "SBO:0000336", # entity participating in a physical or functional interaction
|
274
|
+
SBOTERM_NAMES.MODIFIED: "SBO:0000644",
|
275
|
+
SBOTERM_NAMES.MODIFIER: "SBO:0000019", # parent category of inhibitor and stimulator (i.e., activator)
|
276
|
+
SBOTERM_NAMES.PRODUCT: "SBO:0000011",
|
277
|
+
SBOTERM_NAMES.REACTANT: "SBO:0000010", # aka substrate
|
278
|
+
SBOTERM_NAMES.STIMULATOR: "SBO:0000459", # aka activator
|
275
279
|
}
|
276
280
|
|
281
|
+
VALID_SBO_TERM_NAMES = list(SBOTERM_NAMES.__dict__.values())
|
282
|
+
VALID_SBO_TERMS = list(MINI_SBO_FROM_NAME.values())
|
283
|
+
|
277
284
|
SBO_MODIFIER_NAMES = {
|
278
285
|
SBOTERM_NAMES.INHIBITOR,
|
279
286
|
SBOTERM_NAMES.STIMULATOR,
|
@@ -281,13 +288,14 @@ SBO_MODIFIER_NAMES = {
|
|
281
288
|
}
|
282
289
|
|
283
290
|
MINI_SBO_NAME_TO_POLARITY = {
|
284
|
-
SBOTERM_NAMES.REACTANT: "activation",
|
285
|
-
SBOTERM_NAMES.PRODUCT: "activation",
|
286
291
|
SBOTERM_NAMES.CATALYST: "activation",
|
287
292
|
SBOTERM_NAMES.INHIBITOR: "inhibition",
|
288
|
-
SBOTERM_NAMES.STIMULATOR: "activation",
|
289
|
-
SBOTERM_NAMES.MODIFIER: "ambiguous",
|
290
293
|
SBOTERM_NAMES.INTERACTOR: "ambiguous",
|
294
|
+
SBOTERM_NAMES.MODIFIED: "ambiguous",
|
295
|
+
SBOTERM_NAMES.MODIFIER: "ambiguous",
|
296
|
+
SBOTERM_NAMES.PRODUCT: "activation",
|
297
|
+
SBOTERM_NAMES.REACTANT: "ambiguous",
|
298
|
+
SBOTERM_NAMES.STIMULATOR: "activation",
|
291
299
|
}
|
292
300
|
|
293
301
|
# how does changing a reactions' membership
|
@@ -305,6 +313,7 @@ SBO_NAME_TO_ROLE = {
|
|
305
313
|
SBOTERM_NAMES.CATALYST: SBO_ROLES_DEFS.REQUIRED,
|
306
314
|
SBOTERM_NAMES.INHIBITOR: SBO_ROLES_DEFS.OPTIONAL,
|
307
315
|
SBOTERM_NAMES.STIMULATOR: SBO_ROLES_DEFS.OPTIONAL,
|
316
|
+
SBOTERM_NAMES.MODIFIED: SBO_ROLES_DEFS.DEFINING,
|
308
317
|
SBOTERM_NAMES.MODIFIER: SBO_ROLES_DEFS.OPTIONAL,
|
309
318
|
}
|
310
319
|
|
@@ -322,7 +331,7 @@ VALID_SBO_ROLES = (
|
|
322
331
|
|
323
332
|
# required variables for the edgelist formats used by the matching subpackage
|
324
333
|
# also used in some network modules
|
325
|
-
|
334
|
+
NAPISTU_EDGELIST = SimpleNamespace(
|
326
335
|
S_ID_UPSTREAM="s_id_upstream",
|
327
336
|
S_ID_DOWNSTREAM="s_id_downstream",
|
328
337
|
SC_ID_UPSTREAM="sc_id_upstream",
|
@@ -336,18 +345,18 @@ CPR_EDGELIST = SimpleNamespace(
|
|
336
345
|
)
|
337
346
|
|
338
347
|
IDENTIFIER_EDGELIST_REQ_VARS = {
|
339
|
-
|
340
|
-
|
348
|
+
NAPISTU_EDGELIST.IDENTIFIER_UPSTREAM,
|
349
|
+
NAPISTU_EDGELIST.IDENTIFIER_DOWNSTREAM,
|
341
350
|
}
|
342
351
|
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
352
|
+
NAPISTU_EDGELIST_REQ_VARS = {
|
353
|
+
NAPISTU_EDGELIST.S_ID_UPSTREAM,
|
354
|
+
NAPISTU_EDGELIST.S_ID_DOWNSTREAM,
|
355
|
+
NAPISTU_EDGELIST.SC_ID_UPSTREAM,
|
356
|
+
NAPISTU_EDGELIST.SC_ID_DOWNSTREAM,
|
348
357
|
}
|
349
358
|
|
350
|
-
|
359
|
+
NAPISTU_PATH_REQ_VARS = {NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST}
|
351
360
|
|
352
361
|
FEATURE_ID_VAR_DEFAULT = "feature_id"
|
353
362
|
|
@@ -409,6 +418,16 @@ ONTOLOGY_SPECIES_ALIASES = {
|
|
409
418
|
ONTOLOGIES.UNIPROT: {"Uniprot"},
|
410
419
|
}
|
411
420
|
|
421
|
+
ONTOLOGY_PRIORITIES = pd.DataFrame(
|
422
|
+
[
|
423
|
+
{"ontology": ONTOLOGIES.REACTOME, "ontology_rank": 1},
|
424
|
+
{"ontology": ONTOLOGIES.ENSEMBL_GENE, "ontology_rank": 2},
|
425
|
+
{"ontology": ONTOLOGIES.CHEBI, "ontology_rank": 3},
|
426
|
+
{"ontology": ONTOLOGIES.UNIPROT, "ontology_rank": 4},
|
427
|
+
{"ontology": ONTOLOGIES.GO, "ontology_rank": 5},
|
428
|
+
]
|
429
|
+
)
|
430
|
+
|
412
431
|
ENSEMBL_MOLECULE_TYPES_TO_ONTOLOGY = {
|
413
432
|
"G": ONTOLOGIES.ENSEMBL_GENE,
|
414
433
|
"T": ONTOLOGIES.ENSEMBL_TRANSCRIPT,
|
@@ -266,6 +266,7 @@ def adapt_pw_index(
|
|
266
266
|
source: str | PWIndex,
|
267
267
|
species: str | Iterable[str] | None,
|
268
268
|
outdir: str | None = None,
|
269
|
+
update_index: bool = False,
|
269
270
|
) -> PWIndex:
|
270
271
|
"""Adapts a pw_index
|
271
272
|
|
@@ -288,8 +289,9 @@ def adapt_pw_index(
|
|
288
289
|
raise ValueError("'source' needs to be str or PWIndex")
|
289
290
|
pw_index.filter(species=species)
|
290
291
|
|
291
|
-
if outdir is not None:
|
292
|
+
if outdir is not None and update_index:
|
292
293
|
with open_fs(outdir, create=True) as fs:
|
293
294
|
with fs.open("pw_index.tsv", "w") as f:
|
294
295
|
pw_index.index.to_csv(f, sep="\t")
|
296
|
+
|
295
297
|
return pw_index
|
@@ -20,9 +20,9 @@ def remove_reciprocal_interactions(
|
|
20
20
|
|
21
21
|
Args:
|
22
22
|
edgelist (pd.DataFrame): edgelist (pd.DataFrame): edgelist where the first two
|
23
|
-
|
23
|
+
columns are assumed to be the edge vertices
|
24
24
|
extra_defining_vars (list): list (which can be empty) of variables which define
|
25
|
-
|
25
|
+
a unique interaction beyond the vertices
|
26
26
|
|
27
27
|
Returns:
|
28
28
|
indegenerate_edgelist (pd.DataFrame): edgelist with B-A edges removed and A-B retained
|
@@ -58,9 +58,9 @@ def count_fraction_of_reciprocal_interactions(
|
|
58
58
|
|
59
59
|
Args:
|
60
60
|
edgelist (pd.DataFrame): edgelist (pd.DataFrame): edgelist where the first two
|
61
|
-
|
61
|
+
columns are assumed to be the edge vertices
|
62
62
|
extra_defining_vars (list): list (which can be empty) of variables which define
|
63
|
-
|
63
|
+
a unique interaction beyond the vertices
|
64
64
|
|
65
65
|
Returns:
|
66
66
|
fraction (float): fraction of A-B edges which are also included as B-A edges
|