napistu 0.3.6__tar.gz → 0.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {napistu-0.3.6 → napistu-0.3.7}/PKG-INFO +1 -1
- {napistu-0.3.6 → napistu-0.3.7}/setup.cfg +1 -1
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/__main__.py +20 -9
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/consensus.py +19 -25
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/constants.py +90 -64
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/indices.py +3 -1
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/sbml.py +298 -295
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/string.py +14 -18
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/trrust.py +22 -27
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/matching/species.py +1 -1
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ontologies/genodexito.py +5 -1
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ontologies/renaming.py +4 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/sbml_dfs_core.py +127 -64
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/sbml_dfs_utils.py +4 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/utils.py +52 -41
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu.egg-info/PKG-INFO +1 -1
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/conftest.py +70 -13
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_consensus.py +74 -5
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_gaps.py +26 -15
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_net_create.py +1 -1
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_precompute.py +1 -1
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_ontologies_renaming.py +28 -24
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_sbml_dfs_core.py +165 -15
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_utils.py +19 -0
- {napistu-0.3.6 → napistu-0.3.7}/LICENSE +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/README.md +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/pyproject.toml +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/setup.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/context/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/context/discretize.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/context/filtering.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/gcs/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/gcs/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/gcs/downloads.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/gcs/utils.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/identifiers.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/bigg.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/gtex.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/hpa.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/identifiers_etl.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/napistu_edgelist.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/obo.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/psi_mi.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/reactome.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/yeast.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/matching/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/matching/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/matching/interactions.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/matching/mount.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/__main__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/client.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/codebase.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/codebase_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/component_base.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/config.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/documentation.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/documentation_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/execution.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/health.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/profiles.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/server.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/tutorials.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/tutorials_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/utils.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/modify/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/modify/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/modify/curation.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/modify/gaps.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/modify/pathwayannot.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/modify/uncompartmentalize.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/data_handling.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/ig_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/napistu_graph_core.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/neighborhoods.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/net_create.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/net_propagation.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/ng_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/paths.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/precompute.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ontologies/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ontologies/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ontologies/dogma.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ontologies/mygene.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/rpy2/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/rpy2/callr.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/rpy2/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/rpy2/rids.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/scverse/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/scverse/constants.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/scverse/loading.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu/source.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu.egg-info/SOURCES.txt +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu.egg-info/dependency_links.txt +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu.egg-info/entry_points.txt +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu.egg-info/requires.txt +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/napistu.egg-info/top_level.txt +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_constants.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_context_discretize.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_context_filtering.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_curation.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_data/__init__.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_gcs.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_identifiers.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_indices.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_ingestion_napistu_edgelist.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_ingestion_obo.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_matching_interactions.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_matching_mount.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_matching_species.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_mcp_config.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_mcp_documentation_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_mcp_server.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_data_handling.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_ig_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_neighborhoods.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_net_propagation.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_ng_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_paths.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_ontologies_genodexito.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_ontologies_mygene.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_pathwayannot.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_rpy2_callr.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_rpy2_init.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_sbml.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_sbml_dfs_utils.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_sbo.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_scverse_loading.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_set_coverage.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_source.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_uncompartmentalize.py +0 -0
- {napistu-0.3.6 → napistu-0.3.7}/src/tests/utils.py +0 -0
@@ -12,7 +12,7 @@ import click_logging
|
|
12
12
|
import napistu
|
13
13
|
import igraph as ig
|
14
14
|
import pandas as pd
|
15
|
-
from napistu import consensus as
|
15
|
+
from napistu import consensus as napistu_consensus
|
16
16
|
from napistu import indices
|
17
17
|
from napistu import sbml_dfs_core
|
18
18
|
from napistu import utils
|
@@ -65,7 +65,7 @@ def ingestion():
|
|
65
65
|
"--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
|
66
66
|
)
|
67
67
|
@click_logging.simple_verbosity_option(logger)
|
68
|
-
def
|
68
|
+
def ingest_reactome(base_folder: str, overwrite=True):
|
69
69
|
logger.info("Start downloading Reactome to %s", base_folder)
|
70
70
|
reactome.reactome_sbml_download(f"{base_folder}/sbml", overwrite=overwrite)
|
71
71
|
|
@@ -76,7 +76,7 @@ def load_reactome(base_folder: str, overwrite=True):
|
|
76
76
|
"--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
|
77
77
|
)
|
78
78
|
@click_logging.simple_verbosity_option(logger)
|
79
|
-
def
|
79
|
+
def ingest_bigg(base_folder: str, overwrite: bool):
|
80
80
|
logger.info("Start downloading Bigg to %s", base_folder)
|
81
81
|
bigg.bigg_sbml_download(base_folder, overwrite)
|
82
82
|
|
@@ -84,7 +84,7 @@ def load_bigg(base_folder: str, overwrite: bool):
|
|
84
84
|
@ingestion.command(name="trrust")
|
85
85
|
@click.argument("target_uri", type=str)
|
86
86
|
@click_logging.simple_verbosity_option(logger)
|
87
|
-
def
|
87
|
+
def ingest_ttrust(target_uri: str):
|
88
88
|
logger.info("Start downloading TRRUST to %s", target_uri)
|
89
89
|
trrust.download_trrust(target_uri)
|
90
90
|
|
@@ -98,7 +98,7 @@ def load_ttrust(target_uri: str):
|
|
98
98
|
help="URL to download the zipped protein atlas subcellular localization tsv from.",
|
99
99
|
)
|
100
100
|
@click_logging.simple_verbosity_option(logger)
|
101
|
-
def
|
101
|
+
def ingest_proteinatlas_subcell(target_uri: str, url: str):
|
102
102
|
hpa.download_hpa_data(target_uri, url)
|
103
103
|
|
104
104
|
|
@@ -111,7 +111,7 @@ def load_proteinatlas_subcell(target_uri: str, url: str):
|
|
111
111
|
help="URL to download the gtex file from.",
|
112
112
|
)
|
113
113
|
@click_logging.simple_verbosity_option(logger)
|
114
|
-
def
|
114
|
+
def ingest_gtex_rnaseq(target_uri: str, url: str):
|
115
115
|
gtex.download_gtex_rnaseq(target_uri, url)
|
116
116
|
|
117
117
|
|
@@ -124,7 +124,7 @@ def load_gtex_rnaseq(target_uri: str, url: str):
|
|
124
124
|
help="Species name (e.g., Homo sapiens).",
|
125
125
|
)
|
126
126
|
@click_logging.simple_verbosity_option(logger)
|
127
|
-
def
|
127
|
+
def ingest_string_db(target_uri: str, species: str):
|
128
128
|
string.download_string(target_uri, species)
|
129
129
|
|
130
130
|
|
@@ -137,7 +137,7 @@ def load_string_db(target_uri: str, species: str):
|
|
137
137
|
help="Species name (e.g., Homo sapiens).",
|
138
138
|
)
|
139
139
|
@click_logging.simple_verbosity_option(logger)
|
140
|
-
def
|
140
|
+
def ingest_string_aliases(target_uri: str, species: str):
|
141
141
|
string.download_string_aliases(target_uri, species)
|
142
142
|
|
143
143
|
|
@@ -289,7 +289,7 @@ def create_consensus(
|
|
289
289
|
)
|
290
290
|
pw_index_df["species"] = "unknown"
|
291
291
|
pw_index = indices.PWIndex(pw_index=pw_index_df, validate_paths=False)
|
292
|
-
consensus_model =
|
292
|
+
consensus_model = napistu_consensus.construct_consensus_model(
|
293
293
|
sbml_dfs_dict, pw_index, dogmatic
|
294
294
|
)
|
295
295
|
utils.save_pickle(output_model_uri, consensus_model)
|
@@ -855,6 +855,17 @@ def copy_uri(input_uri, output_uri, is_file=True):
|
|
855
855
|
utils.copy_uri(input_uri, output_uri, is_file=is_file)
|
856
856
|
|
857
857
|
|
858
|
+
@helpers.command(name="validate_sbml_dfs")
|
859
|
+
@click.argument("input_uri", type=str)
|
860
|
+
@click_logging.simple_verbosity_option(logger)
|
861
|
+
def validate_sbml_dfs(input_uri):
|
862
|
+
"""Validate a sbml_dfs object"""
|
863
|
+
sbml_dfs = utils.load_pickle(input_uri)
|
864
|
+
sbml_dfs.validate()
|
865
|
+
|
866
|
+
logger.info(f"Successfully validated: {input_uri}")
|
867
|
+
|
868
|
+
|
858
869
|
@click.group()
|
859
870
|
def stats():
|
860
871
|
"""Various functions to calculate network statistics
|
@@ -15,10 +15,13 @@ from napistu import source
|
|
15
15
|
from napistu import utils
|
16
16
|
from napistu.ingestion import sbml
|
17
17
|
|
18
|
+
from napistu.constants import SCHEMA_DEFS
|
18
19
|
from napistu.constants import SBML_DFS
|
20
|
+
from napistu.constants import SBML_DFS_SCHEMA
|
19
21
|
from napistu.constants import IDENTIFIERS
|
20
22
|
from napistu.constants import SOURCE_SPEC
|
21
23
|
from napistu.constants import BQB_DEFINING_ATTRS
|
24
|
+
from napistu.constants import VALID_BQB_TERMS
|
22
25
|
|
23
26
|
logger = logging.getLogger(__name__)
|
24
27
|
# set the level to show logger.info message
|
@@ -137,8 +140,7 @@ def unnest_SBML_df(
|
|
137
140
|
"""
|
138
141
|
|
139
142
|
# check that all sbml_dfs have the same schema
|
140
|
-
|
141
|
-
table_schema = sbml_dfs_dict[list(sbml_dfs_dict.keys())[0]].schema[table]
|
143
|
+
table_schema = SBML_DFS_SCHEMA.SCHEMA[table]
|
142
144
|
|
143
145
|
df_list = [
|
144
146
|
getattr(sbml_dfs_dict[x], table).assign(model=x) for x in sbml_dfs_dict.keys()
|
@@ -192,7 +194,7 @@ def construct_meta_entities_identifiers(
|
|
192
194
|
agg_tbl = unnest_SBML_df(sbml_dfs_dict, table=table)
|
193
195
|
|
194
196
|
# since all sbml_dfs have the same schema pull out one schema for reference
|
195
|
-
table_schema =
|
197
|
+
table_schema = SBML_DFS_SCHEMA.SCHEMA[table]
|
196
198
|
|
197
199
|
# update foreign keys using provided lookup tables
|
198
200
|
if "fk" in table_schema.keys():
|
@@ -244,6 +246,8 @@ def reduce_to_consensus_ids(
|
|
244
246
|
Series mapping the index of the aggregated entities to new consensus IDs.
|
245
247
|
"""
|
246
248
|
# Step 1: Build consensus identifiers to create clusters of equivalent entities
|
249
|
+
table_name = table_schema[SCHEMA_DEFS.TABLE]
|
250
|
+
logger.debug(f"Building consensus identifiers for {table_name}")
|
247
251
|
indexed_cluster, cluster_consensus_identifiers = build_consensus_identifiers(
|
248
252
|
sbml_df, table_schema, defining_biological_qualifiers
|
249
253
|
)
|
@@ -252,25 +256,28 @@ def reduce_to_consensus_ids(
|
|
252
256
|
agg_table_harmonized = sbml_df.join(indexed_cluster)
|
253
257
|
|
254
258
|
# Step 3: Create lookup table for entity IDs
|
259
|
+
logger.debug(f"Creating lookup table for {table_name}")
|
255
260
|
lookup_table = _create_entity_lookup_table(agg_table_harmonized, table_schema)
|
256
261
|
|
257
262
|
# Step 4: Add nameness scores to help select representative names
|
258
263
|
agg_table_harmonized = utils._add_nameness_score_wrapper(
|
259
|
-
agg_table_harmonized,
|
264
|
+
agg_table_harmonized, SCHEMA_DEFS.LABEL, table_schema
|
260
265
|
)
|
261
266
|
|
262
267
|
# Step 5: Prepare the consensus table with one row per unique entity
|
268
|
+
logger.debug(f"Preparing consensus table for {table_name}")
|
263
269
|
new_id_table = _prepare_consensus_table(
|
264
270
|
agg_table_harmonized, table_schema, cluster_consensus_identifiers
|
265
271
|
)
|
266
272
|
|
267
273
|
# Step 6: Add source information if required
|
268
|
-
if
|
274
|
+
if SCHEMA_DEFS.SOURCE in table_schema.keys():
|
269
275
|
new_id_table = _add_consensus_sources(
|
270
276
|
new_id_table, agg_table_harmonized, lookup_table, table_schema, pw_index
|
271
277
|
)
|
272
278
|
|
273
279
|
# Step 7: Validate the resulting table
|
280
|
+
logger.debug(f"Validating consensus table for {table_name}")
|
274
281
|
_validate_consensus_table(new_id_table, sbml_df)
|
275
282
|
|
276
283
|
return new_id_table, lookup_table
|
@@ -667,7 +674,7 @@ def construct_meta_entities_members(
|
|
667
674
|
defined_by_schema = sbml_dfs_dict[list(sbml_dfs_dict.keys())[0]].schema[defined_by]
|
668
675
|
|
669
676
|
# Step 2: Prepare the member table and validate its structure
|
670
|
-
agg_tbl,
|
677
|
+
agg_tbl, _ = _prepare_member_table(
|
671
678
|
sbml_dfs_dict,
|
672
679
|
defined_by,
|
673
680
|
defined_lookup_tables,
|
@@ -681,9 +688,7 @@ def construct_meta_entities_members(
|
|
681
688
|
membership_lookup = _create_membership_lookup(agg_tbl, table_schema)
|
682
689
|
|
683
690
|
# Step 4: Create consensus entities and lookup table
|
684
|
-
|
685
|
-
membership_lookup, table_schema
|
686
|
-
)
|
691
|
+
_, lookup_table = _create_entity_consensus(membership_lookup, table_schema)
|
687
692
|
|
688
693
|
# Step 5: Log merger information
|
689
694
|
report_consensus_merges(
|
@@ -1507,6 +1512,11 @@ def _filter_identifiers_by_qualifier(
|
|
1507
1512
|
pd.DataFrame
|
1508
1513
|
Filtered identifiers
|
1509
1514
|
"""
|
1515
|
+
|
1516
|
+
invalid_bqbs = set(meta_identifiers[IDENTIFIERS.BQB]) - set(VALID_BQB_TERMS)
|
1517
|
+
if len(invalid_bqbs) > 0:
|
1518
|
+
logger.warning(f"Invalid biological qualifiers: {invalid_bqbs}")
|
1519
|
+
|
1510
1520
|
valid_identifiers = meta_identifiers.copy()
|
1511
1521
|
return valid_identifiers[
|
1512
1522
|
meta_identifiers[IDENTIFIERS.BQB].isin(defining_biological_qualifiers)
|
@@ -2034,22 +2044,6 @@ def _merge_entity_data_report_mismatches(
|
|
2034
2044
|
return None
|
2035
2045
|
|
2036
2046
|
|
2037
|
-
def _test_same_schema(sbml_dfs_dict: dict[str, sbml_dfs_core.SBML_dfs]) -> None:
|
2038
|
-
"""
|
2039
|
-
Ensure that all sbml_dfs in the dict have the same schema
|
2040
|
-
"""
|
2041
|
-
|
2042
|
-
if len(sbml_dfs_dict) != 0:
|
2043
|
-
# extract all schemas
|
2044
|
-
schema_list = [sbml_dfs_dict[x].schema for x in sbml_dfs_dict.keys()]
|
2045
|
-
# if multiple entries are present then are they the same?
|
2046
|
-
if len(sbml_dfs_dict) > 1:
|
2047
|
-
if not all([x == schema_list[0] for x in schema_list]):
|
2048
|
-
raise ValueError("sbml_df schemas were not identical")
|
2049
|
-
|
2050
|
-
return None
|
2051
|
-
|
2052
|
-
|
2053
2047
|
def _create_member_string(x: list[str]) -> str:
|
2054
2048
|
x.sort()
|
2055
2049
|
return "_".join(x)
|
@@ -55,28 +55,49 @@ SBML_DFS = SimpleNamespace(
|
|
55
55
|
SBO_TERM="sbo_term",
|
56
56
|
)
|
57
57
|
|
58
|
+
SCHEMA_DEFS = SimpleNamespace(
|
59
|
+
TABLE="table",
|
60
|
+
PK="pk",
|
61
|
+
FK="fk",
|
62
|
+
LABEL="label",
|
63
|
+
ID="id",
|
64
|
+
SOURCE="source",
|
65
|
+
VARS="vars",
|
66
|
+
)
|
67
|
+
|
58
68
|
SBML_DFS_SCHEMA = SimpleNamespace(
|
59
69
|
SCHEMA={
|
60
70
|
SBML_DFS.COMPARTMENTS: {
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
71
|
+
SCHEMA_DEFS.TABLE: SBML_DFS.COMPARTMENTS,
|
72
|
+
SCHEMA_DEFS.PK: SBML_DFS.C_ID,
|
73
|
+
SCHEMA_DEFS.LABEL: SBML_DFS.C_NAME,
|
74
|
+
SCHEMA_DEFS.ID: SBML_DFS.C_IDENTIFIERS,
|
75
|
+
SCHEMA_DEFS.SOURCE: SBML_DFS.C_SOURCE,
|
76
|
+
SCHEMA_DEFS.VARS: [
|
77
|
+
SBML_DFS.C_NAME,
|
78
|
+
SBML_DFS.C_IDENTIFIERS,
|
79
|
+
SBML_DFS.C_SOURCE,
|
80
|
+
],
|
66
81
|
},
|
67
82
|
SBML_DFS.SPECIES: {
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
83
|
+
SCHEMA_DEFS.TABLE: SBML_DFS.SPECIES,
|
84
|
+
SCHEMA_DEFS.PK: SBML_DFS.S_ID,
|
85
|
+
SCHEMA_DEFS.LABEL: SBML_DFS.S_NAME,
|
86
|
+
SCHEMA_DEFS.ID: SBML_DFS.S_IDENTIFIERS,
|
87
|
+
SCHEMA_DEFS.SOURCE: SBML_DFS.S_SOURCE,
|
88
|
+
SCHEMA_DEFS.VARS: [
|
89
|
+
SBML_DFS.S_NAME,
|
90
|
+
SBML_DFS.S_IDENTIFIERS,
|
91
|
+
SBML_DFS.S_SOURCE,
|
92
|
+
],
|
73
93
|
},
|
74
94
|
SBML_DFS.COMPARTMENTALIZED_SPECIES: {
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
95
|
+
SCHEMA_DEFS.TABLE: SBML_DFS.COMPARTMENTALIZED_SPECIES,
|
96
|
+
SCHEMA_DEFS.PK: SBML_DFS.SC_ID,
|
97
|
+
SCHEMA_DEFS.LABEL: SBML_DFS.SC_NAME,
|
98
|
+
SCHEMA_DEFS.FK: [SBML_DFS.S_ID, SBML_DFS.C_ID],
|
99
|
+
SCHEMA_DEFS.SOURCE: SBML_DFS.SC_SOURCE,
|
100
|
+
SCHEMA_DEFS.VARS: [
|
80
101
|
SBML_DFS.SC_NAME,
|
81
102
|
SBML_DFS.S_ID,
|
82
103
|
SBML_DFS.C_ID,
|
@@ -84,11 +105,12 @@ SBML_DFS_SCHEMA = SimpleNamespace(
|
|
84
105
|
],
|
85
106
|
},
|
86
107
|
SBML_DFS.REACTIONS: {
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
108
|
+
SCHEMA_DEFS.TABLE: SBML_DFS.REACTIONS,
|
109
|
+
SCHEMA_DEFS.PK: SBML_DFS.R_ID,
|
110
|
+
SCHEMA_DEFS.LABEL: SBML_DFS.R_NAME,
|
111
|
+
SCHEMA_DEFS.ID: SBML_DFS.R_IDENTIFIERS,
|
112
|
+
SCHEMA_DEFS.SOURCE: SBML_DFS.R_SOURCE,
|
113
|
+
SCHEMA_DEFS.VARS: [
|
92
114
|
SBML_DFS.R_NAME,
|
93
115
|
SBML_DFS.R_IDENTIFIERS,
|
94
116
|
SBML_DFS.R_SOURCE,
|
@@ -96,9 +118,10 @@ SBML_DFS_SCHEMA = SimpleNamespace(
|
|
96
118
|
],
|
97
119
|
},
|
98
120
|
SBML_DFS.REACTION_SPECIES: {
|
99
|
-
|
100
|
-
|
101
|
-
|
121
|
+
SCHEMA_DEFS.TABLE: SBML_DFS.REACTION_SPECIES,
|
122
|
+
SCHEMA_DEFS.PK: SBML_DFS.RSC_ID,
|
123
|
+
SCHEMA_DEFS.FK: [SBML_DFS.R_ID, SBML_DFS.SC_ID],
|
124
|
+
SCHEMA_DEFS.VARS: [
|
102
125
|
SBML_DFS.R_ID,
|
103
126
|
SBML_DFS.SC_ID,
|
104
127
|
SBML_DFS.STOICHIOMETRY,
|
@@ -129,10 +152,10 @@ ENTITIES_TO_ENTITY_DATA = {
|
|
129
152
|
REQUIRED_REACTION_FROMEDGELIST_COLUMNS = [
|
130
153
|
"sc_id_up",
|
131
154
|
"sc_id_down",
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
155
|
+
SBML_DFS.SBO_TERM,
|
156
|
+
SBML_DFS.R_NAME,
|
157
|
+
SBML_DFS.R_IDENTIFIERS,
|
158
|
+
SBML_DFS.R_ISREVERSIBLE,
|
136
159
|
]
|
137
160
|
|
138
161
|
NAPISTU_STANDARD_OUTPUTS = SimpleNamespace(
|
@@ -155,20 +178,6 @@ INTERACTION_EDGELIST_EXPECTED_VARS = {
|
|
155
178
|
SBML_DFS.R_ISREVERSIBLE,
|
156
179
|
}
|
157
180
|
|
158
|
-
BQB_PRIORITIES = pd.DataFrame(
|
159
|
-
[{"bqb": "BQB_IS", "bqb_rank": 1}, {"bqb": "BQB_HAS_PART", "bqb_rank": 2}]
|
160
|
-
)
|
161
|
-
|
162
|
-
ONTOLOGY_PRIORITIES = pd.DataFrame(
|
163
|
-
[
|
164
|
-
{"ontology": "reactome", "ontology_rank": 1},
|
165
|
-
{"ontology": "ensembl_gene", "ontology_rank": 2},
|
166
|
-
{"ontology": "chebi", "ontology_rank": 3},
|
167
|
-
{"ontology": "uniprot", "ontology_rank": 4},
|
168
|
-
{"ontology": "go", "ontology_rank": 5},
|
169
|
-
]
|
170
|
-
)
|
171
|
-
|
172
181
|
# SBML
|
173
182
|
# Biological qualifiers
|
174
183
|
# Biomodels qualifiers
|
@@ -189,16 +198,33 @@ BQB = SimpleNamespace(
|
|
189
198
|
UNKNOWN="BQB_UNKNOWN",
|
190
199
|
)
|
191
200
|
|
201
|
+
VALID_BQB_TERMS = [
|
202
|
+
BQB.IS,
|
203
|
+
BQB.HAS_PART,
|
204
|
+
BQB.IS_PART_OF,
|
205
|
+
BQB.IS_VERSION_OF,
|
206
|
+
BQB.HAS_VERSION,
|
207
|
+
BQB.IS_HOMOLOG_TO,
|
208
|
+
BQB.IS_DESCRIBED_BY,
|
209
|
+
BQB.IS_ENCODED_BY,
|
210
|
+
BQB.ENCODES,
|
211
|
+
BQB.OCCURS_IN,
|
212
|
+
BQB.HAS_PROPERTY,
|
213
|
+
BQB.IS_PROPERTY_OF,
|
214
|
+
BQB.HAS_TAXON,
|
215
|
+
BQB.UNKNOWN,
|
216
|
+
]
|
217
|
+
|
192
218
|
# molecules are distinctly defined by these BQB terms
|
193
|
-
BQB_DEFINING_ATTRS = [
|
219
|
+
BQB_DEFINING_ATTRS = [BQB.IS, BQB.IS_HOMOLOG_TO]
|
194
220
|
|
195
221
|
# a looser convention which will aggregate genes, transcripts, and proteins
|
196
222
|
# if they are linked with the appropriate bioqualifiers
|
197
223
|
BQB_DEFINING_ATTRS_LOOSE = [
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
224
|
+
BQB.IS,
|
225
|
+
BQB.IS_HOMOLOG_TO,
|
226
|
+
BQB.IS_ENCODED_BY,
|
227
|
+
BQB.ENCODES,
|
202
228
|
]
|
203
229
|
|
204
230
|
# identifiers
|
@@ -206,6 +232,13 @@ IDENTIFIERS = SimpleNamespace(
|
|
206
232
|
ONTOLOGY="ontology", IDENTIFIER="identifier", BQB="bqb", URL="url"
|
207
233
|
)
|
208
234
|
|
235
|
+
BQB_PRIORITIES = pd.DataFrame(
|
236
|
+
[
|
237
|
+
{IDENTIFIERS.BQB: BQB.IS, "bqb_rank": 1},
|
238
|
+
{IDENTIFIERS.BQB: BQB.HAS_PART, "bqb_rank": 2},
|
239
|
+
]
|
240
|
+
)
|
241
|
+
|
209
242
|
IDENTIFIERS_REQUIRED_VARS = {
|
210
243
|
IDENTIFIERS.ONTOLOGY,
|
211
244
|
IDENTIFIERS.IDENTIFIER,
|
@@ -217,26 +250,9 @@ SPECIES_IDENTIFIERS_REQUIRED_VARS = IDENTIFIERS_REQUIRED_VARS | {
|
|
217
250
|
SBML_DFS.S_NAME,
|
218
251
|
}
|
219
252
|
|
220
|
-
BIOLOGICAL_QUALIFIERS = [
|
221
|
-
"BQB_IS",
|
222
|
-
"BQB_HAS_PART",
|
223
|
-
"BQB_IS_PART_OF",
|
224
|
-
"BQB_IS_VERSION_OF",
|
225
|
-
"BQB_HAS_VERSION",
|
226
|
-
"BQB_IS_HOMOLOG_TO",
|
227
|
-
"BQB_IS_DESCRIBED_BY",
|
228
|
-
"BQB_IS_ENCODED_BY",
|
229
|
-
"BQB_ENCODES",
|
230
|
-
"BQB_OCCURS_IN",
|
231
|
-
"BQB_HAS_PROPERTY",
|
232
|
-
"BQB_IS_PROPERTY_OF",
|
233
|
-
"BQB_HAS_TAXON",
|
234
|
-
"BQB_UNKNOWN",
|
235
|
-
]
|
236
|
-
|
237
253
|
|
238
254
|
def get_biological_qualifier_codes():
|
239
|
-
bio_qualifier_codes = {getattr(libsbml, bqb): bqb for bqb in
|
255
|
+
bio_qualifier_codes = {getattr(libsbml, bqb): bqb for bqb in VALID_BQB_TERMS}
|
240
256
|
|
241
257
|
return bio_qualifier_codes
|
242
258
|
|
@@ -409,6 +425,16 @@ ONTOLOGY_SPECIES_ALIASES = {
|
|
409
425
|
ONTOLOGIES.UNIPROT: {"Uniprot"},
|
410
426
|
}
|
411
427
|
|
428
|
+
ONTOLOGY_PRIORITIES = pd.DataFrame(
|
429
|
+
[
|
430
|
+
{"ontology": ONTOLOGIES.REACTOME, "ontology_rank": 1},
|
431
|
+
{"ontology": ONTOLOGIES.ENSEMBL_GENE, "ontology_rank": 2},
|
432
|
+
{"ontology": ONTOLOGIES.CHEBI, "ontology_rank": 3},
|
433
|
+
{"ontology": ONTOLOGIES.UNIPROT, "ontology_rank": 4},
|
434
|
+
{"ontology": ONTOLOGIES.GO, "ontology_rank": 5},
|
435
|
+
]
|
436
|
+
)
|
437
|
+
|
412
438
|
ENSEMBL_MOLECULE_TYPES_TO_ONTOLOGY = {
|
413
439
|
"G": ONTOLOGIES.ENSEMBL_GENE,
|
414
440
|
"T": ONTOLOGIES.ENSEMBL_TRANSCRIPT,
|
@@ -266,6 +266,7 @@ def adapt_pw_index(
|
|
266
266
|
source: str | PWIndex,
|
267
267
|
species: str | Iterable[str] | None,
|
268
268
|
outdir: str | None = None,
|
269
|
+
update_index: bool = False,
|
269
270
|
) -> PWIndex:
|
270
271
|
"""Adapts a pw_index
|
271
272
|
|
@@ -288,8 +289,9 @@ def adapt_pw_index(
|
|
288
289
|
raise ValueError("'source' needs to be str or PWIndex")
|
289
290
|
pw_index.filter(species=species)
|
290
291
|
|
291
|
-
if outdir is not None:
|
292
|
+
if outdir is not None and update_index:
|
292
293
|
with open_fs(outdir, create=True) as fs:
|
293
294
|
with fs.open("pw_index.tsv", "w") as f:
|
294
295
|
pw_index.index.to_csv(f, sep="\t")
|
296
|
+
|
295
297
|
return pw_index
|