napistu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__init__.py +12 -0
- napistu/__main__.py +867 -0
- napistu/consensus.py +1557 -0
- napistu/constants.py +500 -0
- napistu/gcs/__init__.py +10 -0
- napistu/gcs/constants.py +69 -0
- napistu/gcs/downloads.py +180 -0
- napistu/identifiers.py +805 -0
- napistu/indices.py +227 -0
- napistu/ingestion/__init__.py +10 -0
- napistu/ingestion/bigg.py +146 -0
- napistu/ingestion/constants.py +296 -0
- napistu/ingestion/cpr_edgelist.py +106 -0
- napistu/ingestion/identifiers_etl.py +148 -0
- napistu/ingestion/obo.py +268 -0
- napistu/ingestion/psi_mi.py +276 -0
- napistu/ingestion/reactome.py +218 -0
- napistu/ingestion/sbml.py +621 -0
- napistu/ingestion/string.py +356 -0
- napistu/ingestion/trrust.py +285 -0
- napistu/ingestion/yeast.py +147 -0
- napistu/mechanism_matching.py +597 -0
- napistu/modify/__init__.py +10 -0
- napistu/modify/constants.py +86 -0
- napistu/modify/curation.py +628 -0
- napistu/modify/gaps.py +635 -0
- napistu/modify/pathwayannot.py +1381 -0
- napistu/modify/uncompartmentalize.py +264 -0
- napistu/network/__init__.py +10 -0
- napistu/network/constants.py +117 -0
- napistu/network/neighborhoods.py +1594 -0
- napistu/network/net_create.py +1647 -0
- napistu/network/net_utils.py +652 -0
- napistu/network/paths.py +500 -0
- napistu/network/precompute.py +221 -0
- napistu/rpy2/__init__.py +127 -0
- napistu/rpy2/callr.py +168 -0
- napistu/rpy2/constants.py +101 -0
- napistu/rpy2/netcontextr.py +464 -0
- napistu/rpy2/rids.py +697 -0
- napistu/sbml_dfs_core.py +2216 -0
- napistu/sbml_dfs_utils.py +304 -0
- napistu/source.py +394 -0
- napistu/utils.py +943 -0
- napistu-0.1.0.dist-info/METADATA +56 -0
- napistu-0.1.0.dist-info/RECORD +77 -0
- napistu-0.1.0.dist-info/WHEEL +5 -0
- napistu-0.1.0.dist-info/entry_points.txt +2 -0
- napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
- napistu-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +0 -0
- tests/conftest.py +83 -0
- tests/test_consensus.py +255 -0
- tests/test_constants.py +20 -0
- tests/test_curation.py +134 -0
- tests/test_data/__init__.py +0 -0
- tests/test_edgelist.py +20 -0
- tests/test_gcs.py +23 -0
- tests/test_identifiers.py +151 -0
- tests/test_igraph.py +353 -0
- tests/test_indices.py +88 -0
- tests/test_mechanism_matching.py +126 -0
- tests/test_net_utils.py +66 -0
- tests/test_netcontextr.py +105 -0
- tests/test_obo.py +34 -0
- tests/test_pathwayannot.py +95 -0
- tests/test_precomputed_distances.py +222 -0
- tests/test_rpy2.py +61 -0
- tests/test_sbml.py +46 -0
- tests/test_sbml_dfs_create.py +307 -0
- tests/test_sbml_dfs_utils.py +22 -0
- tests/test_sbo.py +11 -0
- tests/test_set_coverage.py +50 -0
- tests/test_source.py +67 -0
- tests/test_uncompartmentalize.py +40 -0
- tests/test_utils.py +487 -0
- tests/utils.py +30 -0
@@ -0,0 +1,126 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from napistu import mechanism_matching
|
6
|
+
from napistu.network import net_create
|
7
|
+
from napistu.network import precompute
|
8
|
+
|
9
|
+
|
10
|
+
def test_features_to_pathway_species(sbml_dfs):
|
11
|
+
|
12
|
+
species_identifiers = sbml_dfs.get_identifiers("species")
|
13
|
+
feature_identifiers = pd.DataFrame({"chebis": ["17627", "15379", "29105", "-1"]})
|
14
|
+
|
15
|
+
matching_df = (
|
16
|
+
mechanism_matching.features_to_pathway_species(
|
17
|
+
feature_identifiers, species_identifiers, {"chebi"}, "chebis"
|
18
|
+
)
|
19
|
+
.value_counts("identifier")
|
20
|
+
.sort_index()
|
21
|
+
)
|
22
|
+
|
23
|
+
assert matching_df.index.tolist() == ["15379", "17627", "29105"]
|
24
|
+
assert matching_df.tolist() == [2, 3, 2]
|
25
|
+
|
26
|
+
|
27
|
+
def test_edgelist_to_pathway_species(sbml_dfs):
|
28
|
+
|
29
|
+
edgelist = pd.DataFrame(
|
30
|
+
[
|
31
|
+
{"identifier_upstream": "17996", "identifier_downstream": "16526"},
|
32
|
+
{"identifier_upstream": "15377", "identifier_downstream": "17544"},
|
33
|
+
{"identifier_upstream": "15378", "identifier_downstream": "57945"},
|
34
|
+
{"identifier_upstream": "57540", "identifier_downstream": "17996"},
|
35
|
+
]
|
36
|
+
)
|
37
|
+
species_identifiers = sbml_dfs.get_identifiers("species").query("bqb == 'BQB_IS'")
|
38
|
+
|
39
|
+
edgelist_w_sids = mechanism_matching.edgelist_to_pathway_species(
|
40
|
+
edgelist, species_identifiers, ontologies={"chebi", "uniprot"}
|
41
|
+
)
|
42
|
+
assert edgelist_w_sids.shape == (4, 4)
|
43
|
+
|
44
|
+
egelist_w_scids = mechanism_matching.edgelist_to_scids(
|
45
|
+
edgelist, sbml_dfs, species_identifiers, ontologies={"chebi"}
|
46
|
+
)
|
47
|
+
|
48
|
+
assert egelist_w_scids.shape == (12, 6)
|
49
|
+
|
50
|
+
direct_interactions = mechanism_matching.filter_to_direct_mechanistic_interactions(
|
51
|
+
edgelist, sbml_dfs, species_identifiers, ontologies={"chebi"}
|
52
|
+
)
|
53
|
+
|
54
|
+
assert direct_interactions.shape == (2, 10)
|
55
|
+
|
56
|
+
|
57
|
+
def test_direct_and_indirect_mechanism_matching(sbml_dfs_glucose_metabolism):
|
58
|
+
|
59
|
+
cpr_graph = net_create.process_cpr_graph(sbml_dfs_glucose_metabolism)
|
60
|
+
|
61
|
+
edgelist = pd.DataFrame(
|
62
|
+
[
|
63
|
+
{
|
64
|
+
"identifier_upstream": "17925",
|
65
|
+
"identifier_downstream": "32966",
|
66
|
+
}, # glu, fbp
|
67
|
+
{
|
68
|
+
"identifier_upstream": "57634",
|
69
|
+
"identifier_downstream": "32966",
|
70
|
+
}, # f6p, fbp
|
71
|
+
{
|
72
|
+
"identifier_upstream": "32966",
|
73
|
+
"identifier_downstream": "57642",
|
74
|
+
}, # fbp, dhap
|
75
|
+
{
|
76
|
+
"identifier_upstream": "17925",
|
77
|
+
"identifier_downstream": "15361",
|
78
|
+
}, # glu, pyr
|
79
|
+
]
|
80
|
+
)
|
81
|
+
|
82
|
+
species_identifiers = sbml_dfs_glucose_metabolism.get_identifiers("species")
|
83
|
+
|
84
|
+
direct_interactions = mechanism_matching.filter_to_direct_mechanistic_interactions(
|
85
|
+
formatted_edgelist=edgelist,
|
86
|
+
sbml_dfs=sbml_dfs_glucose_metabolism,
|
87
|
+
species_identifiers=species_identifiers,
|
88
|
+
ontologies={"chebi"},
|
89
|
+
)
|
90
|
+
|
91
|
+
assert direct_interactions.shape == (2, 10)
|
92
|
+
|
93
|
+
indirect_interactions = (
|
94
|
+
mechanism_matching.filter_to_indirect_mechanistic_interactions(
|
95
|
+
formatted_edgelist=edgelist,
|
96
|
+
sbml_dfs=sbml_dfs_glucose_metabolism,
|
97
|
+
species_identifiers=species_identifiers,
|
98
|
+
cpr_graph=cpr_graph,
|
99
|
+
ontologies={"chebi"},
|
100
|
+
precomputed_distances=None,
|
101
|
+
max_path_length=10,
|
102
|
+
)
|
103
|
+
)
|
104
|
+
|
105
|
+
assert indirect_interactions.shape == (6, 12)
|
106
|
+
|
107
|
+
# confirm that we get the same thing even when using precomputed distances
|
108
|
+
precomputed_distances = precompute.precompute_distances(
|
109
|
+
cpr_graph, weights_vars=["weights"]
|
110
|
+
)
|
111
|
+
|
112
|
+
indirect_interactions_w_precompute = (
|
113
|
+
mechanism_matching.filter_to_indirect_mechanistic_interactions(
|
114
|
+
formatted_edgelist=edgelist,
|
115
|
+
sbml_dfs=sbml_dfs_glucose_metabolism,
|
116
|
+
species_identifiers=species_identifiers,
|
117
|
+
cpr_graph=cpr_graph,
|
118
|
+
ontologies={"chebi"},
|
119
|
+
precomputed_distances=precomputed_distances,
|
120
|
+
max_path_length=10,
|
121
|
+
)
|
122
|
+
)
|
123
|
+
|
124
|
+
assert all(
|
125
|
+
indirect_interactions["weight"] == indirect_interactions_w_precompute["weight"]
|
126
|
+
)
|
tests/test_net_utils.py
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import pytest
|
4
|
+
|
5
|
+
import igraph as ig
|
6
|
+
import pandas as pd
|
7
|
+
from napistu.network import net_utils
|
8
|
+
from napistu.network import net_create
|
9
|
+
|
10
|
+
|
11
|
+
def test_safe_fill():
|
12
|
+
safe_fill_test = ["a_very_long stringggg", ""]
|
13
|
+
assert [net_utils.safe_fill(x) for x in safe_fill_test] == [
|
14
|
+
"a_very_long\nstringggg",
|
15
|
+
"",
|
16
|
+
]
|
17
|
+
|
18
|
+
|
19
|
+
def test_cpr_graph_to_pandas_dfs():
|
20
|
+
graph_data = [
|
21
|
+
(0, 1),
|
22
|
+
(0, 2),
|
23
|
+
(2, 3),
|
24
|
+
(3, 4),
|
25
|
+
(4, 2),
|
26
|
+
(2, 5),
|
27
|
+
(5, 0),
|
28
|
+
(6, 3),
|
29
|
+
(5, 6),
|
30
|
+
]
|
31
|
+
|
32
|
+
g = ig.Graph(graph_data, directed=True)
|
33
|
+
vs, es = net_utils.cpr_graph_to_pandas_dfs(g)
|
34
|
+
|
35
|
+
assert all(vs["index"] == list(range(0, 7)))
|
36
|
+
assert (
|
37
|
+
pd.DataFrame(graph_data)
|
38
|
+
.rename({0: "source", 1: "target"}, axis=1)
|
39
|
+
.sort_values(["source", "target"])
|
40
|
+
.equals(es.sort_values(["source", "target"]))
|
41
|
+
)
|
42
|
+
|
43
|
+
|
44
|
+
def test_validate_graph_attributes(sbml_dfs):
|
45
|
+
|
46
|
+
cpr_graph = net_create.process_cpr_graph(
|
47
|
+
sbml_dfs, directed=True, weighting_strategy="topology"
|
48
|
+
)
|
49
|
+
|
50
|
+
assert (
|
51
|
+
net_utils._validate_edge_attributes(cpr_graph, ["weights", "upstream_weights"])
|
52
|
+
is None
|
53
|
+
)
|
54
|
+
assert net_utils._validate_vertex_attributes(cpr_graph, "node_type") is None
|
55
|
+
with pytest.raises(ValueError):
|
56
|
+
net_utils._validate_vertex_attributes(cpr_graph, "baz")
|
57
|
+
|
58
|
+
|
59
|
+
################################################
|
60
|
+
# __main__
|
61
|
+
################################################
|
62
|
+
|
63
|
+
if __name__ == "__main__":
|
64
|
+
test_safe_fill()
|
65
|
+
test_cpr_graph_to_pandas_dfs()
|
66
|
+
test_validate_graph_attributes()
|
@@ -0,0 +1,105 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
import pytest
|
5
|
+
from napistu import sbml_dfs_core
|
6
|
+
from napistu.identifiers import Identifiers
|
7
|
+
from napistu.rpy2 import netcontextr
|
8
|
+
from napistu.source import Source
|
9
|
+
|
10
|
+
|
11
|
+
@pytest.fixture
|
12
|
+
def sbml_dfs_one_reaction():
|
13
|
+
"""An sbml_dfs with one reaction and one annotated reactant"""
|
14
|
+
interaction_edgelist = pd.DataFrame(
|
15
|
+
{
|
16
|
+
"upstream_name": ["a"],
|
17
|
+
"downstream_name": ["b"],
|
18
|
+
"upstream_compartment": ["nucleoplasm"],
|
19
|
+
"downstream_compartment": ["nucleoplasm"],
|
20
|
+
"r_name": ["a b of a"],
|
21
|
+
"sbo_term": ["SBO:0000010"],
|
22
|
+
"r_Identifiers": Identifiers([]),
|
23
|
+
"r_isreversible": False,
|
24
|
+
}
|
25
|
+
)
|
26
|
+
species = pd.DataFrame(
|
27
|
+
{
|
28
|
+
"s_name": ["a", "b"],
|
29
|
+
"s_Identifiers": [
|
30
|
+
Identifiers([{"ontology": "ensembl_gene", "identifier": "test"}]),
|
31
|
+
Identifiers([]),
|
32
|
+
],
|
33
|
+
}
|
34
|
+
)
|
35
|
+
compartments = pd.DataFrame(
|
36
|
+
{"c_name": ["nucleoplasm"], "c_Identifiers": Identifiers([])}
|
37
|
+
)
|
38
|
+
interaction_source = Source(init=True)
|
39
|
+
sbml_dfs = sbml_dfs_core.sbml_dfs_from_edgelist(
|
40
|
+
interaction_edgelist, species, compartments, interaction_source
|
41
|
+
)
|
42
|
+
return sbml_dfs
|
43
|
+
|
44
|
+
|
45
|
+
@pytest.fixture
|
46
|
+
def sbml_dfs_one_reaction_duplicated_identifiers():
|
47
|
+
"""
|
48
|
+
An sbml_dfs with one reactions and one annotated reactant
|
49
|
+
that has two identifiers
|
50
|
+
"""
|
51
|
+
interaction_edgelist = pd.DataFrame(
|
52
|
+
{
|
53
|
+
"upstream_name": ["a"],
|
54
|
+
"downstream_name": ["b"],
|
55
|
+
"upstream_compartment": ["nucleoplasm"],
|
56
|
+
"downstream_compartment": ["nucleoplasm"],
|
57
|
+
"r_name": ["a b of a"],
|
58
|
+
"sbo_term": ["SBO:0000010"],
|
59
|
+
"r_Identifiers": Identifiers([]),
|
60
|
+
"r_isreversible": False,
|
61
|
+
}
|
62
|
+
)
|
63
|
+
species = pd.DataFrame(
|
64
|
+
{
|
65
|
+
"s_name": ["a", "b"],
|
66
|
+
"s_Identifiers": [
|
67
|
+
Identifiers(
|
68
|
+
[
|
69
|
+
{"ontology": "ensembl_gene", "identifier": "test"},
|
70
|
+
{"ontology": "ensembl_gene", "identifier": "test2"},
|
71
|
+
]
|
72
|
+
),
|
73
|
+
Identifiers([]),
|
74
|
+
],
|
75
|
+
}
|
76
|
+
)
|
77
|
+
compartments = pd.DataFrame(
|
78
|
+
{"c_name": ["nucleoplasm"], "c_Identifiers": Identifiers([])}
|
79
|
+
)
|
80
|
+
interaction_source = Source(init=True)
|
81
|
+
sbml_dfs = sbml_dfs_core.sbml_dfs_from_edgelist(
|
82
|
+
interaction_edgelist, species, compartments, interaction_source
|
83
|
+
)
|
84
|
+
return sbml_dfs
|
85
|
+
|
86
|
+
|
87
|
+
def test_get_reactions_one_reaction(sbml_dfs_one_reaction):
|
88
|
+
reactions = netcontextr._get_reactions(sbml_dfs_one_reaction)
|
89
|
+
assert not reactions[netcontextr.COL_GENE].isna().any()
|
90
|
+
assert reactions.shape[0] == 1
|
91
|
+
|
92
|
+
|
93
|
+
def test_get_reactions_outcols(sbml_dfs_one_reaction):
|
94
|
+
reactions = netcontextr._get_reactions(sbml_dfs_one_reaction)
|
95
|
+
assert netcontextr.COL_GENE in reactions.columns
|
96
|
+
assert netcontextr.COL_REACTION_ID in reactions.columns
|
97
|
+
assert netcontextr.COL_ROLE in reactions.columns
|
98
|
+
|
99
|
+
|
100
|
+
def test_get_reactions_one_reaction_duplicated_ids(
|
101
|
+
sbml_dfs_one_reaction_duplicated_identifiers,
|
102
|
+
):
|
103
|
+
reactions = netcontextr._get_reactions(sbml_dfs_one_reaction_duplicated_identifiers)
|
104
|
+
assert not reactions[netcontextr.COL_GENE].isna().any()
|
105
|
+
assert reactions.shape[0] == 2
|
tests/test_obo.py
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from napistu.ingestion import obo
|
4
|
+
|
5
|
+
|
6
|
+
def test_formatting_obo_attributes():
|
7
|
+
assert obo._format_entry_tuple("foo: bar: baz") == ("foo", "bar: baz")
|
8
|
+
assert obo._format_entry_tuple("foo") is None
|
9
|
+
|
10
|
+
|
11
|
+
def test_formatting_go_isa_attributes():
|
12
|
+
go_parents_test_entries = [
|
13
|
+
([], list()),
|
14
|
+
(["foo ! bar"], [{"parent_id": "foo", "parent_name": "bar"}]),
|
15
|
+
(
|
16
|
+
["foo ! bar", "fun ! baz"],
|
17
|
+
[
|
18
|
+
{"parent_id": "foo", "parent_name": "bar"},
|
19
|
+
{"parent_id": "fun", "parent_name": "baz"},
|
20
|
+
],
|
21
|
+
),
|
22
|
+
]
|
23
|
+
|
24
|
+
for val_list, expected in go_parents_test_entries:
|
25
|
+
assert obo._isa_str_list_to_dict_list(val_list) == expected
|
26
|
+
|
27
|
+
|
28
|
+
################################################
|
29
|
+
# __main__
|
30
|
+
################################################
|
31
|
+
|
32
|
+
if __name__ == "__main__":
|
33
|
+
test_formatting_obo_attributes()
|
34
|
+
test_formatting_go_isa_attributes()
|
@@ -0,0 +1,95 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import os
|
4
|
+
|
5
|
+
from napistu.modify import pathwayannot
|
6
|
+
|
7
|
+
from napistu.constants import SBML_DFS
|
8
|
+
from napistu.modify.constants import COFACTOR_SCHEMA
|
9
|
+
from napistu.modify.constants import COFACTOR_CHEBI_IDS
|
10
|
+
|
11
|
+
test_path = os.path.abspath(os.path.join(__file__, os.pardir))
|
12
|
+
test_data = os.path.join(test_path, "test_data")
|
13
|
+
reduced_neo4j_members_path = os.path.join(test_data, "reduced_neo4j_members.csv")
|
14
|
+
reduced_neo4j_cross_refs_path = os.path.join(test_data, "reduced_neo4j_members.csv")
|
15
|
+
|
16
|
+
|
17
|
+
def test_cofactor_specifications():
|
18
|
+
# cofactors are manually annotated with their ChEBI ID so that they can be matched
|
19
|
+
# to the corresponding ChEBI ID of molecular species
|
20
|
+
|
21
|
+
cofactor_labels_set = set(COFACTOR_CHEBI_IDS["cofactor"].tolist())
|
22
|
+
|
23
|
+
deep_cofactor_list = [list(v.values()) for v in COFACTOR_SCHEMA.values()]
|
24
|
+
shallow_cofactor_list = [item for sublist in deep_cofactor_list for item in sublist]
|
25
|
+
unique_cofactors = {item for sublist in shallow_cofactor_list for item in sublist}
|
26
|
+
|
27
|
+
# check whether all species in cofactor schema are tied with an ID
|
28
|
+
undefined_labels = unique_cofactors.difference(cofactor_labels_set)
|
29
|
+
if len(undefined_labels) != 0:
|
30
|
+
raise ValueError(
|
31
|
+
f"{', '.join(undefined_labels)} are not defined in \"COFACTOR_CHEBI_IDS\""
|
32
|
+
)
|
33
|
+
|
34
|
+
|
35
|
+
def test_add_reactome_entity_sets(sbml_dfs_glucose_metabolism):
|
36
|
+
|
37
|
+
# annotate the GPCR sbml_df with a reduced subset of the
|
38
|
+
# Reactome entity set exports
|
39
|
+
|
40
|
+
sbml_dfs_with_entity_sets = pathwayannot.add_entity_sets(
|
41
|
+
sbml_dfs_glucose_metabolism, reduced_neo4j_members_path
|
42
|
+
)
|
43
|
+
|
44
|
+
lost_species = set(sbml_dfs_glucose_metabolism.species[SBML_DFS.S_NAME]).difference(
|
45
|
+
sbml_dfs_with_entity_sets.species[SBML_DFS.S_NAME]
|
46
|
+
)
|
47
|
+
assert len(lost_species) == 0
|
48
|
+
new_species = set(sbml_dfs_with_entity_sets.species[SBML_DFS.S_NAME]).difference(
|
49
|
+
sbml_dfs_glucose_metabolism.species[SBML_DFS.S_NAME]
|
50
|
+
)
|
51
|
+
assert new_species == {
|
52
|
+
"HK1",
|
53
|
+
"HK2",
|
54
|
+
"HK3",
|
55
|
+
"PRKACA",
|
56
|
+
"PRKACB",
|
57
|
+
"PRKACG",
|
58
|
+
"SLC25A12",
|
59
|
+
"SLC25A13",
|
60
|
+
"SLC37A1",
|
61
|
+
}
|
62
|
+
|
63
|
+
lost_reactions = set(
|
64
|
+
sbml_dfs_glucose_metabolism.reactions[SBML_DFS.R_NAME]
|
65
|
+
).difference(sbml_dfs_with_entity_sets.reactions[SBML_DFS.R_NAME])
|
66
|
+
assert len(lost_reactions) == 0
|
67
|
+
new_reactions = set(
|
68
|
+
sbml_dfs_with_entity_sets.reactions[SBML_DFS.R_NAME]
|
69
|
+
).difference(sbml_dfs_glucose_metabolism.reactions[SBML_DFS.R_NAME])
|
70
|
+
assert len(new_reactions) == 10
|
71
|
+
|
72
|
+
|
73
|
+
def test_add_reactome_cross_refs(sbml_dfs_glucose_metabolism):
|
74
|
+
|
75
|
+
# test adding cross-references to a Reactome model
|
76
|
+
|
77
|
+
sbml_dfs_with_cross_refs = pathwayannot.add_reactome_identifiers(
|
78
|
+
sbml_dfs_glucose_metabolism,
|
79
|
+
os.path.join(test_data, "reduced_neo4j_cross_refs.csv"),
|
80
|
+
)
|
81
|
+
|
82
|
+
sbml_dfs_glucose_metabolism.reaction_species.shape[
|
83
|
+
0
|
84
|
+
] == sbml_dfs_with_cross_refs.reaction_species.shape[0]
|
85
|
+
|
86
|
+
previous_species_identifiers = sbml_dfs_glucose_metabolism.get_identifiers(
|
87
|
+
SBML_DFS.SPECIES
|
88
|
+
)
|
89
|
+
updated_species_identifiers = sbml_dfs_with_cross_refs.get_identifiers(
|
90
|
+
SBML_DFS.SPECIES
|
91
|
+
)
|
92
|
+
assert (
|
93
|
+
updated_species_identifiers.shape[0] - previous_species_identifiers.shape[0]
|
94
|
+
== 88
|
95
|
+
)
|
@@ -0,0 +1,222 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import os
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
import pandas as pd
|
7
|
+
from napistu import sbml_dfs_core
|
8
|
+
from napistu.ingestion import sbml
|
9
|
+
from napistu.network import neighborhoods
|
10
|
+
from napistu.network import net_create
|
11
|
+
from napistu.network import paths
|
12
|
+
from napistu.network import precompute
|
13
|
+
|
14
|
+
test_path = os.path.abspath(os.path.join(__file__, os.pardir))
|
15
|
+
sbml_path = os.path.join(test_path, "test_data", "reactome_glucose_metabolism.sbml")
|
16
|
+
if not os.path.isfile(sbml_path):
|
17
|
+
raise ValueError(f"{sbml_path} not found")
|
18
|
+
|
19
|
+
sbml_model = sbml.SBML(sbml_path).model
|
20
|
+
sbml_dfs = sbml_dfs_core.SBML_dfs(sbml_model)
|
21
|
+
sbml_dfs.validate()
|
22
|
+
|
23
|
+
cpr_graph = net_create.process_cpr_graph(
|
24
|
+
sbml_dfs, graph_type="bipartite", directed=True, weighting_strategy="topology"
|
25
|
+
)
|
26
|
+
|
27
|
+
# number of species to include when finding all x all paths
|
28
|
+
N_SPECIES = 12
|
29
|
+
|
30
|
+
# setting for neighborhoods
|
31
|
+
NETWORK_TYPE = "hourglass"
|
32
|
+
ORDER = 20
|
33
|
+
TOP_N = 20
|
34
|
+
|
35
|
+
precomputed_distances = precompute.precompute_distances(
|
36
|
+
cpr_graph, max_steps=30000, max_score_q=1
|
37
|
+
)
|
38
|
+
|
39
|
+
|
40
|
+
def test_precomputed_distances():
|
41
|
+
assert precomputed_distances.shape == (3934, 5)
|
42
|
+
|
43
|
+
|
44
|
+
def test_precomputed_distances_shortest_paths():
|
45
|
+
cspecies_subset = sbml_dfs.compartmentalized_species.index.tolist()[0:N_SPECIES]
|
46
|
+
|
47
|
+
# we should get the same answer for shortest paths whether or not we use pre-computed distances
|
48
|
+
all_species_pairs = pd.DataFrame(
|
49
|
+
np.array([(x, y) for x in cspecies_subset for y in cspecies_subset]),
|
50
|
+
columns=["sc_id_origin", "sc_id_dest"],
|
51
|
+
)
|
52
|
+
|
53
|
+
(
|
54
|
+
path_vertices,
|
55
|
+
_,
|
56
|
+
_,
|
57
|
+
_,
|
58
|
+
) = paths.find_all_shortest_reaction_paths(cpr_graph, sbml_dfs, all_species_pairs)
|
59
|
+
|
60
|
+
shortest_path_weights = (
|
61
|
+
path_vertices.groupby(["origin", "dest", "path"])["weights"]
|
62
|
+
.sum()
|
63
|
+
.reset_index()
|
64
|
+
.sort_values("weights")
|
65
|
+
.groupby(["origin", "dest"])
|
66
|
+
.first()
|
67
|
+
.reset_index()
|
68
|
+
)
|
69
|
+
|
70
|
+
precomputed_distance_subset_mask = [
|
71
|
+
True if x and y else False
|
72
|
+
for x, y in zip(
|
73
|
+
precomputed_distances["sc_id_origin"].isin(cspecies_subset).tolist(),
|
74
|
+
precomputed_distances["sc_id_dest"].isin(cspecies_subset).tolist(),
|
75
|
+
)
|
76
|
+
]
|
77
|
+
precomputed_distance_subset = precomputed_distances[
|
78
|
+
precomputed_distance_subset_mask
|
79
|
+
]
|
80
|
+
|
81
|
+
path_method_comparison_full_merge = shortest_path_weights.merge(
|
82
|
+
precomputed_distance_subset,
|
83
|
+
left_on=["origin", "dest"],
|
84
|
+
right_on=["sc_id_origin", "sc_id_dest"],
|
85
|
+
how="outer",
|
86
|
+
)
|
87
|
+
|
88
|
+
# tables have identical pairs with a valid path
|
89
|
+
assert (
|
90
|
+
path_method_comparison_full_merge.shape[0]
|
91
|
+
== precomputed_distance_subset.shape[0]
|
92
|
+
)
|
93
|
+
assert path_method_comparison_full_merge.shape[0] == shortest_path_weights.shape[0]
|
94
|
+
assert all(
|
95
|
+
abs(
|
96
|
+
path_method_comparison_full_merge["weights"]
|
97
|
+
- path_method_comparison_full_merge["path_weights"]
|
98
|
+
)
|
99
|
+
< 1e-13
|
100
|
+
)
|
101
|
+
|
102
|
+
# using the precomputed distances generates the same result as excluding it
|
103
|
+
(precompute_path_vertices, _, _, _) = paths.find_all_shortest_reaction_paths(
|
104
|
+
cpr_graph,
|
105
|
+
sbml_dfs,
|
106
|
+
all_species_pairs,
|
107
|
+
precomputed_distances=precomputed_distances,
|
108
|
+
)
|
109
|
+
|
110
|
+
precompute_shortest_path_weights = (
|
111
|
+
precompute_path_vertices.groupby(["origin", "dest", "path"])["weights"]
|
112
|
+
.sum()
|
113
|
+
.reset_index()
|
114
|
+
.sort_values("weights")
|
115
|
+
.groupby(["origin", "dest"])
|
116
|
+
.first()
|
117
|
+
.reset_index()
|
118
|
+
)
|
119
|
+
|
120
|
+
precompute_full_merge = shortest_path_weights.merge(
|
121
|
+
precompute_shortest_path_weights,
|
122
|
+
left_on=["origin", "dest", "path"],
|
123
|
+
right_on=["origin", "dest", "path"],
|
124
|
+
how="outer",
|
125
|
+
)
|
126
|
+
|
127
|
+
assert precompute_full_merge.shape[0] == precompute_shortest_path_weights.shape[0]
|
128
|
+
assert precompute_full_merge.shape[0] == shortest_path_weights.shape[0]
|
129
|
+
assert all(
|
130
|
+
abs(precompute_full_merge["weights_x"] - precompute_full_merge["weights_y"])
|
131
|
+
< 1e-13
|
132
|
+
)
|
133
|
+
|
134
|
+
|
135
|
+
def test_precomputed_distances_neighborhoods():
|
136
|
+
compartmentalized_species = sbml_dfs.compartmentalized_species[
|
137
|
+
sbml_dfs.compartmentalized_species["s_id"] == "S00000000"
|
138
|
+
].index.tolist()
|
139
|
+
|
140
|
+
pruned_neighborhoods_precomputed = neighborhoods.find_and_prune_neighborhoods(
|
141
|
+
sbml_dfs,
|
142
|
+
cpr_graph,
|
143
|
+
compartmentalized_species,
|
144
|
+
precomputed_distances=precomputed_distances,
|
145
|
+
network_type=NETWORK_TYPE,
|
146
|
+
order=ORDER,
|
147
|
+
verbose=True,
|
148
|
+
top_n=TOP_N,
|
149
|
+
)
|
150
|
+
|
151
|
+
pruned_neighborhoods_otf = neighborhoods.find_and_prune_neighborhoods(
|
152
|
+
sbml_dfs,
|
153
|
+
cpr_graph,
|
154
|
+
compartmentalized_species,
|
155
|
+
precomputed_distances=None,
|
156
|
+
network_type=NETWORK_TYPE,
|
157
|
+
order=ORDER,
|
158
|
+
verbose=True,
|
159
|
+
top_n=TOP_N,
|
160
|
+
)
|
161
|
+
|
162
|
+
comparison_l = list()
|
163
|
+
for key in pruned_neighborhoods_precomputed.keys():
|
164
|
+
pruned_vert_otf = pruned_neighborhoods_otf[key]["vertices"]
|
165
|
+
pruned_vert_precomp = pruned_neighborhoods_precomputed[key]["vertices"]
|
166
|
+
|
167
|
+
join_key = ["name", "node_name", "node_orientation"]
|
168
|
+
join_key_w_vars = [*join_key, *["path_weight", "path_length"]]
|
169
|
+
neighbor_comparison = (
|
170
|
+
pruned_vert_precomp[join_key_w_vars]
|
171
|
+
.assign(in_precompute=True)
|
172
|
+
.merge(
|
173
|
+
pruned_vert_otf[join_key_w_vars].assign(in_otf=True),
|
174
|
+
left_on=join_key,
|
175
|
+
right_on=join_key,
|
176
|
+
how="outer",
|
177
|
+
)
|
178
|
+
.fillna(False)
|
179
|
+
)
|
180
|
+
comparison_l.append(neighbor_comparison.assign(focal_sc_id=key))
|
181
|
+
|
182
|
+
comparison_df = pd.concat(comparison_l)
|
183
|
+
comparison_df_disagreements = comparison_df.query("in_precompute != in_otf")
|
184
|
+
|
185
|
+
# pruned neighborhoods are identical with and without using precalculated neighbors
|
186
|
+
assert comparison_df_disagreements.shape[0] == 0
|
187
|
+
|
188
|
+
# compare shortest paths calculated through neighborhoods with precomputed distances
|
189
|
+
# which should be the same if we are pre-selecting the correct neighbors
|
190
|
+
# as part of _precompute_neighbors()
|
191
|
+
downstream_disagreement_w_precompute = (
|
192
|
+
comparison_df[comparison_df["node_orientation"] == "downstream"]
|
193
|
+
.merge(
|
194
|
+
precomputed_distances,
|
195
|
+
left_on=["focal_sc_id", "name"],
|
196
|
+
right_on=["sc_id_origin", "sc_id_dest"],
|
197
|
+
)
|
198
|
+
.query("abs(path_weight_x - path_weights) > 1e-13")
|
199
|
+
)
|
200
|
+
|
201
|
+
upstream_disagreement_w_precompute = (
|
202
|
+
comparison_df[comparison_df["node_orientation"] == "upstream"]
|
203
|
+
.merge(
|
204
|
+
precomputed_distances,
|
205
|
+
left_on=["focal_sc_id", "name"],
|
206
|
+
right_on=["sc_id_dest", "sc_id_origin"],
|
207
|
+
)
|
208
|
+
.query("abs(path_weight_x - path_upstream_weights) > 1e-13")
|
209
|
+
)
|
210
|
+
|
211
|
+
assert downstream_disagreement_w_precompute.shape[0] == 0
|
212
|
+
assert upstream_disagreement_w_precompute.shape[0] == 0
|
213
|
+
|
214
|
+
|
215
|
+
################################################
|
216
|
+
# __main__
|
217
|
+
################################################
|
218
|
+
|
219
|
+
if __name__ == "__main__":
|
220
|
+
test_precomputed_distances()
|
221
|
+
test_precomputed_distances_shortest_paths()
|
222
|
+
test_precomputed_distances_neighborhoods()
|