napistu 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +38 -27
- napistu/consensus.py +22 -27
- napistu/constants.py +91 -65
- napistu/context/filtering.py +2 -1
- napistu/identifiers.py +3 -6
- napistu/indices.py +3 -1
- napistu/ingestion/bigg.py +6 -6
- napistu/ingestion/sbml.py +298 -295
- napistu/ingestion/string.py +16 -19
- napistu/ingestion/trrust.py +22 -27
- napistu/ingestion/yeast.py +2 -1
- napistu/matching/interactions.py +4 -4
- napistu/matching/species.py +1 -1
- napistu/modify/uncompartmentalize.py +1 -1
- napistu/network/net_create.py +1 -1
- napistu/network/paths.py +1 -1
- napistu/ontologies/dogma.py +2 -1
- napistu/ontologies/genodexito.py +5 -1
- napistu/ontologies/renaming.py +4 -0
- napistu/sbml_dfs_core.py +1343 -2167
- napistu/sbml_dfs_utils.py +1086 -143
- napistu/utils.py +52 -41
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/METADATA +2 -2
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/RECORD +40 -40
- tests/conftest.py +113 -13
- tests/test_consensus.py +161 -4
- tests/test_context_filtering.py +2 -2
- tests/test_gaps.py +26 -15
- tests/test_network_net_create.py +1 -1
- tests/test_network_precompute.py +1 -1
- tests/test_ontologies_genodexito.py +3 -0
- tests/test_ontologies_mygene.py +3 -0
- tests/test_ontologies_renaming.py +28 -24
- tests/test_sbml_dfs_core.py +260 -211
- tests/test_sbml_dfs_utils.py +194 -36
- tests/test_utils.py +19 -0
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/WHEEL +0 -0
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/top_level.txt +0 -0
tests/test_sbml_dfs_utils.py
CHANGED
@@ -3,7 +3,14 @@ from __future__ import annotations
|
|
3
3
|
import pandas as pd
|
4
4
|
|
5
5
|
from napistu import sbml_dfs_utils
|
6
|
-
from napistu.constants import
|
6
|
+
from napistu.constants import (
|
7
|
+
BQB,
|
8
|
+
BQB_DEFINING_ATTRS,
|
9
|
+
BQB_DEFINING_ATTRS_LOOSE,
|
10
|
+
SBML_DFS,
|
11
|
+
IDENTIFIERS,
|
12
|
+
SBOTERM_NAMES,
|
13
|
+
)
|
7
14
|
|
8
15
|
|
9
16
|
def test_id_formatter():
|
@@ -17,47 +24,198 @@ def test_id_formatter():
|
|
17
24
|
assert list(input_vals) == inv_ids
|
18
25
|
|
19
26
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
"
|
29
|
-
"
|
30
|
-
"
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
27
|
+
def test_filter_to_characteristic_species_ids():
|
28
|
+
|
29
|
+
species_ids_dict = {
|
30
|
+
SBML_DFS.S_ID: ["large_complex"] * 6
|
31
|
+
+ ["small_complex"] * 2
|
32
|
+
+ ["proteinA", "proteinB"]
|
33
|
+
+ ["proteinC"] * 3
|
34
|
+
+ [
|
35
|
+
"promiscuous_complexA",
|
36
|
+
"promiscuous_complexB",
|
37
|
+
"promiscuous_complexC",
|
38
|
+
"promiscuous_complexD",
|
39
|
+
"promiscuous_complexE",
|
40
|
+
],
|
41
|
+
IDENTIFIERS.ONTOLOGY: ["complexportal"]
|
42
|
+
+ ["HGNC"] * 7
|
43
|
+
+ ["GO"] * 2
|
44
|
+
+ ["ENSG", "ENSP", "pubmed"]
|
45
|
+
+ ["HGNC"] * 5,
|
46
|
+
IDENTIFIERS.IDENTIFIER: [
|
47
|
+
"CPX-BIG",
|
48
|
+
"mem1",
|
49
|
+
"mem2",
|
50
|
+
"mem3",
|
51
|
+
"mem4",
|
52
|
+
"mem5",
|
53
|
+
"part1",
|
54
|
+
"part2",
|
55
|
+
"GO:1",
|
56
|
+
"GO:2",
|
57
|
+
"dna_seq",
|
58
|
+
"protein_seq",
|
59
|
+
"my_cool_pub",
|
60
|
+
]
|
61
|
+
+ ["promiscuous_complex"] * 5,
|
62
|
+
IDENTIFIERS.BQB: [BQB.IS]
|
63
|
+
+ [BQB.HAS_PART] * 7
|
64
|
+
+ [BQB.IS] * 2
|
65
|
+
+ [
|
66
|
+
# these are retained if BQB_DEFINING_ATTRS_LOOSE is used
|
67
|
+
BQB.ENCODES,
|
68
|
+
BQB.IS_ENCODED_BY,
|
69
|
+
# this should always be removed
|
70
|
+
BQB.IS_DESCRIBED_BY,
|
71
|
+
]
|
72
|
+
+ [BQB.HAS_PART] * 5,
|
73
|
+
}
|
74
|
+
|
75
|
+
species_ids = pd.DataFrame(species_ids_dict)
|
76
|
+
|
77
|
+
characteristic_ids_narrow = sbml_dfs_utils.filter_to_characteristic_species_ids(
|
78
|
+
species_ids,
|
79
|
+
defining_biological_qualifiers=BQB_DEFINING_ATTRS,
|
80
|
+
max_complex_size=4,
|
81
|
+
max_promiscuity=4,
|
82
|
+
)
|
83
|
+
|
84
|
+
EXPECTED_IDS = ["CPX-BIG", "GO:1", "GO:2", "part1", "part2"]
|
85
|
+
assert characteristic_ids_narrow[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
|
86
|
+
|
87
|
+
characteristic_ids_loose = sbml_dfs_utils.filter_to_characteristic_species_ids(
|
88
|
+
species_ids,
|
89
|
+
# include encodes and is_encoded_by as equivalent to is
|
90
|
+
defining_biological_qualifiers=BQB_DEFINING_ATTRS_LOOSE,
|
91
|
+
max_complex_size=4,
|
92
|
+
# expand promiscuity to default value
|
93
|
+
max_promiscuity=20,
|
38
94
|
)
|
39
95
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
96
|
+
EXPECTED_IDS = [
|
97
|
+
"CPX-BIG",
|
98
|
+
"GO:1",
|
99
|
+
"GO:2",
|
100
|
+
"dna_seq",
|
101
|
+
"protein_seq",
|
102
|
+
"part1",
|
103
|
+
"part2",
|
104
|
+
] + ["promiscuous_complex"] * 5
|
105
|
+
assert characteristic_ids_loose[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
|
44
106
|
|
45
|
-
mock_sbml = MockSBML_dfs()
|
46
107
|
|
47
|
-
|
48
|
-
|
49
|
-
dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(mock_sbml)
|
50
|
-
expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
108
|
+
def test_formula(sbml_dfs):
|
109
|
+
# create a formula string
|
51
110
|
|
52
|
-
|
111
|
+
an_r_id = sbml_dfs.reactions.index[0]
|
53
112
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
113
|
+
reaction_species_df = sbml_dfs.reaction_species[
|
114
|
+
sbml_dfs.reaction_species["r_id"] == an_r_id
|
115
|
+
].merge(sbml_dfs.compartmentalized_species, left_on="sc_id", right_index=True)
|
116
|
+
|
117
|
+
formula_str = sbml_dfs_utils.construct_formula_string(
|
118
|
+
reaction_species_df, sbml_dfs.reactions, name_var="sc_name"
|
58
119
|
)
|
59
|
-
expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
60
120
|
|
61
|
-
|
62
|
-
|
121
|
+
assert isinstance(formula_str, str)
|
122
|
+
assert (
|
123
|
+
formula_str
|
124
|
+
== "CO2 [extracellular region] -> CO2 [cytosol] ---- modifiers: AQP1 tetramer [plasma membrane]]"
|
63
125
|
)
|
126
|
+
|
127
|
+
|
128
|
+
def test_find_underspecified_reactions():
|
129
|
+
|
130
|
+
reaction_w_regulators = pd.DataFrame(
|
131
|
+
{
|
132
|
+
SBML_DFS.SC_ID: ["A", "B", "C", "D", "E", "F", "G"],
|
133
|
+
SBML_DFS.STOICHIOMETRY: [-1, -1, 1, 1, 0, 0, 0],
|
134
|
+
SBML_DFS.SBO_TERM: [
|
135
|
+
SBOTERM_NAMES.REACTANT,
|
136
|
+
SBOTERM_NAMES.REACTANT,
|
137
|
+
SBOTERM_NAMES.PRODUCT,
|
138
|
+
SBOTERM_NAMES.PRODUCT,
|
139
|
+
SBOTERM_NAMES.CATALYST,
|
140
|
+
SBOTERM_NAMES.CATALYST,
|
141
|
+
SBOTERM_NAMES.STIMULATOR,
|
142
|
+
],
|
143
|
+
}
|
144
|
+
).assign(r_id="bar")
|
145
|
+
reaction_w_regulators[SBML_DFS.RSC_ID] = [
|
146
|
+
f"rsc_{i}" for i in range(len(reaction_w_regulators))
|
147
|
+
]
|
148
|
+
reaction_w_regulators.set_index(SBML_DFS.RSC_ID, inplace=True)
|
149
|
+
reaction_w_regulators = sbml_dfs_utils.add_sbo_role(reaction_w_regulators)
|
150
|
+
|
151
|
+
reaction_w_interactors = pd.DataFrame(
|
152
|
+
{
|
153
|
+
SBML_DFS.SC_ID: ["A", "B"],
|
154
|
+
SBML_DFS.STOICHIOMETRY: [-1, 1],
|
155
|
+
SBML_DFS.SBO_TERM: [SBOTERM_NAMES.REACTANT, SBOTERM_NAMES.REACTANT],
|
156
|
+
}
|
157
|
+
).assign(r_id="baz")
|
158
|
+
reaction_w_interactors[SBML_DFS.RSC_ID] = [
|
159
|
+
f"rsc_{i}" for i in range(len(reaction_w_interactors))
|
160
|
+
]
|
161
|
+
reaction_w_interactors.set_index(SBML_DFS.RSC_ID, inplace=True)
|
162
|
+
reaction_w_interactors = sbml_dfs_utils.add_sbo_role(reaction_w_interactors)
|
163
|
+
|
164
|
+
working_reactions = reaction_w_regulators.copy()
|
165
|
+
working_reactions["new"] = True
|
166
|
+
working_reactions.loc["rsc_0", "new"] = False
|
167
|
+
working_reactions
|
168
|
+
result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
|
169
|
+
assert result == {"bar"}
|
170
|
+
|
171
|
+
# missing one enzyme -> operable
|
172
|
+
working_reactions = reaction_w_regulators.copy()
|
173
|
+
working_reactions["new"] = True
|
174
|
+
working_reactions.loc["rsc_4", "new"] = False
|
175
|
+
working_reactions
|
176
|
+
result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
|
177
|
+
assert result == set()
|
178
|
+
|
179
|
+
# missing one product -> inoperable
|
180
|
+
working_reactions = reaction_w_regulators.copy()
|
181
|
+
working_reactions["new"] = True
|
182
|
+
working_reactions.loc["rsc_2", "new"] = False
|
183
|
+
working_reactions
|
184
|
+
result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
|
185
|
+
assert result == {"bar"}
|
186
|
+
|
187
|
+
# missing all enzymes -> inoperable
|
188
|
+
working_reactions = reaction_w_regulators.copy()
|
189
|
+
working_reactions["new"] = True
|
190
|
+
working_reactions.loc["rsc_4", "new"] = False
|
191
|
+
working_reactions.loc["rsc_5", "new"] = False
|
192
|
+
working_reactions
|
193
|
+
result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
|
194
|
+
assert result == {"bar"}
|
195
|
+
|
196
|
+
# missing regulators -> operable
|
197
|
+
working_reactions = reaction_w_regulators.copy()
|
198
|
+
working_reactions["new"] = True
|
199
|
+
working_reactions.loc["rsc_6", "new"] = False
|
200
|
+
working_reactions
|
201
|
+
result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
|
202
|
+
assert result == set()
|
203
|
+
|
204
|
+
# remove an interactor
|
205
|
+
working_reactions = reaction_w_interactors.copy()
|
206
|
+
working_reactions["new"] = True
|
207
|
+
working_reactions.loc["rsc_0", "new"] = False
|
208
|
+
working_reactions
|
209
|
+
result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
|
210
|
+
assert result == {"baz"}
|
211
|
+
|
212
|
+
|
213
|
+
def test_stubbed_compartment():
|
214
|
+
compartment = sbml_dfs_utils.stub_compartments()
|
215
|
+
|
216
|
+
assert compartment["c_Identifiers"].iloc[0].ids[0] == {
|
217
|
+
"ontology": "go",
|
218
|
+
"identifier": "GO:0005575",
|
219
|
+
"url": "https://www.ebi.ac.uk/QuickGO/term/GO:0005575",
|
220
|
+
"bqb": "BQB_IS",
|
221
|
+
}
|
tests/test_utils.py
CHANGED
@@ -686,3 +686,22 @@ def test_safe_fill():
|
|
686
686
|
"a_very_long\nstringggg",
|
687
687
|
"",
|
688
688
|
]
|
689
|
+
|
690
|
+
|
691
|
+
def test_update_pathological_names():
|
692
|
+
|
693
|
+
# All numeric
|
694
|
+
s = pd.Series(["1", "2", "3"])
|
695
|
+
out = utils.update_pathological_names(s, "prefix_")
|
696
|
+
assert all(x.startswith("prefix_") for x in out)
|
697
|
+
assert list(out) == ["prefix_1", "prefix_2", "prefix_3"]
|
698
|
+
|
699
|
+
# Mixed numeric and non-numeric
|
700
|
+
s2 = pd.Series(["1", "foo", "3"])
|
701
|
+
out2 = utils.update_pathological_names(s2, "prefix_")
|
702
|
+
assert list(out2) == ["1", "foo", "3"]
|
703
|
+
|
704
|
+
# All non-numeric
|
705
|
+
s3 = pd.Series(["foo", "bar", "baz"])
|
706
|
+
out3 = utils.update_pathological_names(s3, "prefix_")
|
707
|
+
assert list(out3) == ["foo", "bar", "baz"]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|