napistu 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. napistu/__main__.py +38 -27
  2. napistu/consensus.py +22 -27
  3. napistu/constants.py +91 -65
  4. napistu/context/filtering.py +2 -1
  5. napistu/identifiers.py +3 -6
  6. napistu/indices.py +3 -1
  7. napistu/ingestion/bigg.py +6 -6
  8. napistu/ingestion/sbml.py +298 -295
  9. napistu/ingestion/string.py +16 -19
  10. napistu/ingestion/trrust.py +22 -27
  11. napistu/ingestion/yeast.py +2 -1
  12. napistu/matching/interactions.py +4 -4
  13. napistu/matching/species.py +1 -1
  14. napistu/modify/uncompartmentalize.py +1 -1
  15. napistu/network/net_create.py +1 -1
  16. napistu/network/paths.py +1 -1
  17. napistu/ontologies/dogma.py +2 -1
  18. napistu/ontologies/genodexito.py +5 -1
  19. napistu/ontologies/renaming.py +4 -0
  20. napistu/sbml_dfs_core.py +1343 -2167
  21. napistu/sbml_dfs_utils.py +1086 -143
  22. napistu/utils.py +52 -41
  23. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/METADATA +2 -2
  24. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/RECORD +40 -40
  25. tests/conftest.py +113 -13
  26. tests/test_consensus.py +161 -4
  27. tests/test_context_filtering.py +2 -2
  28. tests/test_gaps.py +26 -15
  29. tests/test_network_net_create.py +1 -1
  30. tests/test_network_precompute.py +1 -1
  31. tests/test_ontologies_genodexito.py +3 -0
  32. tests/test_ontologies_mygene.py +3 -0
  33. tests/test_ontologies_renaming.py +28 -24
  34. tests/test_sbml_dfs_core.py +260 -211
  35. tests/test_sbml_dfs_utils.py +194 -36
  36. tests/test_utils.py +19 -0
  37. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/WHEEL +0 -0
  38. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/entry_points.txt +0 -0
  39. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/licenses/LICENSE +0 -0
  40. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,14 @@ from __future__ import annotations
3
3
  import pandas as pd
4
4
 
5
5
  from napistu import sbml_dfs_utils
6
- from napistu.constants import BQB, BQB_DEFINING_ATTRS, BQB_DEFINING_ATTRS_LOOSE
6
+ from napistu.constants import (
7
+ BQB,
8
+ BQB_DEFINING_ATTRS,
9
+ BQB_DEFINING_ATTRS_LOOSE,
10
+ SBML_DFS,
11
+ IDENTIFIERS,
12
+ SBOTERM_NAMES,
13
+ )
7
14
 
8
15
 
9
16
  def test_id_formatter():
@@ -17,47 +24,198 @@ def test_id_formatter():
17
24
  assert list(input_vals) == inv_ids
18
25
 
19
26
 
20
- def test_get_characteristic_species_ids():
21
- """
22
- Test get_characteristic_species_ids function with both dogmatic and non-dogmatic cases.
23
- """
24
- # Create mock species identifiers data
25
- mock_species_ids = pd.DataFrame(
26
- {
27
- "s_id": ["s1", "s2", "s3", "s4", "s5"],
28
- "identifier": ["P12345", "CHEBI:15377", "GO:12345", "P67890", "P67890"],
29
- "ontology": ["uniprot", "chebi", "go", "uniprot", "chebi"],
30
- "bqb": [
31
- "BQB_IS",
32
- "BQB_IS",
33
- "BQB_HAS_PART",
34
- "BQB_HAS_VERSION",
35
- "BQB_ENCODES",
36
- ],
37
- }
27
+ def test_filter_to_characteristic_species_ids():
28
+
29
+ species_ids_dict = {
30
+ SBML_DFS.S_ID: ["large_complex"] * 6
31
+ + ["small_complex"] * 2
32
+ + ["proteinA", "proteinB"]
33
+ + ["proteinC"] * 3
34
+ + [
35
+ "promiscuous_complexA",
36
+ "promiscuous_complexB",
37
+ "promiscuous_complexC",
38
+ "promiscuous_complexD",
39
+ "promiscuous_complexE",
40
+ ],
41
+ IDENTIFIERS.ONTOLOGY: ["complexportal"]
42
+ + ["HGNC"] * 7
43
+ + ["GO"] * 2
44
+ + ["ENSG", "ENSP", "pubmed"]
45
+ + ["HGNC"] * 5,
46
+ IDENTIFIERS.IDENTIFIER: [
47
+ "CPX-BIG",
48
+ "mem1",
49
+ "mem2",
50
+ "mem3",
51
+ "mem4",
52
+ "mem5",
53
+ "part1",
54
+ "part2",
55
+ "GO:1",
56
+ "GO:2",
57
+ "dna_seq",
58
+ "protein_seq",
59
+ "my_cool_pub",
60
+ ]
61
+ + ["promiscuous_complex"] * 5,
62
+ IDENTIFIERS.BQB: [BQB.IS]
63
+ + [BQB.HAS_PART] * 7
64
+ + [BQB.IS] * 2
65
+ + [
66
+ # these are retained if BQB_DEFINING_ATTRS_LOOSE is used
67
+ BQB.ENCODES,
68
+ BQB.IS_ENCODED_BY,
69
+ # this should always be removed
70
+ BQB.IS_DESCRIBED_BY,
71
+ ]
72
+ + [BQB.HAS_PART] * 5,
73
+ }
74
+
75
+ species_ids = pd.DataFrame(species_ids_dict)
76
+
77
+ characteristic_ids_narrow = sbml_dfs_utils.filter_to_characteristic_species_ids(
78
+ species_ids,
79
+ defining_biological_qualifiers=BQB_DEFINING_ATTRS,
80
+ max_complex_size=4,
81
+ max_promiscuity=4,
82
+ )
83
+
84
+ EXPECTED_IDS = ["CPX-BIG", "GO:1", "GO:2", "part1", "part2"]
85
+ assert characteristic_ids_narrow[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
86
+
87
+ characteristic_ids_loose = sbml_dfs_utils.filter_to_characteristic_species_ids(
88
+ species_ids,
89
+ # include encodes and is_encoded_by as equivalent to is
90
+ defining_biological_qualifiers=BQB_DEFINING_ATTRS_LOOSE,
91
+ max_complex_size=4,
92
+ # expand promiscuity to default value
93
+ max_promiscuity=20,
38
94
  )
39
95
 
40
- # Create mock SBML_dfs object
41
- class MockSBML_dfs:
42
- def get_identifiers(self, entity_type):
43
- return mock_species_ids
96
+ EXPECTED_IDS = [
97
+ "CPX-BIG",
98
+ "GO:1",
99
+ "GO:2",
100
+ "dna_seq",
101
+ "protein_seq",
102
+ "part1",
103
+ "part2",
104
+ ] + ["promiscuous_complex"] * 5
105
+ assert characteristic_ids_loose[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
44
106
 
45
- mock_sbml = MockSBML_dfs()
46
107
 
47
- # Test dogmatic case (default)
48
- expected_bqbs = BQB_DEFINING_ATTRS + [BQB.HAS_PART] # noqa: F841
49
- dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(mock_sbml)
50
- expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
108
+ def test_formula(sbml_dfs):
109
+ # create a formula string
51
110
 
52
- pd.testing.assert_frame_equal(dogmatic_result, expected_dogmatic, check_like=True)
111
+ an_r_id = sbml_dfs.reactions.index[0]
53
112
 
54
- # Test non-dogmatic case
55
- expected_bqbs = BQB_DEFINING_ATTRS_LOOSE + [BQB.HAS_PART] # noqa: F841
56
- non_dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(
57
- mock_sbml, dogmatic=False
113
+ reaction_species_df = sbml_dfs.reaction_species[
114
+ sbml_dfs.reaction_species["r_id"] == an_r_id
115
+ ].merge(sbml_dfs.compartmentalized_species, left_on="sc_id", right_index=True)
116
+
117
+ formula_str = sbml_dfs_utils.construct_formula_string(
118
+ reaction_species_df, sbml_dfs.reactions, name_var="sc_name"
58
119
  )
59
- expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
60
120
 
61
- pd.testing.assert_frame_equal(
62
- non_dogmatic_result, expected_non_dogmatic, check_like=True
121
+ assert isinstance(formula_str, str)
122
+ assert (
123
+ formula_str
124
+ == "CO2 [extracellular region] -> CO2 [cytosol] ---- modifiers: AQP1 tetramer [plasma membrane]]"
63
125
  )
126
+
127
+
128
+ def test_find_underspecified_reactions():
129
+
130
+ reaction_w_regulators = pd.DataFrame(
131
+ {
132
+ SBML_DFS.SC_ID: ["A", "B", "C", "D", "E", "F", "G"],
133
+ SBML_DFS.STOICHIOMETRY: [-1, -1, 1, 1, 0, 0, 0],
134
+ SBML_DFS.SBO_TERM: [
135
+ SBOTERM_NAMES.REACTANT,
136
+ SBOTERM_NAMES.REACTANT,
137
+ SBOTERM_NAMES.PRODUCT,
138
+ SBOTERM_NAMES.PRODUCT,
139
+ SBOTERM_NAMES.CATALYST,
140
+ SBOTERM_NAMES.CATALYST,
141
+ SBOTERM_NAMES.STIMULATOR,
142
+ ],
143
+ }
144
+ ).assign(r_id="bar")
145
+ reaction_w_regulators[SBML_DFS.RSC_ID] = [
146
+ f"rsc_{i}" for i in range(len(reaction_w_regulators))
147
+ ]
148
+ reaction_w_regulators.set_index(SBML_DFS.RSC_ID, inplace=True)
149
+ reaction_w_regulators = sbml_dfs_utils.add_sbo_role(reaction_w_regulators)
150
+
151
+ reaction_w_interactors = pd.DataFrame(
152
+ {
153
+ SBML_DFS.SC_ID: ["A", "B"],
154
+ SBML_DFS.STOICHIOMETRY: [-1, 1],
155
+ SBML_DFS.SBO_TERM: [SBOTERM_NAMES.REACTANT, SBOTERM_NAMES.REACTANT],
156
+ }
157
+ ).assign(r_id="baz")
158
+ reaction_w_interactors[SBML_DFS.RSC_ID] = [
159
+ f"rsc_{i}" for i in range(len(reaction_w_interactors))
160
+ ]
161
+ reaction_w_interactors.set_index(SBML_DFS.RSC_ID, inplace=True)
162
+ reaction_w_interactors = sbml_dfs_utils.add_sbo_role(reaction_w_interactors)
163
+
164
+ working_reactions = reaction_w_regulators.copy()
165
+ working_reactions["new"] = True
166
+ working_reactions.loc["rsc_0", "new"] = False
167
+ working_reactions
168
+ result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
169
+ assert result == {"bar"}
170
+
171
+ # missing one enzyme -> operable
172
+ working_reactions = reaction_w_regulators.copy()
173
+ working_reactions["new"] = True
174
+ working_reactions.loc["rsc_4", "new"] = False
175
+ working_reactions
176
+ result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
177
+ assert result == set()
178
+
179
+ # missing one product -> inoperable
180
+ working_reactions = reaction_w_regulators.copy()
181
+ working_reactions["new"] = True
182
+ working_reactions.loc["rsc_2", "new"] = False
183
+ working_reactions
184
+ result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
185
+ assert result == {"bar"}
186
+
187
+ # missing all enzymes -> inoperable
188
+ working_reactions = reaction_w_regulators.copy()
189
+ working_reactions["new"] = True
190
+ working_reactions.loc["rsc_4", "new"] = False
191
+ working_reactions.loc["rsc_5", "new"] = False
192
+ working_reactions
193
+ result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
194
+ assert result == {"bar"}
195
+
196
+ # missing regulators -> operable
197
+ working_reactions = reaction_w_regulators.copy()
198
+ working_reactions["new"] = True
199
+ working_reactions.loc["rsc_6", "new"] = False
200
+ working_reactions
201
+ result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
202
+ assert result == set()
203
+
204
+ # remove an interactor
205
+ working_reactions = reaction_w_interactors.copy()
206
+ working_reactions["new"] = True
207
+ working_reactions.loc["rsc_0", "new"] = False
208
+ working_reactions
209
+ result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
210
+ assert result == {"baz"}
211
+
212
+
213
+ def test_stubbed_compartment():
214
+ compartment = sbml_dfs_utils.stub_compartments()
215
+
216
+ assert compartment["c_Identifiers"].iloc[0].ids[0] == {
217
+ "ontology": "go",
218
+ "identifier": "GO:0005575",
219
+ "url": "https://www.ebi.ac.uk/QuickGO/term/GO:0005575",
220
+ "bqb": "BQB_IS",
221
+ }
tests/test_utils.py CHANGED
@@ -686,3 +686,22 @@ def test_safe_fill():
686
686
  "a_very_long\nstringggg",
687
687
  "",
688
688
  ]
689
+
690
+
691
+ def test_update_pathological_names():
692
+
693
+ # All numeric
694
+ s = pd.Series(["1", "2", "3"])
695
+ out = utils.update_pathological_names(s, "prefix_")
696
+ assert all(x.startswith("prefix_") for x in out)
697
+ assert list(out) == ["prefix_1", "prefix_2", "prefix_3"]
698
+
699
+ # Mixed numeric and non-numeric
700
+ s2 = pd.Series(["1", "foo", "3"])
701
+ out2 = utils.update_pathological_names(s2, "prefix_")
702
+ assert list(out2) == ["1", "foo", "3"]
703
+
704
+ # All non-numeric
705
+ s3 = pd.Series(["foo", "bar", "baz"])
706
+ out3 = utils.update_pathological_names(s3, "prefix_")
707
+ assert list(out3) == ["foo", "bar", "baz"]