napistu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__init__.py +12 -0
- napistu/__main__.py +867 -0
- napistu/consensus.py +1557 -0
- napistu/constants.py +500 -0
- napistu/gcs/__init__.py +10 -0
- napistu/gcs/constants.py +69 -0
- napistu/gcs/downloads.py +180 -0
- napistu/identifiers.py +805 -0
- napistu/indices.py +227 -0
- napistu/ingestion/__init__.py +10 -0
- napistu/ingestion/bigg.py +146 -0
- napistu/ingestion/constants.py +296 -0
- napistu/ingestion/cpr_edgelist.py +106 -0
- napistu/ingestion/identifiers_etl.py +148 -0
- napistu/ingestion/obo.py +268 -0
- napistu/ingestion/psi_mi.py +276 -0
- napistu/ingestion/reactome.py +218 -0
- napistu/ingestion/sbml.py +621 -0
- napistu/ingestion/string.py +356 -0
- napistu/ingestion/trrust.py +285 -0
- napistu/ingestion/yeast.py +147 -0
- napistu/mechanism_matching.py +597 -0
- napistu/modify/__init__.py +10 -0
- napistu/modify/constants.py +86 -0
- napistu/modify/curation.py +628 -0
- napistu/modify/gaps.py +635 -0
- napistu/modify/pathwayannot.py +1381 -0
- napistu/modify/uncompartmentalize.py +264 -0
- napistu/network/__init__.py +10 -0
- napistu/network/constants.py +117 -0
- napistu/network/neighborhoods.py +1594 -0
- napistu/network/net_create.py +1647 -0
- napistu/network/net_utils.py +652 -0
- napistu/network/paths.py +500 -0
- napistu/network/precompute.py +221 -0
- napistu/rpy2/__init__.py +127 -0
- napistu/rpy2/callr.py +168 -0
- napistu/rpy2/constants.py +101 -0
- napistu/rpy2/netcontextr.py +464 -0
- napistu/rpy2/rids.py +697 -0
- napistu/sbml_dfs_core.py +2216 -0
- napistu/sbml_dfs_utils.py +304 -0
- napistu/source.py +394 -0
- napistu/utils.py +943 -0
- napistu-0.1.0.dist-info/METADATA +56 -0
- napistu-0.1.0.dist-info/RECORD +77 -0
- napistu-0.1.0.dist-info/WHEEL +5 -0
- napistu-0.1.0.dist-info/entry_points.txt +2 -0
- napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
- napistu-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +0 -0
- tests/conftest.py +83 -0
- tests/test_consensus.py +255 -0
- tests/test_constants.py +20 -0
- tests/test_curation.py +134 -0
- tests/test_data/__init__.py +0 -0
- tests/test_edgelist.py +20 -0
- tests/test_gcs.py +23 -0
- tests/test_identifiers.py +151 -0
- tests/test_igraph.py +353 -0
- tests/test_indices.py +88 -0
- tests/test_mechanism_matching.py +126 -0
- tests/test_net_utils.py +66 -0
- tests/test_netcontextr.py +105 -0
- tests/test_obo.py +34 -0
- tests/test_pathwayannot.py +95 -0
- tests/test_precomputed_distances.py +222 -0
- tests/test_rpy2.py +61 -0
- tests/test_sbml.py +46 -0
- tests/test_sbml_dfs_create.py +307 -0
- tests/test_sbml_dfs_utils.py +22 -0
- tests/test_sbo.py +11 -0
- tests/test_set_coverage.py +50 -0
- tests/test_source.py +67 -0
- tests/test_uncompartmentalize.py +40 -0
- tests/test_utils.py +487 -0
- tests/utils.py +30 -0
@@ -0,0 +1,621 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
import re
|
6
|
+
|
7
|
+
import libsbml
|
8
|
+
import pandas as pd
|
9
|
+
from napistu import consensus
|
10
|
+
from napistu import constants
|
11
|
+
from napistu import identifiers
|
12
|
+
from napistu import sbml_dfs_utils
|
13
|
+
from napistu import source
|
14
|
+
from napistu import utils
|
15
|
+
|
16
|
+
from napistu.constants import BQB
|
17
|
+
|
18
|
+
from napistu.ingestion.constants import SBML_ANNOTATION_METHOD_GET_COMPARTMENT
|
19
|
+
from napistu.ingestion.constants import SBML_ANNOTATION_METHOD_GET_REACTION
|
20
|
+
from napistu.ingestion.constants import SBML_ANNOTATION_METHOD_GET_SPECIES
|
21
|
+
from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_ID
|
22
|
+
from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_IDENTIFIERS
|
23
|
+
from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_NAME
|
24
|
+
from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_SOURCE
|
25
|
+
from napistu.ingestion.constants import SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME
|
26
|
+
from napistu.ingestion.constants import SBML_COMPARTMENTALIZED_SPECIES_DICT_SOURCE
|
27
|
+
from napistu.ingestion.constants import SBML_REACTION_ATTR_GET_GENE_PRODUCT
|
28
|
+
from napistu.ingestion.constants import SBML_SPECIES_DICT_ID
|
29
|
+
from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
|
30
|
+
from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
|
31
|
+
from napistu.ingestion.constants import SMBL_ERROR_CATEGORY
|
32
|
+
from napistu.ingestion.constants import SMBL_ERROR_DESCRIPTION
|
33
|
+
from napistu.ingestion.constants import SMBL_ERROR_MESSAGE
|
34
|
+
from napistu.ingestion.constants import SMBL_ERROR_NUMBER
|
35
|
+
from napistu.ingestion.constants import SMBL_ERROR_SEVERITY
|
36
|
+
from napistu.ingestion.constants import SMBL_REACTION_DICT_ID
|
37
|
+
from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
|
38
|
+
from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
|
39
|
+
from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
|
40
|
+
from napistu.ingestion.constants import SMBL_REACTION_DICT_SOURCE
|
41
|
+
from napistu.ingestion.constants import SMBL_REACTION_SPEC_RSC_ID
|
42
|
+
from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
|
43
|
+
from napistu.ingestion.constants import SMBL_REACTION_SPEC_SC_ID
|
44
|
+
from napistu.ingestion.constants import SMBL_REACTION_SPEC_STOICHIOMETRY
|
45
|
+
from napistu.ingestion.constants import SMBL_SUMMARY_COMPARTMENTS
|
46
|
+
from napistu.ingestion.constants import SMBL_SUMMARY_N_REACTIONS
|
47
|
+
from napistu.ingestion.constants import SMBL_SUMMARY_N_SPECIES
|
48
|
+
from napistu.ingestion.constants import SMBL_SUMMARY_PATHWAY_ID
|
49
|
+
from napistu.ingestion.constants import SMBL_SUMMARY_PATHWAY_NAME
|
50
|
+
|
51
|
+
from fs import open_fs
|
52
|
+
|
53
|
+
logger = logging.getLogger(__name__)
|
54
|
+
|
55
|
+
|
56
|
+
class SBML:
|
57
|
+
"""
|
58
|
+
System Biology Markup Language Connections.
|
59
|
+
|
60
|
+
Attributes
|
61
|
+
----------
|
62
|
+
document
|
63
|
+
Connection to the SBML document
|
64
|
+
model
|
65
|
+
Connection to the SBML model
|
66
|
+
|
67
|
+
Methods
|
68
|
+
-------
|
69
|
+
summary()
|
70
|
+
Prints a summary of the sbml model
|
71
|
+
sbml_errors(reduced_log, return_df)
|
72
|
+
Print a summary of all errors in the SBML file
|
73
|
+
|
74
|
+
"""
|
75
|
+
|
76
|
+
def __init__(
|
77
|
+
self,
|
78
|
+
sbml_path: str,
|
79
|
+
) -> None:
|
80
|
+
"""
|
81
|
+
Connects to an SBML file
|
82
|
+
|
83
|
+
Parameters
|
84
|
+
----------
|
85
|
+
sbml_path : str
|
86
|
+
path to a .sbml file.
|
87
|
+
|
88
|
+
Returns
|
89
|
+
-------
|
90
|
+
None.
|
91
|
+
"""
|
92
|
+
|
93
|
+
reader = libsbml.SBMLReader()
|
94
|
+
if os.path.exists(sbml_path):
|
95
|
+
self.document = reader.readSBML(sbml_path)
|
96
|
+
else:
|
97
|
+
with open_fs(os.path.dirname(sbml_path)) as fs:
|
98
|
+
txt = fs.readtext(os.path.basename(sbml_path))
|
99
|
+
self.document = reader.readSBMLFromString(txt)
|
100
|
+
|
101
|
+
if self.document.getLevel() < 3:
|
102
|
+
raise ValueError(
|
103
|
+
f"SBML model is level {self.document.getLevel()}, only SBML 3 is supported"
|
104
|
+
)
|
105
|
+
|
106
|
+
self.model = self.document.getModel()
|
107
|
+
|
108
|
+
# check for critical sbml errors
|
109
|
+
errors = self.sbml_errors(reduced_log=False, return_df=True)
|
110
|
+
if errors is not None:
|
111
|
+
critical_errors = errors[errors[SMBL_ERROR_SEVERITY] >= 2]
|
112
|
+
critical_errors = set(critical_errors[SMBL_ERROR_DESCRIPTION].unique())
|
113
|
+
known_errors = {"<layout> must have 'id' and may have 'name'"}
|
114
|
+
|
115
|
+
found_known_errors = known_errors.intersection(critical_errors)
|
116
|
+
if len(found_known_errors) > 0:
|
117
|
+
logger.warning(
|
118
|
+
f"The following known errors were found: {found_known_errors}"
|
119
|
+
)
|
120
|
+
|
121
|
+
unknown_critical_errors = critical_errors - known_errors
|
122
|
+
if len(unknown_critical_errors) != 0:
|
123
|
+
critical_errors = ", ".join(unknown_critical_errors)
|
124
|
+
raise ValueError(
|
125
|
+
f"Critical errors were found when reading the sbml file: {critical_errors}"
|
126
|
+
)
|
127
|
+
|
128
|
+
def summary(self) -> pd.DataFrame:
|
129
|
+
"""Returns a pd.DataFrame summary of an SBML model."""
|
130
|
+
model = self.model
|
131
|
+
|
132
|
+
model_summaries = dict()
|
133
|
+
|
134
|
+
model_summaries[SMBL_SUMMARY_PATHWAY_NAME] = model.getName()
|
135
|
+
model_summaries[SMBL_SUMMARY_PATHWAY_ID] = model.getId()
|
136
|
+
|
137
|
+
model_summaries[SMBL_SUMMARY_N_SPECIES] = model.getNumSpecies()
|
138
|
+
model_summaries[SMBL_SUMMARY_N_REACTIONS] = model.getNumReactions()
|
139
|
+
|
140
|
+
compartments = [
|
141
|
+
model.getCompartment(i).getName() for i in range(model.getNumCompartments())
|
142
|
+
]
|
143
|
+
compartments.sort()
|
144
|
+
model_summaries[SMBL_SUMMARY_COMPARTMENTS] = ",\n".join(compartments)
|
145
|
+
|
146
|
+
model_summaries_dat = pd.DataFrame(model_summaries, index=[0]).T
|
147
|
+
|
148
|
+
return utils.style_df(model_summaries_dat) # type: ignore
|
149
|
+
|
150
|
+
def sbml_errors(self, reduced_log: bool = True, return_df: bool = False):
|
151
|
+
"""
|
152
|
+
Format and print all SBML errors
|
153
|
+
|
154
|
+
Parameters
|
155
|
+
----------
|
156
|
+
reduced_log : bool
|
157
|
+
Reduced log aggregates errors across categories an severity levels
|
158
|
+
return_df: bool
|
159
|
+
If False then print a log, if True then return a pd.DataFrame
|
160
|
+
|
161
|
+
Returns
|
162
|
+
-------
|
163
|
+
None or pd.DataFrame.
|
164
|
+
"""
|
165
|
+
n_errors = self.document.getNumErrors()
|
166
|
+
if n_errors == 0:
|
167
|
+
return None
|
168
|
+
|
169
|
+
error_log = list()
|
170
|
+
for i in range(n_errors):
|
171
|
+
e = self.document.getError(i)
|
172
|
+
|
173
|
+
error_entry = {
|
174
|
+
SMBL_ERROR_NUMBER: i,
|
175
|
+
SMBL_ERROR_CATEGORY: e.getCategoryAsString(),
|
176
|
+
SMBL_ERROR_SEVERITY: e.getSeverity(),
|
177
|
+
SMBL_ERROR_DESCRIPTION: e.getShortMessage(),
|
178
|
+
SMBL_ERROR_MESSAGE: e.getMessage(),
|
179
|
+
}
|
180
|
+
|
181
|
+
error_log.append(error_entry)
|
182
|
+
error_log = pd.DataFrame(error_log)
|
183
|
+
|
184
|
+
if reduced_log:
|
185
|
+
error_log = (
|
186
|
+
error_log[
|
187
|
+
[SMBL_ERROR_CATEGORY, SMBL_ERROR_SEVERITY, SMBL_ERROR_MESSAGE]
|
188
|
+
]
|
189
|
+
.groupby([SMBL_ERROR_CATEGORY, SMBL_ERROR_SEVERITY])
|
190
|
+
.count()
|
191
|
+
)
|
192
|
+
|
193
|
+
if return_df:
|
194
|
+
return error_log
|
195
|
+
else:
|
196
|
+
if reduced_log:
|
197
|
+
headers = [f"{SMBL_ERROR_CATEGORY}, {SMBL_ERROR_SEVERITY}", "count"]
|
198
|
+
else:
|
199
|
+
headers = [
|
200
|
+
SMBL_ERROR_CATEGORY,
|
201
|
+
SMBL_ERROR_SEVERITY,
|
202
|
+
SMBL_ERROR_DESCRIPTION,
|
203
|
+
]
|
204
|
+
error_log = error_log[headers]
|
205
|
+
|
206
|
+
utils.style_df(error_log, headers=headers)
|
207
|
+
|
208
|
+
return None
|
209
|
+
|
210
|
+
|
211
|
+
class SBML_reaction:
|
212
|
+
"""
|
213
|
+
System Biology Markup Language Model Reactions.
|
214
|
+
|
215
|
+
Attributes
|
216
|
+
----------
|
217
|
+
reaction_dict: dict
|
218
|
+
dictionary of reaction-level attributes, id, name, identifiers
|
219
|
+
species: pd.DataFrame
|
220
|
+
table of substrates, products, and modifiers
|
221
|
+
|
222
|
+
"""
|
223
|
+
|
224
|
+
def __init__(
|
225
|
+
self,
|
226
|
+
sbml_reaction: libsbml.Reaction,
|
227
|
+
) -> None:
|
228
|
+
"""
|
229
|
+
Convenience class for working with sbml reactions
|
230
|
+
"""
|
231
|
+
reaction_dict = {
|
232
|
+
SMBL_REACTION_DICT_ID: sbml_reaction.getId(),
|
233
|
+
SMBL_REACTION_DICT_NAME: sbml_reaction.getName(),
|
234
|
+
SMBL_REACTION_DICT_IDENTIFIERS: identifiers.cv_to_Identifiers(
|
235
|
+
sbml_reaction
|
236
|
+
),
|
237
|
+
SMBL_REACTION_DICT_SOURCE: source.Source(init=True),
|
238
|
+
SMBL_REACTION_DICT_IS_REVERSIBLE: sbml_reaction.getReversible(),
|
239
|
+
}
|
240
|
+
|
241
|
+
self.reaction_dict = reaction_dict
|
242
|
+
|
243
|
+
# process reaction species
|
244
|
+
reaction_species = list()
|
245
|
+
# save modifiers
|
246
|
+
for i in range(sbml_reaction.getNumModifiers()):
|
247
|
+
spec = sbml_reaction.getModifier(i)
|
248
|
+
spec_dict = {
|
249
|
+
SMBL_REACTION_SPEC_RSC_ID: spec.getId(),
|
250
|
+
SMBL_REACTION_SPEC_SC_ID: spec.getSpecies(),
|
251
|
+
SMBL_REACTION_SPEC_STOICHIOMETRY: 0,
|
252
|
+
SMBL_REACTION_SPEC_SBO_TERM: spec.getSBOTermID(),
|
253
|
+
}
|
254
|
+
reaction_species.append(spec_dict)
|
255
|
+
|
256
|
+
rxn_fbc = sbml_reaction.getPlugin("fbc")
|
257
|
+
# check for gene products associated with the FBC L3 extension
|
258
|
+
if rxn_fbc is not None:
|
259
|
+
gene_products = list()
|
260
|
+
gpa = rxn_fbc.getGeneProductAssociation()
|
261
|
+
if gpa is not None:
|
262
|
+
gpaa = gpa.getAssociation()
|
263
|
+
if hasattr(gpaa, SBML_REACTION_ATTR_GET_GENE_PRODUCT):
|
264
|
+
gene_products.append(_get_gene_product_dict(gpaa))
|
265
|
+
else:
|
266
|
+
for i in range(gpaa.getNumAssociations()):
|
267
|
+
gpaaa = gpaa.getAssociation(i)
|
268
|
+
if hasattr(gpaaa, SBML_REACTION_ATTR_GET_GENE_PRODUCT):
|
269
|
+
gene_products.append(_get_gene_product_dict(gpaaa))
|
270
|
+
else:
|
271
|
+
for i in range(gpaaa.getNumAssociations()):
|
272
|
+
gpaaaa = gpaaa.getAssociation(i)
|
273
|
+
if hasattr(gpaaaa, SBML_REACTION_ATTR_GET_GENE_PRODUCT):
|
274
|
+
gene_products.append(_get_gene_product_dict(gpaaaa))
|
275
|
+
else:
|
276
|
+
for i in range(gpaa.getNumAssociations()):
|
277
|
+
gpaaaaa = gpaaaa.getAssociation(i)
|
278
|
+
if hasattr(
|
279
|
+
gpaaaaa, SBML_REACTION_ATTR_GET_GENE_PRODUCT
|
280
|
+
):
|
281
|
+
gene_products.append(
|
282
|
+
_get_gene_product_dict(gpaaaaa)
|
283
|
+
)
|
284
|
+
else:
|
285
|
+
logger.warning(
|
286
|
+
"gene annotations nested deeper than 4 levels, ignoring"
|
287
|
+
)
|
288
|
+
continue
|
289
|
+
# de-duplicate
|
290
|
+
gene_products = list(
|
291
|
+
{d[SMBL_REACTION_SPEC_SC_ID]: d for d in gene_products}.values()
|
292
|
+
)
|
293
|
+
reaction_species = reaction_species + gene_products
|
294
|
+
|
295
|
+
# save reactants
|
296
|
+
for i in range(sbml_reaction.getNumReactants()):
|
297
|
+
spec = sbml_reaction.getReactant(i)
|
298
|
+
spec_dict = {
|
299
|
+
SMBL_REACTION_SPEC_RSC_ID: spec.getId(),
|
300
|
+
SMBL_REACTION_SPEC_SC_ID: spec.getSpecies(),
|
301
|
+
SMBL_REACTION_SPEC_STOICHIOMETRY: -1 * spec.getStoichiometry(),
|
302
|
+
SMBL_REACTION_SPEC_SBO_TERM: spec.getSBOTermID(),
|
303
|
+
}
|
304
|
+
reaction_species.append(spec_dict)
|
305
|
+
# save products
|
306
|
+
for i in range(sbml_reaction.getNumProducts()):
|
307
|
+
spec = sbml_reaction.getProduct(i)
|
308
|
+
spec_dict = {
|
309
|
+
SMBL_REACTION_SPEC_RSC_ID: spec.getId(),
|
310
|
+
SMBL_REACTION_SPEC_SC_ID: spec.getSpecies(),
|
311
|
+
SMBL_REACTION_SPEC_STOICHIOMETRY: spec.getStoichiometry(),
|
312
|
+
SMBL_REACTION_SPEC_SBO_TERM: spec.getSBOTermID(),
|
313
|
+
}
|
314
|
+
reaction_species.append(spec_dict)
|
315
|
+
|
316
|
+
self.species = pd.DataFrame(reaction_species).set_index(
|
317
|
+
SMBL_REACTION_SPEC_RSC_ID
|
318
|
+
)
|
319
|
+
|
320
|
+
|
321
|
+
def sbml_df_from_sbml(self, sbml_model: SBML):
|
322
|
+
# specify compartments
|
323
|
+
|
324
|
+
compartments = list()
|
325
|
+
for i in range(sbml_model.model.getNumCompartments()):
|
326
|
+
comp = sbml_model.model.getCompartment(i)
|
327
|
+
|
328
|
+
if not comp.getCVTerms():
|
329
|
+
logger.warning(
|
330
|
+
f"Compartment {comp.getId()} has empty CVterms, mapping its c_Identifiers from the Compartment dict"
|
331
|
+
)
|
332
|
+
|
333
|
+
comp_name = comp.getName()
|
334
|
+
mapped_compartment_key = [
|
335
|
+
compkey
|
336
|
+
for compkey, mappednames in constants.COMPARTMENT_ALIASES.items()
|
337
|
+
if comp_name in mappednames
|
338
|
+
]
|
339
|
+
|
340
|
+
if len(mapped_compartment_key) == 0:
|
341
|
+
logger.warning(
|
342
|
+
f"No GO compartment for {comp_name} is mapped, use the generic cellular_component's GO id"
|
343
|
+
)
|
344
|
+
compartments.append(
|
345
|
+
{
|
346
|
+
SBML_COMPARTMENT_DICT_ID: comp.getId(),
|
347
|
+
SBML_COMPARTMENT_DICT_NAME: comp.getName(),
|
348
|
+
SBML_COMPARTMENT_DICT_IDENTIFIERS: identifiers.Identifiers(
|
349
|
+
[
|
350
|
+
identifiers.format_uri(
|
351
|
+
uri=identifiers.create_uri_url(
|
352
|
+
ontology=constants.ONTOLOGIES.GO,
|
353
|
+
identifier=constants.COMPARTMENTS_GO_TERMS[
|
354
|
+
"CELLULAR_COMPONENT"
|
355
|
+
],
|
356
|
+
),
|
357
|
+
biological_qualifier_type=BQB.BQB_IS,
|
358
|
+
)
|
359
|
+
]
|
360
|
+
),
|
361
|
+
SBML_COMPARTMENT_DICT_SOURCE: source.Source(init=True),
|
362
|
+
}
|
363
|
+
)
|
364
|
+
|
365
|
+
if len(mapped_compartment_key) > 0:
|
366
|
+
if len(mapped_compartment_key) > 1:
|
367
|
+
logger.warning(
|
368
|
+
f"More than one GO compartments for {comp_name} are mapped, using the first one"
|
369
|
+
)
|
370
|
+
compartments.append(
|
371
|
+
{
|
372
|
+
SBML_COMPARTMENT_DICT_ID: comp.getId(),
|
373
|
+
SBML_COMPARTMENT_DICT_NAME: comp.getName(),
|
374
|
+
SBML_COMPARTMENT_DICT_IDENTIFIERS: identifiers.Identifiers(
|
375
|
+
[
|
376
|
+
identifiers.format_uri(
|
377
|
+
uri=identifiers.create_uri_url(
|
378
|
+
ontology=constants.ONTOLOGIES.GO,
|
379
|
+
identifier=constants.COMPARTMENTS_GO_TERMS[
|
380
|
+
mapped_compartment_key[0]
|
381
|
+
],
|
382
|
+
),
|
383
|
+
biological_qualifier_type=BQB.IS,
|
384
|
+
)
|
385
|
+
]
|
386
|
+
),
|
387
|
+
SBML_COMPARTMENT_DICT_SOURCE: source.Source(init=True),
|
388
|
+
}
|
389
|
+
)
|
390
|
+
|
391
|
+
else:
|
392
|
+
compartments.append(
|
393
|
+
{
|
394
|
+
SBML_COMPARTMENT_DICT_ID: comp.getId(),
|
395
|
+
SBML_COMPARTMENT_DICT_NAME: comp.getName(),
|
396
|
+
SBML_COMPARTMENT_DICT_IDENTIFIERS: identifiers.cv_to_Identifiers(
|
397
|
+
comp
|
398
|
+
),
|
399
|
+
SBML_COMPARTMENT_DICT_SOURCE: source.Source(init=True),
|
400
|
+
}
|
401
|
+
)
|
402
|
+
|
403
|
+
self.compartments = pd.DataFrame(compartments).set_index(SBML_COMPARTMENT_DICT_ID)
|
404
|
+
|
405
|
+
# create a species df
|
406
|
+
comp_species_df = setup_cspecies(sbml_model)
|
407
|
+
|
408
|
+
# find unique species and create a table
|
409
|
+
consensus_species_df = comp_species_df.copy()
|
410
|
+
consensus_species_df.index.names = [SBML_SPECIES_DICT_ID]
|
411
|
+
consensus_species, species_lookup = consensus.reduce_to_consensus_ids(
|
412
|
+
consensus_species_df,
|
413
|
+
{"pk": SBML_SPECIES_DICT_ID, "id": SBML_SPECIES_DICT_IDENTIFIERS},
|
414
|
+
)
|
415
|
+
|
416
|
+
# create a table of unique molecular species
|
417
|
+
consensus_species.index.name = SBML_SPECIES_DICT_ID
|
418
|
+
consensus_species[SBML_SPECIES_DICT_NAME] = [
|
419
|
+
re.sub("\\[.+\\]", "", x).strip()
|
420
|
+
for x in consensus_species[SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME]
|
421
|
+
]
|
422
|
+
consensus_species = consensus_species.drop(
|
423
|
+
[SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME, SBML_COMPARTMENT_DICT_ID], axis=1
|
424
|
+
)
|
425
|
+
consensus_species["s_Source"] = [
|
426
|
+
source.Source(init=True) for x in range(0, consensus_species.shape[0])
|
427
|
+
]
|
428
|
+
|
429
|
+
self.species = consensus_species[self.schema["species"]["vars"]]
|
430
|
+
|
431
|
+
self.compartmentalized_species = comp_species_df.join(species_lookup).rename(
|
432
|
+
columns={"new_id": SBML_SPECIES_DICT_ID}
|
433
|
+
)[self.schema["compartmentalized_species"]["vars"]]
|
434
|
+
|
435
|
+
# specify reactions
|
436
|
+
|
437
|
+
reactions = list()
|
438
|
+
reaction_species = list()
|
439
|
+
for i in range(sbml_model.model.getNumReactions()):
|
440
|
+
rxn = SBML_reaction(sbml_model.model.getReaction(i))
|
441
|
+
reactions.append(rxn.reaction_dict)
|
442
|
+
|
443
|
+
rxn_specs = rxn.species
|
444
|
+
rxn_specs[SMBL_REACTION_DICT_ID] = rxn.reaction_dict[SMBL_REACTION_DICT_ID]
|
445
|
+
reaction_species.append(rxn_specs)
|
446
|
+
|
447
|
+
self.reactions = pd.DataFrame(reactions).set_index(SMBL_REACTION_DICT_ID)
|
448
|
+
|
449
|
+
reaction_species_df = pd.concat(reaction_species)
|
450
|
+
# add an index if reaction species didn't have IDs in the .sbml
|
451
|
+
if all([v == "" for v in reaction_species_df.index.tolist()]):
|
452
|
+
reaction_species_df = (
|
453
|
+
reaction_species_df.reset_index(drop=True)
|
454
|
+
.assign(
|
455
|
+
rsc_id=sbml_dfs_utils.id_formatter(
|
456
|
+
range(reaction_species_df.shape[0]), SMBL_REACTION_SPEC_RSC_ID
|
457
|
+
)
|
458
|
+
)
|
459
|
+
.set_index(SMBL_REACTION_SPEC_RSC_ID)
|
460
|
+
)
|
461
|
+
|
462
|
+
self.reaction_species = reaction_species_df
|
463
|
+
|
464
|
+
return self
|
465
|
+
|
466
|
+
|
467
|
+
def setup_cspecies(sbml_model: SBML) -> pd.DataFrame:
|
468
|
+
"""
|
469
|
+
Setup Compartmentalized Species
|
470
|
+
|
471
|
+
Read all compartmentalized species from a model
|
472
|
+
and setup as a pd.DataFrame.
|
473
|
+
This operation is functionalized to test the subsequent call of
|
474
|
+
consensus.reduce_to_consensus_ids()
|
475
|
+
which collapses compartmentalized_species -> species
|
476
|
+
based on shared identifiers.
|
477
|
+
"""
|
478
|
+
comp_species = list()
|
479
|
+
for i in range(sbml_model.model.getNumSpecies()):
|
480
|
+
spec = sbml_model.model.getSpecies(i)
|
481
|
+
|
482
|
+
spec_dict = {
|
483
|
+
SMBL_REACTION_SPEC_SC_ID: spec.getId(),
|
484
|
+
SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME: spec.getName(),
|
485
|
+
SBML_COMPARTMENT_DICT_ID: spec.getCompartment(),
|
486
|
+
SBML_SPECIES_DICT_IDENTIFIERS: identifiers.cv_to_Identifiers(spec),
|
487
|
+
SBML_COMPARTMENTALIZED_SPECIES_DICT_SOURCE: source.Source(init=True),
|
488
|
+
}
|
489
|
+
|
490
|
+
comp_species.append(spec_dict)
|
491
|
+
|
492
|
+
mplugin = sbml_model.model.getPlugin("fbc")
|
493
|
+
|
494
|
+
# add geneproducts defined using L3 FBC extension
|
495
|
+
if mplugin is not None:
|
496
|
+
for i in range(mplugin.getNumGeneProducts()):
|
497
|
+
gene_product = mplugin.getGeneProduct(i)
|
498
|
+
|
499
|
+
gene_dict = {
|
500
|
+
SMBL_REACTION_SPEC_SC_ID: gene_product.getId(),
|
501
|
+
SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME: (
|
502
|
+
gene_product.getName()
|
503
|
+
if gene_product.isSetName()
|
504
|
+
else gene_product.getLabel()
|
505
|
+
),
|
506
|
+
# use getLabel() to accomendate sbml model (e.g. HumanGEM.xml) with no fbc:name attribute
|
507
|
+
# Recon3D.xml has both fbc:label and fbc:name attributes, with gene name in fbc:nam
|
508
|
+
SBML_COMPARTMENT_DICT_ID: None,
|
509
|
+
SBML_SPECIES_DICT_IDENTIFIERS: identifiers.cv_to_Identifiers(
|
510
|
+
gene_product
|
511
|
+
),
|
512
|
+
SBML_COMPARTMENTALIZED_SPECIES_DICT_SOURCE: source.Source(init=True),
|
513
|
+
}
|
514
|
+
|
515
|
+
comp_species.append(gene_dict)
|
516
|
+
|
517
|
+
return pd.DataFrame(comp_species).set_index(SMBL_REACTION_SPEC_SC_ID)
|
518
|
+
|
519
|
+
|
520
|
+
def add_sbml_annotations(
|
521
|
+
sbml_model: SBML, annotations: pd.DataFrame, save_path: str
|
522
|
+
) -> None:
|
523
|
+
"""
|
524
|
+
Add SBML Annotations
|
525
|
+
|
526
|
+
Add additional identifiers to an sbml file and save the updated document
|
527
|
+
|
528
|
+
Parameters:
|
529
|
+
sbml_model: SBML
|
530
|
+
A .sbml model
|
531
|
+
annotations: pd.DataFrame
|
532
|
+
A table of annotations to add containing an "id" matching the
|
533
|
+
primary key of an entity, "type" matching the type of entity,
|
534
|
+
and "uri" representing the annotation to add.
|
535
|
+
save_path: str
|
536
|
+
Path to save the model to
|
537
|
+
|
538
|
+
Returns:
|
539
|
+
None
|
540
|
+
"""
|
541
|
+
|
542
|
+
logger.warning(
|
543
|
+
"add_sbml_annotations is deprecated and may be removed in a future version of rcpr; "
|
544
|
+
"we are now adding these annotation during ingestion by sbml.sbml_df_from_sbml() rather "
|
545
|
+
"than directly appending them to the raw .sbml"
|
546
|
+
)
|
547
|
+
|
548
|
+
if not isinstance(sbml_model, SBML):
|
549
|
+
raise TypeError("sbml_model must be an SBML object")
|
550
|
+
|
551
|
+
if not isinstance(annotations, pd.DataFrame):
|
552
|
+
raise TypeError("annotations must be a pd.DataFrame")
|
553
|
+
|
554
|
+
for i in range(0, annotations.shape[0]):
|
555
|
+
annot_type = annotations["type"][i]
|
556
|
+
|
557
|
+
if annot_type == "species":
|
558
|
+
entity_fxn = SBML_ANNOTATION_METHOD_GET_SPECIES
|
559
|
+
elif annot_type == "compartment":
|
560
|
+
entity_fxn = SBML_ANNOTATION_METHOD_GET_COMPARTMENT
|
561
|
+
elif annot_type == "reaction":
|
562
|
+
entity_fxn = SBML_ANNOTATION_METHOD_GET_REACTION
|
563
|
+
else:
|
564
|
+
raise ValueError(
|
565
|
+
f"{annot_type} is not a valid annotation type,"
|
566
|
+
" valid types are species, compartment, and reaction"
|
567
|
+
)
|
568
|
+
# access the node to modify
|
569
|
+
entity_fxn_method = getattr(sbml_model.model, entity_fxn)
|
570
|
+
entity_node = entity_fxn_method(annotations["id"][i])
|
571
|
+
|
572
|
+
# TO DO - check for a valid entity_node in case id is not found
|
573
|
+
|
574
|
+
# set meta-id if there isn't one; required to add a node
|
575
|
+
if not entity_node.isSetMetaId():
|
576
|
+
add_metaid_code = entity_node.setMetaId(annotations["id"][i])
|
577
|
+
|
578
|
+
if add_metaid_code != libsbml.LIBSBML_OPERATION_SUCCESS:
|
579
|
+
raise ValueError(
|
580
|
+
f"adding metaId to {annotations['id'][i]} failed"
|
581
|
+
f" with return code {add_metaid_code} "
|
582
|
+
f"({libsbml.OperationReturnValue_toString(add_metaid_code).strip()})"
|
583
|
+
)
|
584
|
+
|
585
|
+
# create a controlled vocabulary term
|
586
|
+
cv = libsbml.CVTerm()
|
587
|
+
cv.setQualifierType(libsbml.BIOLOGICAL_QUALIFIER)
|
588
|
+
cv.setBiologicalQualifierType(libsbml.BQB_IS_VERSION_OF)
|
589
|
+
|
590
|
+
add_resource_code = cv.addResource(annotations["uri"][i])
|
591
|
+
if add_resource_code != libsbml.LIBSBML_OPERATION_SUCCESS:
|
592
|
+
raise ValueError(
|
593
|
+
"adding resource to CV term returned code"
|
594
|
+
f" {add_resource_code} "
|
595
|
+
f"({libsbml.OperationReturnValue_toString(add_resource_code).strip()})"
|
596
|
+
f" rather than {libsbml.LIBSBML_OPERATION_SUCCESS} when "
|
597
|
+
f"adding {annotations['uri'][i]} to {annotations['id'][i]}"
|
598
|
+
)
|
599
|
+
|
600
|
+
add_cv_code = entity_node.addCVTerm(cv)
|
601
|
+
if add_cv_code != libsbml.LIBSBML_OPERATION_SUCCESS:
|
602
|
+
raise ValueError(
|
603
|
+
f"adding CV to entity returned code {add_cv_code} "
|
604
|
+
f"({libsbml.OperationReturnValue_toString(add_cv_code).strip()})"
|
605
|
+
f" rather than {libsbml.LIBSBML_OPERATION_SUCCESS} when adding"
|
606
|
+
f" {annotations['uri'][i]} to {annotations['id'][i]}"
|
607
|
+
)
|
608
|
+
|
609
|
+
libsbml.writeSBML(sbml_model.document, save_path)
|
610
|
+
|
611
|
+
return None
|
612
|
+
|
613
|
+
|
614
|
+
def _get_gene_product_dict(gp):
|
615
|
+
"""Read a gene product node from an sbml file."""
|
616
|
+
return {
|
617
|
+
SMBL_REACTION_SPEC_RSC_ID: gp.getId(),
|
618
|
+
SMBL_REACTION_SPEC_SC_ID: gp.getGeneProduct(),
|
619
|
+
SMBL_REACTION_SPEC_STOICHIOMETRY: 0,
|
620
|
+
SMBL_REACTION_SPEC_SBO_TERM: gp.getSBOTermID(),
|
621
|
+
}
|