napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. napistu/__init__.py +12 -0
  2. napistu/__main__.py +867 -0
  3. napistu/consensus.py +1557 -0
  4. napistu/constants.py +500 -0
  5. napistu/gcs/__init__.py +10 -0
  6. napistu/gcs/constants.py +69 -0
  7. napistu/gcs/downloads.py +180 -0
  8. napistu/identifiers.py +805 -0
  9. napistu/indices.py +227 -0
  10. napistu/ingestion/__init__.py +10 -0
  11. napistu/ingestion/bigg.py +146 -0
  12. napistu/ingestion/constants.py +296 -0
  13. napistu/ingestion/cpr_edgelist.py +106 -0
  14. napistu/ingestion/identifiers_etl.py +148 -0
  15. napistu/ingestion/obo.py +268 -0
  16. napistu/ingestion/psi_mi.py +276 -0
  17. napistu/ingestion/reactome.py +218 -0
  18. napistu/ingestion/sbml.py +621 -0
  19. napistu/ingestion/string.py +356 -0
  20. napistu/ingestion/trrust.py +285 -0
  21. napistu/ingestion/yeast.py +147 -0
  22. napistu/mechanism_matching.py +597 -0
  23. napistu/modify/__init__.py +10 -0
  24. napistu/modify/constants.py +86 -0
  25. napistu/modify/curation.py +628 -0
  26. napistu/modify/gaps.py +635 -0
  27. napistu/modify/pathwayannot.py +1381 -0
  28. napistu/modify/uncompartmentalize.py +264 -0
  29. napistu/network/__init__.py +10 -0
  30. napistu/network/constants.py +117 -0
  31. napistu/network/neighborhoods.py +1594 -0
  32. napistu/network/net_create.py +1647 -0
  33. napistu/network/net_utils.py +652 -0
  34. napistu/network/paths.py +500 -0
  35. napistu/network/precompute.py +221 -0
  36. napistu/rpy2/__init__.py +127 -0
  37. napistu/rpy2/callr.py +168 -0
  38. napistu/rpy2/constants.py +101 -0
  39. napistu/rpy2/netcontextr.py +464 -0
  40. napistu/rpy2/rids.py +697 -0
  41. napistu/sbml_dfs_core.py +2216 -0
  42. napistu/sbml_dfs_utils.py +304 -0
  43. napistu/source.py +394 -0
  44. napistu/utils.py +943 -0
  45. napistu-0.1.0.dist-info/METADATA +56 -0
  46. napistu-0.1.0.dist-info/RECORD +77 -0
  47. napistu-0.1.0.dist-info/WHEEL +5 -0
  48. napistu-0.1.0.dist-info/entry_points.txt +2 -0
  49. napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
  50. napistu-0.1.0.dist-info/top_level.txt +2 -0
  51. tests/__init__.py +0 -0
  52. tests/conftest.py +83 -0
  53. tests/test_consensus.py +255 -0
  54. tests/test_constants.py +20 -0
  55. tests/test_curation.py +134 -0
  56. tests/test_data/__init__.py +0 -0
  57. tests/test_edgelist.py +20 -0
  58. tests/test_gcs.py +23 -0
  59. tests/test_identifiers.py +151 -0
  60. tests/test_igraph.py +353 -0
  61. tests/test_indices.py +88 -0
  62. tests/test_mechanism_matching.py +126 -0
  63. tests/test_net_utils.py +66 -0
  64. tests/test_netcontextr.py +105 -0
  65. tests/test_obo.py +34 -0
  66. tests/test_pathwayannot.py +95 -0
  67. tests/test_precomputed_distances.py +222 -0
  68. tests/test_rpy2.py +61 -0
  69. tests/test_sbml.py +46 -0
  70. tests/test_sbml_dfs_create.py +307 -0
  71. tests/test_sbml_dfs_utils.py +22 -0
  72. tests/test_sbo.py +11 -0
  73. tests/test_set_coverage.py +50 -0
  74. tests/test_source.py +67 -0
  75. tests/test_uncompartmentalize.py +40 -0
  76. tests/test_utils.py +487 -0
  77. tests/utils.py +30 -0
@@ -0,0 +1,621 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import re
6
+
7
+ import libsbml
8
+ import pandas as pd
9
+ from napistu import consensus
10
+ from napistu import constants
11
+ from napistu import identifiers
12
+ from napistu import sbml_dfs_utils
13
+ from napistu import source
14
+ from napistu import utils
15
+
16
+ from napistu.constants import BQB
17
+
18
+ from napistu.ingestion.constants import SBML_ANNOTATION_METHOD_GET_COMPARTMENT
19
+ from napistu.ingestion.constants import SBML_ANNOTATION_METHOD_GET_REACTION
20
+ from napistu.ingestion.constants import SBML_ANNOTATION_METHOD_GET_SPECIES
21
+ from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_ID
22
+ from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_IDENTIFIERS
23
+ from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_NAME
24
+ from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_SOURCE
25
+ from napistu.ingestion.constants import SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME
26
+ from napistu.ingestion.constants import SBML_COMPARTMENTALIZED_SPECIES_DICT_SOURCE
27
+ from napistu.ingestion.constants import SBML_REACTION_ATTR_GET_GENE_PRODUCT
28
+ from napistu.ingestion.constants import SBML_SPECIES_DICT_ID
29
+ from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
30
+ from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
31
+ from napistu.ingestion.constants import SMBL_ERROR_CATEGORY
32
+ from napistu.ingestion.constants import SMBL_ERROR_DESCRIPTION
33
+ from napistu.ingestion.constants import SMBL_ERROR_MESSAGE
34
+ from napistu.ingestion.constants import SMBL_ERROR_NUMBER
35
+ from napistu.ingestion.constants import SMBL_ERROR_SEVERITY
36
+ from napistu.ingestion.constants import SMBL_REACTION_DICT_ID
37
+ from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
38
+ from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
39
+ from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
40
+ from napistu.ingestion.constants import SMBL_REACTION_DICT_SOURCE
41
+ from napistu.ingestion.constants import SMBL_REACTION_SPEC_RSC_ID
42
+ from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
43
+ from napistu.ingestion.constants import SMBL_REACTION_SPEC_SC_ID
44
+ from napistu.ingestion.constants import SMBL_REACTION_SPEC_STOICHIOMETRY
45
+ from napistu.ingestion.constants import SMBL_SUMMARY_COMPARTMENTS
46
+ from napistu.ingestion.constants import SMBL_SUMMARY_N_REACTIONS
47
+ from napistu.ingestion.constants import SMBL_SUMMARY_N_SPECIES
48
+ from napistu.ingestion.constants import SMBL_SUMMARY_PATHWAY_ID
49
+ from napistu.ingestion.constants import SMBL_SUMMARY_PATHWAY_NAME
50
+
51
+ from fs import open_fs
52
+
53
+ logger = logging.getLogger(__name__)
54
+
55
+
56
+ class SBML:
57
+ """
58
+ System Biology Markup Language Connections.
59
+
60
+ Attributes
61
+ ----------
62
+ document
63
+ Connection to the SBML document
64
+ model
65
+ Connection to the SBML model
66
+
67
+ Methods
68
+ -------
69
+ summary()
70
+ Prints a summary of the sbml model
71
+ sbml_errors(reduced_log, return_df)
72
+ Print a summary of all errors in the SBML file
73
+
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ sbml_path: str,
79
+ ) -> None:
80
+ """
81
+ Connects to an SBML file
82
+
83
+ Parameters
84
+ ----------
85
+ sbml_path : str
86
+ path to a .sbml file.
87
+
88
+ Returns
89
+ -------
90
+ None.
91
+ """
92
+
93
+ reader = libsbml.SBMLReader()
94
+ if os.path.exists(sbml_path):
95
+ self.document = reader.readSBML(sbml_path)
96
+ else:
97
+ with open_fs(os.path.dirname(sbml_path)) as fs:
98
+ txt = fs.readtext(os.path.basename(sbml_path))
99
+ self.document = reader.readSBMLFromString(txt)
100
+
101
+ if self.document.getLevel() < 3:
102
+ raise ValueError(
103
+ f"SBML model is level {self.document.getLevel()}, only SBML 3 is supported"
104
+ )
105
+
106
+ self.model = self.document.getModel()
107
+
108
+ # check for critical sbml errors
109
+ errors = self.sbml_errors(reduced_log=False, return_df=True)
110
+ if errors is not None:
111
+ critical_errors = errors[errors[SMBL_ERROR_SEVERITY] >= 2]
112
+ critical_errors = set(critical_errors[SMBL_ERROR_DESCRIPTION].unique())
113
+ known_errors = {"<layout> must have 'id' and may have 'name'"}
114
+
115
+ found_known_errors = known_errors.intersection(critical_errors)
116
+ if len(found_known_errors) > 0:
117
+ logger.warning(
118
+ f"The following known errors were found: {found_known_errors}"
119
+ )
120
+
121
+ unknown_critical_errors = critical_errors - known_errors
122
+ if len(unknown_critical_errors) != 0:
123
+ critical_errors = ", ".join(unknown_critical_errors)
124
+ raise ValueError(
125
+ f"Critical errors were found when reading the sbml file: {critical_errors}"
126
+ )
127
+
128
+ def summary(self) -> pd.DataFrame:
129
+ """Returns a pd.DataFrame summary of an SBML model."""
130
+ model = self.model
131
+
132
+ model_summaries = dict()
133
+
134
+ model_summaries[SMBL_SUMMARY_PATHWAY_NAME] = model.getName()
135
+ model_summaries[SMBL_SUMMARY_PATHWAY_ID] = model.getId()
136
+
137
+ model_summaries[SMBL_SUMMARY_N_SPECIES] = model.getNumSpecies()
138
+ model_summaries[SMBL_SUMMARY_N_REACTIONS] = model.getNumReactions()
139
+
140
+ compartments = [
141
+ model.getCompartment(i).getName() for i in range(model.getNumCompartments())
142
+ ]
143
+ compartments.sort()
144
+ model_summaries[SMBL_SUMMARY_COMPARTMENTS] = ",\n".join(compartments)
145
+
146
+ model_summaries_dat = pd.DataFrame(model_summaries, index=[0]).T
147
+
148
+ return utils.style_df(model_summaries_dat) # type: ignore
149
+
150
+ def sbml_errors(self, reduced_log: bool = True, return_df: bool = False):
151
+ """
152
+ Format and print all SBML errors
153
+
154
+ Parameters
155
+ ----------
156
+ reduced_log : bool
157
+ Reduced log aggregates errors across categories an severity levels
158
+ return_df: bool
159
+ If False then print a log, if True then return a pd.DataFrame
160
+
161
+ Returns
162
+ -------
163
+ None or pd.DataFrame.
164
+ """
165
+ n_errors = self.document.getNumErrors()
166
+ if n_errors == 0:
167
+ return None
168
+
169
+ error_log = list()
170
+ for i in range(n_errors):
171
+ e = self.document.getError(i)
172
+
173
+ error_entry = {
174
+ SMBL_ERROR_NUMBER: i,
175
+ SMBL_ERROR_CATEGORY: e.getCategoryAsString(),
176
+ SMBL_ERROR_SEVERITY: e.getSeverity(),
177
+ SMBL_ERROR_DESCRIPTION: e.getShortMessage(),
178
+ SMBL_ERROR_MESSAGE: e.getMessage(),
179
+ }
180
+
181
+ error_log.append(error_entry)
182
+ error_log = pd.DataFrame(error_log)
183
+
184
+ if reduced_log:
185
+ error_log = (
186
+ error_log[
187
+ [SMBL_ERROR_CATEGORY, SMBL_ERROR_SEVERITY, SMBL_ERROR_MESSAGE]
188
+ ]
189
+ .groupby([SMBL_ERROR_CATEGORY, SMBL_ERROR_SEVERITY])
190
+ .count()
191
+ )
192
+
193
+ if return_df:
194
+ return error_log
195
+ else:
196
+ if reduced_log:
197
+ headers = [f"{SMBL_ERROR_CATEGORY}, {SMBL_ERROR_SEVERITY}", "count"]
198
+ else:
199
+ headers = [
200
+ SMBL_ERROR_CATEGORY,
201
+ SMBL_ERROR_SEVERITY,
202
+ SMBL_ERROR_DESCRIPTION,
203
+ ]
204
+ error_log = error_log[headers]
205
+
206
+ utils.style_df(error_log, headers=headers)
207
+
208
+ return None
209
+
210
+
211
+ class SBML_reaction:
212
+ """
213
+ System Biology Markup Language Model Reactions.
214
+
215
+ Attributes
216
+ ----------
217
+ reaction_dict: dict
218
+ dictionary of reaction-level attributes, id, name, identifiers
219
+ species: pd.DataFrame
220
+ table of substrates, products, and modifiers
221
+
222
+ """
223
+
224
+ def __init__(
225
+ self,
226
+ sbml_reaction: libsbml.Reaction,
227
+ ) -> None:
228
+ """
229
+ Convenience class for working with sbml reactions
230
+ """
231
+ reaction_dict = {
232
+ SMBL_REACTION_DICT_ID: sbml_reaction.getId(),
233
+ SMBL_REACTION_DICT_NAME: sbml_reaction.getName(),
234
+ SMBL_REACTION_DICT_IDENTIFIERS: identifiers.cv_to_Identifiers(
235
+ sbml_reaction
236
+ ),
237
+ SMBL_REACTION_DICT_SOURCE: source.Source(init=True),
238
+ SMBL_REACTION_DICT_IS_REVERSIBLE: sbml_reaction.getReversible(),
239
+ }
240
+
241
+ self.reaction_dict = reaction_dict
242
+
243
+ # process reaction species
244
+ reaction_species = list()
245
+ # save modifiers
246
+ for i in range(sbml_reaction.getNumModifiers()):
247
+ spec = sbml_reaction.getModifier(i)
248
+ spec_dict = {
249
+ SMBL_REACTION_SPEC_RSC_ID: spec.getId(),
250
+ SMBL_REACTION_SPEC_SC_ID: spec.getSpecies(),
251
+ SMBL_REACTION_SPEC_STOICHIOMETRY: 0,
252
+ SMBL_REACTION_SPEC_SBO_TERM: spec.getSBOTermID(),
253
+ }
254
+ reaction_species.append(spec_dict)
255
+
256
+ rxn_fbc = sbml_reaction.getPlugin("fbc")
257
+ # check for gene products associated with the FBC L3 extension
258
+ if rxn_fbc is not None:
259
+ gene_products = list()
260
+ gpa = rxn_fbc.getGeneProductAssociation()
261
+ if gpa is not None:
262
+ gpaa = gpa.getAssociation()
263
+ if hasattr(gpaa, SBML_REACTION_ATTR_GET_GENE_PRODUCT):
264
+ gene_products.append(_get_gene_product_dict(gpaa))
265
+ else:
266
+ for i in range(gpaa.getNumAssociations()):
267
+ gpaaa = gpaa.getAssociation(i)
268
+ if hasattr(gpaaa, SBML_REACTION_ATTR_GET_GENE_PRODUCT):
269
+ gene_products.append(_get_gene_product_dict(gpaaa))
270
+ else:
271
+ for i in range(gpaaa.getNumAssociations()):
272
+ gpaaaa = gpaaa.getAssociation(i)
273
+ if hasattr(gpaaaa, SBML_REACTION_ATTR_GET_GENE_PRODUCT):
274
+ gene_products.append(_get_gene_product_dict(gpaaaa))
275
+ else:
276
+ for i in range(gpaa.getNumAssociations()):
277
+ gpaaaaa = gpaaaa.getAssociation(i)
278
+ if hasattr(
279
+ gpaaaaa, SBML_REACTION_ATTR_GET_GENE_PRODUCT
280
+ ):
281
+ gene_products.append(
282
+ _get_gene_product_dict(gpaaaaa)
283
+ )
284
+ else:
285
+ logger.warning(
286
+ "gene annotations nested deeper than 4 levels, ignoring"
287
+ )
288
+ continue
289
+ # de-duplicate
290
+ gene_products = list(
291
+ {d[SMBL_REACTION_SPEC_SC_ID]: d for d in gene_products}.values()
292
+ )
293
+ reaction_species = reaction_species + gene_products
294
+
295
+ # save reactants
296
+ for i in range(sbml_reaction.getNumReactants()):
297
+ spec = sbml_reaction.getReactant(i)
298
+ spec_dict = {
299
+ SMBL_REACTION_SPEC_RSC_ID: spec.getId(),
300
+ SMBL_REACTION_SPEC_SC_ID: spec.getSpecies(),
301
+ SMBL_REACTION_SPEC_STOICHIOMETRY: -1 * spec.getStoichiometry(),
302
+ SMBL_REACTION_SPEC_SBO_TERM: spec.getSBOTermID(),
303
+ }
304
+ reaction_species.append(spec_dict)
305
+ # save products
306
+ for i in range(sbml_reaction.getNumProducts()):
307
+ spec = sbml_reaction.getProduct(i)
308
+ spec_dict = {
309
+ SMBL_REACTION_SPEC_RSC_ID: spec.getId(),
310
+ SMBL_REACTION_SPEC_SC_ID: spec.getSpecies(),
311
+ SMBL_REACTION_SPEC_STOICHIOMETRY: spec.getStoichiometry(),
312
+ SMBL_REACTION_SPEC_SBO_TERM: spec.getSBOTermID(),
313
+ }
314
+ reaction_species.append(spec_dict)
315
+
316
+ self.species = pd.DataFrame(reaction_species).set_index(
317
+ SMBL_REACTION_SPEC_RSC_ID
318
+ )
319
+
320
+
321
+ def sbml_df_from_sbml(self, sbml_model: SBML):
322
+ # specify compartments
323
+
324
+ compartments = list()
325
+ for i in range(sbml_model.model.getNumCompartments()):
326
+ comp = sbml_model.model.getCompartment(i)
327
+
328
+ if not comp.getCVTerms():
329
+ logger.warning(
330
+ f"Compartment {comp.getId()} has empty CVterms, mapping its c_Identifiers from the Compartment dict"
331
+ )
332
+
333
+ comp_name = comp.getName()
334
+ mapped_compartment_key = [
335
+ compkey
336
+ for compkey, mappednames in constants.COMPARTMENT_ALIASES.items()
337
+ if comp_name in mappednames
338
+ ]
339
+
340
+ if len(mapped_compartment_key) == 0:
341
+ logger.warning(
342
+ f"No GO compartment for {comp_name} is mapped, use the generic cellular_component's GO id"
343
+ )
344
+ compartments.append(
345
+ {
346
+ SBML_COMPARTMENT_DICT_ID: comp.getId(),
347
+ SBML_COMPARTMENT_DICT_NAME: comp.getName(),
348
+ SBML_COMPARTMENT_DICT_IDENTIFIERS: identifiers.Identifiers(
349
+ [
350
+ identifiers.format_uri(
351
+ uri=identifiers.create_uri_url(
352
+ ontology=constants.ONTOLOGIES.GO,
353
+ identifier=constants.COMPARTMENTS_GO_TERMS[
354
+ "CELLULAR_COMPONENT"
355
+ ],
356
+ ),
357
+ biological_qualifier_type=BQB.BQB_IS,
358
+ )
359
+ ]
360
+ ),
361
+ SBML_COMPARTMENT_DICT_SOURCE: source.Source(init=True),
362
+ }
363
+ )
364
+
365
+ if len(mapped_compartment_key) > 0:
366
+ if len(mapped_compartment_key) > 1:
367
+ logger.warning(
368
+ f"More than one GO compartments for {comp_name} are mapped, using the first one"
369
+ )
370
+ compartments.append(
371
+ {
372
+ SBML_COMPARTMENT_DICT_ID: comp.getId(),
373
+ SBML_COMPARTMENT_DICT_NAME: comp.getName(),
374
+ SBML_COMPARTMENT_DICT_IDENTIFIERS: identifiers.Identifiers(
375
+ [
376
+ identifiers.format_uri(
377
+ uri=identifiers.create_uri_url(
378
+ ontology=constants.ONTOLOGIES.GO,
379
+ identifier=constants.COMPARTMENTS_GO_TERMS[
380
+ mapped_compartment_key[0]
381
+ ],
382
+ ),
383
+ biological_qualifier_type=BQB.IS,
384
+ )
385
+ ]
386
+ ),
387
+ SBML_COMPARTMENT_DICT_SOURCE: source.Source(init=True),
388
+ }
389
+ )
390
+
391
+ else:
392
+ compartments.append(
393
+ {
394
+ SBML_COMPARTMENT_DICT_ID: comp.getId(),
395
+ SBML_COMPARTMENT_DICT_NAME: comp.getName(),
396
+ SBML_COMPARTMENT_DICT_IDENTIFIERS: identifiers.cv_to_Identifiers(
397
+ comp
398
+ ),
399
+ SBML_COMPARTMENT_DICT_SOURCE: source.Source(init=True),
400
+ }
401
+ )
402
+
403
+ self.compartments = pd.DataFrame(compartments).set_index(SBML_COMPARTMENT_DICT_ID)
404
+
405
+ # create a species df
406
+ comp_species_df = setup_cspecies(sbml_model)
407
+
408
+ # find unique species and create a table
409
+ consensus_species_df = comp_species_df.copy()
410
+ consensus_species_df.index.names = [SBML_SPECIES_DICT_ID]
411
+ consensus_species, species_lookup = consensus.reduce_to_consensus_ids(
412
+ consensus_species_df,
413
+ {"pk": SBML_SPECIES_DICT_ID, "id": SBML_SPECIES_DICT_IDENTIFIERS},
414
+ )
415
+
416
+ # create a table of unique molecular species
417
+ consensus_species.index.name = SBML_SPECIES_DICT_ID
418
+ consensus_species[SBML_SPECIES_DICT_NAME] = [
419
+ re.sub("\\[.+\\]", "", x).strip()
420
+ for x in consensus_species[SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME]
421
+ ]
422
+ consensus_species = consensus_species.drop(
423
+ [SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME, SBML_COMPARTMENT_DICT_ID], axis=1
424
+ )
425
+ consensus_species["s_Source"] = [
426
+ source.Source(init=True) for x in range(0, consensus_species.shape[0])
427
+ ]
428
+
429
+ self.species = consensus_species[self.schema["species"]["vars"]]
430
+
431
+ self.compartmentalized_species = comp_species_df.join(species_lookup).rename(
432
+ columns={"new_id": SBML_SPECIES_DICT_ID}
433
+ )[self.schema["compartmentalized_species"]["vars"]]
434
+
435
+ # specify reactions
436
+
437
+ reactions = list()
438
+ reaction_species = list()
439
+ for i in range(sbml_model.model.getNumReactions()):
440
+ rxn = SBML_reaction(sbml_model.model.getReaction(i))
441
+ reactions.append(rxn.reaction_dict)
442
+
443
+ rxn_specs = rxn.species
444
+ rxn_specs[SMBL_REACTION_DICT_ID] = rxn.reaction_dict[SMBL_REACTION_DICT_ID]
445
+ reaction_species.append(rxn_specs)
446
+
447
+ self.reactions = pd.DataFrame(reactions).set_index(SMBL_REACTION_DICT_ID)
448
+
449
+ reaction_species_df = pd.concat(reaction_species)
450
+ # add an index if reaction species didn't have IDs in the .sbml
451
+ if all([v == "" for v in reaction_species_df.index.tolist()]):
452
+ reaction_species_df = (
453
+ reaction_species_df.reset_index(drop=True)
454
+ .assign(
455
+ rsc_id=sbml_dfs_utils.id_formatter(
456
+ range(reaction_species_df.shape[0]), SMBL_REACTION_SPEC_RSC_ID
457
+ )
458
+ )
459
+ .set_index(SMBL_REACTION_SPEC_RSC_ID)
460
+ )
461
+
462
+ self.reaction_species = reaction_species_df
463
+
464
+ return self
465
+
466
+
467
+ def setup_cspecies(sbml_model: SBML) -> pd.DataFrame:
468
+ """
469
+ Setup Compartmentalized Species
470
+
471
+ Read all compartmentalized species from a model
472
+ and setup as a pd.DataFrame.
473
+ This operation is functionalized to test the subsequent call of
474
+ consensus.reduce_to_consensus_ids()
475
+ which collapses compartmentalized_species -> species
476
+ based on shared identifiers.
477
+ """
478
+ comp_species = list()
479
+ for i in range(sbml_model.model.getNumSpecies()):
480
+ spec = sbml_model.model.getSpecies(i)
481
+
482
+ spec_dict = {
483
+ SMBL_REACTION_SPEC_SC_ID: spec.getId(),
484
+ SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME: spec.getName(),
485
+ SBML_COMPARTMENT_DICT_ID: spec.getCompartment(),
486
+ SBML_SPECIES_DICT_IDENTIFIERS: identifiers.cv_to_Identifiers(spec),
487
+ SBML_COMPARTMENTALIZED_SPECIES_DICT_SOURCE: source.Source(init=True),
488
+ }
489
+
490
+ comp_species.append(spec_dict)
491
+
492
+ mplugin = sbml_model.model.getPlugin("fbc")
493
+
494
+ # add geneproducts defined using L3 FBC extension
495
+ if mplugin is not None:
496
+ for i in range(mplugin.getNumGeneProducts()):
497
+ gene_product = mplugin.getGeneProduct(i)
498
+
499
+ gene_dict = {
500
+ SMBL_REACTION_SPEC_SC_ID: gene_product.getId(),
501
+ SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME: (
502
+ gene_product.getName()
503
+ if gene_product.isSetName()
504
+ else gene_product.getLabel()
505
+ ),
506
+ # use getLabel() to accomendate sbml model (e.g. HumanGEM.xml) with no fbc:name attribute
507
+ # Recon3D.xml has both fbc:label and fbc:name attributes, with gene name in fbc:nam
508
+ SBML_COMPARTMENT_DICT_ID: None,
509
+ SBML_SPECIES_DICT_IDENTIFIERS: identifiers.cv_to_Identifiers(
510
+ gene_product
511
+ ),
512
+ SBML_COMPARTMENTALIZED_SPECIES_DICT_SOURCE: source.Source(init=True),
513
+ }
514
+
515
+ comp_species.append(gene_dict)
516
+
517
+ return pd.DataFrame(comp_species).set_index(SMBL_REACTION_SPEC_SC_ID)
518
+
519
+
520
+ def add_sbml_annotations(
521
+ sbml_model: SBML, annotations: pd.DataFrame, save_path: str
522
+ ) -> None:
523
+ """
524
+ Add SBML Annotations
525
+
526
+ Add additional identifiers to an sbml file and save the updated document
527
+
528
+ Parameters:
529
+ sbml_model: SBML
530
+ A .sbml model
531
+ annotations: pd.DataFrame
532
+ A table of annotations to add containing an "id" matching the
533
+ primary key of an entity, "type" matching the type of entity,
534
+ and "uri" representing the annotation to add.
535
+ save_path: str
536
+ Path to save the model to
537
+
538
+ Returns:
539
+ None
540
+ """
541
+
542
+ logger.warning(
543
+ "add_sbml_annotations is deprecated and may be removed in a future version of rcpr; "
544
+ "we are now adding these annotation during ingestion by sbml.sbml_df_from_sbml() rather "
545
+ "than directly appending them to the raw .sbml"
546
+ )
547
+
548
+ if not isinstance(sbml_model, SBML):
549
+ raise TypeError("sbml_model must be an SBML object")
550
+
551
+ if not isinstance(annotations, pd.DataFrame):
552
+ raise TypeError("annotations must be a pd.DataFrame")
553
+
554
+ for i in range(0, annotations.shape[0]):
555
+ annot_type = annotations["type"][i]
556
+
557
+ if annot_type == "species":
558
+ entity_fxn = SBML_ANNOTATION_METHOD_GET_SPECIES
559
+ elif annot_type == "compartment":
560
+ entity_fxn = SBML_ANNOTATION_METHOD_GET_COMPARTMENT
561
+ elif annot_type == "reaction":
562
+ entity_fxn = SBML_ANNOTATION_METHOD_GET_REACTION
563
+ else:
564
+ raise ValueError(
565
+ f"{annot_type} is not a valid annotation type,"
566
+ " valid types are species, compartment, and reaction"
567
+ )
568
+ # access the node to modify
569
+ entity_fxn_method = getattr(sbml_model.model, entity_fxn)
570
+ entity_node = entity_fxn_method(annotations["id"][i])
571
+
572
+ # TO DO - check for a valid entity_node in case id is not found
573
+
574
+ # set meta-id if there isn't one; required to add a node
575
+ if not entity_node.isSetMetaId():
576
+ add_metaid_code = entity_node.setMetaId(annotations["id"][i])
577
+
578
+ if add_metaid_code != libsbml.LIBSBML_OPERATION_SUCCESS:
579
+ raise ValueError(
580
+ f"adding metaId to {annotations['id'][i]} failed"
581
+ f" with return code {add_metaid_code} "
582
+ f"({libsbml.OperationReturnValue_toString(add_metaid_code).strip()})"
583
+ )
584
+
585
+ # create a controlled vocabulary term
586
+ cv = libsbml.CVTerm()
587
+ cv.setQualifierType(libsbml.BIOLOGICAL_QUALIFIER)
588
+ cv.setBiologicalQualifierType(libsbml.BQB_IS_VERSION_OF)
589
+
590
+ add_resource_code = cv.addResource(annotations["uri"][i])
591
+ if add_resource_code != libsbml.LIBSBML_OPERATION_SUCCESS:
592
+ raise ValueError(
593
+ "adding resource to CV term returned code"
594
+ f" {add_resource_code} "
595
+ f"({libsbml.OperationReturnValue_toString(add_resource_code).strip()})"
596
+ f" rather than {libsbml.LIBSBML_OPERATION_SUCCESS} when "
597
+ f"adding {annotations['uri'][i]} to {annotations['id'][i]}"
598
+ )
599
+
600
+ add_cv_code = entity_node.addCVTerm(cv)
601
+ if add_cv_code != libsbml.LIBSBML_OPERATION_SUCCESS:
602
+ raise ValueError(
603
+ f"adding CV to entity returned code {add_cv_code} "
604
+ f"({libsbml.OperationReturnValue_toString(add_cv_code).strip()})"
605
+ f" rather than {libsbml.LIBSBML_OPERATION_SUCCESS} when adding"
606
+ f" {annotations['uri'][i]} to {annotations['id'][i]}"
607
+ )
608
+
609
+ libsbml.writeSBML(sbml_model.document, save_path)
610
+
611
+ return None
612
+
613
+
614
+ def _get_gene_product_dict(gp):
615
+ """Read a gene product node from an sbml file."""
616
+ return {
617
+ SMBL_REACTION_SPEC_RSC_ID: gp.getId(),
618
+ SMBL_REACTION_SPEC_SC_ID: gp.getGeneProduct(),
619
+ SMBL_REACTION_SPEC_STOICHIOMETRY: 0,
620
+ SMBL_REACTION_SPEC_SBO_TERM: gp.getSBOTermID(),
621
+ }