napistu 0.3.6__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +28 -13
- napistu/consensus.py +19 -25
- napistu/constants.py +102 -83
- napistu/indices.py +3 -1
- napistu/ingestion/napistu_edgelist.py +4 -4
- napistu/ingestion/sbml.py +298 -295
- napistu/ingestion/string.py +14 -18
- napistu/ingestion/trrust.py +22 -27
- napistu/matching/interactions.py +41 -39
- napistu/matching/species.py +1 -1
- napistu/modify/gaps.py +2 -1
- napistu/network/constants.py +61 -45
- napistu/network/data_handling.py +1 -1
- napistu/network/neighborhoods.py +3 -3
- napistu/network/net_create.py +440 -616
- napistu/network/net_create_utils.py +734 -0
- napistu/network/net_propagation.py +1 -1
- napistu/network/{napistu_graph_core.py → ng_core.py} +57 -15
- napistu/network/ng_utils.py +28 -21
- napistu/network/paths.py +4 -4
- napistu/network/precompute.py +35 -74
- napistu/ontologies/genodexito.py +5 -1
- napistu/ontologies/renaming.py +4 -0
- napistu/sbml_dfs_core.py +127 -64
- napistu/sbml_dfs_utils.py +50 -0
- napistu/utils.py +132 -46
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/METADATA +2 -2
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/RECORD +47 -44
- tests/conftest.py +171 -13
- tests/test_consensus.py +74 -5
- tests/test_gaps.py +26 -15
- tests/test_network_data_handling.py +5 -2
- tests/test_network_net_create.py +93 -202
- tests/test_network_net_create_utils.py +538 -0
- tests/test_network_ng_core.py +19 -0
- tests/test_network_ng_utils.py +1 -1
- tests/test_network_precompute.py +5 -4
- tests/test_ontologies_renaming.py +28 -24
- tests/test_rpy2_callr.py +0 -1
- tests/test_rpy2_init.py +0 -1
- tests/test_sbml_dfs_core.py +165 -15
- tests/test_sbml_dfs_utils.py +45 -0
- tests/test_utils.py +45 -2
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/WHEEL +0 -0
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/top_level.txt +0 -0
napistu/ingestion/sbml.py
CHANGED
@@ -3,11 +3,12 @@ from __future__ import annotations
|
|
3
3
|
import logging
|
4
4
|
import os
|
5
5
|
import re
|
6
|
+
from typing import Any
|
6
7
|
|
7
8
|
import libsbml
|
8
9
|
import pandas as pd
|
9
10
|
from fs import open_fs
|
10
|
-
from pydantic import
|
11
|
+
from pydantic import field_validator, RootModel
|
11
12
|
|
12
13
|
from napistu import consensus
|
13
14
|
from napistu import identifiers
|
@@ -17,6 +18,8 @@ from napistu import utils
|
|
17
18
|
from napistu.constants import BQB
|
18
19
|
from napistu.constants import ONTOLOGIES
|
19
20
|
from napistu.constants import SBML_DFS
|
21
|
+
from napistu.constants import SBML_DFS_SCHEMA
|
22
|
+
from napistu.constants import SCHEMA_DEFS
|
20
23
|
from napistu.ingestion.constants import SBML_DEFS
|
21
24
|
from napistu.ingestion.constants import COMPARTMENTS_GO_TERMS
|
22
25
|
from napistu.ingestion.constants import COMPARTMENT_ALIASES
|
@@ -25,8 +28,6 @@ from napistu.ingestion.constants import GENERIC_COMPARTMENT
|
|
25
28
|
|
26
29
|
logger = logging.getLogger(__name__)
|
27
30
|
|
28
|
-
NonEmptyStringList = conlist(str, min_length=1)
|
29
|
-
|
30
31
|
|
31
32
|
class SBML:
|
32
33
|
"""A class for handling Systems Biology Markup Language (SBML) files.
|
@@ -101,35 +102,6 @@ class SBML:
|
|
101
102
|
f"Critical errors were found when reading the sbml file: {critical_errors}"
|
102
103
|
)
|
103
104
|
|
104
|
-
def summary(self) -> pd.DataFrame:
|
105
|
-
"""Generates a styled summary of the SBML model.
|
106
|
-
|
107
|
-
Returns
|
108
|
-
-------
|
109
|
-
pd.io.formats.style.Styler
|
110
|
-
A styled pandas DataFrame containing a summary of the model,
|
111
|
-
including pathway name, ID, and counts of species and reactions.
|
112
|
-
"""
|
113
|
-
model = self.model
|
114
|
-
|
115
|
-
model_summaries = dict()
|
116
|
-
|
117
|
-
model_summaries[SBML_DEFS.SUMMARY_PATHWAY_NAME] = model.getName()
|
118
|
-
model_summaries[SBML_DEFS.SUMMARY_PATHWAY_ID] = model.getId()
|
119
|
-
|
120
|
-
model_summaries[SBML_DEFS.SUMMARY_N_SPECIES] = model.getNumSpecies()
|
121
|
-
model_summaries[SBML_DEFS.SUMMARY_N_REACTIONS] = model.getNumReactions()
|
122
|
-
|
123
|
-
compartments = [
|
124
|
-
model.getCompartment(i).getName() for i in range(model.getNumCompartments())
|
125
|
-
]
|
126
|
-
compartments.sort()
|
127
|
-
model_summaries[SBML_DEFS.SUMMARY_COMPARTMENTS] = ",\n".join(compartments)
|
128
|
-
|
129
|
-
model_summaries_dat = pd.DataFrame(model_summaries, index=[0]).T
|
130
|
-
|
131
|
-
return utils.style_df(model_summaries_dat) # type: ignore
|
132
|
-
|
133
105
|
def sbml_errors(self, reduced_log: bool = True, return_df: bool = False):
|
134
106
|
"""Formats and reports all errors found in the SBML file.
|
135
107
|
|
@@ -199,6 +171,253 @@ class SBML:
|
|
199
171
|
|
200
172
|
return None
|
201
173
|
|
174
|
+
def summary(self) -> pd.DataFrame:
|
175
|
+
"""Generates a styled summary of the SBML model.
|
176
|
+
|
177
|
+
Returns
|
178
|
+
-------
|
179
|
+
pd.io.formats.style.Styler
|
180
|
+
A styled pandas DataFrame containing a summary of the model,
|
181
|
+
including pathway name, ID, and counts of species and reactions.
|
182
|
+
"""
|
183
|
+
model = self.model
|
184
|
+
|
185
|
+
model_summaries = dict()
|
186
|
+
|
187
|
+
model_summaries[SBML_DEFS.SUMMARY_PATHWAY_NAME] = model.getName()
|
188
|
+
model_summaries[SBML_DEFS.SUMMARY_PATHWAY_ID] = model.getId()
|
189
|
+
|
190
|
+
model_summaries[SBML_DEFS.SUMMARY_N_SPECIES] = model.getNumSpecies()
|
191
|
+
model_summaries[SBML_DEFS.SUMMARY_N_REACTIONS] = model.getNumReactions()
|
192
|
+
|
193
|
+
compartments = [
|
194
|
+
model.getCompartment(i).getName() for i in range(model.getNumCompartments())
|
195
|
+
]
|
196
|
+
compartments.sort()
|
197
|
+
model_summaries[SBML_DEFS.SUMMARY_COMPARTMENTS] = ",\n".join(compartments)
|
198
|
+
|
199
|
+
model_summaries_dat = pd.DataFrame(model_summaries, index=[0]).T
|
200
|
+
|
201
|
+
return utils.style_df(model_summaries_dat) # type: ignore
|
202
|
+
|
203
|
+
def _define_compartments(
|
204
|
+
self, compartment_aliases_dict: dict | None = None
|
205
|
+
) -> pd.DataFrame:
|
206
|
+
"""Extracts and defines compartments from the SBML model.
|
207
|
+
|
208
|
+
This function iterates through the compartments in the SBML model,
|
209
|
+
extracting their IDs, names, and identifiers. It also handles cases where
|
210
|
+
CVTerms are missing by mapping compartment names to known GO terms.
|
211
|
+
|
212
|
+
Parameters
|
213
|
+
----------
|
214
|
+
sbml_model : SBML
|
215
|
+
The SBML model to process.
|
216
|
+
compartment_aliases_dict : dict, optional
|
217
|
+
A dictionary to map custom compartment names. If None, the default
|
218
|
+
mapping from `COMPARTMENT_ALIASES` is used.
|
219
|
+
|
220
|
+
Returns
|
221
|
+
-------
|
222
|
+
pd.DataFrame
|
223
|
+
A DataFrame containing information about each compartment, indexed by
|
224
|
+
compartment ID.
|
225
|
+
"""
|
226
|
+
if compartment_aliases_dict is None:
|
227
|
+
aliases = COMPARTMENT_ALIASES
|
228
|
+
else:
|
229
|
+
aliases = CompartmentAliasesValidator.from_dict(compartment_aliases_dict)
|
230
|
+
|
231
|
+
compartments = list()
|
232
|
+
for i in range(self.model.getNumCompartments()):
|
233
|
+
comp = self.model.getCompartment(i)
|
234
|
+
|
235
|
+
if not comp.getCVTerms():
|
236
|
+
logger.warning(
|
237
|
+
f"Compartment {comp.getId()} has empty CVterms, mapping its c_Identifiers from the Compartment dict"
|
238
|
+
)
|
239
|
+
|
240
|
+
compartments.append(_define_compartments_missing_cvterms(comp, aliases))
|
241
|
+
|
242
|
+
else:
|
243
|
+
compartments.append(
|
244
|
+
{
|
245
|
+
SBML_DFS.C_ID: comp.getId(),
|
246
|
+
SBML_DFS.C_NAME: comp.getName(),
|
247
|
+
SBML_DFS.C_IDENTIFIERS: identifiers.cv_to_Identifiers(comp),
|
248
|
+
SBML_DFS.C_SOURCE: source.Source(init=True),
|
249
|
+
}
|
250
|
+
)
|
251
|
+
|
252
|
+
return pd.DataFrame(compartments).set_index(SBML_DFS.C_ID)
|
253
|
+
|
254
|
+
def _define_cspecies(self) -> pd.DataFrame:
|
255
|
+
"""Creates a DataFrame of compartmentalized species from an SBML model.
|
256
|
+
|
257
|
+
This function extracts all species from the model and creates a
|
258
|
+
standardized DataFrame that includes unique IDs for each compartmentalized
|
259
|
+
species (`sc_id`), along with species and compartment IDs, and their
|
260
|
+
corresponding identifiers.
|
261
|
+
|
262
|
+
Returns
|
263
|
+
-------
|
264
|
+
pd.DataFrame
|
265
|
+
A DataFrame containing information about each compartmentalized species.
|
266
|
+
"""
|
267
|
+
comp_species = list()
|
268
|
+
for i in range(self.model.getNumSpecies()):
|
269
|
+
spec = self.model.getSpecies(i)
|
270
|
+
|
271
|
+
spec_dict = {
|
272
|
+
SBML_DFS.SC_ID: spec.getId(),
|
273
|
+
SBML_DFS.SC_NAME: spec.getName(),
|
274
|
+
SBML_DFS.C_ID: spec.getCompartment(),
|
275
|
+
SBML_DFS.S_IDENTIFIERS: identifiers.cv_to_Identifiers(spec),
|
276
|
+
SBML_DFS.SC_SOURCE: source.Source(init=True),
|
277
|
+
}
|
278
|
+
|
279
|
+
comp_species.append(spec_dict)
|
280
|
+
|
281
|
+
# add geneproducts defined using L3 FBC extension
|
282
|
+
fbc_gene_products = self._define_fbc_gene_products()
|
283
|
+
comp_species.extend(fbc_gene_products)
|
284
|
+
|
285
|
+
comp_species_df = pd.DataFrame(comp_species).set_index(SBML_DFS.SC_ID)
|
286
|
+
comp_species_df[SBML_DFS.SC_NAME] = utils.update_pathological_names(
|
287
|
+
comp_species_df[SBML_DFS.SC_NAME], "SC"
|
288
|
+
)
|
289
|
+
|
290
|
+
return comp_species_df
|
291
|
+
|
292
|
+
def _define_fbc_gene_products(self) -> list[dict]:
|
293
|
+
|
294
|
+
mplugin = self.model.getPlugin("fbc")
|
295
|
+
|
296
|
+
fbc_gene_products = list()
|
297
|
+
if mplugin is not None:
|
298
|
+
for i in range(mplugin.getNumGeneProducts()):
|
299
|
+
gene_product = mplugin.getGeneProduct(i)
|
300
|
+
|
301
|
+
gene_dict = {
|
302
|
+
SBML_DFS.SC_ID: gene_product.getId(),
|
303
|
+
SBML_DFS.SC_NAME: (
|
304
|
+
gene_product.getName()
|
305
|
+
if gene_product.isSetName()
|
306
|
+
else gene_product.getLabel()
|
307
|
+
),
|
308
|
+
# use getLabel() to accomendate sbml model (e.g. HumanGEM.xml) with no fbc:name attribute
|
309
|
+
# Recon3D.xml has both fbc:label and fbc:name attributes, with gene name in fbc:nam
|
310
|
+
SBML_DFS.C_ID: None,
|
311
|
+
SBML_DFS.S_IDENTIFIERS: identifiers.cv_to_Identifiers(gene_product),
|
312
|
+
SBML_DFS.SC_SOURCE: source.Source(init=True),
|
313
|
+
}
|
314
|
+
|
315
|
+
fbc_gene_products.append(gene_dict)
|
316
|
+
|
317
|
+
return fbc_gene_products
|
318
|
+
|
319
|
+
def _define_reactions(self) -> tuple[pd.DataFrame, pd.DataFrame]:
|
320
|
+
"""Extracts and defines reactions and their participating species.
|
321
|
+
|
322
|
+
This function iterates through all reactions in the SBML model, creating
|
323
|
+
a DataFrame for reaction attributes and another for all participating
|
324
|
+
species (reactants, products, and modifiers).
|
325
|
+
|
326
|
+
Parameters
|
327
|
+
----------
|
328
|
+
sbml_model : SBML
|
329
|
+
The SBML model to process.
|
330
|
+
|
331
|
+
Returns
|
332
|
+
-------
|
333
|
+
tuple[pd.DataFrame, pd.DataFrame]
|
334
|
+
A tuple containing two DataFrames:
|
335
|
+
- The first DataFrame contains reaction attributes, indexed by reaction ID.
|
336
|
+
- The second DataFrame lists all species participating in reactions.
|
337
|
+
"""
|
338
|
+
reactions_list = []
|
339
|
+
reaction_species_list = []
|
340
|
+
for i in range(self.model.getNumReactions()):
|
341
|
+
rxn = SBML_reaction(self.model.getReaction(i))
|
342
|
+
reactions_list.append(rxn.reaction_dict)
|
343
|
+
|
344
|
+
rxn_specs = rxn.species
|
345
|
+
rxn_specs[SBML_DFS.R_ID] = rxn.reaction_dict[SBML_DFS.R_ID]
|
346
|
+
reaction_species_list.append(rxn_specs)
|
347
|
+
|
348
|
+
reactions = pd.DataFrame(reactions_list).set_index(SBML_DFS.R_ID)
|
349
|
+
|
350
|
+
reaction_species_df = pd.concat(reaction_species_list)
|
351
|
+
# add an index if reaction species didn't have IDs in the .sbml
|
352
|
+
if all([v == "" for v in reaction_species_df.index.tolist()]):
|
353
|
+
reaction_species_df = (
|
354
|
+
reaction_species_df.reset_index(drop=True)
|
355
|
+
.assign(
|
356
|
+
rsc_id=sbml_dfs_utils.id_formatter(
|
357
|
+
range(reaction_species_df.shape[0]), SBML_DFS.RSC_ID
|
358
|
+
)
|
359
|
+
)
|
360
|
+
.set_index(SBML_DFS.RSC_ID)
|
361
|
+
)
|
362
|
+
|
363
|
+
return reactions, reaction_species_df
|
364
|
+
|
365
|
+
def _define_species(self) -> tuple[pd.DataFrame, pd.DataFrame]:
|
366
|
+
"""Extracts and defines species and compartmentalized species.
|
367
|
+
|
368
|
+
This function creates two DataFrames: one for unique molecular species
|
369
|
+
(un-compartmentalized) and another for compartmentalized species, which
|
370
|
+
represent a species within a specific compartment.
|
371
|
+
|
372
|
+
Returns
|
373
|
+
-------
|
374
|
+
tuple[pd.DataFrame, pd.DataFrame]
|
375
|
+
A tuple containing two DataFrames:
|
376
|
+
- The first DataFrame represents unique molecular species.
|
377
|
+
- The second DataFrame represents compartmentalized species.
|
378
|
+
"""
|
379
|
+
|
380
|
+
SPECIES_SCHEMA = SBML_DFS_SCHEMA.SCHEMA[SBML_DFS.SPECIES]
|
381
|
+
CSPECIES_SCHEMA = SBML_DFS_SCHEMA.SCHEMA[SBML_DFS.COMPARTMENTALIZED_SPECIES]
|
382
|
+
SPECIES_VARS = SPECIES_SCHEMA[SCHEMA_DEFS.VARS]
|
383
|
+
CSPECIES_VARS = CSPECIES_SCHEMA[SCHEMA_DEFS.VARS]
|
384
|
+
|
385
|
+
comp_species_df = self._define_cspecies()
|
386
|
+
|
387
|
+
# find unique species and create a table
|
388
|
+
consensus_species_df = comp_species_df.copy()
|
389
|
+
consensus_species_df.index.names = [SBML_DFS.S_ID]
|
390
|
+
consensus_species, species_lookup = consensus.reduce_to_consensus_ids(
|
391
|
+
consensus_species_df,
|
392
|
+
# note that this is an incomplete schema because consensus_species_df isn't a
|
393
|
+
# normal species table
|
394
|
+
{
|
395
|
+
SCHEMA_DEFS.PK: SBML_DFS.S_ID,
|
396
|
+
SCHEMA_DEFS.ID: SBML_DFS.S_IDENTIFIERS,
|
397
|
+
SCHEMA_DEFS.TABLE: SBML_DFS.SPECIES,
|
398
|
+
},
|
399
|
+
)
|
400
|
+
|
401
|
+
# create a table of unique molecular species
|
402
|
+
consensus_species.index.name = SBML_DFS.S_ID
|
403
|
+
consensus_species[SBML_DFS.S_NAME] = [
|
404
|
+
re.sub("\\[.+\\]", "", x).strip()
|
405
|
+
for x in consensus_species[SBML_DFS.SC_NAME]
|
406
|
+
]
|
407
|
+
consensus_species = consensus_species.drop(
|
408
|
+
[SBML_DFS.SC_NAME, SBML_DFS.C_ID], axis=1
|
409
|
+
)
|
410
|
+
consensus_species[SBML_DFS.S_SOURCE] = [
|
411
|
+
source.Source(init=True) for x in range(0, consensus_species.shape[0])
|
412
|
+
]
|
413
|
+
|
414
|
+
species = consensus_species[SPECIES_VARS]
|
415
|
+
compartmentalized_species = comp_species_df.join(species_lookup).rename(
|
416
|
+
columns={"new_id": SBML_DFS.S_ID}
|
417
|
+
)[CSPECIES_VARS]
|
418
|
+
|
419
|
+
return species, compartmentalized_species
|
420
|
+
|
202
421
|
|
203
422
|
class CompartmentAliasesValidator(RootModel):
|
204
423
|
"""
|
@@ -375,288 +594,72 @@ def sbml_dfs_from_sbml(self, sbml_model: SBML, compartment_aliases: dict | None
|
|
375
594
|
compartments, species, compartmentalized_species, reactions, and reaction_species
|
376
595
|
"""
|
377
596
|
# 1. Process compartments from the SBML model
|
378
|
-
self.compartments = _define_compartments(
|
597
|
+
self.compartments = sbml_model._define_compartments(compartment_aliases)
|
379
598
|
|
380
599
|
# 2. Process species and compartmentalized species
|
381
|
-
self.species, self.compartmentalized_species = _define_species(
|
382
|
-
sbml_model, self.schema
|
383
|
-
)
|
600
|
+
self.species, self.compartmentalized_species = sbml_model._define_species()
|
384
601
|
|
385
602
|
# 3. Process reactions and their participating species
|
386
|
-
self.reactions, self.reaction_species = _define_reactions(
|
603
|
+
self.reactions, self.reaction_species = sbml_model._define_reactions()
|
387
604
|
|
388
605
|
return self
|
389
606
|
|
390
607
|
|
391
|
-
def
|
392
|
-
|
393
|
-
) ->
|
394
|
-
"""Extracts and defines compartments from the SBML model.
|
608
|
+
def _define_compartments_missing_cvterms(
|
609
|
+
comp: libsbml.Compartment, aliases: dict
|
610
|
+
) -> dict[str, Any]:
|
395
611
|
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
Parameters
|
401
|
-
----------
|
402
|
-
sbml_model : SBML
|
403
|
-
The SBML model to process.
|
404
|
-
compartment_aliases_dict : dict, optional
|
405
|
-
A dictionary to map custom compartment names. If None, the default
|
406
|
-
mapping from `COMPARTMENT_ALIASES` is used.
|
612
|
+
comp_name = comp.getName()
|
613
|
+
mapped_compartment_key = [
|
614
|
+
compkey for compkey, mappednames in aliases.items() if comp_name in mappednames
|
615
|
+
]
|
407
616
|
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
compartment ID.
|
413
|
-
"""
|
414
|
-
if compartment_aliases_dict is None:
|
415
|
-
aliases = COMPARTMENT_ALIASES
|
416
|
-
else:
|
417
|
-
aliases = CompartmentAliasesValidator.from_dict(compartment_aliases_dict)
|
617
|
+
if len(mapped_compartment_key) == 0:
|
618
|
+
logger.warning(
|
619
|
+
f"No GO compartment for {comp_name} is mapped, use the generic cellular_component's GO id"
|
620
|
+
)
|
418
621
|
|
419
|
-
|
420
|
-
|
421
|
-
|
622
|
+
compartment_entry = {
|
623
|
+
SBML_DFS.C_ID: comp.getId(),
|
624
|
+
SBML_DFS.C_NAME: comp.getName(),
|
625
|
+
SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
|
626
|
+
[
|
627
|
+
identifiers.format_uri(
|
628
|
+
uri=identifiers.create_uri_url(
|
629
|
+
ontology=ONTOLOGIES.GO,
|
630
|
+
identifier=COMPARTMENTS_GO_TERMS[GENERIC_COMPARTMENT],
|
631
|
+
),
|
632
|
+
biological_qualifier_type=BQB.BQB_IS,
|
633
|
+
)
|
634
|
+
]
|
635
|
+
),
|
636
|
+
SBML_DFS.C_SOURCE: source.Source(init=True),
|
637
|
+
}
|
422
638
|
|
423
|
-
|
639
|
+
if len(mapped_compartment_key) > 0:
|
640
|
+
if len(mapped_compartment_key) > 1:
|
424
641
|
logger.warning(
|
425
|
-
f"
|
642
|
+
f"More than one GO compartments for {comp_name} are mapped, using the first one"
|
426
643
|
)
|
427
644
|
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
f"No GO compartment for {comp_name} is mapped, use the generic cellular_component's GO id"
|
438
|
-
)
|
439
|
-
compartments.append(
|
440
|
-
{
|
441
|
-
SBML_DFS.C_ID: comp.getId(),
|
442
|
-
SBML_DFS.C_NAME: comp.getName(),
|
443
|
-
SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
|
444
|
-
[
|
445
|
-
identifiers.format_uri(
|
446
|
-
uri=identifiers.create_uri_url(
|
447
|
-
ontology=ONTOLOGIES.GO,
|
448
|
-
identifier=COMPARTMENTS_GO_TERMS[
|
449
|
-
GENERIC_COMPARTMENT
|
450
|
-
],
|
451
|
-
),
|
452
|
-
biological_qualifier_type=BQB.BQB_IS,
|
453
|
-
)
|
454
|
-
]
|
645
|
+
compartment_entry = {
|
646
|
+
SBML_DFS.C_ID: comp.getId(),
|
647
|
+
SBML_DFS.C_NAME: comp.getName(),
|
648
|
+
SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
|
649
|
+
[
|
650
|
+
identifiers.format_uri(
|
651
|
+
uri=identifiers.create_uri_url(
|
652
|
+
ontology=ONTOLOGIES.GO,
|
653
|
+
identifier=COMPARTMENTS_GO_TERMS[mapped_compartment_key[0]],
|
455
654
|
),
|
456
|
-
|
457
|
-
}
|
458
|
-
)
|
459
|
-
|
460
|
-
if len(mapped_compartment_key) > 0:
|
461
|
-
if len(mapped_compartment_key) > 1:
|
462
|
-
logger.warning(
|
463
|
-
f"More than one GO compartments for {comp_name} are mapped, using the first one"
|
655
|
+
biological_qualifier_type=BQB.IS,
|
464
656
|
)
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
SBML_DFS.C_NAME: comp.getName(),
|
469
|
-
SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
|
470
|
-
[
|
471
|
-
identifiers.format_uri(
|
472
|
-
uri=identifiers.create_uri_url(
|
473
|
-
ontology=ONTOLOGIES.GO,
|
474
|
-
identifier=COMPARTMENTS_GO_TERMS[
|
475
|
-
mapped_compartment_key[0]
|
476
|
-
],
|
477
|
-
),
|
478
|
-
biological_qualifier_type=BQB.IS,
|
479
|
-
)
|
480
|
-
]
|
481
|
-
),
|
482
|
-
SBML_DFS.C_SOURCE: source.Source(init=True),
|
483
|
-
}
|
484
|
-
)
|
485
|
-
|
486
|
-
else:
|
487
|
-
compartments.append(
|
488
|
-
{
|
489
|
-
SBML_DFS.C_ID: comp.getId(),
|
490
|
-
SBML_DFS.C_NAME: comp.getName(),
|
491
|
-
SBML_DFS.C_IDENTIFIERS: identifiers.cv_to_Identifiers(comp),
|
492
|
-
SBML_DFS.C_SOURCE: source.Source(init=True),
|
493
|
-
}
|
494
|
-
)
|
495
|
-
|
496
|
-
return pd.DataFrame(compartments).set_index(SBML_DFS.C_ID)
|
497
|
-
|
498
|
-
|
499
|
-
def _define_species(
|
500
|
-
sbml_model: SBML, schema: dict
|
501
|
-
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
502
|
-
"""Extracts and defines species and compartmentalized species.
|
503
|
-
|
504
|
-
This function creates two DataFrames: one for unique molecular species
|
505
|
-
(un-compartmentalized) and another for compartmentalized species, which
|
506
|
-
represent a species within a specific compartment.
|
507
|
-
|
508
|
-
Parameters
|
509
|
-
----------
|
510
|
-
sbml_model : SBML
|
511
|
-
The SBML model to process.
|
512
|
-
schema : dict
|
513
|
-
A dictionary defining the data schema for species and compartmentalized
|
514
|
-
species tables.
|
515
|
-
|
516
|
-
Returns
|
517
|
-
-------
|
518
|
-
tuple[pd.DataFrame, pd.DataFrame]
|
519
|
-
A tuple containing two DataFrames:
|
520
|
-
- The first DataFrame represents unique molecular species.
|
521
|
-
- The second DataFrame represents compartmentalized species.
|
522
|
-
"""
|
523
|
-
|
524
|
-
SPECIES_VARS = schema["species"]["vars"]
|
525
|
-
CSPECIES_VARS = schema["compartmentalized_species"]["vars"]
|
526
|
-
|
527
|
-
comp_species_df = setup_cspecies(sbml_model)
|
528
|
-
|
529
|
-
# find unique species and create a table
|
530
|
-
consensus_species_df = comp_species_df.copy()
|
531
|
-
consensus_species_df.index.names = [SBML_DFS.S_ID]
|
532
|
-
consensus_species, species_lookup = consensus.reduce_to_consensus_ids(
|
533
|
-
consensus_species_df,
|
534
|
-
{"pk": SBML_DFS.S_ID, "id": SBML_DFS.S_IDENTIFIERS},
|
535
|
-
)
|
536
|
-
|
537
|
-
# create a table of unique molecular species
|
538
|
-
consensus_species.index.name = SBML_DFS.S_ID
|
539
|
-
consensus_species[SBML_DFS.S_NAME] = [
|
540
|
-
re.sub("\\[.+\\]", "", x).strip() for x in consensus_species[SBML_DFS.SC_NAME]
|
541
|
-
]
|
542
|
-
consensus_species = consensus_species.drop(
|
543
|
-
[SBML_DFS.SC_NAME, SBML_DFS.C_ID], axis=1
|
544
|
-
)
|
545
|
-
consensus_species["s_Source"] = [
|
546
|
-
source.Source(init=True) for x in range(0, consensus_species.shape[0])
|
547
|
-
]
|
548
|
-
|
549
|
-
species = consensus_species[SPECIES_VARS]
|
550
|
-
compartmentalized_species = comp_species_df.join(species_lookup).rename(
|
551
|
-
columns={"new_id": SBML_DFS.S_ID}
|
552
|
-
)[CSPECIES_VARS]
|
553
|
-
|
554
|
-
return species, compartmentalized_species
|
555
|
-
|
556
|
-
|
557
|
-
def _define_reactions(sbml_model: SBML) -> tuple[pd.DataFrame, pd.DataFrame]:
|
558
|
-
"""Extracts and defines reactions and their participating species.
|
559
|
-
|
560
|
-
This function iterates through all reactions in the SBML model, creating
|
561
|
-
a DataFrame for reaction attributes and another for all participating
|
562
|
-
species (reactants, products, and modifiers).
|
563
|
-
|
564
|
-
Parameters
|
565
|
-
----------
|
566
|
-
sbml_model : SBML
|
567
|
-
The SBML model to process.
|
568
|
-
|
569
|
-
Returns
|
570
|
-
-------
|
571
|
-
tuple[pd.DataFrame, pd.DataFrame]
|
572
|
-
A tuple containing two DataFrames:
|
573
|
-
- The first DataFrame contains reaction attributes, indexed by reaction ID.
|
574
|
-
- The second DataFrame lists all species participating in reactions.
|
575
|
-
"""
|
576
|
-
reactions_list = []
|
577
|
-
reaction_species_list = []
|
578
|
-
for i in range(sbml_model.model.getNumReactions()):
|
579
|
-
rxn = SBML_reaction(sbml_model.model.getReaction(i))
|
580
|
-
reactions_list.append(rxn.reaction_dict)
|
581
|
-
|
582
|
-
rxn_specs = rxn.species
|
583
|
-
rxn_specs[SBML_DFS.R_ID] = rxn.reaction_dict[SBML_DFS.R_ID]
|
584
|
-
reaction_species_list.append(rxn_specs)
|
585
|
-
|
586
|
-
reactions = pd.DataFrame(reactions_list).set_index(SBML_DFS.R_ID)
|
587
|
-
|
588
|
-
reaction_species_df = pd.concat(reaction_species_list)
|
589
|
-
# add an index if reaction species didn't have IDs in the .sbml
|
590
|
-
if all([v == "" for v in reaction_species_df.index.tolist()]):
|
591
|
-
reaction_species_df = (
|
592
|
-
reaction_species_df.reset_index(drop=True)
|
593
|
-
.assign(
|
594
|
-
rsc_id=sbml_dfs_utils.id_formatter(
|
595
|
-
range(reaction_species_df.shape[0]), SBML_DFS.RSC_ID
|
596
|
-
)
|
597
|
-
)
|
598
|
-
.set_index(SBML_DFS.RSC_ID)
|
599
|
-
)
|
600
|
-
|
601
|
-
return reactions, reaction_species_df
|
602
|
-
|
603
|
-
|
604
|
-
def setup_cspecies(sbml_model: SBML) -> pd.DataFrame:
|
605
|
-
"""Creates a DataFrame of compartmentalized species from an SBML model.
|
606
|
-
|
607
|
-
This function extracts all species from the model and creates a
|
608
|
-
standardized DataFrame that includes unique IDs for each compartmentalized
|
609
|
-
species (`sc_id`), along with species and compartment IDs, and their
|
610
|
-
corresponding identifiers.
|
611
|
-
|
612
|
-
Parameters
|
613
|
-
----------
|
614
|
-
sbml_model : SBML
|
615
|
-
The SBML model to process.
|
616
|
-
|
617
|
-
Returns
|
618
|
-
-------
|
619
|
-
pd.DataFrame
|
620
|
-
A DataFrame containing information about each compartmentalized species.
|
621
|
-
"""
|
622
|
-
comp_species = list()
|
623
|
-
for i in range(sbml_model.model.getNumSpecies()):
|
624
|
-
spec = sbml_model.model.getSpecies(i)
|
625
|
-
|
626
|
-
spec_dict = {
|
627
|
-
SBML_DFS.SC_ID: spec.getId(),
|
628
|
-
SBML_DFS.SC_NAME: spec.getName(),
|
629
|
-
SBML_DFS.C_ID: spec.getCompartment(),
|
630
|
-
SBML_DFS.S_IDENTIFIERS: identifiers.cv_to_Identifiers(spec),
|
631
|
-
SBML_DFS.SC_SOURCE: source.Source(init=True),
|
657
|
+
]
|
658
|
+
),
|
659
|
+
SBML_DFS.C_SOURCE: source.Source(init=True),
|
632
660
|
}
|
633
661
|
|
634
|
-
|
635
|
-
|
636
|
-
mplugin = sbml_model.model.getPlugin("fbc")
|
637
|
-
|
638
|
-
# add geneproducts defined using L3 FBC extension
|
639
|
-
if mplugin is not None:
|
640
|
-
for i in range(mplugin.getNumGeneProducts()):
|
641
|
-
gene_product = mplugin.getGeneProduct(i)
|
642
|
-
|
643
|
-
gene_dict = {
|
644
|
-
SBML_DFS.SC_ID: gene_product.getId(),
|
645
|
-
SBML_DFS.SC_NAME: (
|
646
|
-
gene_product.getName()
|
647
|
-
if gene_product.isSetName()
|
648
|
-
else gene_product.getLabel()
|
649
|
-
),
|
650
|
-
# use getLabel() to accomendate sbml model (e.g. HumanGEM.xml) with no fbc:name attribute
|
651
|
-
# Recon3D.xml has both fbc:label and fbc:name attributes, with gene name in fbc:nam
|
652
|
-
SBML_DFS.C_ID: None,
|
653
|
-
SBML_DFS.S_IDENTIFIERS: identifiers.cv_to_Identifiers(gene_product),
|
654
|
-
SBML_DFS.SC_SOURCE: source.Source(init=True),
|
655
|
-
}
|
656
|
-
|
657
|
-
comp_species.append(gene_dict)
|
658
|
-
|
659
|
-
return pd.DataFrame(comp_species).set_index(SBML_DFS.SC_ID)
|
662
|
+
return compartment_entry
|
660
663
|
|
661
664
|
|
662
665
|
def _get_gene_product_dict(gp):
|