napistu 0.3.1.dev1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +1 -1
- napistu/constants.py +0 -80
- napistu/ingestion/constants.py +106 -37
- napistu/ingestion/sbml.py +392 -221
- napistu/ingestion/string.py +2 -2
- napistu/modify/gaps.py +3 -3
- napistu/network/precompute.py +64 -16
- napistu/sbml_dfs_core.py +19 -20
- {napistu-0.3.1.dev1.dist-info → napistu-0.3.3.dist-info}/METADATA +1 -1
- {napistu-0.3.1.dev1.dist-info → napistu-0.3.3.dist-info}/RECORD +17 -17
- tests/test_network_precompute.py +58 -8
- tests/test_sbml.py +38 -7
- {napistu-0.3.1.dev1.dist-info → napistu-0.3.3.dist-info}/WHEEL +0 -0
- {napistu-0.3.1.dev1.dist-info → napistu-0.3.3.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.1.dev1.dist-info → napistu-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.1.dev1.dist-info → napistu-0.3.3.dist-info}/top_level.txt +0 -0
- /tests/{test_net_propagation.py → test_network_net_propagation.py} +0 -0
napistu/ingestion/sbml.py
CHANGED
@@ -6,60 +6,46 @@ import re
|
|
6
6
|
|
7
7
|
import libsbml
|
8
8
|
import pandas as pd
|
9
|
+
from fs import open_fs
|
10
|
+
from pydantic import conlist, field_validator, RootModel
|
11
|
+
|
9
12
|
from napistu import consensus
|
10
|
-
from napistu import constants
|
11
13
|
from napistu import identifiers
|
12
14
|
from napistu import sbml_dfs_utils
|
13
15
|
from napistu import source
|
14
16
|
from napistu import utils
|
15
|
-
|
16
17
|
from napistu.constants import BQB
|
17
|
-
|
18
|
-
from napistu.
|
19
|
-
from napistu.ingestion.constants import
|
20
|
-
from napistu.ingestion.constants import
|
21
|
-
from napistu.ingestion.constants import
|
22
|
-
from napistu.ingestion.constants import
|
23
|
-
from napistu.ingestion.constants import
|
24
|
-
from napistu.ingestion.constants import SBML_REACTION_ATTR_GET_GENE_PRODUCT
|
25
|
-
from napistu.ingestion.constants import SBML_SPECIES_DICT_ID
|
26
|
-
from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
|
27
|
-
from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
|
28
|
-
from napistu.ingestion.constants import SMBL_ERROR_CATEGORY
|
29
|
-
from napistu.ingestion.constants import SMBL_ERROR_DESCRIPTION
|
30
|
-
from napistu.ingestion.constants import SMBL_ERROR_MESSAGE
|
31
|
-
from napistu.ingestion.constants import SMBL_ERROR_NUMBER
|
32
|
-
from napistu.ingestion.constants import SMBL_ERROR_SEVERITY
|
33
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_ID
|
34
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
|
35
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
|
36
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
|
37
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_SOURCE
|
38
|
-
from napistu.ingestion.constants import SMBL_REACTION_SPEC_RSC_ID
|
39
|
-
from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
|
40
|
-
from napistu.ingestion.constants import SMBL_REACTION_SPEC_SC_ID
|
41
|
-
from napistu.ingestion.constants import SMBL_REACTION_SPEC_STOICHIOMETRY
|
42
|
-
from napistu.ingestion.constants import SMBL_SUMMARY_COMPARTMENTS
|
43
|
-
from napistu.ingestion.constants import SMBL_SUMMARY_N_REACTIONS
|
44
|
-
from napistu.ingestion.constants import SMBL_SUMMARY_N_SPECIES
|
45
|
-
from napistu.ingestion.constants import SMBL_SUMMARY_PATHWAY_ID
|
46
|
-
from napistu.ingestion.constants import SMBL_SUMMARY_PATHWAY_NAME
|
47
|
-
|
48
|
-
from fs import open_fs
|
18
|
+
from napistu.constants import ONTOLOGIES
|
19
|
+
from napistu.constants import SBML_DFS
|
20
|
+
from napistu.ingestion.constants import SBML_DEFS
|
21
|
+
from napistu.ingestion.constants import COMPARTMENTS_GO_TERMS
|
22
|
+
from napistu.ingestion.constants import COMPARTMENT_ALIASES
|
23
|
+
from napistu.ingestion.constants import VALID_COMPARTMENTS
|
24
|
+
from napistu.ingestion.constants import GENERIC_COMPARTMENT
|
49
25
|
|
50
26
|
logger = logging.getLogger(__name__)
|
51
27
|
|
28
|
+
NonEmptyStringList = conlist(str, min_length=1)
|
29
|
+
|
52
30
|
|
53
31
|
class SBML:
|
54
|
-
"""
|
55
|
-
|
32
|
+
"""A class for handling Systems Biology Markup Language (SBML) files.
|
33
|
+
|
34
|
+
This class provides an interface to read and parse SBML files, offering
|
35
|
+
methods to access the model, summarize its contents, and report any errors
|
36
|
+
encountered during parsing.
|
37
|
+
|
38
|
+
Parameters
|
39
|
+
----------
|
40
|
+
sbml_path : str
|
41
|
+
The file path to an SBML model. Supports local paths and GCS URIs.
|
56
42
|
|
57
43
|
Attributes
|
58
44
|
----------
|
59
|
-
document
|
60
|
-
|
61
|
-
model
|
62
|
-
|
45
|
+
document : libsbml.SBMLDocument
|
46
|
+
The raw SBML document object from libsbml.
|
47
|
+
model : libsbml.Model
|
48
|
+
The parsed SBML model object from libsbml.
|
63
49
|
|
64
50
|
Methods
|
65
51
|
-------
|
@@ -68,25 +54,18 @@ class SBML:
|
|
68
54
|
sbml_errors(reduced_log, return_df)
|
69
55
|
Print a summary of all errors in the SBML file
|
70
56
|
|
57
|
+
Raises
|
58
|
+
------
|
59
|
+
ValueError
|
60
|
+
If the SBML model is not Level 3, or if critical, unknown errors are
|
61
|
+
found during parsing.
|
71
62
|
"""
|
72
63
|
|
73
64
|
def __init__(
|
74
65
|
self,
|
75
66
|
sbml_path: str,
|
76
67
|
) -> None:
|
77
|
-
"""
|
78
|
-
Connects to an SBML file
|
79
|
-
|
80
|
-
Parameters
|
81
|
-
----------
|
82
|
-
sbml_path : str
|
83
|
-
path to a .sbml file.
|
84
|
-
|
85
|
-
Returns
|
86
|
-
-------
|
87
|
-
None.
|
88
|
-
"""
|
89
|
-
|
68
|
+
"""Initializes the SBML object by reading and validating an SBML file."""
|
90
69
|
reader = libsbml.SBMLReader()
|
91
70
|
if os.path.exists(sbml_path):
|
92
71
|
self.document = reader.readSBML(sbml_path)
|
@@ -105,8 +84,8 @@ class SBML:
|
|
105
84
|
# check for critical sbml errors
|
106
85
|
errors = self.sbml_errors(reduced_log=False, return_df=True)
|
107
86
|
if errors is not None:
|
108
|
-
critical_errors = errors[errors[
|
109
|
-
critical_errors = set(critical_errors[
|
87
|
+
critical_errors = errors[errors[SBML_DEFS.ERROR_SEVERITY] >= 2]
|
88
|
+
critical_errors = set(critical_errors[SBML_DEFS.ERROR_DESCRIPTION].unique())
|
110
89
|
known_errors = {"<layout> must have 'id' and may have 'name'"}
|
111
90
|
|
112
91
|
found_known_errors = known_errors.intersection(critical_errors)
|
@@ -123,41 +102,50 @@ class SBML:
|
|
123
102
|
)
|
124
103
|
|
125
104
|
def summary(self) -> pd.DataFrame:
|
126
|
-
"""
|
105
|
+
"""Generates a styled summary of the SBML model.
|
106
|
+
|
107
|
+
Returns
|
108
|
+
-------
|
109
|
+
pd.io.formats.style.Styler
|
110
|
+
A styled pandas DataFrame containing a summary of the model,
|
111
|
+
including pathway name, ID, and counts of species and reactions.
|
112
|
+
"""
|
127
113
|
model = self.model
|
128
114
|
|
129
115
|
model_summaries = dict()
|
130
116
|
|
131
|
-
model_summaries[
|
132
|
-
model_summaries[
|
117
|
+
model_summaries[SBML_DEFS.SUMMARY_PATHWAY_NAME] = model.getName()
|
118
|
+
model_summaries[SBML_DEFS.SUMMARY_PATHWAY_ID] = model.getId()
|
133
119
|
|
134
|
-
model_summaries[
|
135
|
-
model_summaries[
|
120
|
+
model_summaries[SBML_DEFS.SUMMARY_N_SPECIES] = model.getNumSpecies()
|
121
|
+
model_summaries[SBML_DEFS.SUMMARY_N_REACTIONS] = model.getNumReactions()
|
136
122
|
|
137
123
|
compartments = [
|
138
124
|
model.getCompartment(i).getName() for i in range(model.getNumCompartments())
|
139
125
|
]
|
140
126
|
compartments.sort()
|
141
|
-
model_summaries[
|
127
|
+
model_summaries[SBML_DEFS.SUMMARY_COMPARTMENTS] = ",\n".join(compartments)
|
142
128
|
|
143
129
|
model_summaries_dat = pd.DataFrame(model_summaries, index=[0]).T
|
144
130
|
|
145
131
|
return utils.style_df(model_summaries_dat) # type: ignore
|
146
132
|
|
147
133
|
def sbml_errors(self, reduced_log: bool = True, return_df: bool = False):
|
148
|
-
"""
|
149
|
-
Format and print all SBML errors
|
134
|
+
"""Formats and reports all errors found in the SBML file.
|
150
135
|
|
151
136
|
Parameters
|
152
137
|
----------
|
153
|
-
reduced_log : bool
|
154
|
-
|
155
|
-
return_df: bool
|
156
|
-
If
|
138
|
+
reduced_log : bool, optional
|
139
|
+
If True, aggregates errors by category and severity. Defaults to True.
|
140
|
+
return_df : bool, optional
|
141
|
+
If True, returns a DataFrame of the errors. Otherwise, prints a
|
142
|
+
styled summary. Defaults to False.
|
157
143
|
|
158
144
|
Returns
|
159
145
|
-------
|
160
|
-
|
146
|
+
pd.DataFrame or None
|
147
|
+
A DataFrame containing the error log if `return_df` is True and
|
148
|
+
errors are present, otherwise None.
|
161
149
|
"""
|
162
150
|
n_errors = self.document.getNumErrors()
|
163
151
|
if n_errors == 0:
|
@@ -168,11 +156,11 @@ class SBML:
|
|
168
156
|
e = self.document.getError(i)
|
169
157
|
|
170
158
|
error_entry = {
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
159
|
+
SBML_DEFS.ERROR_NUMBER: i,
|
160
|
+
SBML_DEFS.ERROR_CATEGORY: e.getCategoryAsString(),
|
161
|
+
SBML_DEFS.ERROR_SEVERITY: e.getSeverity(),
|
162
|
+
SBML_DEFS.ERROR_DESCRIPTION: e.getShortMessage(),
|
163
|
+
SBML_DEFS.ERROR_MESSAGE: e.getMessage(),
|
176
164
|
}
|
177
165
|
|
178
166
|
error_log.append(error_entry)
|
@@ -181,9 +169,13 @@ class SBML:
|
|
181
169
|
if reduced_log:
|
182
170
|
error_log = (
|
183
171
|
error_log[
|
184
|
-
[
|
172
|
+
[
|
173
|
+
SBML_DEFS.ERROR_CATEGORY,
|
174
|
+
SBML_DEFS.ERROR_SEVERITY,
|
175
|
+
SBML_DEFS.ERROR_MESSAGE,
|
176
|
+
]
|
185
177
|
]
|
186
|
-
.groupby([
|
178
|
+
.groupby([SBML_DEFS.ERROR_CATEGORY, SBML_DEFS.ERROR_SEVERITY])
|
187
179
|
.count()
|
188
180
|
)
|
189
181
|
|
@@ -191,12 +183,15 @@ class SBML:
|
|
191
183
|
return error_log
|
192
184
|
else:
|
193
185
|
if reduced_log:
|
194
|
-
headers = [
|
186
|
+
headers = [
|
187
|
+
f"{SBML_DEFS.ERROR_CATEGORY}, {SBML_DEFS.ERROR_SEVERITY}",
|
188
|
+
"count",
|
189
|
+
]
|
195
190
|
else:
|
196
191
|
headers = [
|
197
|
-
|
198
|
-
|
199
|
-
|
192
|
+
SBML_DEFS.ERROR_CATEGORY,
|
193
|
+
SBML_DEFS.ERROR_SEVERITY,
|
194
|
+
SBML_DEFS.ERROR_DESCRIPTION,
|
200
195
|
]
|
201
196
|
error_log = error_log[headers]
|
202
197
|
|
@@ -205,34 +200,103 @@ class SBML:
|
|
205
200
|
return None
|
206
201
|
|
207
202
|
|
208
|
-
class
|
203
|
+
class CompartmentAliasesValidator(RootModel):
|
209
204
|
"""
|
210
|
-
|
205
|
+
A Pydantic model for validating compartment alias dictionaries.
|
206
|
+
|
207
|
+
This model ensures that the compartment alias dictionary is a mapping
|
208
|
+
from a string (the canonical compartment name) to a list of strings
|
209
|
+
(the aliases for that compartment). It also validates that the keys
|
210
|
+
of the dictionary are valid compartment names.
|
211
211
|
|
212
212
|
Attributes
|
213
213
|
----------
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
214
|
+
root : dict[str, list[str]]
|
215
|
+
The root of the model is a dictionary where keys are strings and
|
216
|
+
values are lists of strings.
|
217
|
+
"""
|
218
|
+
|
219
|
+
root: dict[str, list[str]]
|
220
|
+
|
221
|
+
@field_validator("root")
|
222
|
+
def validate_aliases(cls, values: dict[str, list[str]]):
|
223
|
+
"""Validate the compartment alias dictionary."""
|
224
|
+
for key, alias_list in values.items():
|
225
|
+
if not key:
|
226
|
+
raise ValueError("Compartment keys must be non-empty.")
|
227
|
+
if key not in VALID_COMPARTMENTS:
|
228
|
+
raise ValueError(
|
229
|
+
f"Invalid compartment key: {key}. "
|
230
|
+
f"Must be one of {VALID_COMPARTMENTS}"
|
231
|
+
)
|
232
|
+
if not alias_list:
|
233
|
+
raise ValueError(f"Alias list for '{key}' cannot be empty.")
|
234
|
+
return values
|
235
|
+
|
236
|
+
@classmethod
|
237
|
+
def from_dict(cls, data: dict[str, list[str]]) -> "CompartmentAliasesValidator":
|
238
|
+
"""
|
239
|
+
Create a CompartmentAliasesValidator from a dictionary.
|
240
|
+
|
241
|
+
Parameters
|
242
|
+
----------
|
243
|
+
data : dict[str, list[str]]
|
244
|
+
A dictionary mapping canonical compartment names to their aliases.
|
245
|
+
|
246
|
+
Returns
|
247
|
+
-------
|
248
|
+
CompartmentAliasesValidator
|
249
|
+
A validated instance of the model.
|
250
|
+
"""
|
251
|
+
return cls.model_validate(data)
|
252
|
+
|
253
|
+
def __getitem__(self, key: str) -> list[str]:
|
254
|
+
return self.root[key]
|
255
|
+
|
256
|
+
def items(self):
|
257
|
+
return self.root.items()
|
258
|
+
|
259
|
+
def __iter__(self):
|
260
|
+
return iter(self.root)
|
261
|
+
|
262
|
+
def __len__(self):
|
263
|
+
return len(self.root)
|
264
|
+
|
265
|
+
|
266
|
+
class SBML_reaction:
|
267
|
+
"""A convenience class for processing individual SBML reactions.
|
268
|
+
|
269
|
+
This class extracts and organizes key information about an SBML reaction,
|
270
|
+
including its attributes and participating species (substrates, products,
|
271
|
+
and modifiers).
|
272
|
+
|
273
|
+
Parameters
|
274
|
+
----------
|
275
|
+
sbml_reaction : libsbml.Reaction
|
276
|
+
A libsbml Reaction object to be processed.
|
218
277
|
|
278
|
+
Attributes
|
279
|
+
----------
|
280
|
+
reaction_dict : dict
|
281
|
+
A dictionary of reaction-level attributes, including its ID, name,
|
282
|
+
reversibility, identifiers, and source information.
|
283
|
+
species : pd.DataFrame
|
284
|
+
A DataFrame listing all species participating in the reaction,
|
285
|
+
including their roles (substrate, product, modifier), stoichiometry,
|
286
|
+
and SBO terms.
|
219
287
|
"""
|
220
288
|
|
221
289
|
def __init__(
|
222
290
|
self,
|
223
291
|
sbml_reaction: libsbml.Reaction,
|
224
292
|
) -> None:
|
225
|
-
"""
|
226
|
-
Convenience class for working with sbml reactions
|
227
|
-
"""
|
293
|
+
"""Initializes the SBML_reaction object by parsing a libsbml Reaction."""
|
228
294
|
reaction_dict = {
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
),
|
234
|
-
SMBL_REACTION_DICT_SOURCE: source.Source(init=True),
|
235
|
-
SMBL_REACTION_DICT_IS_REVERSIBLE: sbml_reaction.getReversible(),
|
295
|
+
SBML_DFS.R_ID: sbml_reaction.getId(),
|
296
|
+
SBML_DFS.R_NAME: sbml_reaction.getName(),
|
297
|
+
SBML_DFS.R_IDENTIFIERS: identifiers.cv_to_Identifiers(sbml_reaction),
|
298
|
+
SBML_DFS.R_SOURCE: source.Source(init=True),
|
299
|
+
SBML_DFS.R_ISREVERSIBLE: sbml_reaction.getReversible(),
|
236
300
|
}
|
237
301
|
|
238
302
|
self.reaction_dict = reaction_dict
|
@@ -243,80 +307,114 @@ class SBML_reaction:
|
|
243
307
|
for i in range(sbml_reaction.getNumModifiers()):
|
244
308
|
spec = sbml_reaction.getModifier(i)
|
245
309
|
spec_dict = {
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
310
|
+
SBML_DFS.RSC_ID: spec.getId(),
|
311
|
+
SBML_DFS.SC_ID: spec.getSpecies(),
|
312
|
+
SBML_DFS.STOICHIOMETRY: 0,
|
313
|
+
SBML_DFS.SBO_TERM: spec.getSBOTermID(),
|
250
314
|
}
|
251
315
|
reaction_species.append(spec_dict)
|
252
316
|
|
317
|
+
# find gene products defined using the fbc plugin
|
253
318
|
rxn_fbc = sbml_reaction.getPlugin("fbc")
|
254
|
-
|
255
|
-
if rxn_fbc is not None:
|
256
|
-
gene_products = list()
|
319
|
+
if rxn_fbc:
|
257
320
|
gpa = rxn_fbc.getGeneProductAssociation()
|
258
|
-
if gpa
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
if hasattr(gpaaa, SBML_REACTION_ATTR_GET_GENE_PRODUCT):
|
266
|
-
gene_products.append(_get_gene_product_dict(gpaaa))
|
267
|
-
else:
|
268
|
-
for i in range(gpaaa.getNumAssociations()):
|
269
|
-
gpaaaa = gpaaa.getAssociation(i)
|
270
|
-
if hasattr(gpaaaa, SBML_REACTION_ATTR_GET_GENE_PRODUCT):
|
271
|
-
gene_products.append(_get_gene_product_dict(gpaaaa))
|
272
|
-
else:
|
273
|
-
for i in range(gpaa.getNumAssociations()):
|
274
|
-
gpaaaaa = gpaaaa.getAssociation(i)
|
275
|
-
if hasattr(
|
276
|
-
gpaaaaa, SBML_REACTION_ATTR_GET_GENE_PRODUCT
|
277
|
-
):
|
278
|
-
gene_products.append(
|
279
|
-
_get_gene_product_dict(gpaaaaa)
|
280
|
-
)
|
281
|
-
else:
|
282
|
-
logger.warning(
|
283
|
-
"gene annotations nested deeper than 4 levels, ignoring"
|
284
|
-
)
|
285
|
-
continue
|
286
|
-
# de-duplicate
|
287
|
-
gene_products = list(
|
288
|
-
{d[SMBL_REACTION_SPEC_SC_ID]: d for d in gene_products}.values()
|
289
|
-
)
|
290
|
-
reaction_species = reaction_species + gene_products
|
321
|
+
if gpa:
|
322
|
+
gene_products = _extract_gene_products(gpa.getAssociation())
|
323
|
+
# de-duplicate
|
324
|
+
gene_products = list(
|
325
|
+
{d[SBML_DFS.SC_ID]: d for d in gene_products}.values()
|
326
|
+
)
|
327
|
+
reaction_species.extend(gene_products)
|
291
328
|
|
292
329
|
# save reactants
|
293
330
|
for i in range(sbml_reaction.getNumReactants()):
|
294
331
|
spec = sbml_reaction.getReactant(i)
|
295
332
|
spec_dict = {
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
333
|
+
SBML_DFS.RSC_ID: spec.getId(),
|
334
|
+
SBML_DFS.SC_ID: spec.getSpecies(),
|
335
|
+
SBML_DFS.STOICHIOMETRY: -1 * spec.getStoichiometry(),
|
336
|
+
SBML_DFS.SBO_TERM: spec.getSBOTermID(),
|
300
337
|
}
|
301
338
|
reaction_species.append(spec_dict)
|
302
339
|
# save products
|
303
340
|
for i in range(sbml_reaction.getNumProducts()):
|
304
341
|
spec = sbml_reaction.getProduct(i)
|
305
342
|
spec_dict = {
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
343
|
+
SBML_DFS.RSC_ID: spec.getId(),
|
344
|
+
SBML_DFS.SC_ID: spec.getSpecies(),
|
345
|
+
SBML_DFS.STOICHIOMETRY: spec.getStoichiometry(),
|
346
|
+
SBML_DFS.SBO_TERM: spec.getSBOTermID(),
|
310
347
|
}
|
311
348
|
reaction_species.append(spec_dict)
|
312
349
|
|
313
|
-
self.species = pd.DataFrame(reaction_species).set_index(
|
314
|
-
|
315
|
-
|
350
|
+
self.species = pd.DataFrame(reaction_species).set_index(SBML_DFS.RSC_ID)
|
351
|
+
|
352
|
+
|
353
|
+
def sbml_dfs_from_sbml(self, sbml_model: SBML, compartment_aliases: dict | None = None):
|
354
|
+
"""Parses an SBML model into a set of standardized DataFrames.
|
355
|
+
|
356
|
+
This function serves as the main entry point for converting an SBML model
|
357
|
+
into the internal DataFrame-based representation used by napistu. It
|
358
|
+
orchestrates the processing of compartments, species, and reactions.
|
359
|
+
|
360
|
+
Parameters
|
361
|
+
----------
|
362
|
+
self : object
|
363
|
+
The instance of the calling class, expected to have a `schema` attribute.
|
364
|
+
sbml_model : SBML
|
365
|
+
The SBML model to be parsed.
|
366
|
+
compartment_aliases : dict, optional
|
367
|
+
A dictionary to map custom compartment names to the napistu controlled
|
368
|
+
vocabulary. If None, the default mapping (COMPARTMENT_ALIASES) is used.
|
369
|
+
Defaults to None.
|
370
|
+
|
371
|
+
Returns
|
372
|
+
-------
|
373
|
+
object
|
374
|
+
The calling class instance, now populated with DataFrames for
|
375
|
+
compartments, species, compartmentalized_species, reactions, and reaction_species
|
376
|
+
"""
|
377
|
+
# 1. Process compartments from the SBML model
|
378
|
+
self.compartments = _define_compartments(sbml_model, compartment_aliases)
|
379
|
+
|
380
|
+
# 2. Process species and compartmentalized species
|
381
|
+
self.species, self.compartmentalized_species = _define_species(
|
382
|
+
sbml_model, self.schema
|
383
|
+
)
|
384
|
+
|
385
|
+
# 3. Process reactions and their participating species
|
386
|
+
self.reactions, self.reaction_species = _define_reactions(sbml_model)
|
316
387
|
|
388
|
+
return self
|
389
|
+
|
390
|
+
|
391
|
+
def _define_compartments(
|
392
|
+
sbml_model: SBML, compartment_aliases_dict: dict | None = None
|
393
|
+
) -> pd.DataFrame:
|
394
|
+
"""Extracts and defines compartments from the SBML model.
|
395
|
+
|
396
|
+
This function iterates through the compartments in the SBML model,
|
397
|
+
extracting their IDs, names, and identifiers. It also handles cases where
|
398
|
+
CVTerms are missing by mapping compartment names to known GO terms.
|
317
399
|
|
318
|
-
|
319
|
-
|
400
|
+
Parameters
|
401
|
+
----------
|
402
|
+
sbml_model : SBML
|
403
|
+
The SBML model to process.
|
404
|
+
compartment_aliases_dict : dict, optional
|
405
|
+
A dictionary to map custom compartment names. If None, the default
|
406
|
+
mapping from `COMPARTMENT_ALIASES` is used.
|
407
|
+
|
408
|
+
Returns
|
409
|
+
-------
|
410
|
+
pd.DataFrame
|
411
|
+
A DataFrame containing information about each compartment, indexed by
|
412
|
+
compartment ID.
|
413
|
+
"""
|
414
|
+
if compartment_aliases_dict is None:
|
415
|
+
aliases = COMPARTMENT_ALIASES
|
416
|
+
else:
|
417
|
+
aliases = CompartmentAliasesValidator.from_dict(compartment_aliases_dict)
|
320
418
|
|
321
419
|
compartments = list()
|
322
420
|
for i in range(sbml_model.model.getNumCompartments()):
|
@@ -330,7 +428,7 @@ def sbml_df_from_sbml(self, sbml_model: SBML):
|
|
330
428
|
comp_name = comp.getName()
|
331
429
|
mapped_compartment_key = [
|
332
430
|
compkey
|
333
|
-
for compkey, mappednames in
|
431
|
+
for compkey, mappednames in aliases.items()
|
334
432
|
if comp_name in mappednames
|
335
433
|
]
|
336
434
|
|
@@ -340,22 +438,22 @@ def sbml_df_from_sbml(self, sbml_model: SBML):
|
|
340
438
|
)
|
341
439
|
compartments.append(
|
342
440
|
{
|
343
|
-
|
344
|
-
|
345
|
-
|
441
|
+
SBML_DFS.C_ID: comp.getId(),
|
442
|
+
SBML_DFS.C_NAME: comp.getName(),
|
443
|
+
SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
|
346
444
|
[
|
347
445
|
identifiers.format_uri(
|
348
446
|
uri=identifiers.create_uri_url(
|
349
|
-
ontology=
|
350
|
-
identifier=
|
351
|
-
|
447
|
+
ontology=ONTOLOGIES.GO,
|
448
|
+
identifier=COMPARTMENTS_GO_TERMS[
|
449
|
+
GENERIC_COMPARTMENT
|
352
450
|
],
|
353
451
|
),
|
354
452
|
biological_qualifier_type=BQB.BQB_IS,
|
355
453
|
)
|
356
454
|
]
|
357
455
|
),
|
358
|
-
|
456
|
+
SBML_DFS.C_SOURCE: source.Source(init=True),
|
359
457
|
}
|
360
458
|
)
|
361
459
|
|
@@ -366,14 +464,14 @@ def sbml_df_from_sbml(self, sbml_model: SBML):
|
|
366
464
|
)
|
367
465
|
compartments.append(
|
368
466
|
{
|
369
|
-
|
370
|
-
|
371
|
-
|
467
|
+
SBML_DFS.C_ID: comp.getId(),
|
468
|
+
SBML_DFS.C_NAME: comp.getName(),
|
469
|
+
SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
|
372
470
|
[
|
373
471
|
identifiers.format_uri(
|
374
472
|
uri=identifiers.create_uri_url(
|
375
|
-
ontology=
|
376
|
-
identifier=
|
473
|
+
ontology=ONTOLOGIES.GO,
|
474
|
+
identifier=COMPARTMENTS_GO_TERMS[
|
377
475
|
mapped_compartment_key[0]
|
378
476
|
],
|
379
477
|
),
|
@@ -381,107 +479,156 @@ def sbml_df_from_sbml(self, sbml_model: SBML):
|
|
381
479
|
)
|
382
480
|
]
|
383
481
|
),
|
384
|
-
|
482
|
+
SBML_DFS.C_SOURCE: source.Source(init=True),
|
385
483
|
}
|
386
484
|
)
|
387
485
|
|
388
486
|
else:
|
389
487
|
compartments.append(
|
390
488
|
{
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
),
|
396
|
-
SBML_COMPARTMENT_DICT_SOURCE: source.Source(init=True),
|
489
|
+
SBML_DFS.C_ID: comp.getId(),
|
490
|
+
SBML_DFS.C_NAME: comp.getName(),
|
491
|
+
SBML_DFS.C_IDENTIFIERS: identifiers.cv_to_Identifiers(comp),
|
492
|
+
SBML_DFS.C_SOURCE: source.Source(init=True),
|
397
493
|
}
|
398
494
|
)
|
399
495
|
|
400
|
-
|
496
|
+
return pd.DataFrame(compartments).set_index(SBML_DFS.C_ID)
|
497
|
+
|
498
|
+
|
499
|
+
def _define_species(
|
500
|
+
sbml_model: SBML, schema: dict
|
501
|
+
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
502
|
+
"""Extracts and defines species and compartmentalized species.
|
503
|
+
|
504
|
+
This function creates two DataFrames: one for unique molecular species
|
505
|
+
(un-compartmentalized) and another for compartmentalized species, which
|
506
|
+
represent a species within a specific compartment.
|
507
|
+
|
508
|
+
Parameters
|
509
|
+
----------
|
510
|
+
sbml_model : SBML
|
511
|
+
The SBML model to process.
|
512
|
+
schema : dict
|
513
|
+
A dictionary defining the data schema for species and compartmentalized
|
514
|
+
species tables.
|
515
|
+
|
516
|
+
Returns
|
517
|
+
-------
|
518
|
+
tuple[pd.DataFrame, pd.DataFrame]
|
519
|
+
A tuple containing two DataFrames:
|
520
|
+
- The first DataFrame represents unique molecular species.
|
521
|
+
- The second DataFrame represents compartmentalized species.
|
522
|
+
"""
|
523
|
+
|
524
|
+
SPECIES_VARS = schema["species"]["vars"]
|
525
|
+
CSPECIES_VARS = schema["compartmentalized_species"]["vars"]
|
401
526
|
|
402
|
-
# create a species df
|
403
527
|
comp_species_df = setup_cspecies(sbml_model)
|
404
528
|
|
405
529
|
# find unique species and create a table
|
406
530
|
consensus_species_df = comp_species_df.copy()
|
407
|
-
consensus_species_df.index.names = [
|
531
|
+
consensus_species_df.index.names = [SBML_DFS.S_ID]
|
408
532
|
consensus_species, species_lookup = consensus.reduce_to_consensus_ids(
|
409
533
|
consensus_species_df,
|
410
|
-
{"pk":
|
534
|
+
{"pk": SBML_DFS.S_ID, "id": SBML_DFS.S_IDENTIFIERS},
|
411
535
|
)
|
412
536
|
|
413
537
|
# create a table of unique molecular species
|
414
|
-
consensus_species.index.name =
|
415
|
-
consensus_species[
|
416
|
-
re.sub("\\[.+\\]", "", x).strip()
|
417
|
-
for x in consensus_species[SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME]
|
538
|
+
consensus_species.index.name = SBML_DFS.S_ID
|
539
|
+
consensus_species[SBML_DFS.S_NAME] = [
|
540
|
+
re.sub("\\[.+\\]", "", x).strip() for x in consensus_species[SBML_DFS.SC_NAME]
|
418
541
|
]
|
419
542
|
consensus_species = consensus_species.drop(
|
420
|
-
[
|
543
|
+
[SBML_DFS.SC_NAME, SBML_DFS.C_ID], axis=1
|
421
544
|
)
|
422
545
|
consensus_species["s_Source"] = [
|
423
546
|
source.Source(init=True) for x in range(0, consensus_species.shape[0])
|
424
547
|
]
|
425
548
|
|
426
|
-
|
549
|
+
species = consensus_species[SPECIES_VARS]
|
550
|
+
compartmentalized_species = comp_species_df.join(species_lookup).rename(
|
551
|
+
columns={"new_id": SBML_DFS.S_ID}
|
552
|
+
)[CSPECIES_VARS]
|
553
|
+
|
554
|
+
return species, compartmentalized_species
|
427
555
|
|
428
|
-
self.compartmentalized_species = comp_species_df.join(species_lookup).rename(
|
429
|
-
columns={"new_id": SBML_SPECIES_DICT_ID}
|
430
|
-
)[self.schema["compartmentalized_species"]["vars"]]
|
431
556
|
|
432
|
-
|
557
|
+
def _define_reactions(sbml_model: SBML) -> tuple[pd.DataFrame, pd.DataFrame]:
|
558
|
+
"""Extracts and defines reactions and their participating species.
|
559
|
+
|
560
|
+
This function iterates through all reactions in the SBML model, creating
|
561
|
+
a DataFrame for reaction attributes and another for all participating
|
562
|
+
species (reactants, products, and modifiers).
|
563
|
+
|
564
|
+
Parameters
|
565
|
+
----------
|
566
|
+
sbml_model : SBML
|
567
|
+
The SBML model to process.
|
433
568
|
|
434
|
-
|
435
|
-
|
569
|
+
Returns
|
570
|
+
-------
|
571
|
+
tuple[pd.DataFrame, pd.DataFrame]
|
572
|
+
A tuple containing two DataFrames:
|
573
|
+
- The first DataFrame contains reaction attributes, indexed by reaction ID.
|
574
|
+
- The second DataFrame lists all species participating in reactions.
|
575
|
+
"""
|
576
|
+
reactions_list = []
|
577
|
+
reaction_species_list = []
|
436
578
|
for i in range(sbml_model.model.getNumReactions()):
|
437
579
|
rxn = SBML_reaction(sbml_model.model.getReaction(i))
|
438
|
-
|
580
|
+
reactions_list.append(rxn.reaction_dict)
|
439
581
|
|
440
582
|
rxn_specs = rxn.species
|
441
|
-
rxn_specs[
|
442
|
-
|
583
|
+
rxn_specs[SBML_DFS.R_ID] = rxn.reaction_dict[SBML_DFS.R_ID]
|
584
|
+
reaction_species_list.append(rxn_specs)
|
443
585
|
|
444
|
-
|
586
|
+
reactions = pd.DataFrame(reactions_list).set_index(SBML_DFS.R_ID)
|
445
587
|
|
446
|
-
reaction_species_df = pd.concat(
|
588
|
+
reaction_species_df = pd.concat(reaction_species_list)
|
447
589
|
# add an index if reaction species didn't have IDs in the .sbml
|
448
590
|
if all([v == "" for v in reaction_species_df.index.tolist()]):
|
449
591
|
reaction_species_df = (
|
450
592
|
reaction_species_df.reset_index(drop=True)
|
451
593
|
.assign(
|
452
594
|
rsc_id=sbml_dfs_utils.id_formatter(
|
453
|
-
range(reaction_species_df.shape[0]),
|
595
|
+
range(reaction_species_df.shape[0]), SBML_DFS.RSC_ID
|
454
596
|
)
|
455
597
|
)
|
456
|
-
.set_index(
|
598
|
+
.set_index(SBML_DFS.RSC_ID)
|
457
599
|
)
|
458
600
|
|
459
|
-
|
460
|
-
|
461
|
-
return self
|
601
|
+
return reactions, reaction_species_df
|
462
602
|
|
463
603
|
|
464
604
|
def setup_cspecies(sbml_model: SBML) -> pd.DataFrame:
|
465
|
-
"""
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
and
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
605
|
+
"""Creates a DataFrame of compartmentalized species from an SBML model.
|
606
|
+
|
607
|
+
This function extracts all species from the model and creates a
|
608
|
+
standardized DataFrame that includes unique IDs for each compartmentalized
|
609
|
+
species (`sc_id`), along with species and compartment IDs, and their
|
610
|
+
corresponding identifiers.
|
611
|
+
|
612
|
+
Parameters
|
613
|
+
----------
|
614
|
+
sbml_model : SBML
|
615
|
+
The SBML model to process.
|
616
|
+
|
617
|
+
Returns
|
618
|
+
-------
|
619
|
+
pd.DataFrame
|
620
|
+
A DataFrame containing information about each compartmentalized species.
|
474
621
|
"""
|
475
622
|
comp_species = list()
|
476
623
|
for i in range(sbml_model.model.getNumSpecies()):
|
477
624
|
spec = sbml_model.model.getSpecies(i)
|
478
625
|
|
479
626
|
spec_dict = {
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
627
|
+
SBML_DFS.SC_ID: spec.getId(),
|
628
|
+
SBML_DFS.SC_NAME: spec.getName(),
|
629
|
+
SBML_DFS.C_ID: spec.getCompartment(),
|
630
|
+
SBML_DFS.S_IDENTIFIERS: identifiers.cv_to_Identifiers(spec),
|
631
|
+
SBML_DFS.SC_SOURCE: source.Source(init=True),
|
485
632
|
}
|
486
633
|
|
487
634
|
comp_species.append(spec_dict)
|
@@ -494,31 +641,55 @@ def setup_cspecies(sbml_model: SBML) -> pd.DataFrame:
|
|
494
641
|
gene_product = mplugin.getGeneProduct(i)
|
495
642
|
|
496
643
|
gene_dict = {
|
497
|
-
|
498
|
-
|
644
|
+
SBML_DFS.SC_ID: gene_product.getId(),
|
645
|
+
SBML_DFS.SC_NAME: (
|
499
646
|
gene_product.getName()
|
500
647
|
if gene_product.isSetName()
|
501
648
|
else gene_product.getLabel()
|
502
649
|
),
|
503
650
|
# use getLabel() to accomendate sbml model (e.g. HumanGEM.xml) with no fbc:name attribute
|
504
651
|
# Recon3D.xml has both fbc:label and fbc:name attributes, with gene name in fbc:nam
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
),
|
509
|
-
SBML_COMPARTMENTALIZED_SPECIES_DICT_SOURCE: source.Source(init=True),
|
652
|
+
SBML_DFS.C_ID: None,
|
653
|
+
SBML_DFS.S_IDENTIFIERS: identifiers.cv_to_Identifiers(gene_product),
|
654
|
+
SBML_DFS.SC_SOURCE: source.Source(init=True),
|
510
655
|
}
|
511
656
|
|
512
657
|
comp_species.append(gene_dict)
|
513
658
|
|
514
|
-
return pd.DataFrame(comp_species).set_index(
|
659
|
+
return pd.DataFrame(comp_species).set_index(SBML_DFS.SC_ID)
|
515
660
|
|
516
661
|
|
517
662
|
def _get_gene_product_dict(gp):
|
518
|
-
"""
|
663
|
+
"""Extracts attributes of a gene product from an SBML reaction object.
|
664
|
+
|
665
|
+
Parameters
|
666
|
+
----------
|
667
|
+
gp : libsbml.GeneProduct
|
668
|
+
A libsbml GeneProduct object.
|
669
|
+
|
670
|
+
Returns
|
671
|
+
-------
|
672
|
+
dict
|
673
|
+
A dictionary containing the gene product's ID, name, and identifiers.
|
674
|
+
"""
|
519
675
|
return {
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
676
|
+
SBML_DFS.RSC_ID: gp.getId(),
|
677
|
+
SBML_DFS.SC_ID: gp.getGeneProduct(),
|
678
|
+
SBML_DFS.STOICHIOMETRY: 0,
|
679
|
+
SBML_DFS.SBO_TERM: gp.getSBOTermID(),
|
524
680
|
}
|
681
|
+
|
682
|
+
|
683
|
+
def _extract_gene_products(association: libsbml.Association) -> list[dict]:
|
684
|
+
"""Recursively extracts gene products from an association tree."""
|
685
|
+
gene_products = []
|
686
|
+
|
687
|
+
def _recursive_helper(assoc: libsbml.Association):
|
688
|
+
if hasattr(assoc, SBML_DEFS.REACTION_ATTR_GET_GENE_PRODUCT):
|
689
|
+
gene_products.append(_get_gene_product_dict(assoc))
|
690
|
+
elif hasattr(assoc, "getNumAssociations"):
|
691
|
+
for i in range(assoc.getNumAssociations()):
|
692
|
+
_recursive_helper(assoc.getAssociation(i))
|
693
|
+
|
694
|
+
_recursive_helper(association)
|
695
|
+
return gene_products
|