napistu 0.3.6__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. napistu/__main__.py +28 -13
  2. napistu/consensus.py +19 -25
  3. napistu/constants.py +102 -83
  4. napistu/indices.py +3 -1
  5. napistu/ingestion/napistu_edgelist.py +4 -4
  6. napistu/ingestion/sbml.py +298 -295
  7. napistu/ingestion/string.py +14 -18
  8. napistu/ingestion/trrust.py +22 -27
  9. napistu/matching/interactions.py +41 -39
  10. napistu/matching/species.py +1 -1
  11. napistu/modify/gaps.py +2 -1
  12. napistu/network/constants.py +61 -45
  13. napistu/network/data_handling.py +1 -1
  14. napistu/network/neighborhoods.py +3 -3
  15. napistu/network/net_create.py +440 -616
  16. napistu/network/net_create_utils.py +734 -0
  17. napistu/network/net_propagation.py +1 -1
  18. napistu/network/{napistu_graph_core.py → ng_core.py} +57 -15
  19. napistu/network/ng_utils.py +28 -21
  20. napistu/network/paths.py +4 -4
  21. napistu/network/precompute.py +35 -74
  22. napistu/ontologies/genodexito.py +5 -1
  23. napistu/ontologies/renaming.py +4 -0
  24. napistu/sbml_dfs_core.py +127 -64
  25. napistu/sbml_dfs_utils.py +50 -0
  26. napistu/utils.py +132 -46
  27. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/METADATA +2 -2
  28. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/RECORD +47 -44
  29. tests/conftest.py +171 -13
  30. tests/test_consensus.py +74 -5
  31. tests/test_gaps.py +26 -15
  32. tests/test_network_data_handling.py +5 -2
  33. tests/test_network_net_create.py +93 -202
  34. tests/test_network_net_create_utils.py +538 -0
  35. tests/test_network_ng_core.py +19 -0
  36. tests/test_network_ng_utils.py +1 -1
  37. tests/test_network_precompute.py +5 -4
  38. tests/test_ontologies_renaming.py +28 -24
  39. tests/test_rpy2_callr.py +0 -1
  40. tests/test_rpy2_init.py +0 -1
  41. tests/test_sbml_dfs_core.py +165 -15
  42. tests/test_sbml_dfs_utils.py +45 -0
  43. tests/test_utils.py +45 -2
  44. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/WHEEL +0 -0
  45. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/entry_points.txt +0 -0
  46. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/licenses/LICENSE +0 -0
  47. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/top_level.txt +0 -0
napistu/ingestion/sbml.py CHANGED
@@ -3,11 +3,12 @@ from __future__ import annotations
3
3
  import logging
4
4
  import os
5
5
  import re
6
+ from typing import Any
6
7
 
7
8
  import libsbml
8
9
  import pandas as pd
9
10
  from fs import open_fs
10
- from pydantic import conlist, field_validator, RootModel
11
+ from pydantic import field_validator, RootModel
11
12
 
12
13
  from napistu import consensus
13
14
  from napistu import identifiers
@@ -17,6 +18,8 @@ from napistu import utils
17
18
  from napistu.constants import BQB
18
19
  from napistu.constants import ONTOLOGIES
19
20
  from napistu.constants import SBML_DFS
21
+ from napistu.constants import SBML_DFS_SCHEMA
22
+ from napistu.constants import SCHEMA_DEFS
20
23
  from napistu.ingestion.constants import SBML_DEFS
21
24
  from napistu.ingestion.constants import COMPARTMENTS_GO_TERMS
22
25
  from napistu.ingestion.constants import COMPARTMENT_ALIASES
@@ -25,8 +28,6 @@ from napistu.ingestion.constants import GENERIC_COMPARTMENT
25
28
 
26
29
  logger = logging.getLogger(__name__)
27
30
 
28
- NonEmptyStringList = conlist(str, min_length=1)
29
-
30
31
 
31
32
  class SBML:
32
33
  """A class for handling Systems Biology Markup Language (SBML) files.
@@ -101,35 +102,6 @@ class SBML:
101
102
  f"Critical errors were found when reading the sbml file: {critical_errors}"
102
103
  )
103
104
 
104
- def summary(self) -> pd.DataFrame:
105
- """Generates a styled summary of the SBML model.
106
-
107
- Returns
108
- -------
109
- pd.io.formats.style.Styler
110
- A styled pandas DataFrame containing a summary of the model,
111
- including pathway name, ID, and counts of species and reactions.
112
- """
113
- model = self.model
114
-
115
- model_summaries = dict()
116
-
117
- model_summaries[SBML_DEFS.SUMMARY_PATHWAY_NAME] = model.getName()
118
- model_summaries[SBML_DEFS.SUMMARY_PATHWAY_ID] = model.getId()
119
-
120
- model_summaries[SBML_DEFS.SUMMARY_N_SPECIES] = model.getNumSpecies()
121
- model_summaries[SBML_DEFS.SUMMARY_N_REACTIONS] = model.getNumReactions()
122
-
123
- compartments = [
124
- model.getCompartment(i).getName() for i in range(model.getNumCompartments())
125
- ]
126
- compartments.sort()
127
- model_summaries[SBML_DEFS.SUMMARY_COMPARTMENTS] = ",\n".join(compartments)
128
-
129
- model_summaries_dat = pd.DataFrame(model_summaries, index=[0]).T
130
-
131
- return utils.style_df(model_summaries_dat) # type: ignore
132
-
133
105
  def sbml_errors(self, reduced_log: bool = True, return_df: bool = False):
134
106
  """Formats and reports all errors found in the SBML file.
135
107
 
@@ -199,6 +171,253 @@ class SBML:
199
171
 
200
172
  return None
201
173
 
174
+ def summary(self) -> pd.DataFrame:
175
+ """Generates a styled summary of the SBML model.
176
+
177
+ Returns
178
+ -------
179
+ pd.io.formats.style.Styler
180
+ A styled pandas DataFrame containing a summary of the model,
181
+ including pathway name, ID, and counts of species and reactions.
182
+ """
183
+ model = self.model
184
+
185
+ model_summaries = dict()
186
+
187
+ model_summaries[SBML_DEFS.SUMMARY_PATHWAY_NAME] = model.getName()
188
+ model_summaries[SBML_DEFS.SUMMARY_PATHWAY_ID] = model.getId()
189
+
190
+ model_summaries[SBML_DEFS.SUMMARY_N_SPECIES] = model.getNumSpecies()
191
+ model_summaries[SBML_DEFS.SUMMARY_N_REACTIONS] = model.getNumReactions()
192
+
193
+ compartments = [
194
+ model.getCompartment(i).getName() for i in range(model.getNumCompartments())
195
+ ]
196
+ compartments.sort()
197
+ model_summaries[SBML_DEFS.SUMMARY_COMPARTMENTS] = ",\n".join(compartments)
198
+
199
+ model_summaries_dat = pd.DataFrame(model_summaries, index=[0]).T
200
+
201
+ return utils.style_df(model_summaries_dat) # type: ignore
202
+
203
+ def _define_compartments(
204
+ self, compartment_aliases_dict: dict | None = None
205
+ ) -> pd.DataFrame:
206
+ """Extracts and defines compartments from the SBML model.
207
+
208
+ This function iterates through the compartments in the SBML model,
209
+ extracting their IDs, names, and identifiers. It also handles cases where
210
+ CVTerms are missing by mapping compartment names to known GO terms.
211
+
212
+ Parameters
213
+ ----------
214
+ sbml_model : SBML
215
+ The SBML model to process.
216
+ compartment_aliases_dict : dict, optional
217
+ A dictionary to map custom compartment names. If None, the default
218
+ mapping from `COMPARTMENT_ALIASES` is used.
219
+
220
+ Returns
221
+ -------
222
+ pd.DataFrame
223
+ A DataFrame containing information about each compartment, indexed by
224
+ compartment ID.
225
+ """
226
+ if compartment_aliases_dict is None:
227
+ aliases = COMPARTMENT_ALIASES
228
+ else:
229
+ aliases = CompartmentAliasesValidator.from_dict(compartment_aliases_dict)
230
+
231
+ compartments = list()
232
+ for i in range(self.model.getNumCompartments()):
233
+ comp = self.model.getCompartment(i)
234
+
235
+ if not comp.getCVTerms():
236
+ logger.warning(
237
+ f"Compartment {comp.getId()} has empty CVterms, mapping its c_Identifiers from the Compartment dict"
238
+ )
239
+
240
+ compartments.append(_define_compartments_missing_cvterms(comp, aliases))
241
+
242
+ else:
243
+ compartments.append(
244
+ {
245
+ SBML_DFS.C_ID: comp.getId(),
246
+ SBML_DFS.C_NAME: comp.getName(),
247
+ SBML_DFS.C_IDENTIFIERS: identifiers.cv_to_Identifiers(comp),
248
+ SBML_DFS.C_SOURCE: source.Source(init=True),
249
+ }
250
+ )
251
+
252
+ return pd.DataFrame(compartments).set_index(SBML_DFS.C_ID)
253
+
254
+ def _define_cspecies(self) -> pd.DataFrame:
255
+ """Creates a DataFrame of compartmentalized species from an SBML model.
256
+
257
+ This function extracts all species from the model and creates a
258
+ standardized DataFrame that includes unique IDs for each compartmentalized
259
+ species (`sc_id`), along with species and compartment IDs, and their
260
+ corresponding identifiers.
261
+
262
+ Returns
263
+ -------
264
+ pd.DataFrame
265
+ A DataFrame containing information about each compartmentalized species.
266
+ """
267
+ comp_species = list()
268
+ for i in range(self.model.getNumSpecies()):
269
+ spec = self.model.getSpecies(i)
270
+
271
+ spec_dict = {
272
+ SBML_DFS.SC_ID: spec.getId(),
273
+ SBML_DFS.SC_NAME: spec.getName(),
274
+ SBML_DFS.C_ID: spec.getCompartment(),
275
+ SBML_DFS.S_IDENTIFIERS: identifiers.cv_to_Identifiers(spec),
276
+ SBML_DFS.SC_SOURCE: source.Source(init=True),
277
+ }
278
+
279
+ comp_species.append(spec_dict)
280
+
281
+ # add geneproducts defined using L3 FBC extension
282
+ fbc_gene_products = self._define_fbc_gene_products()
283
+ comp_species.extend(fbc_gene_products)
284
+
285
+ comp_species_df = pd.DataFrame(comp_species).set_index(SBML_DFS.SC_ID)
286
+ comp_species_df[SBML_DFS.SC_NAME] = utils.update_pathological_names(
287
+ comp_species_df[SBML_DFS.SC_NAME], "SC"
288
+ )
289
+
290
+ return comp_species_df
291
+
292
+ def _define_fbc_gene_products(self) -> list[dict]:
293
+
294
+ mplugin = self.model.getPlugin("fbc")
295
+
296
+ fbc_gene_products = list()
297
+ if mplugin is not None:
298
+ for i in range(mplugin.getNumGeneProducts()):
299
+ gene_product = mplugin.getGeneProduct(i)
300
+
301
+ gene_dict = {
302
+ SBML_DFS.SC_ID: gene_product.getId(),
303
+ SBML_DFS.SC_NAME: (
304
+ gene_product.getName()
305
+ if gene_product.isSetName()
306
+ else gene_product.getLabel()
307
+ ),
308
+ # use getLabel() to accomendate sbml model (e.g. HumanGEM.xml) with no fbc:name attribute
309
+ # Recon3D.xml has both fbc:label and fbc:name attributes, with gene name in fbc:nam
310
+ SBML_DFS.C_ID: None,
311
+ SBML_DFS.S_IDENTIFIERS: identifiers.cv_to_Identifiers(gene_product),
312
+ SBML_DFS.SC_SOURCE: source.Source(init=True),
313
+ }
314
+
315
+ fbc_gene_products.append(gene_dict)
316
+
317
+ return fbc_gene_products
318
+
319
+ def _define_reactions(self) -> tuple[pd.DataFrame, pd.DataFrame]:
320
+ """Extracts and defines reactions and their participating species.
321
+
322
+ This function iterates through all reactions in the SBML model, creating
323
+ a DataFrame for reaction attributes and another for all participating
324
+ species (reactants, products, and modifiers).
325
+
326
+ Parameters
327
+ ----------
328
+ sbml_model : SBML
329
+ The SBML model to process.
330
+
331
+ Returns
332
+ -------
333
+ tuple[pd.DataFrame, pd.DataFrame]
334
+ A tuple containing two DataFrames:
335
+ - The first DataFrame contains reaction attributes, indexed by reaction ID.
336
+ - The second DataFrame lists all species participating in reactions.
337
+ """
338
+ reactions_list = []
339
+ reaction_species_list = []
340
+ for i in range(self.model.getNumReactions()):
341
+ rxn = SBML_reaction(self.model.getReaction(i))
342
+ reactions_list.append(rxn.reaction_dict)
343
+
344
+ rxn_specs = rxn.species
345
+ rxn_specs[SBML_DFS.R_ID] = rxn.reaction_dict[SBML_DFS.R_ID]
346
+ reaction_species_list.append(rxn_specs)
347
+
348
+ reactions = pd.DataFrame(reactions_list).set_index(SBML_DFS.R_ID)
349
+
350
+ reaction_species_df = pd.concat(reaction_species_list)
351
+ # add an index if reaction species didn't have IDs in the .sbml
352
+ if all([v == "" for v in reaction_species_df.index.tolist()]):
353
+ reaction_species_df = (
354
+ reaction_species_df.reset_index(drop=True)
355
+ .assign(
356
+ rsc_id=sbml_dfs_utils.id_formatter(
357
+ range(reaction_species_df.shape[0]), SBML_DFS.RSC_ID
358
+ )
359
+ )
360
+ .set_index(SBML_DFS.RSC_ID)
361
+ )
362
+
363
+ return reactions, reaction_species_df
364
+
365
+ def _define_species(self) -> tuple[pd.DataFrame, pd.DataFrame]:
366
+ """Extracts and defines species and compartmentalized species.
367
+
368
+ This function creates two DataFrames: one for unique molecular species
369
+ (un-compartmentalized) and another for compartmentalized species, which
370
+ represent a species within a specific compartment.
371
+
372
+ Returns
373
+ -------
374
+ tuple[pd.DataFrame, pd.DataFrame]
375
+ A tuple containing two DataFrames:
376
+ - The first DataFrame represents unique molecular species.
377
+ - The second DataFrame represents compartmentalized species.
378
+ """
379
+
380
+ SPECIES_SCHEMA = SBML_DFS_SCHEMA.SCHEMA[SBML_DFS.SPECIES]
381
+ CSPECIES_SCHEMA = SBML_DFS_SCHEMA.SCHEMA[SBML_DFS.COMPARTMENTALIZED_SPECIES]
382
+ SPECIES_VARS = SPECIES_SCHEMA[SCHEMA_DEFS.VARS]
383
+ CSPECIES_VARS = CSPECIES_SCHEMA[SCHEMA_DEFS.VARS]
384
+
385
+ comp_species_df = self._define_cspecies()
386
+
387
+ # find unique species and create a table
388
+ consensus_species_df = comp_species_df.copy()
389
+ consensus_species_df.index.names = [SBML_DFS.S_ID]
390
+ consensus_species, species_lookup = consensus.reduce_to_consensus_ids(
391
+ consensus_species_df,
392
+ # note that this is an incomplete schema because consensus_species_df isn't a
393
+ # normal species table
394
+ {
395
+ SCHEMA_DEFS.PK: SBML_DFS.S_ID,
396
+ SCHEMA_DEFS.ID: SBML_DFS.S_IDENTIFIERS,
397
+ SCHEMA_DEFS.TABLE: SBML_DFS.SPECIES,
398
+ },
399
+ )
400
+
401
+ # create a table of unique molecular species
402
+ consensus_species.index.name = SBML_DFS.S_ID
403
+ consensus_species[SBML_DFS.S_NAME] = [
404
+ re.sub("\\[.+\\]", "", x).strip()
405
+ for x in consensus_species[SBML_DFS.SC_NAME]
406
+ ]
407
+ consensus_species = consensus_species.drop(
408
+ [SBML_DFS.SC_NAME, SBML_DFS.C_ID], axis=1
409
+ )
410
+ consensus_species[SBML_DFS.S_SOURCE] = [
411
+ source.Source(init=True) for x in range(0, consensus_species.shape[0])
412
+ ]
413
+
414
+ species = consensus_species[SPECIES_VARS]
415
+ compartmentalized_species = comp_species_df.join(species_lookup).rename(
416
+ columns={"new_id": SBML_DFS.S_ID}
417
+ )[CSPECIES_VARS]
418
+
419
+ return species, compartmentalized_species
420
+
202
421
 
203
422
  class CompartmentAliasesValidator(RootModel):
204
423
  """
@@ -375,288 +594,72 @@ def sbml_dfs_from_sbml(self, sbml_model: SBML, compartment_aliases: dict | None
375
594
  compartments, species, compartmentalized_species, reactions, and reaction_species
376
595
  """
377
596
  # 1. Process compartments from the SBML model
378
- self.compartments = _define_compartments(sbml_model, compartment_aliases)
597
+ self.compartments = sbml_model._define_compartments(compartment_aliases)
379
598
 
380
599
  # 2. Process species and compartmentalized species
381
- self.species, self.compartmentalized_species = _define_species(
382
- sbml_model, self.schema
383
- )
600
+ self.species, self.compartmentalized_species = sbml_model._define_species()
384
601
 
385
602
  # 3. Process reactions and their participating species
386
- self.reactions, self.reaction_species = _define_reactions(sbml_model)
603
+ self.reactions, self.reaction_species = sbml_model._define_reactions()
387
604
 
388
605
  return self
389
606
 
390
607
 
391
- def _define_compartments(
392
- sbml_model: SBML, compartment_aliases_dict: dict | None = None
393
- ) -> pd.DataFrame:
394
- """Extracts and defines compartments from the SBML model.
608
+ def _define_compartments_missing_cvterms(
609
+ comp: libsbml.Compartment, aliases: dict
610
+ ) -> dict[str, Any]:
395
611
 
396
- This function iterates through the compartments in the SBML model,
397
- extracting their IDs, names, and identifiers. It also handles cases where
398
- CVTerms are missing by mapping compartment names to known GO terms.
399
-
400
- Parameters
401
- ----------
402
- sbml_model : SBML
403
- The SBML model to process.
404
- compartment_aliases_dict : dict, optional
405
- A dictionary to map custom compartment names. If None, the default
406
- mapping from `COMPARTMENT_ALIASES` is used.
612
+ comp_name = comp.getName()
613
+ mapped_compartment_key = [
614
+ compkey for compkey, mappednames in aliases.items() if comp_name in mappednames
615
+ ]
407
616
 
408
- Returns
409
- -------
410
- pd.DataFrame
411
- A DataFrame containing information about each compartment, indexed by
412
- compartment ID.
413
- """
414
- if compartment_aliases_dict is None:
415
- aliases = COMPARTMENT_ALIASES
416
- else:
417
- aliases = CompartmentAliasesValidator.from_dict(compartment_aliases_dict)
617
+ if len(mapped_compartment_key) == 0:
618
+ logger.warning(
619
+ f"No GO compartment for {comp_name} is mapped, use the generic cellular_component's GO id"
620
+ )
418
621
 
419
- compartments = list()
420
- for i in range(sbml_model.model.getNumCompartments()):
421
- comp = sbml_model.model.getCompartment(i)
622
+ compartment_entry = {
623
+ SBML_DFS.C_ID: comp.getId(),
624
+ SBML_DFS.C_NAME: comp.getName(),
625
+ SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
626
+ [
627
+ identifiers.format_uri(
628
+ uri=identifiers.create_uri_url(
629
+ ontology=ONTOLOGIES.GO,
630
+ identifier=COMPARTMENTS_GO_TERMS[GENERIC_COMPARTMENT],
631
+ ),
632
+ biological_qualifier_type=BQB.BQB_IS,
633
+ )
634
+ ]
635
+ ),
636
+ SBML_DFS.C_SOURCE: source.Source(init=True),
637
+ }
422
638
 
423
- if not comp.getCVTerms():
639
+ if len(mapped_compartment_key) > 0:
640
+ if len(mapped_compartment_key) > 1:
424
641
  logger.warning(
425
- f"Compartment {comp.getId()} has empty CVterms, mapping its c_Identifiers from the Compartment dict"
642
+ f"More than one GO compartments for {comp_name} are mapped, using the first one"
426
643
  )
427
644
 
428
- comp_name = comp.getName()
429
- mapped_compartment_key = [
430
- compkey
431
- for compkey, mappednames in aliases.items()
432
- if comp_name in mappednames
433
- ]
434
-
435
- if len(mapped_compartment_key) == 0:
436
- logger.warning(
437
- f"No GO compartment for {comp_name} is mapped, use the generic cellular_component's GO id"
438
- )
439
- compartments.append(
440
- {
441
- SBML_DFS.C_ID: comp.getId(),
442
- SBML_DFS.C_NAME: comp.getName(),
443
- SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
444
- [
445
- identifiers.format_uri(
446
- uri=identifiers.create_uri_url(
447
- ontology=ONTOLOGIES.GO,
448
- identifier=COMPARTMENTS_GO_TERMS[
449
- GENERIC_COMPARTMENT
450
- ],
451
- ),
452
- biological_qualifier_type=BQB.BQB_IS,
453
- )
454
- ]
645
+ compartment_entry = {
646
+ SBML_DFS.C_ID: comp.getId(),
647
+ SBML_DFS.C_NAME: comp.getName(),
648
+ SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
649
+ [
650
+ identifiers.format_uri(
651
+ uri=identifiers.create_uri_url(
652
+ ontology=ONTOLOGIES.GO,
653
+ identifier=COMPARTMENTS_GO_TERMS[mapped_compartment_key[0]],
455
654
  ),
456
- SBML_DFS.C_SOURCE: source.Source(init=True),
457
- }
458
- )
459
-
460
- if len(mapped_compartment_key) > 0:
461
- if len(mapped_compartment_key) > 1:
462
- logger.warning(
463
- f"More than one GO compartments for {comp_name} are mapped, using the first one"
655
+ biological_qualifier_type=BQB.IS,
464
656
  )
465
- compartments.append(
466
- {
467
- SBML_DFS.C_ID: comp.getId(),
468
- SBML_DFS.C_NAME: comp.getName(),
469
- SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
470
- [
471
- identifiers.format_uri(
472
- uri=identifiers.create_uri_url(
473
- ontology=ONTOLOGIES.GO,
474
- identifier=COMPARTMENTS_GO_TERMS[
475
- mapped_compartment_key[0]
476
- ],
477
- ),
478
- biological_qualifier_type=BQB.IS,
479
- )
480
- ]
481
- ),
482
- SBML_DFS.C_SOURCE: source.Source(init=True),
483
- }
484
- )
485
-
486
- else:
487
- compartments.append(
488
- {
489
- SBML_DFS.C_ID: comp.getId(),
490
- SBML_DFS.C_NAME: comp.getName(),
491
- SBML_DFS.C_IDENTIFIERS: identifiers.cv_to_Identifiers(comp),
492
- SBML_DFS.C_SOURCE: source.Source(init=True),
493
- }
494
- )
495
-
496
- return pd.DataFrame(compartments).set_index(SBML_DFS.C_ID)
497
-
498
-
499
- def _define_species(
500
- sbml_model: SBML, schema: dict
501
- ) -> tuple[pd.DataFrame, pd.DataFrame]:
502
- """Extracts and defines species and compartmentalized species.
503
-
504
- This function creates two DataFrames: one for unique molecular species
505
- (un-compartmentalized) and another for compartmentalized species, which
506
- represent a species within a specific compartment.
507
-
508
- Parameters
509
- ----------
510
- sbml_model : SBML
511
- The SBML model to process.
512
- schema : dict
513
- A dictionary defining the data schema for species and compartmentalized
514
- species tables.
515
-
516
- Returns
517
- -------
518
- tuple[pd.DataFrame, pd.DataFrame]
519
- A tuple containing two DataFrames:
520
- - The first DataFrame represents unique molecular species.
521
- - The second DataFrame represents compartmentalized species.
522
- """
523
-
524
- SPECIES_VARS = schema["species"]["vars"]
525
- CSPECIES_VARS = schema["compartmentalized_species"]["vars"]
526
-
527
- comp_species_df = setup_cspecies(sbml_model)
528
-
529
- # find unique species and create a table
530
- consensus_species_df = comp_species_df.copy()
531
- consensus_species_df.index.names = [SBML_DFS.S_ID]
532
- consensus_species, species_lookup = consensus.reduce_to_consensus_ids(
533
- consensus_species_df,
534
- {"pk": SBML_DFS.S_ID, "id": SBML_DFS.S_IDENTIFIERS},
535
- )
536
-
537
- # create a table of unique molecular species
538
- consensus_species.index.name = SBML_DFS.S_ID
539
- consensus_species[SBML_DFS.S_NAME] = [
540
- re.sub("\\[.+\\]", "", x).strip() for x in consensus_species[SBML_DFS.SC_NAME]
541
- ]
542
- consensus_species = consensus_species.drop(
543
- [SBML_DFS.SC_NAME, SBML_DFS.C_ID], axis=1
544
- )
545
- consensus_species["s_Source"] = [
546
- source.Source(init=True) for x in range(0, consensus_species.shape[0])
547
- ]
548
-
549
- species = consensus_species[SPECIES_VARS]
550
- compartmentalized_species = comp_species_df.join(species_lookup).rename(
551
- columns={"new_id": SBML_DFS.S_ID}
552
- )[CSPECIES_VARS]
553
-
554
- return species, compartmentalized_species
555
-
556
-
557
- def _define_reactions(sbml_model: SBML) -> tuple[pd.DataFrame, pd.DataFrame]:
558
- """Extracts and defines reactions and their participating species.
559
-
560
- This function iterates through all reactions in the SBML model, creating
561
- a DataFrame for reaction attributes and another for all participating
562
- species (reactants, products, and modifiers).
563
-
564
- Parameters
565
- ----------
566
- sbml_model : SBML
567
- The SBML model to process.
568
-
569
- Returns
570
- -------
571
- tuple[pd.DataFrame, pd.DataFrame]
572
- A tuple containing two DataFrames:
573
- - The first DataFrame contains reaction attributes, indexed by reaction ID.
574
- - The second DataFrame lists all species participating in reactions.
575
- """
576
- reactions_list = []
577
- reaction_species_list = []
578
- for i in range(sbml_model.model.getNumReactions()):
579
- rxn = SBML_reaction(sbml_model.model.getReaction(i))
580
- reactions_list.append(rxn.reaction_dict)
581
-
582
- rxn_specs = rxn.species
583
- rxn_specs[SBML_DFS.R_ID] = rxn.reaction_dict[SBML_DFS.R_ID]
584
- reaction_species_list.append(rxn_specs)
585
-
586
- reactions = pd.DataFrame(reactions_list).set_index(SBML_DFS.R_ID)
587
-
588
- reaction_species_df = pd.concat(reaction_species_list)
589
- # add an index if reaction species didn't have IDs in the .sbml
590
- if all([v == "" for v in reaction_species_df.index.tolist()]):
591
- reaction_species_df = (
592
- reaction_species_df.reset_index(drop=True)
593
- .assign(
594
- rsc_id=sbml_dfs_utils.id_formatter(
595
- range(reaction_species_df.shape[0]), SBML_DFS.RSC_ID
596
- )
597
- )
598
- .set_index(SBML_DFS.RSC_ID)
599
- )
600
-
601
- return reactions, reaction_species_df
602
-
603
-
604
- def setup_cspecies(sbml_model: SBML) -> pd.DataFrame:
605
- """Creates a DataFrame of compartmentalized species from an SBML model.
606
-
607
- This function extracts all species from the model and creates a
608
- standardized DataFrame that includes unique IDs for each compartmentalized
609
- species (`sc_id`), along with species and compartment IDs, and their
610
- corresponding identifiers.
611
-
612
- Parameters
613
- ----------
614
- sbml_model : SBML
615
- The SBML model to process.
616
-
617
- Returns
618
- -------
619
- pd.DataFrame
620
- A DataFrame containing information about each compartmentalized species.
621
- """
622
- comp_species = list()
623
- for i in range(sbml_model.model.getNumSpecies()):
624
- spec = sbml_model.model.getSpecies(i)
625
-
626
- spec_dict = {
627
- SBML_DFS.SC_ID: spec.getId(),
628
- SBML_DFS.SC_NAME: spec.getName(),
629
- SBML_DFS.C_ID: spec.getCompartment(),
630
- SBML_DFS.S_IDENTIFIERS: identifiers.cv_to_Identifiers(spec),
631
- SBML_DFS.SC_SOURCE: source.Source(init=True),
657
+ ]
658
+ ),
659
+ SBML_DFS.C_SOURCE: source.Source(init=True),
632
660
  }
633
661
 
634
- comp_species.append(spec_dict)
635
-
636
- mplugin = sbml_model.model.getPlugin("fbc")
637
-
638
- # add geneproducts defined using L3 FBC extension
639
- if mplugin is not None:
640
- for i in range(mplugin.getNumGeneProducts()):
641
- gene_product = mplugin.getGeneProduct(i)
642
-
643
- gene_dict = {
644
- SBML_DFS.SC_ID: gene_product.getId(),
645
- SBML_DFS.SC_NAME: (
646
- gene_product.getName()
647
- if gene_product.isSetName()
648
- else gene_product.getLabel()
649
- ),
650
- # use getLabel() to accomendate sbml model (e.g. HumanGEM.xml) with no fbc:name attribute
651
- # Recon3D.xml has both fbc:label and fbc:name attributes, with gene name in fbc:nam
652
- SBML_DFS.C_ID: None,
653
- SBML_DFS.S_IDENTIFIERS: identifiers.cv_to_Identifiers(gene_product),
654
- SBML_DFS.SC_SOURCE: source.Source(init=True),
655
- }
656
-
657
- comp_species.append(gene_dict)
658
-
659
- return pd.DataFrame(comp_species).set_index(SBML_DFS.SC_ID)
662
+ return compartment_entry
660
663
 
661
664
 
662
665
  def _get_gene_product_dict(gp):