napistu 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +18 -18
- napistu/consensus.py +3 -2
- napistu/constants.py +1 -1
- napistu/context/filtering.py +2 -1
- napistu/identifiers.py +3 -6
- napistu/ingestion/bigg.py +6 -6
- napistu/ingestion/string.py +2 -1
- napistu/ingestion/yeast.py +2 -1
- napistu/matching/interactions.py +4 -4
- napistu/modify/uncompartmentalize.py +1 -1
- napistu/network/net_create.py +1 -1
- napistu/network/paths.py +1 -1
- napistu/ontologies/dogma.py +2 -1
- napistu/sbml_dfs_core.py +1282 -2169
- napistu/sbml_dfs_utils.py +1082 -143
- {napistu-0.3.5.dist-info → napistu-0.3.6.dist-info}/METADATA +2 -2
- {napistu-0.3.5.dist-info → napistu-0.3.6.dist-info}/RECORD +28 -28
- tests/conftest.py +43 -0
- tests/test_consensus.py +88 -0
- tests/test_context_filtering.py +2 -2
- tests/test_ontologies_genodexito.py +3 -0
- tests/test_ontologies_mygene.py +3 -0
- tests/test_sbml_dfs_core.py +102 -203
- tests/test_sbml_dfs_utils.py +194 -36
- {napistu-0.3.5.dist-info → napistu-0.3.6.dist-info}/WHEEL +0 -0
- {napistu-0.3.5.dist-info → napistu-0.3.6.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.5.dist-info → napistu-0.3.6.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.5.dist-info → napistu-0.3.6.dist-info}/top_level.txt +0 -0
tests/test_sbml_dfs_core.py
CHANGED
@@ -9,18 +9,16 @@ from napistu import sbml_dfs_core
|
|
9
9
|
from napistu.source import Source
|
10
10
|
from napistu.ingestion import sbml
|
11
11
|
from napistu.modify import pathwayannot
|
12
|
-
from napistu.sbml_dfs_utils import _stub_ids
|
13
12
|
|
14
13
|
from napistu import identifiers as napistu_identifiers
|
15
14
|
from napistu.constants import (
|
16
15
|
SBML_DFS,
|
17
|
-
SBOTERM_NAMES,
|
18
16
|
BQB_DEFINING_ATTRS,
|
19
17
|
BQB_DEFINING_ATTRS_LOOSE,
|
20
18
|
BQB,
|
21
|
-
IDENTIFIERS,
|
22
19
|
)
|
23
20
|
from napistu.sbml_dfs_core import SBML_dfs
|
21
|
+
from unittest.mock import patch
|
24
22
|
|
25
23
|
|
26
24
|
@pytest.fixture
|
@@ -30,8 +28,8 @@ def test_data():
|
|
30
28
|
# Test compartments
|
31
29
|
compartments_df = pd.DataFrame(
|
32
30
|
[
|
33
|
-
{"c_name": "nucleus", "c_Identifiers":
|
34
|
-
{"c_name": "cytoplasm", "c_Identifiers":
|
31
|
+
{"c_name": "nucleus", "c_Identifiers": None},
|
32
|
+
{"c_name": "cytoplasm", "c_Identifiers": None},
|
35
33
|
]
|
36
34
|
)
|
37
35
|
|
@@ -40,13 +38,13 @@ def test_data():
|
|
40
38
|
[
|
41
39
|
{
|
42
40
|
"s_name": "TP53",
|
43
|
-
"s_Identifiers":
|
41
|
+
"s_Identifiers": None,
|
44
42
|
"gene_type": "tumor_suppressor",
|
45
43
|
},
|
46
|
-
{"s_name": "MDM2", "s_Identifiers":
|
44
|
+
{"s_name": "MDM2", "s_Identifiers": None, "gene_type": "oncogene"},
|
47
45
|
{
|
48
46
|
"s_name": "CDKN1A",
|
49
|
-
"s_Identifiers":
|
47
|
+
"s_Identifiers": None,
|
50
48
|
"gene_type": "cell_cycle",
|
51
49
|
},
|
52
50
|
]
|
@@ -62,7 +60,7 @@ def test_data():
|
|
62
60
|
"downstream_compartment": "nucleus",
|
63
61
|
"r_name": "TP53_activates_CDKN1A",
|
64
62
|
"sbo_term": "SBO:0000459",
|
65
|
-
"r_Identifiers":
|
63
|
+
"r_Identifiers": None,
|
66
64
|
"r_isreversible": False,
|
67
65
|
"confidence": 0.95,
|
68
66
|
},
|
@@ -73,7 +71,7 @@ def test_data():
|
|
73
71
|
"downstream_compartment": "nucleus",
|
74
72
|
"r_name": "MDM2_inhibits_TP53",
|
75
73
|
"sbo_term": "SBO:0000020",
|
76
|
-
"r_Identifiers":
|
74
|
+
"r_Identifiers": None,
|
77
75
|
"r_isreversible": False,
|
78
76
|
"confidence": 0.87,
|
79
77
|
},
|
@@ -274,26 +272,6 @@ def test_sbml_dfs_remove_reactions_check_species(sbml_dfs):
|
|
274
272
|
sbml_dfs.validate()
|
275
273
|
|
276
274
|
|
277
|
-
def test_formula(sbml_dfs):
|
278
|
-
# create a formula string
|
279
|
-
|
280
|
-
an_r_id = sbml_dfs.reactions.index[0]
|
281
|
-
|
282
|
-
reaction_species_df = sbml_dfs.reaction_species[
|
283
|
-
sbml_dfs.reaction_species["r_id"] == an_r_id
|
284
|
-
].merge(sbml_dfs.compartmentalized_species, left_on="sc_id", right_index=True)
|
285
|
-
|
286
|
-
formula_str = sbml_dfs_core.construct_formula_string(
|
287
|
-
reaction_species_df, sbml_dfs.reactions, name_var="sc_name"
|
288
|
-
)
|
289
|
-
|
290
|
-
assert isinstance(formula_str, str)
|
291
|
-
assert (
|
292
|
-
formula_str
|
293
|
-
== "CO2 [extracellular region] -> CO2 [cytosol] ---- modifiers: AQP1 tetramer [plasma membrane]]"
|
294
|
-
)
|
295
|
-
|
296
|
-
|
297
275
|
def test_read_sbml_with_invalid_ids():
|
298
276
|
SBML_W_BAD_IDS = "R-HSA-166658.sbml"
|
299
277
|
test_path = os.path.abspath(os.path.join(__file__, os.pardir))
|
@@ -305,17 +283,6 @@ def test_read_sbml_with_invalid_ids():
|
|
305
283
|
assert isinstance(sbml_dfs_core.SBML_dfs(sbml_w_bad_ids), sbml_dfs_core.SBML_dfs)
|
306
284
|
|
307
285
|
|
308
|
-
def test_stubbed_compartment():
|
309
|
-
compartment = sbml_dfs_core._stub_compartments()
|
310
|
-
|
311
|
-
assert compartment["c_Identifiers"].iloc[0].ids[0] == {
|
312
|
-
"ontology": "go",
|
313
|
-
"identifier": "GO:0005575",
|
314
|
-
"url": "https://www.ebi.ac.uk/QuickGO/term/GO:0005575",
|
315
|
-
"bqb": "BQB_IS",
|
316
|
-
}
|
317
|
-
|
318
|
-
|
319
286
|
def test_get_table(sbml_dfs):
|
320
287
|
assert isinstance(sbml_dfs.get_table("species"), pd.DataFrame)
|
321
288
|
assert isinstance(sbml_dfs.get_table("species", {"id"}), pd.DataFrame)
|
@@ -366,10 +333,20 @@ def test_species_status(sbml_dfs):
|
|
366
333
|
select_species = species[species["s_name"] == "OxyHbA"]
|
367
334
|
assert select_species.shape[0] == 1
|
368
335
|
|
369
|
-
status =
|
336
|
+
status = sbml_dfs.species_status(select_species.index[0])
|
337
|
+
|
338
|
+
# expected columns
|
339
|
+
expected_columns = [
|
340
|
+
SBML_DFS.SC_NAME,
|
341
|
+
SBML_DFS.STOICHIOMETRY,
|
342
|
+
SBML_DFS.R_NAME,
|
343
|
+
"r_formula_str",
|
344
|
+
]
|
345
|
+
assert all(col in status.columns for col in expected_columns)
|
346
|
+
|
370
347
|
assert (
|
371
348
|
status["r_formula_str"][0]
|
372
|
-
== "4.0
|
349
|
+
== "cytosol: 4.0 CO2 + 4.0 H+ + OxyHbA -> 4.0 O2 + Protonated Carbamino DeoxyHbA"
|
373
350
|
)
|
374
351
|
|
375
352
|
|
@@ -436,91 +413,6 @@ def test_get_identifiers_handles_missing_values():
|
|
436
413
|
), "Only Identifiers objects should be returned."
|
437
414
|
|
438
415
|
|
439
|
-
def test_find_underspecified_reactions():
|
440
|
-
|
441
|
-
reaction_w_regulators = pd.DataFrame(
|
442
|
-
{
|
443
|
-
SBML_DFS.SC_ID: ["A", "B", "C", "D", "E", "F", "G"],
|
444
|
-
SBML_DFS.STOICHIOMETRY: [-1, -1, 1, 1, 0, 0, 0],
|
445
|
-
SBML_DFS.SBO_TERM: [
|
446
|
-
SBOTERM_NAMES.REACTANT,
|
447
|
-
SBOTERM_NAMES.REACTANT,
|
448
|
-
SBOTERM_NAMES.PRODUCT,
|
449
|
-
SBOTERM_NAMES.PRODUCT,
|
450
|
-
SBOTERM_NAMES.CATALYST,
|
451
|
-
SBOTERM_NAMES.CATALYST,
|
452
|
-
SBOTERM_NAMES.STIMULATOR,
|
453
|
-
],
|
454
|
-
}
|
455
|
-
).assign(r_id="bar")
|
456
|
-
reaction_w_regulators[SBML_DFS.RSC_ID] = [
|
457
|
-
f"rsc_{i}" for i in range(len(reaction_w_regulators))
|
458
|
-
]
|
459
|
-
reaction_w_regulators.set_index(SBML_DFS.RSC_ID, inplace=True)
|
460
|
-
reaction_w_regulators = sbml_dfs_core.add_sbo_role(reaction_w_regulators)
|
461
|
-
|
462
|
-
reaction_w_interactors = pd.DataFrame(
|
463
|
-
{
|
464
|
-
SBML_DFS.SC_ID: ["A", "B"],
|
465
|
-
SBML_DFS.STOICHIOMETRY: [-1, 1],
|
466
|
-
SBML_DFS.SBO_TERM: [SBOTERM_NAMES.REACTANT, SBOTERM_NAMES.REACTANT],
|
467
|
-
}
|
468
|
-
).assign(r_id="baz")
|
469
|
-
reaction_w_interactors[SBML_DFS.RSC_ID] = [
|
470
|
-
f"rsc_{i}" for i in range(len(reaction_w_interactors))
|
471
|
-
]
|
472
|
-
reaction_w_interactors.set_index(SBML_DFS.RSC_ID, inplace=True)
|
473
|
-
reaction_w_interactors = sbml_dfs_core.add_sbo_role(reaction_w_interactors)
|
474
|
-
|
475
|
-
working_reactions = reaction_w_regulators.copy()
|
476
|
-
working_reactions["new"] = True
|
477
|
-
working_reactions.loc["rsc_0", "new"] = False
|
478
|
-
working_reactions
|
479
|
-
result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
|
480
|
-
assert result == {"bar"}
|
481
|
-
|
482
|
-
# missing one enzyme -> operable
|
483
|
-
working_reactions = reaction_w_regulators.copy()
|
484
|
-
working_reactions["new"] = True
|
485
|
-
working_reactions.loc["rsc_4", "new"] = False
|
486
|
-
working_reactions
|
487
|
-
result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
|
488
|
-
assert result == set()
|
489
|
-
|
490
|
-
# missing one product -> inoperable
|
491
|
-
working_reactions = reaction_w_regulators.copy()
|
492
|
-
working_reactions["new"] = True
|
493
|
-
working_reactions.loc["rsc_2", "new"] = False
|
494
|
-
working_reactions
|
495
|
-
result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
|
496
|
-
assert result == {"bar"}
|
497
|
-
|
498
|
-
# missing all enzymes -> inoperable
|
499
|
-
working_reactions = reaction_w_regulators.copy()
|
500
|
-
working_reactions["new"] = True
|
501
|
-
working_reactions.loc["rsc_4", "new"] = False
|
502
|
-
working_reactions.loc["rsc_5", "new"] = False
|
503
|
-
working_reactions
|
504
|
-
result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
|
505
|
-
assert result == {"bar"}
|
506
|
-
|
507
|
-
# missing regulators -> operable
|
508
|
-
working_reactions = reaction_w_regulators.copy()
|
509
|
-
working_reactions["new"] = True
|
510
|
-
working_reactions.loc["rsc_6", "new"] = False
|
511
|
-
working_reactions
|
512
|
-
result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
|
513
|
-
assert result == set()
|
514
|
-
|
515
|
-
# remove an interactor
|
516
|
-
working_reactions = reaction_w_interactors.copy()
|
517
|
-
working_reactions["new"] = True
|
518
|
-
working_reactions.loc["rsc_0", "new"] = False
|
519
|
-
working_reactions
|
520
|
-
result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
|
521
|
-
assert result == {"baz"}
|
522
|
-
|
523
|
-
|
524
416
|
def test_remove_entity_data_success(sbml_dfs_w_data):
|
525
417
|
"""Test successful removal of entity data."""
|
526
418
|
# Get initial data
|
@@ -564,85 +456,92 @@ def test_remove_entity_data_nonexistent(sbml_dfs_w_data, caplog):
|
|
564
456
|
sbml_dfs_w_data.validate()
|
565
457
|
|
566
458
|
|
567
|
-
def
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
"
|
576
|
-
"
|
577
|
-
"
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
+ ["HGNC"] * 5,
|
586
|
-
IDENTIFIERS.IDENTIFIER: [
|
587
|
-
"CPX-BIG",
|
588
|
-
"mem1",
|
589
|
-
"mem2",
|
590
|
-
"mem3",
|
591
|
-
"mem4",
|
592
|
-
"mem5",
|
593
|
-
"part1",
|
594
|
-
"part2",
|
595
|
-
"GO:1",
|
596
|
-
"GO:2",
|
597
|
-
"dna_seq",
|
598
|
-
"protein_seq",
|
599
|
-
"my_cool_pub",
|
600
|
-
]
|
601
|
-
+ ["promiscuous_complex"] * 5,
|
602
|
-
IDENTIFIERS.BQB: [BQB.IS]
|
603
|
-
+ [BQB.HAS_PART] * 7
|
604
|
-
+ [BQB.IS] * 2
|
605
|
-
+ [
|
606
|
-
# these are retained if BQB_DEFINING_ATTRS_LOOSE is used
|
607
|
-
BQB.ENCODES,
|
608
|
-
BQB.IS_ENCODED_BY,
|
609
|
-
# this should always be removed
|
610
|
-
BQB.IS_DESCRIBED_BY,
|
611
|
-
]
|
612
|
-
+ [BQB.HAS_PART] * 5,
|
613
|
-
}
|
614
|
-
|
615
|
-
species_ids = pd.DataFrame(species_ids_dict)
|
616
|
-
|
617
|
-
characteristic_ids_narrow = sbml_dfs_core.filter_to_characteristic_species_ids(
|
618
|
-
species_ids,
|
619
|
-
defining_biological_qualifiers=BQB_DEFINING_ATTRS,
|
620
|
-
max_complex_size=4,
|
621
|
-
max_promiscuity=4,
|
459
|
+
def test_get_characteristic_species_ids():
|
460
|
+
"""
|
461
|
+
Test get_characteristic_species_ids function with both dogmatic and non-dogmatic cases.
|
462
|
+
"""
|
463
|
+
# Create mock species identifiers data
|
464
|
+
mock_species_ids = pd.DataFrame(
|
465
|
+
{
|
466
|
+
"s_id": ["s1", "s2", "s3", "s4", "s5"],
|
467
|
+
"identifier": ["P12345", "CHEBI:15377", "GO:12345", "P67890", "P67890"],
|
468
|
+
"ontology": ["uniprot", "chebi", "go", "uniprot", "chebi"],
|
469
|
+
"bqb": [
|
470
|
+
"BQB_IS",
|
471
|
+
"BQB_IS",
|
472
|
+
"BQB_HAS_PART",
|
473
|
+
"BQB_HAS_VERSION",
|
474
|
+
"BQB_ENCODES",
|
475
|
+
],
|
476
|
+
}
|
622
477
|
)
|
623
478
|
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
characteristic_ids_loose = sbml_dfs_core.filter_to_characteristic_species_ids(
|
628
|
-
species_ids,
|
629
|
-
# include encodes and is_encoded_by as equivalent to is
|
630
|
-
defining_biological_qualifiers=BQB_DEFINING_ATTRS_LOOSE,
|
631
|
-
max_complex_size=4,
|
632
|
-
# expand promiscuity to default value
|
633
|
-
max_promiscuity=20,
|
479
|
+
# Create minimal required tables for SBML_dfs
|
480
|
+
compartments = pd.DataFrame(
|
481
|
+
{"c_name": ["cytosol"], "c_Identifiers": [None]}, index=["C1"]
|
634
482
|
)
|
483
|
+
compartments.index.name = "c_id"
|
484
|
+
species = pd.DataFrame(
|
485
|
+
{"s_name": ["A"], "s_Identifiers": [None], "s_source": [None]}, index=["s1"]
|
486
|
+
)
|
487
|
+
species.index.name = "s_id"
|
488
|
+
compartmentalized_species = pd.DataFrame(
|
489
|
+
{
|
490
|
+
"sc_name": ["A [cytosol]"],
|
491
|
+
"s_id": ["s1"],
|
492
|
+
"c_id": ["C1"],
|
493
|
+
"sc_source": [None],
|
494
|
+
},
|
495
|
+
index=["SC1"],
|
496
|
+
)
|
497
|
+
compartmentalized_species.index.name = "sc_id"
|
498
|
+
reactions = pd.DataFrame(
|
499
|
+
{
|
500
|
+
"r_name": ["rxn1"],
|
501
|
+
"r_Identifiers": [None],
|
502
|
+
"r_source": [None],
|
503
|
+
"r_isreversible": [False],
|
504
|
+
},
|
505
|
+
index=["R1"],
|
506
|
+
)
|
507
|
+
reactions.index.name = "r_id"
|
508
|
+
reaction_species = pd.DataFrame(
|
509
|
+
{
|
510
|
+
"r_id": ["R1"],
|
511
|
+
"sc_id": ["SC1"],
|
512
|
+
"stoichiometry": [1],
|
513
|
+
"sbo_term": ["SBO:0000459"],
|
514
|
+
},
|
515
|
+
index=["RSC1"],
|
516
|
+
)
|
517
|
+
reaction_species.index.name = "rsc_id"
|
635
518
|
|
636
|
-
|
637
|
-
"
|
638
|
-
"
|
639
|
-
"
|
640
|
-
"
|
641
|
-
"
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
519
|
+
sbml_dict = {
|
520
|
+
"compartments": compartments,
|
521
|
+
"species": species,
|
522
|
+
"compartmentalized_species": compartmentalized_species,
|
523
|
+
"reactions": reactions,
|
524
|
+
"reaction_species": reaction_species,
|
525
|
+
}
|
526
|
+
sbml_dfs = SBML_dfs(sbml_dict, validate=False, resolve=False)
|
527
|
+
|
528
|
+
# Test dogmatic case (default)
|
529
|
+
expected_bqbs = BQB_DEFINING_ATTRS + [BQB.HAS_PART] # noqa: F841
|
530
|
+
with patch.object(sbml_dfs, "get_identifiers", return_value=mock_species_ids):
|
531
|
+
dogmatic_result = sbml_dfs.get_characteristic_species_ids()
|
532
|
+
expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
533
|
+
pd.testing.assert_frame_equal(
|
534
|
+
dogmatic_result, expected_dogmatic, check_like=True
|
535
|
+
)
|
536
|
+
|
537
|
+
# Test non-dogmatic case
|
538
|
+
expected_bqbs = BQB_DEFINING_ATTRS_LOOSE + [BQB.HAS_PART] # noqa: F841
|
539
|
+
with patch.object(sbml_dfs, "get_identifiers", return_value=mock_species_ids):
|
540
|
+
non_dogmatic_result = sbml_dfs.get_characteristic_species_ids(dogmatic=False)
|
541
|
+
expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
542
|
+
pd.testing.assert_frame_equal(
|
543
|
+
non_dogmatic_result, expected_non_dogmatic, check_like=True
|
544
|
+
)
|
646
545
|
|
647
546
|
|
648
547
|
def test_sbml_basic_functionality(test_data):
|
tests/test_sbml_dfs_utils.py
CHANGED
@@ -3,7 +3,14 @@ from __future__ import annotations
|
|
3
3
|
import pandas as pd
|
4
4
|
|
5
5
|
from napistu import sbml_dfs_utils
|
6
|
-
from napistu.constants import
|
6
|
+
from napistu.constants import (
|
7
|
+
BQB,
|
8
|
+
BQB_DEFINING_ATTRS,
|
9
|
+
BQB_DEFINING_ATTRS_LOOSE,
|
10
|
+
SBML_DFS,
|
11
|
+
IDENTIFIERS,
|
12
|
+
SBOTERM_NAMES,
|
13
|
+
)
|
7
14
|
|
8
15
|
|
9
16
|
def test_id_formatter():
|
@@ -17,47 +24,198 @@ def test_id_formatter():
|
|
17
24
|
assert list(input_vals) == inv_ids
|
18
25
|
|
19
26
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
"
|
29
|
-
"
|
30
|
-
"
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
27
|
+
def test_filter_to_characteristic_species_ids():
|
28
|
+
|
29
|
+
species_ids_dict = {
|
30
|
+
SBML_DFS.S_ID: ["large_complex"] * 6
|
31
|
+
+ ["small_complex"] * 2
|
32
|
+
+ ["proteinA", "proteinB"]
|
33
|
+
+ ["proteinC"] * 3
|
34
|
+
+ [
|
35
|
+
"promiscuous_complexA",
|
36
|
+
"promiscuous_complexB",
|
37
|
+
"promiscuous_complexC",
|
38
|
+
"promiscuous_complexD",
|
39
|
+
"promiscuous_complexE",
|
40
|
+
],
|
41
|
+
IDENTIFIERS.ONTOLOGY: ["complexportal"]
|
42
|
+
+ ["HGNC"] * 7
|
43
|
+
+ ["GO"] * 2
|
44
|
+
+ ["ENSG", "ENSP", "pubmed"]
|
45
|
+
+ ["HGNC"] * 5,
|
46
|
+
IDENTIFIERS.IDENTIFIER: [
|
47
|
+
"CPX-BIG",
|
48
|
+
"mem1",
|
49
|
+
"mem2",
|
50
|
+
"mem3",
|
51
|
+
"mem4",
|
52
|
+
"mem5",
|
53
|
+
"part1",
|
54
|
+
"part2",
|
55
|
+
"GO:1",
|
56
|
+
"GO:2",
|
57
|
+
"dna_seq",
|
58
|
+
"protein_seq",
|
59
|
+
"my_cool_pub",
|
60
|
+
]
|
61
|
+
+ ["promiscuous_complex"] * 5,
|
62
|
+
IDENTIFIERS.BQB: [BQB.IS]
|
63
|
+
+ [BQB.HAS_PART] * 7
|
64
|
+
+ [BQB.IS] * 2
|
65
|
+
+ [
|
66
|
+
# these are retained if BQB_DEFINING_ATTRS_LOOSE is used
|
67
|
+
BQB.ENCODES,
|
68
|
+
BQB.IS_ENCODED_BY,
|
69
|
+
# this should always be removed
|
70
|
+
BQB.IS_DESCRIBED_BY,
|
71
|
+
]
|
72
|
+
+ [BQB.HAS_PART] * 5,
|
73
|
+
}
|
74
|
+
|
75
|
+
species_ids = pd.DataFrame(species_ids_dict)
|
76
|
+
|
77
|
+
characteristic_ids_narrow = sbml_dfs_utils.filter_to_characteristic_species_ids(
|
78
|
+
species_ids,
|
79
|
+
defining_biological_qualifiers=BQB_DEFINING_ATTRS,
|
80
|
+
max_complex_size=4,
|
81
|
+
max_promiscuity=4,
|
82
|
+
)
|
83
|
+
|
84
|
+
EXPECTED_IDS = ["CPX-BIG", "GO:1", "GO:2", "part1", "part2"]
|
85
|
+
assert characteristic_ids_narrow[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
|
86
|
+
|
87
|
+
characteristic_ids_loose = sbml_dfs_utils.filter_to_characteristic_species_ids(
|
88
|
+
species_ids,
|
89
|
+
# include encodes and is_encoded_by as equivalent to is
|
90
|
+
defining_biological_qualifiers=BQB_DEFINING_ATTRS_LOOSE,
|
91
|
+
max_complex_size=4,
|
92
|
+
# expand promiscuity to default value
|
93
|
+
max_promiscuity=20,
|
38
94
|
)
|
39
95
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
96
|
+
EXPECTED_IDS = [
|
97
|
+
"CPX-BIG",
|
98
|
+
"GO:1",
|
99
|
+
"GO:2",
|
100
|
+
"dna_seq",
|
101
|
+
"protein_seq",
|
102
|
+
"part1",
|
103
|
+
"part2",
|
104
|
+
] + ["promiscuous_complex"] * 5
|
105
|
+
assert characteristic_ids_loose[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
|
44
106
|
|
45
|
-
mock_sbml = MockSBML_dfs()
|
46
107
|
|
47
|
-
|
48
|
-
|
49
|
-
dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(mock_sbml)
|
50
|
-
expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
108
|
+
def test_formula(sbml_dfs):
|
109
|
+
# create a formula string
|
51
110
|
|
52
|
-
|
111
|
+
an_r_id = sbml_dfs.reactions.index[0]
|
53
112
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
113
|
+
reaction_species_df = sbml_dfs.reaction_species[
|
114
|
+
sbml_dfs.reaction_species["r_id"] == an_r_id
|
115
|
+
].merge(sbml_dfs.compartmentalized_species, left_on="sc_id", right_index=True)
|
116
|
+
|
117
|
+
formula_str = sbml_dfs_utils.construct_formula_string(
|
118
|
+
reaction_species_df, sbml_dfs.reactions, name_var="sc_name"
|
58
119
|
)
|
59
|
-
expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
60
120
|
|
61
|
-
|
62
|
-
|
121
|
+
assert isinstance(formula_str, str)
|
122
|
+
assert (
|
123
|
+
formula_str
|
124
|
+
== "CO2 [extracellular region] -> CO2 [cytosol] ---- modifiers: AQP1 tetramer [plasma membrane]]"
|
63
125
|
)
|
126
|
+
|
127
|
+
|
128
|
+
def test_find_underspecified_reactions():
|
129
|
+
|
130
|
+
reaction_w_regulators = pd.DataFrame(
|
131
|
+
{
|
132
|
+
SBML_DFS.SC_ID: ["A", "B", "C", "D", "E", "F", "G"],
|
133
|
+
SBML_DFS.STOICHIOMETRY: [-1, -1, 1, 1, 0, 0, 0],
|
134
|
+
SBML_DFS.SBO_TERM: [
|
135
|
+
SBOTERM_NAMES.REACTANT,
|
136
|
+
SBOTERM_NAMES.REACTANT,
|
137
|
+
SBOTERM_NAMES.PRODUCT,
|
138
|
+
SBOTERM_NAMES.PRODUCT,
|
139
|
+
SBOTERM_NAMES.CATALYST,
|
140
|
+
SBOTERM_NAMES.CATALYST,
|
141
|
+
SBOTERM_NAMES.STIMULATOR,
|
142
|
+
],
|
143
|
+
}
|
144
|
+
).assign(r_id="bar")
|
145
|
+
reaction_w_regulators[SBML_DFS.RSC_ID] = [
|
146
|
+
f"rsc_{i}" for i in range(len(reaction_w_regulators))
|
147
|
+
]
|
148
|
+
reaction_w_regulators.set_index(SBML_DFS.RSC_ID, inplace=True)
|
149
|
+
reaction_w_regulators = sbml_dfs_utils.add_sbo_role(reaction_w_regulators)
|
150
|
+
|
151
|
+
reaction_w_interactors = pd.DataFrame(
|
152
|
+
{
|
153
|
+
SBML_DFS.SC_ID: ["A", "B"],
|
154
|
+
SBML_DFS.STOICHIOMETRY: [-1, 1],
|
155
|
+
SBML_DFS.SBO_TERM: [SBOTERM_NAMES.REACTANT, SBOTERM_NAMES.REACTANT],
|
156
|
+
}
|
157
|
+
).assign(r_id="baz")
|
158
|
+
reaction_w_interactors[SBML_DFS.RSC_ID] = [
|
159
|
+
f"rsc_{i}" for i in range(len(reaction_w_interactors))
|
160
|
+
]
|
161
|
+
reaction_w_interactors.set_index(SBML_DFS.RSC_ID, inplace=True)
|
162
|
+
reaction_w_interactors = sbml_dfs_utils.add_sbo_role(reaction_w_interactors)
|
163
|
+
|
164
|
+
working_reactions = reaction_w_regulators.copy()
|
165
|
+
working_reactions["new"] = True
|
166
|
+
working_reactions.loc["rsc_0", "new"] = False
|
167
|
+
working_reactions
|
168
|
+
result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
|
169
|
+
assert result == {"bar"}
|
170
|
+
|
171
|
+
# missing one enzyme -> operable
|
172
|
+
working_reactions = reaction_w_regulators.copy()
|
173
|
+
working_reactions["new"] = True
|
174
|
+
working_reactions.loc["rsc_4", "new"] = False
|
175
|
+
working_reactions
|
176
|
+
result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
|
177
|
+
assert result == set()
|
178
|
+
|
179
|
+
# missing one product -> inoperable
|
180
|
+
working_reactions = reaction_w_regulators.copy()
|
181
|
+
working_reactions["new"] = True
|
182
|
+
working_reactions.loc["rsc_2", "new"] = False
|
183
|
+
working_reactions
|
184
|
+
result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
|
185
|
+
assert result == {"bar"}
|
186
|
+
|
187
|
+
# missing all enzymes -> inoperable
|
188
|
+
working_reactions = reaction_w_regulators.copy()
|
189
|
+
working_reactions["new"] = True
|
190
|
+
working_reactions.loc["rsc_4", "new"] = False
|
191
|
+
working_reactions.loc["rsc_5", "new"] = False
|
192
|
+
working_reactions
|
193
|
+
result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
|
194
|
+
assert result == {"bar"}
|
195
|
+
|
196
|
+
# missing regulators -> operable
|
197
|
+
working_reactions = reaction_w_regulators.copy()
|
198
|
+
working_reactions["new"] = True
|
199
|
+
working_reactions.loc["rsc_6", "new"] = False
|
200
|
+
working_reactions
|
201
|
+
result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
|
202
|
+
assert result == set()
|
203
|
+
|
204
|
+
# remove an interactor
|
205
|
+
working_reactions = reaction_w_interactors.copy()
|
206
|
+
working_reactions["new"] = True
|
207
|
+
working_reactions.loc["rsc_0", "new"] = False
|
208
|
+
working_reactions
|
209
|
+
result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
|
210
|
+
assert result == {"baz"}
|
211
|
+
|
212
|
+
|
213
|
+
def test_stubbed_compartment():
|
214
|
+
compartment = sbml_dfs_utils.stub_compartments()
|
215
|
+
|
216
|
+
assert compartment["c_Identifiers"].iloc[0].ids[0] == {
|
217
|
+
"ontology": "go",
|
218
|
+
"identifier": "GO:0005575",
|
219
|
+
"url": "https://www.ebi.ac.uk/QuickGO/term/GO:0005575",
|
220
|
+
"bqb": "BQB_IS",
|
221
|
+
}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|