napistu 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +38 -27
- napistu/consensus.py +22 -27
- napistu/constants.py +91 -65
- napistu/context/filtering.py +2 -1
- napistu/identifiers.py +3 -6
- napistu/indices.py +3 -1
- napistu/ingestion/bigg.py +6 -6
- napistu/ingestion/sbml.py +298 -295
- napistu/ingestion/string.py +16 -19
- napistu/ingestion/trrust.py +22 -27
- napistu/ingestion/yeast.py +2 -1
- napistu/matching/interactions.py +4 -4
- napistu/matching/species.py +1 -1
- napistu/modify/uncompartmentalize.py +1 -1
- napistu/network/net_create.py +1 -1
- napistu/network/paths.py +1 -1
- napistu/ontologies/dogma.py +2 -1
- napistu/ontologies/genodexito.py +5 -1
- napistu/ontologies/renaming.py +4 -0
- napistu/sbml_dfs_core.py +1343 -2167
- napistu/sbml_dfs_utils.py +1086 -143
- napistu/utils.py +52 -41
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/METADATA +2 -2
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/RECORD +40 -40
- tests/conftest.py +113 -13
- tests/test_consensus.py +161 -4
- tests/test_context_filtering.py +2 -2
- tests/test_gaps.py +26 -15
- tests/test_network_net_create.py +1 -1
- tests/test_network_precompute.py +1 -1
- tests/test_ontologies_genodexito.py +3 -0
- tests/test_ontologies_mygene.py +3 -0
- tests/test_ontologies_renaming.py +28 -24
- tests/test_sbml_dfs_core.py +260 -211
- tests/test_sbml_dfs_utils.py +194 -36
- tests/test_utils.py +19 -0
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/WHEEL +0 -0
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/top_level.txt +0 -0
tests/test_sbml_dfs_core.py
CHANGED
@@ -5,33 +5,34 @@ import os
|
|
5
5
|
import numpy as np
|
6
6
|
import pandas as pd
|
7
7
|
import pytest
|
8
|
+
from napistu import identifiers
|
8
9
|
from napistu import sbml_dfs_core
|
9
10
|
from napistu.source import Source
|
10
11
|
from napistu.ingestion import sbml
|
11
12
|
from napistu.modify import pathwayannot
|
12
|
-
from napistu.sbml_dfs_utils import _stub_ids
|
13
13
|
|
14
14
|
from napistu import identifiers as napistu_identifiers
|
15
15
|
from napistu.constants import (
|
16
16
|
SBML_DFS,
|
17
|
-
SBOTERM_NAMES,
|
18
17
|
BQB_DEFINING_ATTRS,
|
19
18
|
BQB_DEFINING_ATTRS_LOOSE,
|
20
19
|
BQB,
|
21
|
-
IDENTIFIERS,
|
22
20
|
)
|
23
21
|
from napistu.sbml_dfs_core import SBML_dfs
|
22
|
+
from unittest.mock import patch
|
24
23
|
|
25
24
|
|
26
25
|
@pytest.fixture
|
27
26
|
def test_data():
|
28
27
|
"""Create test data for SBML integration tests."""
|
29
28
|
|
29
|
+
blank_id = identifiers.Identifiers([])
|
30
|
+
|
30
31
|
# Test compartments
|
31
32
|
compartments_df = pd.DataFrame(
|
32
33
|
[
|
33
|
-
{
|
34
|
-
{
|
34
|
+
{SBML_DFS.C_NAME: "nucleus", SBML_DFS.C_IDENTIFIERS: blank_id},
|
35
|
+
{SBML_DFS.C_NAME: "cytoplasm", SBML_DFS.C_IDENTIFIERS: blank_id},
|
35
36
|
]
|
36
37
|
)
|
37
38
|
|
@@ -39,14 +40,18 @@ def test_data():
|
|
39
40
|
species_df = pd.DataFrame(
|
40
41
|
[
|
41
42
|
{
|
42
|
-
|
43
|
-
|
43
|
+
SBML_DFS.S_NAME: "TP53",
|
44
|
+
SBML_DFS.S_IDENTIFIERS: blank_id,
|
44
45
|
"gene_type": "tumor_suppressor",
|
45
46
|
},
|
46
|
-
{"s_name": "MDM2", "s_Identifiers": _stub_ids([]), "gene_type": "oncogene"},
|
47
47
|
{
|
48
|
-
|
49
|
-
|
48
|
+
SBML_DFS.S_NAME: "MDM2",
|
49
|
+
SBML_DFS.S_IDENTIFIERS: blank_id,
|
50
|
+
"gene_type": "oncogene",
|
51
|
+
},
|
52
|
+
{
|
53
|
+
SBML_DFS.S_NAME: "CDKN1A",
|
54
|
+
SBML_DFS.S_IDENTIFIERS: blank_id,
|
50
55
|
"gene_type": "cell_cycle",
|
51
56
|
},
|
52
57
|
]
|
@@ -60,10 +65,10 @@ def test_data():
|
|
60
65
|
"downstream_name": "CDKN1A",
|
61
66
|
"upstream_compartment": "nucleus",
|
62
67
|
"downstream_compartment": "nucleus",
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
68
|
+
SBML_DFS.R_NAME: "TP53_activates_CDKN1A",
|
69
|
+
SBML_DFS.SBO_TERM: "SBO:0000459",
|
70
|
+
SBML_DFS.R_IDENTIFIERS: blank_id,
|
71
|
+
SBML_DFS.R_ISREVERSIBLE: False,
|
67
72
|
"confidence": 0.95,
|
68
73
|
},
|
69
74
|
{
|
@@ -71,10 +76,10 @@ def test_data():
|
|
71
76
|
"downstream_name": "TP53",
|
72
77
|
"upstream_compartment": "cytoplasm",
|
73
78
|
"downstream_compartment": "nucleus",
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
79
|
+
SBML_DFS.R_NAME: "MDM2_inhibits_TP53",
|
80
|
+
SBML_DFS.SBO_TERM: "SBO:0000020",
|
81
|
+
SBML_DFS.R_IDENTIFIERS: blank_id,
|
82
|
+
SBML_DFS.R_ISREVERSIBLE: False,
|
78
83
|
"confidence": 0.87,
|
79
84
|
},
|
80
85
|
]
|
@@ -274,26 +279,6 @@ def test_sbml_dfs_remove_reactions_check_species(sbml_dfs):
|
|
274
279
|
sbml_dfs.validate()
|
275
280
|
|
276
281
|
|
277
|
-
def test_formula(sbml_dfs):
|
278
|
-
# create a formula string
|
279
|
-
|
280
|
-
an_r_id = sbml_dfs.reactions.index[0]
|
281
|
-
|
282
|
-
reaction_species_df = sbml_dfs.reaction_species[
|
283
|
-
sbml_dfs.reaction_species["r_id"] == an_r_id
|
284
|
-
].merge(sbml_dfs.compartmentalized_species, left_on="sc_id", right_index=True)
|
285
|
-
|
286
|
-
formula_str = sbml_dfs_core.construct_formula_string(
|
287
|
-
reaction_species_df, sbml_dfs.reactions, name_var="sc_name"
|
288
|
-
)
|
289
|
-
|
290
|
-
assert isinstance(formula_str, str)
|
291
|
-
assert (
|
292
|
-
formula_str
|
293
|
-
== "CO2 [extracellular region] -> CO2 [cytosol] ---- modifiers: AQP1 tetramer [plasma membrane]]"
|
294
|
-
)
|
295
|
-
|
296
|
-
|
297
282
|
def test_read_sbml_with_invalid_ids():
|
298
283
|
SBML_W_BAD_IDS = "R-HSA-166658.sbml"
|
299
284
|
test_path = os.path.abspath(os.path.join(__file__, os.pardir))
|
@@ -305,17 +290,6 @@ def test_read_sbml_with_invalid_ids():
|
|
305
290
|
assert isinstance(sbml_dfs_core.SBML_dfs(sbml_w_bad_ids), sbml_dfs_core.SBML_dfs)
|
306
291
|
|
307
292
|
|
308
|
-
def test_stubbed_compartment():
|
309
|
-
compartment = sbml_dfs_core._stub_compartments()
|
310
|
-
|
311
|
-
assert compartment["c_Identifiers"].iloc[0].ids[0] == {
|
312
|
-
"ontology": "go",
|
313
|
-
"identifier": "GO:0005575",
|
314
|
-
"url": "https://www.ebi.ac.uk/QuickGO/term/GO:0005575",
|
315
|
-
"bqb": "BQB_IS",
|
316
|
-
}
|
317
|
-
|
318
|
-
|
319
293
|
def test_get_table(sbml_dfs):
|
320
294
|
assert isinstance(sbml_dfs.get_table("species"), pd.DataFrame)
|
321
295
|
assert isinstance(sbml_dfs.get_table("species", {"id"}), pd.DataFrame)
|
@@ -366,10 +340,20 @@ def test_species_status(sbml_dfs):
|
|
366
340
|
select_species = species[species["s_name"] == "OxyHbA"]
|
367
341
|
assert select_species.shape[0] == 1
|
368
342
|
|
369
|
-
status =
|
343
|
+
status = sbml_dfs.species_status(select_species.index[0])
|
344
|
+
|
345
|
+
# expected columns
|
346
|
+
expected_columns = [
|
347
|
+
SBML_DFS.SC_NAME,
|
348
|
+
SBML_DFS.STOICHIOMETRY,
|
349
|
+
SBML_DFS.R_NAME,
|
350
|
+
"r_formula_str",
|
351
|
+
]
|
352
|
+
assert all(col in status.columns for col in expected_columns)
|
353
|
+
|
370
354
|
assert (
|
371
355
|
status["r_formula_str"][0]
|
372
|
-
== "4.0
|
356
|
+
== "cytosol: 4.0 CO2 + 4.0 H+ + OxyHbA -> 4.0 O2 + Protonated Carbamino DeoxyHbA"
|
373
357
|
)
|
374
358
|
|
375
359
|
|
@@ -436,91 +420,6 @@ def test_get_identifiers_handles_missing_values():
|
|
436
420
|
), "Only Identifiers objects should be returned."
|
437
421
|
|
438
422
|
|
439
|
-
def test_find_underspecified_reactions():
|
440
|
-
|
441
|
-
reaction_w_regulators = pd.DataFrame(
|
442
|
-
{
|
443
|
-
SBML_DFS.SC_ID: ["A", "B", "C", "D", "E", "F", "G"],
|
444
|
-
SBML_DFS.STOICHIOMETRY: [-1, -1, 1, 1, 0, 0, 0],
|
445
|
-
SBML_DFS.SBO_TERM: [
|
446
|
-
SBOTERM_NAMES.REACTANT,
|
447
|
-
SBOTERM_NAMES.REACTANT,
|
448
|
-
SBOTERM_NAMES.PRODUCT,
|
449
|
-
SBOTERM_NAMES.PRODUCT,
|
450
|
-
SBOTERM_NAMES.CATALYST,
|
451
|
-
SBOTERM_NAMES.CATALYST,
|
452
|
-
SBOTERM_NAMES.STIMULATOR,
|
453
|
-
],
|
454
|
-
}
|
455
|
-
).assign(r_id="bar")
|
456
|
-
reaction_w_regulators[SBML_DFS.RSC_ID] = [
|
457
|
-
f"rsc_{i}" for i in range(len(reaction_w_regulators))
|
458
|
-
]
|
459
|
-
reaction_w_regulators.set_index(SBML_DFS.RSC_ID, inplace=True)
|
460
|
-
reaction_w_regulators = sbml_dfs_core.add_sbo_role(reaction_w_regulators)
|
461
|
-
|
462
|
-
reaction_w_interactors = pd.DataFrame(
|
463
|
-
{
|
464
|
-
SBML_DFS.SC_ID: ["A", "B"],
|
465
|
-
SBML_DFS.STOICHIOMETRY: [-1, 1],
|
466
|
-
SBML_DFS.SBO_TERM: [SBOTERM_NAMES.REACTANT, SBOTERM_NAMES.REACTANT],
|
467
|
-
}
|
468
|
-
).assign(r_id="baz")
|
469
|
-
reaction_w_interactors[SBML_DFS.RSC_ID] = [
|
470
|
-
f"rsc_{i}" for i in range(len(reaction_w_interactors))
|
471
|
-
]
|
472
|
-
reaction_w_interactors.set_index(SBML_DFS.RSC_ID, inplace=True)
|
473
|
-
reaction_w_interactors = sbml_dfs_core.add_sbo_role(reaction_w_interactors)
|
474
|
-
|
475
|
-
working_reactions = reaction_w_regulators.copy()
|
476
|
-
working_reactions["new"] = True
|
477
|
-
working_reactions.loc["rsc_0", "new"] = False
|
478
|
-
working_reactions
|
479
|
-
result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
|
480
|
-
assert result == {"bar"}
|
481
|
-
|
482
|
-
# missing one enzyme -> operable
|
483
|
-
working_reactions = reaction_w_regulators.copy()
|
484
|
-
working_reactions["new"] = True
|
485
|
-
working_reactions.loc["rsc_4", "new"] = False
|
486
|
-
working_reactions
|
487
|
-
result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
|
488
|
-
assert result == set()
|
489
|
-
|
490
|
-
# missing one product -> inoperable
|
491
|
-
working_reactions = reaction_w_regulators.copy()
|
492
|
-
working_reactions["new"] = True
|
493
|
-
working_reactions.loc["rsc_2", "new"] = False
|
494
|
-
working_reactions
|
495
|
-
result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
|
496
|
-
assert result == {"bar"}
|
497
|
-
|
498
|
-
# missing all enzymes -> inoperable
|
499
|
-
working_reactions = reaction_w_regulators.copy()
|
500
|
-
working_reactions["new"] = True
|
501
|
-
working_reactions.loc["rsc_4", "new"] = False
|
502
|
-
working_reactions.loc["rsc_5", "new"] = False
|
503
|
-
working_reactions
|
504
|
-
result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
|
505
|
-
assert result == {"bar"}
|
506
|
-
|
507
|
-
# missing regulators -> operable
|
508
|
-
working_reactions = reaction_w_regulators.copy()
|
509
|
-
working_reactions["new"] = True
|
510
|
-
working_reactions.loc["rsc_6", "new"] = False
|
511
|
-
working_reactions
|
512
|
-
result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
|
513
|
-
assert result == set()
|
514
|
-
|
515
|
-
# remove an interactor
|
516
|
-
working_reactions = reaction_w_interactors.copy()
|
517
|
-
working_reactions["new"] = True
|
518
|
-
working_reactions.loc["rsc_0", "new"] = False
|
519
|
-
working_reactions
|
520
|
-
result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
|
521
|
-
assert result == {"baz"}
|
522
|
-
|
523
|
-
|
524
423
|
def test_remove_entity_data_success(sbml_dfs_w_data):
|
525
424
|
"""Test successful removal of entity data."""
|
526
425
|
# Get initial data
|
@@ -564,85 +463,92 @@ def test_remove_entity_data_nonexistent(sbml_dfs_w_data, caplog):
|
|
564
463
|
sbml_dfs_w_data.validate()
|
565
464
|
|
566
465
|
|
567
|
-
def
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
"
|
576
|
-
"
|
577
|
-
"
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
+ ["HGNC"] * 5,
|
586
|
-
IDENTIFIERS.IDENTIFIER: [
|
587
|
-
"CPX-BIG",
|
588
|
-
"mem1",
|
589
|
-
"mem2",
|
590
|
-
"mem3",
|
591
|
-
"mem4",
|
592
|
-
"mem5",
|
593
|
-
"part1",
|
594
|
-
"part2",
|
595
|
-
"GO:1",
|
596
|
-
"GO:2",
|
597
|
-
"dna_seq",
|
598
|
-
"protein_seq",
|
599
|
-
"my_cool_pub",
|
600
|
-
]
|
601
|
-
+ ["promiscuous_complex"] * 5,
|
602
|
-
IDENTIFIERS.BQB: [BQB.IS]
|
603
|
-
+ [BQB.HAS_PART] * 7
|
604
|
-
+ [BQB.IS] * 2
|
605
|
-
+ [
|
606
|
-
# these are retained if BQB_DEFINING_ATTRS_LOOSE is used
|
607
|
-
BQB.ENCODES,
|
608
|
-
BQB.IS_ENCODED_BY,
|
609
|
-
# this should always be removed
|
610
|
-
BQB.IS_DESCRIBED_BY,
|
611
|
-
]
|
612
|
-
+ [BQB.HAS_PART] * 5,
|
613
|
-
}
|
614
|
-
|
615
|
-
species_ids = pd.DataFrame(species_ids_dict)
|
616
|
-
|
617
|
-
characteristic_ids_narrow = sbml_dfs_core.filter_to_characteristic_species_ids(
|
618
|
-
species_ids,
|
619
|
-
defining_biological_qualifiers=BQB_DEFINING_ATTRS,
|
620
|
-
max_complex_size=4,
|
621
|
-
max_promiscuity=4,
|
466
|
+
def test_get_characteristic_species_ids():
|
467
|
+
"""
|
468
|
+
Test get_characteristic_species_ids function with both dogmatic and non-dogmatic cases.
|
469
|
+
"""
|
470
|
+
# Create mock species identifiers data
|
471
|
+
mock_species_ids = pd.DataFrame(
|
472
|
+
{
|
473
|
+
"s_id": ["s1", "s2", "s3", "s4", "s5"],
|
474
|
+
"identifier": ["P12345", "CHEBI:15377", "GO:12345", "P67890", "P67890"],
|
475
|
+
"ontology": ["uniprot", "chebi", "go", "uniprot", "chebi"],
|
476
|
+
"bqb": [
|
477
|
+
"BQB_IS",
|
478
|
+
"BQB_IS",
|
479
|
+
"BQB_HAS_PART",
|
480
|
+
"BQB_HAS_VERSION",
|
481
|
+
"BQB_ENCODES",
|
482
|
+
],
|
483
|
+
}
|
622
484
|
)
|
623
485
|
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
486
|
+
# Create minimal required tables for SBML_dfs
|
487
|
+
compartments = pd.DataFrame(
|
488
|
+
{"c_name": ["cytosol"], "c_Identifiers": [None]}, index=["C1"]
|
489
|
+
)
|
490
|
+
compartments.index.name = "c_id"
|
491
|
+
species = pd.DataFrame(
|
492
|
+
{"s_name": ["A"], "s_Identifiers": [None], "s_source": [None]}, index=["s1"]
|
493
|
+
)
|
494
|
+
species.index.name = "s_id"
|
495
|
+
compartmentalized_species = pd.DataFrame(
|
496
|
+
{
|
497
|
+
"sc_name": ["A [cytosol]"],
|
498
|
+
"s_id": ["s1"],
|
499
|
+
"c_id": ["C1"],
|
500
|
+
"sc_source": [None],
|
501
|
+
},
|
502
|
+
index=["SC1"],
|
503
|
+
)
|
504
|
+
compartmentalized_species.index.name = "sc_id"
|
505
|
+
reactions = pd.DataFrame(
|
506
|
+
{
|
507
|
+
"r_name": ["rxn1"],
|
508
|
+
"r_Identifiers": [None],
|
509
|
+
"r_source": [None],
|
510
|
+
"r_isreversible": [False],
|
511
|
+
},
|
512
|
+
index=["R1"],
|
513
|
+
)
|
514
|
+
reactions.index.name = "r_id"
|
515
|
+
reaction_species = pd.DataFrame(
|
516
|
+
{
|
517
|
+
"r_id": ["R1"],
|
518
|
+
"sc_id": ["SC1"],
|
519
|
+
"stoichiometry": [1],
|
520
|
+
"sbo_term": ["SBO:0000459"],
|
521
|
+
},
|
522
|
+
index=["RSC1"],
|
634
523
|
)
|
524
|
+
reaction_species.index.name = "rsc_id"
|
635
525
|
|
636
|
-
|
637
|
-
"
|
638
|
-
"
|
639
|
-
"
|
640
|
-
"
|
641
|
-
"
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
526
|
+
sbml_dict = {
|
527
|
+
"compartments": compartments,
|
528
|
+
"species": species,
|
529
|
+
"compartmentalized_species": compartmentalized_species,
|
530
|
+
"reactions": reactions,
|
531
|
+
"reaction_species": reaction_species,
|
532
|
+
}
|
533
|
+
sbml_dfs = SBML_dfs(sbml_dict, validate=False, resolve=False)
|
534
|
+
|
535
|
+
# Test dogmatic case (default)
|
536
|
+
expected_bqbs = BQB_DEFINING_ATTRS + [BQB.HAS_PART] # noqa: F841
|
537
|
+
with patch.object(sbml_dfs, "get_identifiers", return_value=mock_species_ids):
|
538
|
+
dogmatic_result = sbml_dfs.get_characteristic_species_ids()
|
539
|
+
expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
540
|
+
pd.testing.assert_frame_equal(
|
541
|
+
dogmatic_result, expected_dogmatic, check_like=True
|
542
|
+
)
|
543
|
+
|
544
|
+
# Test non-dogmatic case
|
545
|
+
expected_bqbs = BQB_DEFINING_ATTRS_LOOSE + [BQB.HAS_PART] # noqa: F841
|
546
|
+
with patch.object(sbml_dfs, "get_identifiers", return_value=mock_species_ids):
|
547
|
+
non_dogmatic_result = sbml_dfs.get_characteristic_species_ids(dogmatic=False)
|
548
|
+
expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
549
|
+
pd.testing.assert_frame_equal(
|
550
|
+
non_dogmatic_result, expected_non_dogmatic, check_like=True
|
551
|
+
)
|
646
552
|
|
647
553
|
|
648
554
|
def test_sbml_basic_functionality(test_data):
|
@@ -712,3 +618,146 @@ def test_sbml_custom_stoichiometry(test_data):
|
|
712
618
|
stoichiometries = result.reaction_species["stoichiometry"].unique()
|
713
619
|
assert 2 in stoichiometries # upstream
|
714
620
|
assert 3 in stoichiometries # downstream
|
621
|
+
|
622
|
+
|
623
|
+
def test_validate_schema_missing(minimal_valid_sbml_dfs):
|
624
|
+
"""Test validation fails when schema is missing."""
|
625
|
+
delattr(minimal_valid_sbml_dfs, "schema")
|
626
|
+
with pytest.raises(ValueError, match="No schema found"):
|
627
|
+
minimal_valid_sbml_dfs.validate()
|
628
|
+
|
629
|
+
|
630
|
+
def test_validate_table(minimal_valid_sbml_dfs):
|
631
|
+
"""Test _validate_table fails for various table structure issues."""
|
632
|
+
# Wrong index name
|
633
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
634
|
+
sbml_dfs.species.index.name = "wrong_name"
|
635
|
+
with pytest.raises(ValueError, match="the index name for species was not the pk"):
|
636
|
+
sbml_dfs.validate()
|
637
|
+
|
638
|
+
# Duplicate primary keys
|
639
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
640
|
+
duplicate_species = pd.DataFrame(
|
641
|
+
{
|
642
|
+
SBML_DFS.S_NAME: ["ATP", "ADP"],
|
643
|
+
SBML_DFS.S_IDENTIFIERS: [
|
644
|
+
identifiers.Identifiers([]),
|
645
|
+
identifiers.Identifiers([]),
|
646
|
+
],
|
647
|
+
SBML_DFS.S_SOURCE: [Source(init=True), Source(init=True)],
|
648
|
+
},
|
649
|
+
index=pd.Index(["S00001", "S00001"], name=SBML_DFS.S_ID),
|
650
|
+
)
|
651
|
+
sbml_dfs.species = duplicate_species
|
652
|
+
with pytest.raises(ValueError, match="primary keys were duplicated"):
|
653
|
+
sbml_dfs.validate()
|
654
|
+
|
655
|
+
# Missing required variables
|
656
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
657
|
+
sbml_dfs.species = sbml_dfs.species.drop(columns=[SBML_DFS.S_NAME])
|
658
|
+
with pytest.raises(ValueError, match="Missing .+ required variables for species"):
|
659
|
+
sbml_dfs.validate()
|
660
|
+
|
661
|
+
# Empty table
|
662
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
663
|
+
sbml_dfs.species = pd.DataFrame(
|
664
|
+
{
|
665
|
+
SBML_DFS.S_NAME: [],
|
666
|
+
SBML_DFS.S_IDENTIFIERS: [],
|
667
|
+
SBML_DFS.S_SOURCE: [],
|
668
|
+
},
|
669
|
+
index=pd.Index([], name=SBML_DFS.S_ID),
|
670
|
+
)
|
671
|
+
with pytest.raises(ValueError, match="species contained no entries"):
|
672
|
+
sbml_dfs.validate()
|
673
|
+
|
674
|
+
|
675
|
+
def test_check_pk_fk_correspondence(minimal_valid_sbml_dfs):
|
676
|
+
"""Test _check_pk_fk_correspondence fails for various foreign key issues."""
|
677
|
+
# Missing species reference
|
678
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
679
|
+
sbml_dfs.compartmentalized_species[SBML_DFS.S_ID] = ["S99999"]
|
680
|
+
with pytest.raises(
|
681
|
+
ValueError,
|
682
|
+
match="s_id values were found in compartmentalized_species but missing from species",
|
683
|
+
):
|
684
|
+
sbml_dfs.validate()
|
685
|
+
|
686
|
+
# Missing compartment reference
|
687
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
688
|
+
sbml_dfs.compartmentalized_species[SBML_DFS.C_ID] = ["C99999"]
|
689
|
+
with pytest.raises(
|
690
|
+
ValueError,
|
691
|
+
match="c_id values were found in compartmentalized_species but missing from compartments",
|
692
|
+
):
|
693
|
+
sbml_dfs.validate()
|
694
|
+
|
695
|
+
# Null foreign keys
|
696
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
697
|
+
sbml_dfs.compartmentalized_species[SBML_DFS.S_ID] = [None]
|
698
|
+
with pytest.raises(
|
699
|
+
ValueError, match="compartmentalized_species included missing s_id values"
|
700
|
+
):
|
701
|
+
sbml_dfs.validate()
|
702
|
+
|
703
|
+
|
704
|
+
def test_validate_reaction_species(minimal_valid_sbml_dfs):
|
705
|
+
"""Test _validate_reaction_species fails for various reaction species issues."""
|
706
|
+
# Null stoichiometry
|
707
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
708
|
+
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] = [None]
|
709
|
+
with pytest.raises(ValueError, match="All reaction_species.* must be not null"):
|
710
|
+
sbml_dfs.validate()
|
711
|
+
|
712
|
+
# Null SBO terms
|
713
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
714
|
+
sbml_dfs.reaction_species[SBML_DFS.SBO_TERM] = [None]
|
715
|
+
with pytest.raises(
|
716
|
+
ValueError, match="sbo_terms were None; all terms should be defined"
|
717
|
+
):
|
718
|
+
sbml_dfs.validate()
|
719
|
+
|
720
|
+
# Invalid SBO terms
|
721
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
722
|
+
sbml_dfs.reaction_species[SBML_DFS.SBO_TERM] = ["INVALID_SBO_TERM"]
|
723
|
+
with pytest.raises(ValueError, match="sbo_terms were not defined"):
|
724
|
+
sbml_dfs.validate()
|
725
|
+
|
726
|
+
|
727
|
+
def test_validate_identifiers(minimal_valid_sbml_dfs):
|
728
|
+
"""Test _validate_identifiers fails when identifiers are missing."""
|
729
|
+
minimal_valid_sbml_dfs.species[SBML_DFS.S_IDENTIFIERS] = [None]
|
730
|
+
with pytest.raises(ValueError, match="species has .+ missing ids"):
|
731
|
+
minimal_valid_sbml_dfs.validate()
|
732
|
+
|
733
|
+
|
734
|
+
def test_validate_sources(minimal_valid_sbml_dfs):
|
735
|
+
"""Test _validate_sources fails when sources are missing."""
|
736
|
+
minimal_valid_sbml_dfs.species[SBML_DFS.S_SOURCE] = [None]
|
737
|
+
with pytest.raises(ValueError, match="species has .+ missing sources"):
|
738
|
+
minimal_valid_sbml_dfs.validate()
|
739
|
+
|
740
|
+
|
741
|
+
def test_validate_species_data(minimal_valid_sbml_dfs):
|
742
|
+
"""Test _validate_species_data fails when species_data has invalid structure."""
|
743
|
+
invalid_data = pd.DataFrame(
|
744
|
+
{"extra_info": ["test"]}, index=pd.Index(["S99999"], name=SBML_DFS.S_ID)
|
745
|
+
) # Non-existent species
|
746
|
+
minimal_valid_sbml_dfs.species_data["invalid"] = invalid_data
|
747
|
+
with pytest.raises(ValueError, match="species data invalid was invalid"):
|
748
|
+
minimal_valid_sbml_dfs.validate()
|
749
|
+
|
750
|
+
|
751
|
+
def test_validate_reactions_data(minimal_valid_sbml_dfs):
|
752
|
+
"""Test _validate_reactions_data fails when reactions_data has invalid structure."""
|
753
|
+
invalid_data = pd.DataFrame(
|
754
|
+
{"extra_info": ["test"]}, index=pd.Index(["R99999"], name=SBML_DFS.R_ID)
|
755
|
+
) # Non-existent reaction
|
756
|
+
minimal_valid_sbml_dfs.reactions_data["invalid"] = invalid_data
|
757
|
+
with pytest.raises(ValueError, match="reactions data invalid was invalid"):
|
758
|
+
minimal_valid_sbml_dfs.validate()
|
759
|
+
|
760
|
+
|
761
|
+
def test_validate_passes_with_valid_data(minimal_valid_sbml_dfs):
|
762
|
+
"""Test that validation passes with completely valid data."""
|
763
|
+
minimal_valid_sbml_dfs.validate() # Should not raise any exceptions
|