napistu 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. napistu/__main__.py +38 -27
  2. napistu/consensus.py +22 -27
  3. napistu/constants.py +91 -65
  4. napistu/context/filtering.py +2 -1
  5. napistu/identifiers.py +3 -6
  6. napistu/indices.py +3 -1
  7. napistu/ingestion/bigg.py +6 -6
  8. napistu/ingestion/sbml.py +298 -295
  9. napistu/ingestion/string.py +16 -19
  10. napistu/ingestion/trrust.py +22 -27
  11. napistu/ingestion/yeast.py +2 -1
  12. napistu/matching/interactions.py +4 -4
  13. napistu/matching/species.py +1 -1
  14. napistu/modify/uncompartmentalize.py +1 -1
  15. napistu/network/net_create.py +1 -1
  16. napistu/network/paths.py +1 -1
  17. napistu/ontologies/dogma.py +2 -1
  18. napistu/ontologies/genodexito.py +5 -1
  19. napistu/ontologies/renaming.py +4 -0
  20. napistu/sbml_dfs_core.py +1343 -2167
  21. napistu/sbml_dfs_utils.py +1086 -143
  22. napistu/utils.py +52 -41
  23. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/METADATA +2 -2
  24. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/RECORD +40 -40
  25. tests/conftest.py +113 -13
  26. tests/test_consensus.py +161 -4
  27. tests/test_context_filtering.py +2 -2
  28. tests/test_gaps.py +26 -15
  29. tests/test_network_net_create.py +1 -1
  30. tests/test_network_precompute.py +1 -1
  31. tests/test_ontologies_genodexito.py +3 -0
  32. tests/test_ontologies_mygene.py +3 -0
  33. tests/test_ontologies_renaming.py +28 -24
  34. tests/test_sbml_dfs_core.py +260 -211
  35. tests/test_sbml_dfs_utils.py +194 -36
  36. tests/test_utils.py +19 -0
  37. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/WHEEL +0 -0
  38. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/entry_points.txt +0 -0
  39. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/licenses/LICENSE +0 -0
  40. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/top_level.txt +0 -0
@@ -5,33 +5,34 @@ import os
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
  import pytest
8
+ from napistu import identifiers
8
9
  from napistu import sbml_dfs_core
9
10
  from napistu.source import Source
10
11
  from napistu.ingestion import sbml
11
12
  from napistu.modify import pathwayannot
12
- from napistu.sbml_dfs_utils import _stub_ids
13
13
 
14
14
  from napistu import identifiers as napistu_identifiers
15
15
  from napistu.constants import (
16
16
  SBML_DFS,
17
- SBOTERM_NAMES,
18
17
  BQB_DEFINING_ATTRS,
19
18
  BQB_DEFINING_ATTRS_LOOSE,
20
19
  BQB,
21
- IDENTIFIERS,
22
20
  )
23
21
  from napistu.sbml_dfs_core import SBML_dfs
22
+ from unittest.mock import patch
24
23
 
25
24
 
26
25
  @pytest.fixture
27
26
  def test_data():
28
27
  """Create test data for SBML integration tests."""
29
28
 
29
+ blank_id = identifiers.Identifiers([])
30
+
30
31
  # Test compartments
31
32
  compartments_df = pd.DataFrame(
32
33
  [
33
- {"c_name": "nucleus", "c_Identifiers": _stub_ids([])},
34
- {"c_name": "cytoplasm", "c_Identifiers": _stub_ids([])},
34
+ {SBML_DFS.C_NAME: "nucleus", SBML_DFS.C_IDENTIFIERS: blank_id},
35
+ {SBML_DFS.C_NAME: "cytoplasm", SBML_DFS.C_IDENTIFIERS: blank_id},
35
36
  ]
36
37
  )
37
38
 
@@ -39,14 +40,18 @@ def test_data():
39
40
  species_df = pd.DataFrame(
40
41
  [
41
42
  {
42
- "s_name": "TP53",
43
- "s_Identifiers": _stub_ids([]),
43
+ SBML_DFS.S_NAME: "TP53",
44
+ SBML_DFS.S_IDENTIFIERS: blank_id,
44
45
  "gene_type": "tumor_suppressor",
45
46
  },
46
- {"s_name": "MDM2", "s_Identifiers": _stub_ids([]), "gene_type": "oncogene"},
47
47
  {
48
- "s_name": "CDKN1A",
49
- "s_Identifiers": _stub_ids([]),
48
+ SBML_DFS.S_NAME: "MDM2",
49
+ SBML_DFS.S_IDENTIFIERS: blank_id,
50
+ "gene_type": "oncogene",
51
+ },
52
+ {
53
+ SBML_DFS.S_NAME: "CDKN1A",
54
+ SBML_DFS.S_IDENTIFIERS: blank_id,
50
55
  "gene_type": "cell_cycle",
51
56
  },
52
57
  ]
@@ -60,10 +65,10 @@ def test_data():
60
65
  "downstream_name": "CDKN1A",
61
66
  "upstream_compartment": "nucleus",
62
67
  "downstream_compartment": "nucleus",
63
- "r_name": "TP53_activates_CDKN1A",
64
- "sbo_term": "SBO:0000459",
65
- "r_Identifiers": _stub_ids([]),
66
- "r_isreversible": False,
68
+ SBML_DFS.R_NAME: "TP53_activates_CDKN1A",
69
+ SBML_DFS.SBO_TERM: "SBO:0000459",
70
+ SBML_DFS.R_IDENTIFIERS: blank_id,
71
+ SBML_DFS.R_ISREVERSIBLE: False,
67
72
  "confidence": 0.95,
68
73
  },
69
74
  {
@@ -71,10 +76,10 @@ def test_data():
71
76
  "downstream_name": "TP53",
72
77
  "upstream_compartment": "cytoplasm",
73
78
  "downstream_compartment": "nucleus",
74
- "r_name": "MDM2_inhibits_TP53",
75
- "sbo_term": "SBO:0000020",
76
- "r_Identifiers": _stub_ids([]),
77
- "r_isreversible": False,
79
+ SBML_DFS.R_NAME: "MDM2_inhibits_TP53",
80
+ SBML_DFS.SBO_TERM: "SBO:0000020",
81
+ SBML_DFS.R_IDENTIFIERS: blank_id,
82
+ SBML_DFS.R_ISREVERSIBLE: False,
78
83
  "confidence": 0.87,
79
84
  },
80
85
  ]
@@ -274,26 +279,6 @@ def test_sbml_dfs_remove_reactions_check_species(sbml_dfs):
274
279
  sbml_dfs.validate()
275
280
 
276
281
 
277
- def test_formula(sbml_dfs):
278
- # create a formula string
279
-
280
- an_r_id = sbml_dfs.reactions.index[0]
281
-
282
- reaction_species_df = sbml_dfs.reaction_species[
283
- sbml_dfs.reaction_species["r_id"] == an_r_id
284
- ].merge(sbml_dfs.compartmentalized_species, left_on="sc_id", right_index=True)
285
-
286
- formula_str = sbml_dfs_core.construct_formula_string(
287
- reaction_species_df, sbml_dfs.reactions, name_var="sc_name"
288
- )
289
-
290
- assert isinstance(formula_str, str)
291
- assert (
292
- formula_str
293
- == "CO2 [extracellular region] -> CO2 [cytosol] ---- modifiers: AQP1 tetramer [plasma membrane]]"
294
- )
295
-
296
-
297
282
  def test_read_sbml_with_invalid_ids():
298
283
  SBML_W_BAD_IDS = "R-HSA-166658.sbml"
299
284
  test_path = os.path.abspath(os.path.join(__file__, os.pardir))
@@ -305,17 +290,6 @@ def test_read_sbml_with_invalid_ids():
305
290
  assert isinstance(sbml_dfs_core.SBML_dfs(sbml_w_bad_ids), sbml_dfs_core.SBML_dfs)
306
291
 
307
292
 
308
- def test_stubbed_compartment():
309
- compartment = sbml_dfs_core._stub_compartments()
310
-
311
- assert compartment["c_Identifiers"].iloc[0].ids[0] == {
312
- "ontology": "go",
313
- "identifier": "GO:0005575",
314
- "url": "https://www.ebi.ac.uk/QuickGO/term/GO:0005575",
315
- "bqb": "BQB_IS",
316
- }
317
-
318
-
319
293
  def test_get_table(sbml_dfs):
320
294
  assert isinstance(sbml_dfs.get_table("species"), pd.DataFrame)
321
295
  assert isinstance(sbml_dfs.get_table("species", {"id"}), pd.DataFrame)
@@ -366,10 +340,20 @@ def test_species_status(sbml_dfs):
366
340
  select_species = species[species["s_name"] == "OxyHbA"]
367
341
  assert select_species.shape[0] == 1
368
342
 
369
- status = sbml_dfs_core.species_status(select_species.index[0], sbml_dfs)
343
+ status = sbml_dfs.species_status(select_species.index[0])
344
+
345
+ # expected columns
346
+ expected_columns = [
347
+ SBML_DFS.SC_NAME,
348
+ SBML_DFS.STOICHIOMETRY,
349
+ SBML_DFS.R_NAME,
350
+ "r_formula_str",
351
+ ]
352
+ assert all(col in status.columns for col in expected_columns)
353
+
370
354
  assert (
371
355
  status["r_formula_str"][0]
372
- == "4.0 H+ + OxyHbA + 4.0 CO2 -> 4.0 O2 + Protonated Carbamino DeoxyHbA [cytosol]"
356
+ == "cytosol: 4.0 CO2 + 4.0 H+ + OxyHbA -> 4.0 O2 + Protonated Carbamino DeoxyHbA"
373
357
  )
374
358
 
375
359
 
@@ -436,91 +420,6 @@ def test_get_identifiers_handles_missing_values():
436
420
  ), "Only Identifiers objects should be returned."
437
421
 
438
422
 
439
- def test_find_underspecified_reactions():
440
-
441
- reaction_w_regulators = pd.DataFrame(
442
- {
443
- SBML_DFS.SC_ID: ["A", "B", "C", "D", "E", "F", "G"],
444
- SBML_DFS.STOICHIOMETRY: [-1, -1, 1, 1, 0, 0, 0],
445
- SBML_DFS.SBO_TERM: [
446
- SBOTERM_NAMES.REACTANT,
447
- SBOTERM_NAMES.REACTANT,
448
- SBOTERM_NAMES.PRODUCT,
449
- SBOTERM_NAMES.PRODUCT,
450
- SBOTERM_NAMES.CATALYST,
451
- SBOTERM_NAMES.CATALYST,
452
- SBOTERM_NAMES.STIMULATOR,
453
- ],
454
- }
455
- ).assign(r_id="bar")
456
- reaction_w_regulators[SBML_DFS.RSC_ID] = [
457
- f"rsc_{i}" for i in range(len(reaction_w_regulators))
458
- ]
459
- reaction_w_regulators.set_index(SBML_DFS.RSC_ID, inplace=True)
460
- reaction_w_regulators = sbml_dfs_core.add_sbo_role(reaction_w_regulators)
461
-
462
- reaction_w_interactors = pd.DataFrame(
463
- {
464
- SBML_DFS.SC_ID: ["A", "B"],
465
- SBML_DFS.STOICHIOMETRY: [-1, 1],
466
- SBML_DFS.SBO_TERM: [SBOTERM_NAMES.REACTANT, SBOTERM_NAMES.REACTANT],
467
- }
468
- ).assign(r_id="baz")
469
- reaction_w_interactors[SBML_DFS.RSC_ID] = [
470
- f"rsc_{i}" for i in range(len(reaction_w_interactors))
471
- ]
472
- reaction_w_interactors.set_index(SBML_DFS.RSC_ID, inplace=True)
473
- reaction_w_interactors = sbml_dfs_core.add_sbo_role(reaction_w_interactors)
474
-
475
- working_reactions = reaction_w_regulators.copy()
476
- working_reactions["new"] = True
477
- working_reactions.loc["rsc_0", "new"] = False
478
- working_reactions
479
- result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
480
- assert result == {"bar"}
481
-
482
- # missing one enzyme -> operable
483
- working_reactions = reaction_w_regulators.copy()
484
- working_reactions["new"] = True
485
- working_reactions.loc["rsc_4", "new"] = False
486
- working_reactions
487
- result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
488
- assert result == set()
489
-
490
- # missing one product -> inoperable
491
- working_reactions = reaction_w_regulators.copy()
492
- working_reactions["new"] = True
493
- working_reactions.loc["rsc_2", "new"] = False
494
- working_reactions
495
- result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
496
- assert result == {"bar"}
497
-
498
- # missing all enzymes -> inoperable
499
- working_reactions = reaction_w_regulators.copy()
500
- working_reactions["new"] = True
501
- working_reactions.loc["rsc_4", "new"] = False
502
- working_reactions.loc["rsc_5", "new"] = False
503
- working_reactions
504
- result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
505
- assert result == {"bar"}
506
-
507
- # missing regulators -> operable
508
- working_reactions = reaction_w_regulators.copy()
509
- working_reactions["new"] = True
510
- working_reactions.loc["rsc_6", "new"] = False
511
- working_reactions
512
- result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
513
- assert result == set()
514
-
515
- # remove an interactor
516
- working_reactions = reaction_w_interactors.copy()
517
- working_reactions["new"] = True
518
- working_reactions.loc["rsc_0", "new"] = False
519
- working_reactions
520
- result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
521
- assert result == {"baz"}
522
-
523
-
524
423
  def test_remove_entity_data_success(sbml_dfs_w_data):
525
424
  """Test successful removal of entity data."""
526
425
  # Get initial data
@@ -564,85 +463,92 @@ def test_remove_entity_data_nonexistent(sbml_dfs_w_data, caplog):
564
463
  sbml_dfs_w_data.validate()
565
464
 
566
465
 
567
- def test_filter_to_characteristic_species_ids():
568
-
569
- species_ids_dict = {
570
- SBML_DFS.S_ID: ["large_complex"] * 6
571
- + ["small_complex"] * 2
572
- + ["proteinA", "proteinB"]
573
- + ["proteinC"] * 3
574
- + [
575
- "promiscuous_complexA",
576
- "promiscuous_complexB",
577
- "promiscuous_complexC",
578
- "promiscuous_complexD",
579
- "promiscuous_complexE",
580
- ],
581
- IDENTIFIERS.ONTOLOGY: ["complexportal"]
582
- + ["HGNC"] * 7
583
- + ["GO"] * 2
584
- + ["ENSG", "ENSP", "pubmed"]
585
- + ["HGNC"] * 5,
586
- IDENTIFIERS.IDENTIFIER: [
587
- "CPX-BIG",
588
- "mem1",
589
- "mem2",
590
- "mem3",
591
- "mem4",
592
- "mem5",
593
- "part1",
594
- "part2",
595
- "GO:1",
596
- "GO:2",
597
- "dna_seq",
598
- "protein_seq",
599
- "my_cool_pub",
600
- ]
601
- + ["promiscuous_complex"] * 5,
602
- IDENTIFIERS.BQB: [BQB.IS]
603
- + [BQB.HAS_PART] * 7
604
- + [BQB.IS] * 2
605
- + [
606
- # these are retained if BQB_DEFINING_ATTRS_LOOSE is used
607
- BQB.ENCODES,
608
- BQB.IS_ENCODED_BY,
609
- # this should always be removed
610
- BQB.IS_DESCRIBED_BY,
611
- ]
612
- + [BQB.HAS_PART] * 5,
613
- }
614
-
615
- species_ids = pd.DataFrame(species_ids_dict)
616
-
617
- characteristic_ids_narrow = sbml_dfs_core.filter_to_characteristic_species_ids(
618
- species_ids,
619
- defining_biological_qualifiers=BQB_DEFINING_ATTRS,
620
- max_complex_size=4,
621
- max_promiscuity=4,
466
+ def test_get_characteristic_species_ids():
467
+ """
468
+ Test get_characteristic_species_ids function with both dogmatic and non-dogmatic cases.
469
+ """
470
+ # Create mock species identifiers data
471
+ mock_species_ids = pd.DataFrame(
472
+ {
473
+ "s_id": ["s1", "s2", "s3", "s4", "s5"],
474
+ "identifier": ["P12345", "CHEBI:15377", "GO:12345", "P67890", "P67890"],
475
+ "ontology": ["uniprot", "chebi", "go", "uniprot", "chebi"],
476
+ "bqb": [
477
+ "BQB_IS",
478
+ "BQB_IS",
479
+ "BQB_HAS_PART",
480
+ "BQB_HAS_VERSION",
481
+ "BQB_ENCODES",
482
+ ],
483
+ }
622
484
  )
623
485
 
624
- EXPECTED_IDS = ["CPX-BIG", "GO:1", "GO:2", "part1", "part2"]
625
- assert characteristic_ids_narrow[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
626
-
627
- characteristic_ids_loose = sbml_dfs_core.filter_to_characteristic_species_ids(
628
- species_ids,
629
- # include encodes and is_encoded_by as equivalent to is
630
- defining_biological_qualifiers=BQB_DEFINING_ATTRS_LOOSE,
631
- max_complex_size=4,
632
- # expand promiscuity to default value
633
- max_promiscuity=20,
486
+ # Create minimal required tables for SBML_dfs
487
+ compartments = pd.DataFrame(
488
+ {"c_name": ["cytosol"], "c_Identifiers": [None]}, index=["C1"]
489
+ )
490
+ compartments.index.name = "c_id"
491
+ species = pd.DataFrame(
492
+ {"s_name": ["A"], "s_Identifiers": [None], "s_source": [None]}, index=["s1"]
493
+ )
494
+ species.index.name = "s_id"
495
+ compartmentalized_species = pd.DataFrame(
496
+ {
497
+ "sc_name": ["A [cytosol]"],
498
+ "s_id": ["s1"],
499
+ "c_id": ["C1"],
500
+ "sc_source": [None],
501
+ },
502
+ index=["SC1"],
503
+ )
504
+ compartmentalized_species.index.name = "sc_id"
505
+ reactions = pd.DataFrame(
506
+ {
507
+ "r_name": ["rxn1"],
508
+ "r_Identifiers": [None],
509
+ "r_source": [None],
510
+ "r_isreversible": [False],
511
+ },
512
+ index=["R1"],
513
+ )
514
+ reactions.index.name = "r_id"
515
+ reaction_species = pd.DataFrame(
516
+ {
517
+ "r_id": ["R1"],
518
+ "sc_id": ["SC1"],
519
+ "stoichiometry": [1],
520
+ "sbo_term": ["SBO:0000459"],
521
+ },
522
+ index=["RSC1"],
634
523
  )
524
+ reaction_species.index.name = "rsc_id"
635
525
 
636
- EXPECTED_IDS = [
637
- "CPX-BIG",
638
- "GO:1",
639
- "GO:2",
640
- "dna_seq",
641
- "protein_seq",
642
- "part1",
643
- "part2",
644
- ] + ["promiscuous_complex"] * 5
645
- assert characteristic_ids_loose[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
526
+ sbml_dict = {
527
+ "compartments": compartments,
528
+ "species": species,
529
+ "compartmentalized_species": compartmentalized_species,
530
+ "reactions": reactions,
531
+ "reaction_species": reaction_species,
532
+ }
533
+ sbml_dfs = SBML_dfs(sbml_dict, validate=False, resolve=False)
534
+
535
+ # Test dogmatic case (default)
536
+ expected_bqbs = BQB_DEFINING_ATTRS + [BQB.HAS_PART] # noqa: F841
537
+ with patch.object(sbml_dfs, "get_identifiers", return_value=mock_species_ids):
538
+ dogmatic_result = sbml_dfs.get_characteristic_species_ids()
539
+ expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
540
+ pd.testing.assert_frame_equal(
541
+ dogmatic_result, expected_dogmatic, check_like=True
542
+ )
543
+
544
+ # Test non-dogmatic case
545
+ expected_bqbs = BQB_DEFINING_ATTRS_LOOSE + [BQB.HAS_PART] # noqa: F841
546
+ with patch.object(sbml_dfs, "get_identifiers", return_value=mock_species_ids):
547
+ non_dogmatic_result = sbml_dfs.get_characteristic_species_ids(dogmatic=False)
548
+ expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
549
+ pd.testing.assert_frame_equal(
550
+ non_dogmatic_result, expected_non_dogmatic, check_like=True
551
+ )
646
552
 
647
553
 
648
554
  def test_sbml_basic_functionality(test_data):
@@ -712,3 +618,146 @@ def test_sbml_custom_stoichiometry(test_data):
712
618
  stoichiometries = result.reaction_species["stoichiometry"].unique()
713
619
  assert 2 in stoichiometries # upstream
714
620
  assert 3 in stoichiometries # downstream
621
+
622
+
623
+ def test_validate_schema_missing(minimal_valid_sbml_dfs):
624
+ """Test validation fails when schema is missing."""
625
+ delattr(minimal_valid_sbml_dfs, "schema")
626
+ with pytest.raises(ValueError, match="No schema found"):
627
+ minimal_valid_sbml_dfs.validate()
628
+
629
+
630
+ def test_validate_table(minimal_valid_sbml_dfs):
631
+ """Test _validate_table fails for various table structure issues."""
632
+ # Wrong index name
633
+ sbml_dfs = minimal_valid_sbml_dfs.copy()
634
+ sbml_dfs.species.index.name = "wrong_name"
635
+ with pytest.raises(ValueError, match="the index name for species was not the pk"):
636
+ sbml_dfs.validate()
637
+
638
+ # Duplicate primary keys
639
+ sbml_dfs = minimal_valid_sbml_dfs.copy()
640
+ duplicate_species = pd.DataFrame(
641
+ {
642
+ SBML_DFS.S_NAME: ["ATP", "ADP"],
643
+ SBML_DFS.S_IDENTIFIERS: [
644
+ identifiers.Identifiers([]),
645
+ identifiers.Identifiers([]),
646
+ ],
647
+ SBML_DFS.S_SOURCE: [Source(init=True), Source(init=True)],
648
+ },
649
+ index=pd.Index(["S00001", "S00001"], name=SBML_DFS.S_ID),
650
+ )
651
+ sbml_dfs.species = duplicate_species
652
+ with pytest.raises(ValueError, match="primary keys were duplicated"):
653
+ sbml_dfs.validate()
654
+
655
+ # Missing required variables
656
+ sbml_dfs = minimal_valid_sbml_dfs.copy()
657
+ sbml_dfs.species = sbml_dfs.species.drop(columns=[SBML_DFS.S_NAME])
658
+ with pytest.raises(ValueError, match="Missing .+ required variables for species"):
659
+ sbml_dfs.validate()
660
+
661
+ # Empty table
662
+ sbml_dfs = minimal_valid_sbml_dfs.copy()
663
+ sbml_dfs.species = pd.DataFrame(
664
+ {
665
+ SBML_DFS.S_NAME: [],
666
+ SBML_DFS.S_IDENTIFIERS: [],
667
+ SBML_DFS.S_SOURCE: [],
668
+ },
669
+ index=pd.Index([], name=SBML_DFS.S_ID),
670
+ )
671
+ with pytest.raises(ValueError, match="species contained no entries"):
672
+ sbml_dfs.validate()
673
+
674
+
675
+ def test_check_pk_fk_correspondence(minimal_valid_sbml_dfs):
676
+ """Test _check_pk_fk_correspondence fails for various foreign key issues."""
677
+ # Missing species reference
678
+ sbml_dfs = minimal_valid_sbml_dfs.copy()
679
+ sbml_dfs.compartmentalized_species[SBML_DFS.S_ID] = ["S99999"]
680
+ with pytest.raises(
681
+ ValueError,
682
+ match="s_id values were found in compartmentalized_species but missing from species",
683
+ ):
684
+ sbml_dfs.validate()
685
+
686
+ # Missing compartment reference
687
+ sbml_dfs = minimal_valid_sbml_dfs.copy()
688
+ sbml_dfs.compartmentalized_species[SBML_DFS.C_ID] = ["C99999"]
689
+ with pytest.raises(
690
+ ValueError,
691
+ match="c_id values were found in compartmentalized_species but missing from compartments",
692
+ ):
693
+ sbml_dfs.validate()
694
+
695
+ # Null foreign keys
696
+ sbml_dfs = minimal_valid_sbml_dfs.copy()
697
+ sbml_dfs.compartmentalized_species[SBML_DFS.S_ID] = [None]
698
+ with pytest.raises(
699
+ ValueError, match="compartmentalized_species included missing s_id values"
700
+ ):
701
+ sbml_dfs.validate()
702
+
703
+
704
+ def test_validate_reaction_species(minimal_valid_sbml_dfs):
705
+ """Test _validate_reaction_species fails for various reaction species issues."""
706
+ # Null stoichiometry
707
+ sbml_dfs = minimal_valid_sbml_dfs.copy()
708
+ sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] = [None]
709
+ with pytest.raises(ValueError, match="All reaction_species.* must be not null"):
710
+ sbml_dfs.validate()
711
+
712
+ # Null SBO terms
713
+ sbml_dfs = minimal_valid_sbml_dfs.copy()
714
+ sbml_dfs.reaction_species[SBML_DFS.SBO_TERM] = [None]
715
+ with pytest.raises(
716
+ ValueError, match="sbo_terms were None; all terms should be defined"
717
+ ):
718
+ sbml_dfs.validate()
719
+
720
+ # Invalid SBO terms
721
+ sbml_dfs = minimal_valid_sbml_dfs.copy()
722
+ sbml_dfs.reaction_species[SBML_DFS.SBO_TERM] = ["INVALID_SBO_TERM"]
723
+ with pytest.raises(ValueError, match="sbo_terms were not defined"):
724
+ sbml_dfs.validate()
725
+
726
+
727
+ def test_validate_identifiers(minimal_valid_sbml_dfs):
728
+ """Test _validate_identifiers fails when identifiers are missing."""
729
+ minimal_valid_sbml_dfs.species[SBML_DFS.S_IDENTIFIERS] = [None]
730
+ with pytest.raises(ValueError, match="species has .+ missing ids"):
731
+ minimal_valid_sbml_dfs.validate()
732
+
733
+
734
+ def test_validate_sources(minimal_valid_sbml_dfs):
735
+ """Test _validate_sources fails when sources are missing."""
736
+ minimal_valid_sbml_dfs.species[SBML_DFS.S_SOURCE] = [None]
737
+ with pytest.raises(ValueError, match="species has .+ missing sources"):
738
+ minimal_valid_sbml_dfs.validate()
739
+
740
+
741
+ def test_validate_species_data(minimal_valid_sbml_dfs):
742
+ """Test _validate_species_data fails when species_data has invalid structure."""
743
+ invalid_data = pd.DataFrame(
744
+ {"extra_info": ["test"]}, index=pd.Index(["S99999"], name=SBML_DFS.S_ID)
745
+ ) # Non-existent species
746
+ minimal_valid_sbml_dfs.species_data["invalid"] = invalid_data
747
+ with pytest.raises(ValueError, match="species data invalid was invalid"):
748
+ minimal_valid_sbml_dfs.validate()
749
+
750
+
751
+ def test_validate_reactions_data(minimal_valid_sbml_dfs):
752
+ """Test _validate_reactions_data fails when reactions_data has invalid structure."""
753
+ invalid_data = pd.DataFrame(
754
+ {"extra_info": ["test"]}, index=pd.Index(["R99999"], name=SBML_DFS.R_ID)
755
+ ) # Non-existent reaction
756
+ minimal_valid_sbml_dfs.reactions_data["invalid"] = invalid_data
757
+ with pytest.raises(ValueError, match="reactions data invalid was invalid"):
758
+ minimal_valid_sbml_dfs.validate()
759
+
760
+
761
+ def test_validate_passes_with_valid_data(minimal_valid_sbml_dfs):
762
+ """Test that validation passes with completely valid data."""
763
+ minimal_valid_sbml_dfs.validate() # Should not raise any exceptions