PyPI - napistu - Versions diffs - 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl - Mend

napistu 0.3.4py3-none-any.whl → 0.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

napistu/__main__.py +18 -18
napistu/consensus.py +3 -2
napistu/constants.py +5 -5
napistu/context/filtering.py +2 -1
napistu/identifiers.py +3 -6
napistu/ingestion/bigg.py +6 -6
napistu/ingestion/string.py +2 -1
napistu/ingestion/yeast.py +2 -1
napistu/matching/interactions.py +4 -4
napistu/modify/uncompartmentalize.py +1 -1
napistu/network/ig_utils.py +35 -0
napistu/network/net_create.py +1 -1
napistu/network/paths.py +1 -1
napistu/network/precompute.py +2 -1
napistu/ontologies/dogma.py +2 -1
napistu/sbml_dfs_core.py +1330 -2016
napistu/sbml_dfs_utils.py +1082 -143
napistu/source.py +1 -1
{napistu-0.3.4.dist-info → napistu-0.3.6.dist-info}/METADATA +2 -2
{napistu-0.3.4.dist-info → napistu-0.3.6.dist-info}/RECORD +32 -32
tests/conftest.py +43 -0
tests/test_consensus.py +88 -0
tests/test_context_filtering.py +2 -2
tests/test_network_ig_utils.py +36 -0
tests/test_ontologies_genodexito.py +3 -0
tests/test_ontologies_mygene.py +3 -0
tests/test_sbml_dfs_core.py +221 -191
tests/test_sbml_dfs_utils.py +194 -36
{napistu-0.3.4.dist-info → napistu-0.3.6.dist-info}/WHEEL +0 -0
{napistu-0.3.4.dist-info → napistu-0.3.6.dist-info}/entry_points.txt +0 -0
{napistu-0.3.4.dist-info → napistu-0.3.6.dist-info}/licenses/LICENSE +0 -0
{napistu-0.3.4.dist-info → napistu-0.3.6.dist-info}/top_level.txt +0 -0

tests/test_sbml_dfs_core.py CHANGED Viewed

@@ -6,19 +6,79 @@ import numpy as np
 import pandas as pd
 import pytest
 from napistu import sbml_dfs_core
+from napistu.source import Source
 from napistu.ingestion import sbml
 from napistu.modify import pathwayannot
 from napistu import identifiers as napistu_identifiers
 from napistu.constants import (
     SBML_DFS,
-    SBOTERM_NAMES,
     BQB_DEFINING_ATTRS,
     BQB_DEFINING_ATTRS_LOOSE,
     BQB,
-    IDENTIFIERS,
 )
 from napistu.sbml_dfs_core import SBML_dfs
+from unittest.mock import patch
+@pytest.fixture
+def test_data():
+    """Create test data for SBML integration tests."""
+    # Test compartments
+    compartments_df = pd.DataFrame(
+        [
+            {"c_name": "nucleus", "c_Identifiers": None},
+            {"c_name": "cytoplasm", "c_Identifiers": None},
+        ]
+    )
+    # Test species with extra data
+    species_df = pd.DataFrame(
+        [
+            {
+                "s_name": "TP53",
+                "s_Identifiers": None,
+                "gene_type": "tumor_suppressor",
+            },
+            {"s_name": "MDM2", "s_Identifiers": None, "gene_type": "oncogene"},
+            {
+                "s_name": "CDKN1A",
+                "s_Identifiers": None,
+                "gene_type": "cell_cycle",
+            },
+        ]
+    )
+    # Test interactions with extra data
+    interaction_edgelist = pd.DataFrame(
+        [
+            {
+                "upstream_name": "TP53",
+                "downstream_name": "CDKN1A",
+                "upstream_compartment": "nucleus",
+                "downstream_compartment": "nucleus",
+                "r_name": "TP53_activates_CDKN1A",
+                "sbo_term": "SBO:0000459",
+                "r_Identifiers": None,
+                "r_isreversible": False,
+                "confidence": 0.95,
+            },
+            {
+                "upstream_name": "MDM2",
+                "downstream_name": "TP53",
+                "upstream_compartment": "cytoplasm",
+                "downstream_compartment": "nucleus",
+                "r_name": "MDM2_inhibits_TP53",
+                "sbo_term": "SBO:0000020",
+                "r_Identifiers": None,
+                "r_isreversible": False,
+                "confidence": 0.87,
+            },
+        ]
+    )
+    return [interaction_edgelist, species_df, compartments_df, Source(init=True)]
 def test_drop_cofactors(sbml_dfs):
@@ -212,26 +272,6 @@ def test_sbml_dfs_remove_reactions_check_species(sbml_dfs):
     sbml_dfs.validate()
-def test_formula(sbml_dfs):
-    # create a formula string
-    an_r_id = sbml_dfs.reactions.index[0]
-    reaction_species_df = sbml_dfs.reaction_species[
-        sbml_dfs.reaction_species["r_id"] == an_r_id
-    ].merge(sbml_dfs.compartmentalized_species, left_on="sc_id", right_index=True)
-    formula_str = sbml_dfs_core.construct_formula_string(
-        reaction_species_df, sbml_dfs.reactions, name_var="sc_name"
-    )
-    assert isinstance(formula_str, str)
-    assert (
-        formula_str
-        == "CO2 [extracellular region] -> CO2 [cytosol] ---- modifiers: AQP1 tetramer [plasma membrane]]"
-    )
 def test_read_sbml_with_invalid_ids():
     SBML_W_BAD_IDS = "R-HSA-166658.sbml"
     test_path = os.path.abspath(os.path.join(__file__, os.pardir))
@@ -243,17 +283,6 @@ def test_read_sbml_with_invalid_ids():
     assert isinstance(sbml_dfs_core.SBML_dfs(sbml_w_bad_ids), sbml_dfs_core.SBML_dfs)
-def test_stubbed_compartment():
-    compartment = sbml_dfs_core._stub_compartments()
-    assert compartment["c_Identifiers"].iloc[0].ids[0] == {
-        "ontology": "go",
-        "identifier": "GO:0005575",
-        "url": "https://www.ebi.ac.uk/QuickGO/term/GO:0005575",
-        "bqb": "BQB_IS",
-    }
 def test_get_table(sbml_dfs):
     assert isinstance(sbml_dfs.get_table("species"), pd.DataFrame)
     assert isinstance(sbml_dfs.get_table("species", {"id"}), pd.DataFrame)
@@ -304,10 +333,20 @@ def test_species_status(sbml_dfs):
     select_species = species[species["s_name"] == "OxyHbA"]
     assert select_species.shape[0] == 1
-    status = sbml_dfs_core.species_status(select_species.index[0], sbml_dfs)
+    status = sbml_dfs.species_status(select_species.index[0])
+    # expected columns
+    expected_columns = [
+        SBML_DFS.SC_NAME,
+        SBML_DFS.STOICHIOMETRY,
+        SBML_DFS.R_NAME,
+        "r_formula_str",
+    ]
+    assert all(col in status.columns for col in expected_columns)
     assert (
         status["r_formula_str"][0]
-        == "4.0 H+ + OxyHbA + 4.0 CO2 -> 4.0 O2 + Protonated Carbamino DeoxyHbA [cytosol]"
+        == "cytosol: 4.0 CO2 + 4.0 H+ + OxyHbA -> 4.0 O2 + Protonated Carbamino DeoxyHbA"
     )
@@ -374,91 +413,6 @@ def test_get_identifiers_handles_missing_values():
     ), "Only Identifiers objects should be returned."
-def test_find_underspecified_reactions():
-    reaction_w_regulators = pd.DataFrame(
-        {
-            SBML_DFS.SC_ID: ["A", "B", "C", "D", "E", "F", "G"],
-            SBML_DFS.STOICHIOMETRY: [-1, -1, 1, 1, 0, 0, 0],
-            SBML_DFS.SBO_TERM: [
-                SBOTERM_NAMES.REACTANT,
-                SBOTERM_NAMES.REACTANT,
-                SBOTERM_NAMES.PRODUCT,
-                SBOTERM_NAMES.PRODUCT,
-                SBOTERM_NAMES.CATALYST,
-                SBOTERM_NAMES.CATALYST,
-                SBOTERM_NAMES.STIMULATOR,
-            ],
-        }
-    ).assign(r_id="bar")
-    reaction_w_regulators[SBML_DFS.RSC_ID] = [
-        f"rsc_{i}" for i in range(len(reaction_w_regulators))
-    ]
-    reaction_w_regulators.set_index(SBML_DFS.RSC_ID, inplace=True)
-    reaction_w_regulators = sbml_dfs_core.add_sbo_role(reaction_w_regulators)
-    reaction_w_interactors = pd.DataFrame(
-        {
-            SBML_DFS.SC_ID: ["A", "B"],
-            SBML_DFS.STOICHIOMETRY: [-1, 1],
-            SBML_DFS.SBO_TERM: [SBOTERM_NAMES.REACTANT, SBOTERM_NAMES.REACTANT],
-        }
-    ).assign(r_id="baz")
-    reaction_w_interactors[SBML_DFS.RSC_ID] = [
-        f"rsc_{i}" for i in range(len(reaction_w_interactors))
-    ]
-    reaction_w_interactors.set_index(SBML_DFS.RSC_ID, inplace=True)
-    reaction_w_interactors = sbml_dfs_core.add_sbo_role(reaction_w_interactors)
-    working_reactions = reaction_w_regulators.copy()
-    working_reactions["new"] = True
-    working_reactions.loc["rsc_0", "new"] = False
-    working_reactions
-    result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
-    assert result == {"bar"}
-    # missing one enzyme -> operable
-    working_reactions = reaction_w_regulators.copy()
-    working_reactions["new"] = True
-    working_reactions.loc["rsc_4", "new"] = False
-    working_reactions
-    result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
-    assert result == set()
-    # missing one product -> inoperable
-    working_reactions = reaction_w_regulators.copy()
-    working_reactions["new"] = True
-    working_reactions.loc["rsc_2", "new"] = False
-    working_reactions
-    result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
-    assert result == {"bar"}
-    # missing all enzymes -> inoperable
-    working_reactions = reaction_w_regulators.copy()
-    working_reactions["new"] = True
-    working_reactions.loc["rsc_4", "new"] = False
-    working_reactions.loc["rsc_5", "new"] = False
-    working_reactions
-    result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
-    assert result == {"bar"}
-    # missing regulators -> operable
-    working_reactions = reaction_w_regulators.copy()
-    working_reactions["new"] = True
-    working_reactions.loc["rsc_6", "new"] = False
-    working_reactions
-    result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
-    assert result == set()
-    # remove an interactor
-    working_reactions = reaction_w_interactors.copy()
-    working_reactions["new"] = True
-    working_reactions.loc["rsc_0", "new"] = False
-    working_reactions
-    result = sbml_dfs_core.find_underspecified_reactions(working_reactions)
-    assert result == {"baz"}
 def test_remove_entity_data_success(sbml_dfs_w_data):
     """Test successful removal of entity data."""
     # Get initial data
@@ -502,82 +456,158 @@ def test_remove_entity_data_nonexistent(sbml_dfs_w_data, caplog):
     sbml_dfs_w_data.validate()
-def test_filter_to_characteristic_species_ids():
-    species_ids_dict = {
-        SBML_DFS.S_ID: ["large_complex"] * 6
-        + ["small_complex"] * 2
-        + ["proteinA", "proteinB"]
-        + ["proteinC"] * 3
-        + [
-            "promiscuous_complexA",
-            "promiscuous_complexB",
-            "promiscuous_complexC",
-            "promiscuous_complexD",
-            "promiscuous_complexE",
-        ],
-        IDENTIFIERS.ONTOLOGY: ["complexportal"]
-        + ["HGNC"] * 7
-        + ["GO"] * 2
-        + ["ENSG", "ENSP", "pubmed"]
-        + ["HGNC"] * 5,
-        IDENTIFIERS.IDENTIFIER: [
-            "CPX-BIG",
-            "mem1",
-            "mem2",
-            "mem3",
-            "mem4",
-            "mem5",
-            "part1",
-            "part2",
-            "GO:1",
-            "GO:2",
-            "dna_seq",
-            "protein_seq",
-            "my_cool_pub",
-        ]
-        + ["promiscuous_complex"] * 5,
-        IDENTIFIERS.BQB: [BQB.IS]
-        + [BQB.HAS_PART] * 7
-        + [BQB.IS] * 2
-        + [
-            # these are retained if BQB_DEFINING_ATTRS_LOOSE is used
-            BQB.ENCODES,
-            BQB.IS_ENCODED_BY,
-            # this should always be removed
-            BQB.IS_DESCRIBED_BY,
-        ]
-        + [BQB.HAS_PART] * 5,
+def test_get_characteristic_species_ids():
+    """
+    Test get_characteristic_species_ids function with both dogmatic and non-dogmatic cases.
+    """
+    # Create mock species identifiers data
+    mock_species_ids = pd.DataFrame(
+        {
+            "s_id": ["s1", "s2", "s3", "s4", "s5"],
+            "identifier": ["P12345", "CHEBI:15377", "GO:12345", "P67890", "P67890"],
+            "ontology": ["uniprot", "chebi", "go", "uniprot", "chebi"],
+            "bqb": [
+                "BQB_IS",
+                "BQB_IS",
+                "BQB_HAS_PART",
+                "BQB_HAS_VERSION",
+                "BQB_ENCODES",
+            ],
+        }
+    )
+    # Create minimal required tables for SBML_dfs
+    compartments = pd.DataFrame(
+        {"c_name": ["cytosol"], "c_Identifiers": [None]}, index=["C1"]
+    )
+    compartments.index.name = "c_id"
+    species = pd.DataFrame(
+        {"s_name": ["A"], "s_Identifiers": [None], "s_source": [None]}, index=["s1"]
+    )
+    species.index.name = "s_id"
+    compartmentalized_species = pd.DataFrame(
+        {
+            "sc_name": ["A [cytosol]"],
+            "s_id": ["s1"],
+            "c_id": ["C1"],
+            "sc_source": [None],
+        },
+        index=["SC1"],
+    )
+    compartmentalized_species.index.name = "sc_id"
+    reactions = pd.DataFrame(
+        {
+            "r_name": ["rxn1"],
+            "r_Identifiers": [None],
+            "r_source": [None],
+            "r_isreversible": [False],
+        },
+        index=["R1"],
+    )
+    reactions.index.name = "r_id"
+    reaction_species = pd.DataFrame(
+        {
+            "r_id": ["R1"],
+            "sc_id": ["SC1"],
+            "stoichiometry": [1],
+            "sbo_term": ["SBO:0000459"],
+        },
+        index=["RSC1"],
+    )
+    reaction_species.index.name = "rsc_id"
+    sbml_dict = {
+        "compartments": compartments,
+        "species": species,
+        "compartmentalized_species": compartmentalized_species,
+        "reactions": reactions,
+        "reaction_species": reaction_species,
     }
+    sbml_dfs = SBML_dfs(sbml_dict, validate=False, resolve=False)
+    # Test dogmatic case (default)
+    expected_bqbs = BQB_DEFINING_ATTRS + [BQB.HAS_PART]  # noqa: F841
+    with patch.object(sbml_dfs, "get_identifiers", return_value=mock_species_ids):
+        dogmatic_result = sbml_dfs.get_characteristic_species_ids()
+        expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
+        pd.testing.assert_frame_equal(
+            dogmatic_result, expected_dogmatic, check_like=True
+        )
+    # Test non-dogmatic case
+    expected_bqbs = BQB_DEFINING_ATTRS_LOOSE + [BQB.HAS_PART]  # noqa: F841
+    with patch.object(sbml_dfs, "get_identifiers", return_value=mock_species_ids):
+        non_dogmatic_result = sbml_dfs.get_characteristic_species_ids(dogmatic=False)
+        expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
+        pd.testing.assert_frame_equal(
+            non_dogmatic_result, expected_non_dogmatic, check_like=True
+        )
+def test_sbml_basic_functionality(test_data):
+    """Test basic SBML_dfs creation from edgelist."""
+    interaction_edgelist, species_df, compartments_df, interaction_source = test_data
+    result = sbml_dfs_core.sbml_dfs_from_edgelist(
+        interaction_edgelist, species_df, compartments_df, interaction_source
+    )
+    assert isinstance(result, SBML_dfs)
+    assert len(result.species) == 3
+    assert len(result.compartments) == 2
+    assert len(result.reactions) == 2
+    assert (
+        len(result.compartmentalized_species) == 3
+    )  # TP53[nucleus], CDKN1A[nucleus], MDM2[cytoplasm]
+    assert len(result.reaction_species) == 4  # 2 reactions * 2 species each
+def test_sbml_extra_data_preservation(test_data):
+    """Test that extra columns are preserved when requested."""
+    interaction_edgelist, species_df, compartments_df, interaction_source = test_data
+    result = sbml_dfs_core.sbml_dfs_from_edgelist(
+        interaction_edgelist,
+        species_df,
+        compartments_df,
+        interaction_source,
+        keep_species_data=True,
+        keep_reactions_data="experiment",
+    )
-    species_ids = pd.DataFrame(species_ids_dict)
+    assert hasattr(result, "species_data")
+    assert hasattr(result, "reactions_data")
+    assert "gene_type" in result.species_data["source"].columns
+    assert "confidence" in result.reactions_data["experiment"].columns
-    characteristic_ids_narrow = sbml_dfs_core.filter_to_characteristic_species_ids(
-        species_ids,
-        defining_biological_qualifiers=BQB_DEFINING_ATTRS,
-        max_complex_size=4,
-        max_promiscuity=4,
+def test_sbml_compartmentalized_naming(test_data):
+    """Test compartmentalized species naming convention."""
+    interaction_edgelist, species_df, compartments_df, interaction_source = test_data
+    result = sbml_dfs_core.sbml_dfs_from_edgelist(
+        interaction_edgelist, species_df, compartments_df, interaction_source
     )
-    EXPECTED_IDS = ["CPX-BIG", "GO:1", "GO:2", "part1", "part2"]
-    assert characteristic_ids_narrow[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
+    comp_names = result.compartmentalized_species["sc_name"].tolist()
+    assert "TP53 [nucleus]" in comp_names
+    assert "MDM2 [cytoplasm]" in comp_names
+    assert "CDKN1A [nucleus]" in comp_names
+def test_sbml_custom_stoichiometry(test_data):
+    """Test custom stoichiometry parameters."""
+    interaction_edgelist, species_df, compartments_df, interaction_source = test_data
-    characteristic_ids_loose = sbml_dfs_core.filter_to_characteristic_species_ids(
-        species_ids,
-        # include encodes and is_encoded_by as equivalent to is
-        defining_biological_qualifiers=BQB_DEFINING_ATTRS_LOOSE,
-        max_complex_size=4,
-        # expand promiscuity to default value
-        max_promiscuity=20,
+    result = sbml_dfs_core.sbml_dfs_from_edgelist(
+        interaction_edgelist,
+        species_df,
+        compartments_df,
+        interaction_source,
+        upstream_stoichiometry=2,
+        downstream_stoichiometry=3,
     )
-    EXPECTED_IDS = [
-        "CPX-BIG",
-        "GO:1",
-        "GO:2",
-        "dna_seq",
-        "protein_seq",
-        "part1",
-        "part2",
-    ] + ["promiscuous_complex"] * 5
-    assert characteristic_ids_loose[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
+    stoichiometries = result.reaction_species["stoichiometry"].unique()
+    assert 2 in stoichiometries  # upstream
+    assert 3 in stoichiometries  # downstream

napistu 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

napistu 0.3.4py3-none-any.whl → 0.3.6py3-none-any.whl