PyPI - napistu - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

napistu 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

napistu/__main__.py +38 -27
napistu/consensus.py +22 -27
napistu/constants.py +91 -65
napistu/context/filtering.py +2 -1
napistu/identifiers.py +3 -6
napistu/indices.py +3 -1
napistu/ingestion/bigg.py +6 -6
napistu/ingestion/sbml.py +298 -295
napistu/ingestion/string.py +16 -19
napistu/ingestion/trrust.py +22 -27
napistu/ingestion/yeast.py +2 -1
napistu/matching/interactions.py +4 -4
napistu/matching/species.py +1 -1
napistu/modify/uncompartmentalize.py +1 -1
napistu/network/net_create.py +1 -1
napistu/network/paths.py +1 -1
napistu/ontologies/dogma.py +2 -1
napistu/ontologies/genodexito.py +5 -1
napistu/ontologies/renaming.py +4 -0
napistu/sbml_dfs_core.py +1343 -2167
napistu/sbml_dfs_utils.py +1086 -143
napistu/utils.py +52 -41
{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/METADATA +2 -2
{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/RECORD +40 -40
tests/conftest.py +113 -13
tests/test_consensus.py +161 -4
tests/test_context_filtering.py +2 -2
tests/test_gaps.py +26 -15
tests/test_network_net_create.py +1 -1
tests/test_network_precompute.py +1 -1
tests/test_ontologies_genodexito.py +3 -0
tests/test_ontologies_mygene.py +3 -0
tests/test_ontologies_renaming.py +28 -24
tests/test_sbml_dfs_core.py +260 -211
tests/test_sbml_dfs_utils.py +194 -36
tests/test_utils.py +19 -0
{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/WHEEL +0 -0
{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/entry_points.txt +0 -0
{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/licenses/LICENSE +0 -0
{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/top_level.txt +0 -0

tests/test_sbml_dfs_utils.py CHANGED Viewed

@@ -3,7 +3,14 @@ from __future__ import annotations
 import pandas as pd
 from napistu import sbml_dfs_utils
-from napistu.constants import BQB, BQB_DEFINING_ATTRS, BQB_DEFINING_ATTRS_LOOSE
+from napistu.constants import (
+    BQB,
+    BQB_DEFINING_ATTRS,
+    BQB_DEFINING_ATTRS_LOOSE,
+    SBML_DFS,
+    IDENTIFIERS,
+    SBOTERM_NAMES,
+)
 def test_id_formatter():
@@ -17,47 +24,198 @@ def test_id_formatter():
     assert list(input_vals) == inv_ids
-def test_get_characteristic_species_ids():
-    """
-    Test get_characteristic_species_ids function with both dogmatic and non-dogmatic cases.
-    """
-    # Create mock species identifiers data
-    mock_species_ids = pd.DataFrame(
-        {
-            "s_id": ["s1", "s2", "s3", "s4", "s5"],
-            "identifier": ["P12345", "CHEBI:15377", "GO:12345", "P67890", "P67890"],
-            "ontology": ["uniprot", "chebi", "go", "uniprot", "chebi"],
-            "bqb": [
-                "BQB_IS",
-                "BQB_IS",
-                "BQB_HAS_PART",
-                "BQB_HAS_VERSION",
-                "BQB_ENCODES",
-            ],
-        }
+def test_filter_to_characteristic_species_ids():
+    species_ids_dict = {
+        SBML_DFS.S_ID: ["large_complex"] * 6
+        + ["small_complex"] * 2
+        + ["proteinA", "proteinB"]
+        + ["proteinC"] * 3
+        + [
+            "promiscuous_complexA",
+            "promiscuous_complexB",
+            "promiscuous_complexC",
+            "promiscuous_complexD",
+            "promiscuous_complexE",
+        ],
+        IDENTIFIERS.ONTOLOGY: ["complexportal"]
+        + ["HGNC"] * 7
+        + ["GO"] * 2
+        + ["ENSG", "ENSP", "pubmed"]
+        + ["HGNC"] * 5,
+        IDENTIFIERS.IDENTIFIER: [
+            "CPX-BIG",
+            "mem1",
+            "mem2",
+            "mem3",
+            "mem4",
+            "mem5",
+            "part1",
+            "part2",
+            "GO:1",
+            "GO:2",
+            "dna_seq",
+            "protein_seq",
+            "my_cool_pub",
+        ]
+        + ["promiscuous_complex"] * 5,
+        IDENTIFIERS.BQB: [BQB.IS]
+        + [BQB.HAS_PART] * 7
+        + [BQB.IS] * 2
+        + [
+            # these are retained if BQB_DEFINING_ATTRS_LOOSE is used
+            BQB.ENCODES,
+            BQB.IS_ENCODED_BY,
+            # this should always be removed
+            BQB.IS_DESCRIBED_BY,
+        ]
+        + [BQB.HAS_PART] * 5,
+    }
+    species_ids = pd.DataFrame(species_ids_dict)
+    characteristic_ids_narrow = sbml_dfs_utils.filter_to_characteristic_species_ids(
+        species_ids,
+        defining_biological_qualifiers=BQB_DEFINING_ATTRS,
+        max_complex_size=4,
+        max_promiscuity=4,
+    )
+    EXPECTED_IDS = ["CPX-BIG", "GO:1", "GO:2", "part1", "part2"]
+    assert characteristic_ids_narrow[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
+    characteristic_ids_loose = sbml_dfs_utils.filter_to_characteristic_species_ids(
+        species_ids,
+        # include encodes and is_encoded_by as equivalent to is
+        defining_biological_qualifiers=BQB_DEFINING_ATTRS_LOOSE,
+        max_complex_size=4,
+        # expand promiscuity to default value
+        max_promiscuity=20,
     )
-    # Create mock SBML_dfs object
-    class MockSBML_dfs:
-        def get_identifiers(self, entity_type):
-            return mock_species_ids
+    EXPECTED_IDS = [
+        "CPX-BIG",
+        "GO:1",
+        "GO:2",
+        "dna_seq",
+        "protein_seq",
+        "part1",
+        "part2",
+    ] + ["promiscuous_complex"] * 5
+    assert characteristic_ids_loose[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
-    mock_sbml = MockSBML_dfs()
-    # Test dogmatic case (default)
-    expected_bqbs = BQB_DEFINING_ATTRS + [BQB.HAS_PART]  # noqa: F841
-    dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(mock_sbml)
-    expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
+def test_formula(sbml_dfs):
+    # create a formula string
-    pd.testing.assert_frame_equal(dogmatic_result, expected_dogmatic, check_like=True)
+    an_r_id = sbml_dfs.reactions.index[0]
-    # Test non-dogmatic case
-    expected_bqbs = BQB_DEFINING_ATTRS_LOOSE + [BQB.HAS_PART]  # noqa: F841
-    non_dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(
-        mock_sbml, dogmatic=False
+    reaction_species_df = sbml_dfs.reaction_species[
+        sbml_dfs.reaction_species["r_id"] == an_r_id
+    ].merge(sbml_dfs.compartmentalized_species, left_on="sc_id", right_index=True)
+    formula_str = sbml_dfs_utils.construct_formula_string(
+        reaction_species_df, sbml_dfs.reactions, name_var="sc_name"
     )
-    expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
-    pd.testing.assert_frame_equal(
-        non_dogmatic_result, expected_non_dogmatic, check_like=True
+    assert isinstance(formula_str, str)
+    assert (
+        formula_str
+        == "CO2 [extracellular region] -> CO2 [cytosol] ---- modifiers: AQP1 tetramer [plasma membrane]]"
     )
+def test_find_underspecified_reactions():
+    reaction_w_regulators = pd.DataFrame(
+        {
+            SBML_DFS.SC_ID: ["A", "B", "C", "D", "E", "F", "G"],
+            SBML_DFS.STOICHIOMETRY: [-1, -1, 1, 1, 0, 0, 0],
+            SBML_DFS.SBO_TERM: [
+                SBOTERM_NAMES.REACTANT,
+                SBOTERM_NAMES.REACTANT,
+                SBOTERM_NAMES.PRODUCT,
+                SBOTERM_NAMES.PRODUCT,
+                SBOTERM_NAMES.CATALYST,
+                SBOTERM_NAMES.CATALYST,
+                SBOTERM_NAMES.STIMULATOR,
+            ],
+        }
+    ).assign(r_id="bar")
+    reaction_w_regulators[SBML_DFS.RSC_ID] = [
+        f"rsc_{i}" for i in range(len(reaction_w_regulators))
+    ]
+    reaction_w_regulators.set_index(SBML_DFS.RSC_ID, inplace=True)
+    reaction_w_regulators = sbml_dfs_utils.add_sbo_role(reaction_w_regulators)
+    reaction_w_interactors = pd.DataFrame(
+        {
+            SBML_DFS.SC_ID: ["A", "B"],
+            SBML_DFS.STOICHIOMETRY: [-1, 1],
+            SBML_DFS.SBO_TERM: [SBOTERM_NAMES.REACTANT, SBOTERM_NAMES.REACTANT],
+        }
+    ).assign(r_id="baz")
+    reaction_w_interactors[SBML_DFS.RSC_ID] = [
+        f"rsc_{i}" for i in range(len(reaction_w_interactors))
+    ]
+    reaction_w_interactors.set_index(SBML_DFS.RSC_ID, inplace=True)
+    reaction_w_interactors = sbml_dfs_utils.add_sbo_role(reaction_w_interactors)
+    working_reactions = reaction_w_regulators.copy()
+    working_reactions["new"] = True
+    working_reactions.loc["rsc_0", "new"] = False
+    working_reactions
+    result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
+    assert result == {"bar"}
+    # missing one enzyme -> operable
+    working_reactions = reaction_w_regulators.copy()
+    working_reactions["new"] = True
+    working_reactions.loc["rsc_4", "new"] = False
+    working_reactions
+    result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
+    assert result == set()
+    # missing one product -> inoperable
+    working_reactions = reaction_w_regulators.copy()
+    working_reactions["new"] = True
+    working_reactions.loc["rsc_2", "new"] = False
+    working_reactions
+    result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
+    assert result == {"bar"}
+    # missing all enzymes -> inoperable
+    working_reactions = reaction_w_regulators.copy()
+    working_reactions["new"] = True
+    working_reactions.loc["rsc_4", "new"] = False
+    working_reactions.loc["rsc_5", "new"] = False
+    working_reactions
+    result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
+    assert result == {"bar"}
+    # missing regulators -> operable
+    working_reactions = reaction_w_regulators.copy()
+    working_reactions["new"] = True
+    working_reactions.loc["rsc_6", "new"] = False
+    working_reactions
+    result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
+    assert result == set()
+    # remove an interactor
+    working_reactions = reaction_w_interactors.copy()
+    working_reactions["new"] = True
+    working_reactions.loc["rsc_0", "new"] = False
+    working_reactions
+    result = sbml_dfs_utils._find_underspecified_reactions(working_reactions)
+    assert result == {"baz"}
+def test_stubbed_compartment():
+    compartment = sbml_dfs_utils.stub_compartments()
+    assert compartment["c_Identifiers"].iloc[0].ids[0] == {
+        "ontology": "go",
+        "identifier": "GO:0005575",
+        "url": "https://www.ebi.ac.uk/QuickGO/term/GO:0005575",
+        "bqb": "BQB_IS",
+    }

tests/test_utils.py CHANGED Viewed

@@ -686,3 +686,22 @@ def test_safe_fill():
         "a_very_long\nstringggg",
         "",
     ]
+def test_update_pathological_names():
+    # All numeric
+    s = pd.Series(["1", "2", "3"])
+    out = utils.update_pathological_names(s, "prefix_")
+    assert all(x.startswith("prefix_") for x in out)
+    assert list(out) == ["prefix_1", "prefix_2", "prefix_3"]
+    # Mixed numeric and non-numeric
+    s2 = pd.Series(["1", "foo", "3"])
+    out2 = utils.update_pathological_names(s2, "prefix_")
+    assert list(out2) == ["1", "foo", "3"]
+    # All non-numeric
+    s3 = pd.Series(["foo", "bar", "baz"])
+    out3 = utils.update_pathological_names(s3, "prefix_")
+    assert list(out3) == ["foo", "bar", "baz"]

{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

napistu 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

napistu 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl