PyPI - napistu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

napistu/__init__.py +12 -0
napistu/__main__.py +867 -0
napistu/consensus.py +1557 -0
napistu/constants.py +500 -0
napistu/gcs/__init__.py +10 -0
napistu/gcs/constants.py +69 -0
napistu/gcs/downloads.py +180 -0
napistu/identifiers.py +805 -0
napistu/indices.py +227 -0
napistu/ingestion/__init__.py +10 -0
napistu/ingestion/bigg.py +146 -0
napistu/ingestion/constants.py +296 -0
napistu/ingestion/cpr_edgelist.py +106 -0
napistu/ingestion/identifiers_etl.py +148 -0
napistu/ingestion/obo.py +268 -0
napistu/ingestion/psi_mi.py +276 -0
napistu/ingestion/reactome.py +218 -0
napistu/ingestion/sbml.py +621 -0
napistu/ingestion/string.py +356 -0
napistu/ingestion/trrust.py +285 -0
napistu/ingestion/yeast.py +147 -0
napistu/mechanism_matching.py +597 -0
napistu/modify/__init__.py +10 -0
napistu/modify/constants.py +86 -0
napistu/modify/curation.py +628 -0
napistu/modify/gaps.py +635 -0
napistu/modify/pathwayannot.py +1381 -0
napistu/modify/uncompartmentalize.py +264 -0
napistu/network/__init__.py +10 -0
napistu/network/constants.py +117 -0
napistu/network/neighborhoods.py +1594 -0
napistu/network/net_create.py +1647 -0
napistu/network/net_utils.py +652 -0
napistu/network/paths.py +500 -0
napistu/network/precompute.py +221 -0
napistu/rpy2/__init__.py +127 -0
napistu/rpy2/callr.py +168 -0
napistu/rpy2/constants.py +101 -0
napistu/rpy2/netcontextr.py +464 -0
napistu/rpy2/rids.py +697 -0
napistu/sbml_dfs_core.py +2216 -0
napistu/sbml_dfs_utils.py +304 -0
napistu/source.py +394 -0
napistu/utils.py +943 -0
napistu-0.1.0.dist-info/METADATA +56 -0
napistu-0.1.0.dist-info/RECORD +77 -0
napistu-0.1.0.dist-info/WHEEL +5 -0
napistu-0.1.0.dist-info/entry_points.txt +2 -0
napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
napistu-0.1.0.dist-info/top_level.txt +2 -0
tests/__init__.py +0 -0
tests/conftest.py +83 -0
tests/test_consensus.py +255 -0
tests/test_constants.py +20 -0
tests/test_curation.py +134 -0
tests/test_data/__init__.py +0 -0
tests/test_edgelist.py +20 -0
tests/test_gcs.py +23 -0
tests/test_identifiers.py +151 -0
tests/test_igraph.py +353 -0
tests/test_indices.py +88 -0
tests/test_mechanism_matching.py +126 -0
tests/test_net_utils.py +66 -0
tests/test_netcontextr.py +105 -0
tests/test_obo.py +34 -0
tests/test_pathwayannot.py +95 -0
tests/test_precomputed_distances.py +222 -0
tests/test_rpy2.py +61 -0
tests/test_sbml.py +46 -0
tests/test_sbml_dfs_create.py +307 -0
tests/test_sbml_dfs_utils.py +22 -0
tests/test_sbo.py +11 -0
tests/test_set_coverage.py +50 -0
tests/test_source.py +67 -0
tests/test_uncompartmentalize.py +40 -0
tests/test_utils.py +487 -0
tests/utils.py +30 -0

tests/test_edgelist.py ADDED Viewed

@@ -0,0 +1,20 @@
+from __future__ import annotations
+import pandas as pd
+from napistu.ingestion import cpr_edgelist
+def test_edgelist_remove_reciprocal_reactions():
+    edgelist = pd.DataFrame({"from": ["A", "B", "C", "D"], "to": ["B", "A", "D", "C"]})
+    nondegenerate_edgelist = cpr_edgelist.remove_reciprocal_interactions(edgelist)
+    assert nondegenerate_edgelist.shape == (2, 2)
+################################################
+# __main__
+################################################
+if __name__ == "__main__":
+    test_edgelist_remove_reciprocal_reactions()

tests/test_gcs.py ADDED Viewed

@@ -0,0 +1,23 @@
+from __future__ import annotations
+import os
+import pytest
+import shutil
+from napistu.gcs.downloads import load_public_cpr_asset
+@pytest.mark.skip_on_windows
+def test_download_and_load_gcs_asset():
+    local_path = load_public_cpr_asset(
+        asset="test_pathway", subasset="sbml_dfs", data_dir="/tmp"
+    )
+    assert local_path == "/tmp/test_pathway/sbml_dfs.pkl"
+    # clean-up
+    clean_up_dir = "/tmp/test_pathway"
+    shutil.rmtree(clean_up_dir)
+    if os.path.exists(clean_up_dir):
+        raise Exception(f"Failed to clean up {clean_up_dir}")

tests/test_identifiers.py ADDED Viewed

@@ -0,0 +1,151 @@
+from __future__ import annotations
+import os
+import numpy as np
+import pandas as pd
+from napistu import identifiers
+# logger = logging.getLogger()
+# logger.setLevel("DEBUG")
+test_path = os.path.abspath(os.path.join(__file__, os.pardir))
+identifier_examples = pd.read_csv(
+    os.path.join(test_path, "test_data", "identifier_examples.tsv"),
+    sep="\t",
+    header=0,
+)
+def test_identifiers():
+    assert (
+        identifiers.Identifiers(
+            [{"ontology": "KEGG", "identifier": "C00031", "bqb": "BQB_IS"}]
+        ).ids[0]["ontology"]
+        == "KEGG"
+    )
+    example_identifiers = identifiers.Identifiers(
+        [
+            {"ontology": "SGD", "identifier": "S000004535", "bqb": "BQB_IS"},
+            {"ontology": "foo", "identifier": "bar", "bqb": "BQB_IS"},
+        ]
+    )
+    assert type(example_identifiers) is identifiers.Identifiers
+    assert example_identifiers.filter("SGD") is True
+    assert example_identifiers.filter("baz") is False
+    assert example_identifiers.filter("SGD", summarize=False) == [True, False]
+    assert example_identifiers.filter(["SGD", "foo"], summarize=False) == [True, True]
+    assert example_identifiers.filter(["foo", "SGD"], summarize=False) == [True, True]
+    assert example_identifiers.filter(["baz", "bar"], summarize=False) == [False, False]
+    assert example_identifiers.hoist("SGD") == "S000004535"
+    assert example_identifiers.hoist("baz") is None
+def test_identifiers_from_urls():
+    for i in range(0, identifier_examples.shape[0]):
+        # print(identifier_examples["url"][i])
+        testIdentifiers = identifiers.Identifiers(
+            [
+                identifiers.format_uri(
+                    identifier_examples["url"][i], biological_qualifier_type="BQB_IS"
+                )
+            ]
+        )
+        # print(f"ontology = {testIdentifiers.ids[0]['ontology']}; identifier = {testIdentifiers.ids[0]['identifier']}")
+        assert (
+            testIdentifiers.ids[0]["ontology"] == identifier_examples["ontology"][i]
+        ), f"ontology {testIdentifiers.ids[0]['ontology']} does not equal {identifier_examples['ontology'][i]}"
+        assert (
+            testIdentifiers.ids[0]["identifier"] == identifier_examples["identifier"][i]
+        ), f"identifier {testIdentifiers.ids[0]['identifier']} does not equal {identifier_examples['identifier'][i]}"
+def test_url_from_identifiers():
+    for row in identifier_examples.iterrows():
+        # some urls (e.g., chebi) will be converted to a canonical url (e.g., chebi) since multiple URIs exist
+        if row[1]["canonical_url"] is not np.nan:
+            expected_url_out = row[1]["canonical_url"]
+        else:
+            expected_url_out = row[1]["url"]
+        url_out = identifiers.create_uri_url(
+            ontology=row[1]["ontology"], identifier=row[1]["identifier"]
+        )
+        # print(f"expected: {expected_url_out}; observed: {url_out}")
+        assert url_out == expected_url_out
+    # test non-strict treatment
+    assert (
+        identifiers.create_uri_url(ontology="chebi", identifier="abc", strict=False)
+        is None
+    )
+def test_parsing_ensembl_ids():
+    ensembl_examples = {
+        # human foxp2
+        "ENSG00000128573": ("ENSG00000128573", "ensembl_gene", "Homo sapiens"),
+        "ENST00000441290": ("ENST00000441290", "ensembl_transcript", "Homo sapiens"),
+        "ENSP00000265436": ("ENSP00000265436", "ensembl_protein", "Homo sapiens"),
+        # mouse leptin
+        "ENSMUSG00000059201": ("ENSMUSG00000059201", "ensembl_gene", "Mus musculus"),
+        "ENSMUST00000069789": (
+            "ENSMUST00000069789",
+            "ensembl_transcript",
+            "Mus musculus",
+        ),
+        # substrings are okay
+        "gene=ENSMUSG00000017146": (
+            "ENSMUSG00000017146",
+            "ensembl_gene",
+            "Mus musculus",
+        ),
+    }
+    for k, v in ensembl_examples.items():
+        assert identifiers.parse_ensembl_id(k) == v
+def test_reciprocal_ensembl_dicts():
+    assert len(identifiers.ENSEMBL_SPECIES_TO_CODE) == len(
+        identifiers.ENSEMBL_SPECIES_FROM_CODE
+    )
+    for k in identifiers.ENSEMBL_SPECIES_TO_CODE.keys():
+        assert (
+            identifiers.ENSEMBL_SPECIES_FROM_CODE[
+                identifiers.ENSEMBL_SPECIES_TO_CODE[k]
+            ]
+            == k
+        )
+    assert len(identifiers.ENSEMBL_MOLECULE_TYPES_TO_ONTOLOGY) == len(
+        identifiers.ENSEMBL_MOLECULE_TYPES_FROM_ONTOLOGY
+    )
+    for k in identifiers.ENSEMBL_MOLECULE_TYPES_TO_ONTOLOGY.keys():
+        assert (
+            identifiers.ENSEMBL_MOLECULE_TYPES_FROM_ONTOLOGY[
+                identifiers.ENSEMBL_MOLECULE_TYPES_TO_ONTOLOGY[k]
+            ]
+            == k
+        )
+################################################
+# __main__
+################################################
+if __name__ == "__main__":
+    test_identifiers()
+    test_identifiers_from_urls()
+    test_url_from_identifiers()
+    test_parsing_ensembl_ids()
+    test_reciprocal_ensembl_dicts()

tests/test_igraph.py ADDED Viewed

@@ -0,0 +1,353 @@
+from __future__ import annotations
+import os
+import pandas as pd
+from napistu import sbml_dfs_core
+from napistu.constants import DEFAULT_WT_TRANS
+from napistu.constants import MINI_SBO_FROM_NAME
+from napistu.ingestion import sbml
+from napistu.network import neighborhoods
+from napistu.network import net_create
+from napistu.network import net_utils
+from napistu.network import paths
+test_path = os.path.abspath(os.path.join(__file__, os.pardir))
+test_data = os.path.join(test_path, "test_data")
+sbml_path = os.path.join(test_data, "R-HSA-1237044.sbml")
+sbml_model = sbml.SBML(sbml_path).model
+sbml_dfs = sbml_dfs_core.SBML_dfs(sbml_model)
+# create a dict containing reaction species for a few example reactions
+reaction_species_examples_dict = dict()
+# stub with a random reaction
+r_id = sbml_dfs.reactions.index[0]
+reaction_species_examples_dict["valid_interactor"] = pd.DataFrame(
+    {
+        "r_id": [r_id, r_id],
+        "sbo_term": [
+            MINI_SBO_FROM_NAME["interactor"],
+            MINI_SBO_FROM_NAME["interactor"],
+        ],
+        "sc_id": ["sc1", "sc2"],
+        "stoichiometry": [0, 0],
+    }
+).set_index(["r_id", "sbo_term"])
+reaction_species_examples_dict["invalid_interactor"] = pd.DataFrame(
+    {
+        "r_id": [r_id, r_id],
+        "sbo_term": [
+            MINI_SBO_FROM_NAME["interactor"],
+            MINI_SBO_FROM_NAME["product"],
+        ],
+        "sc_id": ["sc1", "sc2"],
+        "stoichiometry": [0, 0],
+    }
+).set_index(["r_id", "sbo_term"])
+# simple reaction with just substrates and products
+reaction_species_examples_dict["sub_and_prod"] = pd.DataFrame(
+    {
+        "r_id": [r_id, r_id],
+        "sbo_term": [MINI_SBO_FROM_NAME["reactant"], MINI_SBO_FROM_NAME["product"]],
+        "sc_id": ["sub", "prod"],
+        "stoichiometry": [-1, 1],
+    }
+).set_index(["r_id", "sbo_term"])
+reaction_species_examples_dict["stimulator"] = pd.DataFrame(
+    {
+        "r_id": [r_id, r_id, r_id],
+        "sbo_term": [
+            MINI_SBO_FROM_NAME["reactant"],
+            MINI_SBO_FROM_NAME["product"],
+            MINI_SBO_FROM_NAME["stimulator"],
+        ],
+        "sc_id": ["sub", "prod", "stim"],
+        "stoichiometry": [-1, 1, 0],
+    }
+).set_index(["r_id", "sbo_term"])
+reaction_species_examples_dict["all_entities"] = pd.DataFrame(
+    {
+        "r_id": [r_id, r_id, r_id, r_id],
+        "sbo_term": [
+            MINI_SBO_FROM_NAME["reactant"],
+            MINI_SBO_FROM_NAME["product"],
+            MINI_SBO_FROM_NAME["stimulator"],
+            MINI_SBO_FROM_NAME["catalyst"],
+        ],
+        "sc_id": ["sub", "prod", "stim", "cat"],
+        "stoichiometry": [-1, 1, 0, 0],
+    }
+).set_index(["r_id", "sbo_term"])
+reaction_species_examples_dict["no_substrate"] = pd.DataFrame(
+    {
+        "r_id": [r_id, r_id, r_id, r_id, r_id],
+        "sbo_term": [
+            MINI_SBO_FROM_NAME["product"],
+            MINI_SBO_FROM_NAME["stimulator"],
+            MINI_SBO_FROM_NAME["stimulator"],
+            MINI_SBO_FROM_NAME["inhibitor"],
+            MINI_SBO_FROM_NAME["catalyst"],
+        ],
+        "sc_id": ["prod", "stim1", "stim2", "inh", "cat"],
+        "stoichiometry": [1, 0, 0, 0, 0],
+    }
+).set_index(["r_id", "sbo_term"])
+def test_create_cpr_graph():
+    _ = net_create.create_cpr_graph(sbml_dfs, graph_type="bipartite")
+    _ = net_create.create_cpr_graph(sbml_dfs, graph_type="regulatory")
+    _ = net_create.create_cpr_graph(sbml_dfs, graph_type="surrogate")
+def test_igraph_construction():
+    _ = net_create.process_cpr_graph(sbml_dfs)
+def test_igraph_loading():
+    # test read/write of an igraph network
+    directeds = [True, False]
+    graph_types = ["bipartite", "regulatory"]
+    net_utils.export_networks(
+        sbml_dfs,
+        model_prefix="tmp",
+        outdir="/tmp",
+        directeds=directeds,
+        graph_types=graph_types,
+    )
+    for graph_type in graph_types:
+        for directed in directeds:
+            import_pkl_path = net_utils._create_network_save_string(
+                model_prefix="tmp",
+                outdir="/tmp",
+                directed=directed,
+                graph_type=graph_type,
+            )
+            network_graph = net_utils.read_network_pkl(
+                model_prefix="tmp",
+                network_dir="/tmp",
+                directed=directed,
+                graph_type=graph_type,
+            )
+            assert network_graph.is_directed() == directed
+            # cleanup
+            os.unlink(import_pkl_path)
+def test_shortest_paths():
+    species = sbml_dfs.species
+    source_species = species[species["s_name"] == "NADH"]
+    dest_species = species[species["s_name"] == "NAD+"]
+    target_species_paths = net_utils.compartmentalize_species_pairs(
+        sbml_dfs, source_species.index.tolist(), dest_species.index.tolist()
+    )
+    # directed graph
+    cpr_graph = net_create.process_cpr_graph(
+        sbml_dfs, directed=True, weighting_strategy="topology"
+    )
+    (
+        all_shortest_reaction_paths_df,
+        all_shortest_reaction_path_edges_df,
+        edge_sources,
+        paths_graph,
+    ) = paths.find_all_shortest_reaction_paths(
+        cpr_graph, sbml_dfs, target_species_paths, weight_var="weights"
+    )
+    # undirected graph
+    cpr_graph = net_create.process_cpr_graph(
+        sbml_dfs, directed=False, weighting_strategy="topology"
+    )
+    (
+        all_shortest_reaction_paths_df,
+        all_shortest_reaction_path_edges_df,
+        edge_sources,
+        paths_graph,
+    ) = paths.find_all_shortest_reaction_paths(
+        cpr_graph, sbml_dfs, target_species_paths, weight_var="weights"
+    )
+    assert all_shortest_reaction_paths_df.shape[0] == 3
+def test_neighborhood():
+    species = sbml_dfs.species
+    source_species = species[species["s_name"] == "NADH"].index.tolist()
+    query_sc_species = net_utils.compartmentalize_species(sbml_dfs, source_species)
+    compartmentalized_species = query_sc_species["sc_id"].tolist()
+    cpr_graph = net_create.process_cpr_graph(
+        sbml_dfs, directed=True, weighting_strategy="topology"
+    )
+    neighborhood = neighborhoods.find_neighborhoods(
+        sbml_dfs,
+        cpr_graph,
+        compartmentalized_species=compartmentalized_species,
+        order=3,
+    )
+    assert neighborhood["species_73473"]["vertices"].shape[0] == 6
+def test_format_interactors():
+    # interactions are formatted
+    graph_hierarchy_df = net_create._create_graph_hierarchy_df("regulatory")
+    assert (
+        net_create._format_tiered_reaction_species(
+            r_id,
+            reaction_species_examples_dict["valid_interactor"],
+            sbml_dfs,
+            graph_hierarchy_df,
+        ).shape[0]
+        == 1
+    )
+    print("Re-enable test once Issue #102 is solved")
+    # catch error from invalid interactor specification
+    # with pytest.raises(ValueError) as excinfo:
+    #    net_create._format_tiered_reaction_species(
+    #        r_id, reaction_species_examples_dict["invalid_interactor"], sbml_dfs
+    #    )
+    # assert str(excinfo.value).startswith("Invalid combinations of SBO_terms")
+    # simple reaction with just substrates and products
+    assert (
+        net_create._format_tiered_reaction_species(
+            r_id,
+            reaction_species_examples_dict["sub_and_prod"],
+            sbml_dfs,
+            graph_hierarchy_df,
+        ).shape[0]
+        == 2
+    )
+    # add a stimulator (activator)
+    rxn_edges = net_create._format_tiered_reaction_species(
+        r_id, reaction_species_examples_dict["stimulator"], sbml_dfs, graph_hierarchy_df
+    )
+    assert rxn_edges.shape[0] == 3
+    assert rxn_edges.iloc[0][["from", "to"]].tolist() == ["stim", "sub"]
+    # add catalyst + stimulator
+    rxn_edges = net_create._format_tiered_reaction_species(
+        r_id,
+        reaction_species_examples_dict["all_entities"],
+        sbml_dfs,
+        graph_hierarchy_df,
+    )
+    assert rxn_edges.shape[0] == 4
+    assert rxn_edges.iloc[0][["from", "to"]].tolist() == ["stim", "cat"]
+    assert rxn_edges.iloc[1][["from", "to"]].tolist() == ["cat", "sub"]
+    # no substrate
+    rxn_edges = net_create._format_tiered_reaction_species(
+        r_id,
+        reaction_species_examples_dict["no_substrate"],
+        sbml_dfs,
+        graph_hierarchy_df,
+    )
+    assert rxn_edges.shape[0] == 5
+    # stimulator -> reactant
+    assert rxn_edges.iloc[0][["from", "to"]].tolist() == ["stim1", "cat"]
+    assert rxn_edges.iloc[1][["from", "to"]].tolist() == ["stim2", "cat"]
+    assert rxn_edges.iloc[2][["from", "to"]].tolist() == ["inh", "cat"]
+    # use the surrogate model tiered layout also
+    graph_hierarchy_df = net_create._create_graph_hierarchy_df("surrogate")
+    rxn_edges = net_create._format_tiered_reaction_species(
+        r_id,
+        reaction_species_examples_dict["all_entities"],
+        sbml_dfs,
+        graph_hierarchy_df,
+    )
+    assert rxn_edges.shape[0] == 4
+    assert rxn_edges.iloc[0][["from", "to"]].tolist() == ["stim", "sub"]
+    assert rxn_edges.iloc[1][["from", "to"]].tolist() == ["sub", "cat"]
+def test_reverse_network_edges():
+    graph_hierarchy_df = net_create._create_graph_hierarchy_df("regulatory")
+    rxn_edges = net_create._format_tiered_reaction_species(
+        r_id,
+        reaction_species_examples_dict["all_entities"],
+        sbml_dfs,
+        graph_hierarchy_df,
+    )
+    augmented_network_edges = rxn_edges.assign(r_isreversible=True)
+    augmented_network_edges["sc_parents"] = range(0, augmented_network_edges.shape[0])
+    augmented_network_edges["sc_children"] = range(
+        augmented_network_edges.shape[0], 0, -1
+    )
+    assert net_create._reverse_network_edges(augmented_network_edges).shape[0] == 2
+def test_net_polarity():
+    polarity_series = pd.Series(
+        ["ambiguous", "ambiguous"], index=[0, 1], name="link_polarity"
+    )
+    assert all(
+        [x == "ambiguous" for x in paths._calculate_net_polarity(polarity_series)]
+    )
+    polarity_series = pd.Series(
+        ["activation", "inhibition", "inhibition", "ambiguous"],
+        index=range(0, 4),
+        name="link_polarity",
+    )
+    assert paths._calculate_net_polarity(polarity_series) == [
+        "activation",
+        "inhibition",
+        "activation",
+        "ambiguous activation",
+    ]
+    assert paths._terminal_net_polarity(polarity_series) == "ambiguous activation"
+def test_entity_validation():
+    entity_attrs = {"table": "reactions", "variable": "foo"}
+    assert net_create._EntityAttrValidator(**entity_attrs).model_dump() == {
+        **entity_attrs,
+        **{"trans": DEFAULT_WT_TRANS},
+    }
+################################################
+# __main__
+################################################
+if __name__ == "__main__":
+    test_create_cpr_graph()
+    test_igraph_loading()
+    test_igraph_construction()
+    test_shortest_paths()
+    test_neighborhood()
+    test_format_interactors()
+    test_reverse_network_edges()
+    test_entity_validation()

tests/test_indices.py ADDED Viewed

@@ -0,0 +1,88 @@
+from __future__ import annotations
+import os
+import pandas as pd
+import pytest
+from napistu import indices
+test_path = os.path.abspath(os.path.join(__file__, os.pardir))
+test_data = os.path.join(test_path, "test_data")
+def test_pwindex_from_file():
+    pw_index_path = os.path.join(test_data, "pw_index.tsv")
+    pw_index = indices.PWIndex(pw_index_path)
+    assert pw_index.index.shape == (5, 6)
+def test_pwindex_from_df():
+    stub_pw_df = pd.DataFrame(
+        {
+            "file": "DNE",
+            "source": "The farm",
+            "species": "Gallus gallus",
+            "pathway_id": "chickens",
+            "name": "Chickens",
+            "date": "2020-01-01",
+        },
+        index=[0],
+    )
+    assert indices.PWIndex(pw_index=stub_pw_df, validate_paths=False).index.equals(
+        stub_pw_df
+    )
+    with pytest.raises(FileNotFoundError) as _:
+        indices.PWIndex(pw_index=stub_pw_df, pw_index_base_path="missing_directory")
+    with pytest.raises(FileNotFoundError) as _:
+        indices.PWIndex(pw_index=stub_pw_df, pw_index_base_path=test_data)
+@pytest.fixture
+def pw_testindex():
+    pw_index = indices.PWIndex(os.path.join(test_data, "pw_index.tsv"))
+    return pw_index
+def test_index(pw_testindex):
+    pw_index = pw_testindex
+    full_index_shape = (5, 6)
+    assert pw_index.index.shape == full_index_shape
+    ref_index = pw_index.index.copy()
+    pw_index.filter(sources="Reactome")
+    assert pw_index.index.shape == full_index_shape
+    pw_index.filter(species="Homo sapiens")
+    assert pw_index.index.shape == full_index_shape
+    pw_index.filter(sources=("Reactome",))
+    assert pw_index.index.shape == full_index_shape
+    pw_index.filter(species=("Homo sapiens",))
+    assert pw_index.index.shape == full_index_shape
+    pw_index.filter(sources="NotValid")
+    assert pw_index.index.shape == (0, full_index_shape[1])
+    pw_index.index = ref_index.copy()
+    pw_index.filter(species="NotValid")
+    assert pw_index.index.shape == (0, full_index_shape[1])
+    pw_index.index = ref_index.copy()
+    pw_index.search("erythrocytes")
+    assert pw_index.index.shape == (2, 6)
+    pw_index.index = ref_index.copy()
+    pw_index.search("erythrocytes|HYDROCARBON")
+    assert pw_index.index.shape == (3, 6)
+def test_missing_file(pw_testindex):
+    pw_index = pw_testindex
+    pw_index.index.loc[0, "file"] = "not_existing.sbml"
+    with pytest.raises(FileNotFoundError) as _:
+        pw_index._check_files()