PyPI - napistu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

napistu/__init__.py +12 -0
napistu/__main__.py +867 -0
napistu/consensus.py +1557 -0
napistu/constants.py +500 -0
napistu/gcs/__init__.py +10 -0
napistu/gcs/constants.py +69 -0
napistu/gcs/downloads.py +180 -0
napistu/identifiers.py +805 -0
napistu/indices.py +227 -0
napistu/ingestion/__init__.py +10 -0
napistu/ingestion/bigg.py +146 -0
napistu/ingestion/constants.py +296 -0
napistu/ingestion/cpr_edgelist.py +106 -0
napistu/ingestion/identifiers_etl.py +148 -0
napistu/ingestion/obo.py +268 -0
napistu/ingestion/psi_mi.py +276 -0
napistu/ingestion/reactome.py +218 -0
napistu/ingestion/sbml.py +621 -0
napistu/ingestion/string.py +356 -0
napistu/ingestion/trrust.py +285 -0
napistu/ingestion/yeast.py +147 -0
napistu/mechanism_matching.py +597 -0
napistu/modify/__init__.py +10 -0
napistu/modify/constants.py +86 -0
napistu/modify/curation.py +628 -0
napistu/modify/gaps.py +635 -0
napistu/modify/pathwayannot.py +1381 -0
napistu/modify/uncompartmentalize.py +264 -0
napistu/network/__init__.py +10 -0
napistu/network/constants.py +117 -0
napistu/network/neighborhoods.py +1594 -0
napistu/network/net_create.py +1647 -0
napistu/network/net_utils.py +652 -0
napistu/network/paths.py +500 -0
napistu/network/precompute.py +221 -0
napistu/rpy2/__init__.py +127 -0
napistu/rpy2/callr.py +168 -0
napistu/rpy2/constants.py +101 -0
napistu/rpy2/netcontextr.py +464 -0
napistu/rpy2/rids.py +697 -0
napistu/sbml_dfs_core.py +2216 -0
napistu/sbml_dfs_utils.py +304 -0
napistu/source.py +394 -0
napistu/utils.py +943 -0
napistu-0.1.0.dist-info/METADATA +56 -0
napistu-0.1.0.dist-info/RECORD +77 -0
napistu-0.1.0.dist-info/WHEEL +5 -0
napistu-0.1.0.dist-info/entry_points.txt +2 -0
napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
napistu-0.1.0.dist-info/top_level.txt +2 -0
tests/__init__.py +0 -0
tests/conftest.py +83 -0
tests/test_consensus.py +255 -0
tests/test_constants.py +20 -0
tests/test_curation.py +134 -0
tests/test_data/__init__.py +0 -0
tests/test_edgelist.py +20 -0
tests/test_gcs.py +23 -0
tests/test_identifiers.py +151 -0
tests/test_igraph.py +353 -0
tests/test_indices.py +88 -0
tests/test_mechanism_matching.py +126 -0
tests/test_net_utils.py +66 -0
tests/test_netcontextr.py +105 -0
tests/test_obo.py +34 -0
tests/test_pathwayannot.py +95 -0
tests/test_precomputed_distances.py +222 -0
tests/test_rpy2.py +61 -0
tests/test_sbml.py +46 -0
tests/test_sbml_dfs_create.py +307 -0
tests/test_sbml_dfs_utils.py +22 -0
tests/test_sbo.py +11 -0
tests/test_set_coverage.py +50 -0
tests/test_source.py +67 -0
tests/test_uncompartmentalize.py +40 -0
tests/test_utils.py +487 -0
tests/utils.py +30 -0

napistu/modify/uncompartmentalize.py ADDED Viewed

@@ -0,0 +1,264 @@
+from __future__ import annotations
+import logging
+import pandas as pd
+from napistu import consensus
+from napistu import identifiers
+from napistu import indices
+from napistu import sbml_dfs_core
+from napistu import sbml_dfs_utils
+from napistu import source
+from napistu.constants import SBML_DFS
+from napistu.constants import SOURCE_SPEC
+logger = logging.getLogger(__name__)
+def uncompartmentalize_sbml_dfs(
+    sbml_dfs: sbml_dfs_core.SBML_dfs,
+) -> sbml_dfs_core.SBML_dfs:
+    """Uncompartmentalize SBML_dfs
+    Take a compartmentalized mechanistic model and merge all of the compartments.
+    Args:
+        rxn_consensus_species (pd.DataFrame): reactions
+        rxnspec_consensus_instances (pd.DataFrame): reaction species
+    Returns:
+        reactions (pd.DataFrame): reactions with trivial reactions dropped
+        reaction_species (pd.DataFrame): reaction species with trivial reaction species dropped
+    """
+    # to remove compartmentalization we can:
+    # 1. update the compartments table to the stubbed default level: GO CELLULAR_COMPONENT
+    # 2. ignore the species table (it will be the same in the compartmentalized and uncompartmenalzied model)
+    # 3. create a 1-1 correspondence between species and new compartmentalized species. w/ GO CELLULAR_COMPONENT
+    # 4. update reaction species to the new compartmentalized species
+    # 5. drop reactions if:
+    #   - they are redundant (e.g., the same reaction occurred in multiple compartments)
+    #   - substrates and products are identical (e.g., a transportation reaction)
+    if sbml_dfs.compartments.shape[0] == 1:
+        logger.warning(
+            "The sbml_dfs model only contains a single compartment, calling uncompartmentalize_sbml_dfs()"
+            " may not be appropriate"
+        )
+    # 1. update the compartments table to the stubbed default level: GO CELLULAR_COMPONENT
+    stubbed_compartment = sbml_dfs_core._stub_compartments().assign(
+        c_Source=_create_stubbed_source()
+    )
+    # 3. create a 1-1 correspondence between species and new compartmentalized species. w/ GO CELLULAR_COMPONENT
+    compspec_consensus_instances, compspec_lookup_table = _uncompartmentalize_cspecies(
+        sbml_dfs, stubbed_compartment
+    )
+    # 4. update reaction species to the new compartmentalized species
+    # 5. drop reactions if:
+    #   - they are redundant (e.g., the same reaction occurred in multiple compartments)
+    #   - substrates and products are identical (e.g., a transportation reaction)
+    reactions, reaction_species = _uncompartmentalize_reactions(
+        sbml_dfs, compspec_lookup_table
+    )
+    sbml_dfs.compartments = stubbed_compartment
+    sbml_dfs.compartmentalized_species = compspec_consensus_instances
+    sbml_dfs.reactions = reactions
+    sbml_dfs.reaction_species = reaction_species
+    sbml_dfs.validate()
+    return sbml_dfs
+def _uncompartmentalize_cspecies(
+    sbml_dfs: sbml_dfs_core.SBML_dfs, stubbed_compartment: identifiers.Identifiers
+) -> tuple[pd.Dataframe, pd.DataFrame]:
+    """Convert compartmetnalized species into uncompartmentalized ones."""
+    updated_cspecies = (
+        sbml_dfs.compartmentalized_species.drop(
+            [SBML_DFS.SC_NAME, SBML_DFS.C_ID, SBML_DFS.SC_SOURCE], axis=1
+        )
+        .merge(
+            sbml_dfs.species[[SBML_DFS.S_NAME, SBML_DFS.S_SOURCE]],
+            left_on=SBML_DFS.S_ID,
+            right_index=True,
+        )
+        .reset_index()
+        .rename(
+            {
+                SBML_DFS.SC_ID: "sc_id_old",
+                SBML_DFS.S_NAME: SBML_DFS.SC_NAME,
+                SBML_DFS.S_SOURCE: SBML_DFS.SC_SOURCE,
+            },
+            axis=1,
+        )
+    )
+    # define new sc_ids as a 1-1 match to s_ids
+    new_sc_ids = updated_cspecies[SBML_DFS.S_ID].drop_duplicates().to_frame()
+    new_sc_ids[SBML_DFS.SC_ID] = sbml_dfs_utils.id_formatter(
+        range(new_sc_ids.shape[0]), SBML_DFS.SC_ID
+    )
+    # add new identifiers
+    updated_cspecies = updated_cspecies.merge(new_sc_ids)
+    # add new compartment
+    updated_cspecies[SBML_DFS.C_ID] = stubbed_compartment.index.tolist()[0]
+    # create a lookup table of old -> new sc_ids
+    compspec_lookup_table = (
+        updated_cspecies.assign(model="uncompartmentalization")
+        .rename({"sc_id_old": SBML_DFS.SC_ID, SBML_DFS.SC_ID: "new_id"}, axis=1)
+        .set_index([SOURCE_SPEC.MODEL, SBML_DFS.SC_ID])["new_id"]
+    )
+    compspec_consensus_instances = updated_cspecies.groupby(SBML_DFS.SC_ID).first()[
+        [SBML_DFS.S_ID, SBML_DFS.C_ID, SBML_DFS.SC_NAME, SBML_DFS.SC_SOURCE]
+    ]
+    return compspec_consensus_instances, compspec_lookup_table
+def _uncompartmentalize_reactions(
+    sbml_dfs: sbml_dfs_core.SBML_dfs, compspec_lookup_table: pd.Series
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    """Update reactions and reaction species to include uncompartmentalized species"""
+    stubbed_index = _create_stubbed_index()
+    # format sbml_dfs as a dict to take advantage of the consensus functions
+    sbml_dfs_dict = {"uncompartmentalization": sbml_dfs}
+    # merge reactions with identical stoichiometry
+    rxn_consensus_species, rxn_lookup_table = consensus.construct_meta_entities_members(
+        sbml_dfs_dict=sbml_dfs_dict,  # a single dict entry
+        pw_index=stubbed_index,
+        table=SBML_DFS.REACTIONS,
+        defined_by=SBML_DFS.REACTION_SPECIES,
+        defined_lookup_tables={SBML_DFS.SC_ID: compspec_lookup_table},
+        defining_attrs=[SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY],
+    )
+    (
+        rxnspec_consensus_instances,
+        rxnspec_lookup_table,
+    ) = consensus.construct_meta_entities_fk(
+        sbml_dfs_dict=sbml_dfs_dict,  # a single dict entry
+        pw_index=stubbed_index,
+        table=SBML_DFS.REACTION_SPECIES,
+        fk_lookup_tables={
+            SBML_DFS.R_ID: rxn_lookup_table,
+            SBML_DFS.SC_ID: compspec_lookup_table,
+        },
+        # retain species with different roles
+        extra_defining_attrs=[SBML_DFS.SBO_TERM],
+    )
+    # drop reactions and reaction species where due to removal of compartments
+    # the substrates and products are the same
+    # this will mostly remove transporation reactions
+    reactions, reaction_species = _filter_trivial_reactions(
+        rxn_consensus_species, rxnspec_consensus_instances
+    )
+    return reactions, reaction_species
+def _filter_trivial_reactions(
+    rxn_consensus_species: pd.DataFrame, rxnspec_consensus_instances: pd.DataFrame
+) -> tuple[pd.Dataframe, pd.DataFrame]:
+    """Filter Trivial Reactions
+    Filter reaction species which cancel out as substrates and products in the same reaction.
+    Args:
+        rxn_consensus_species (pd.DataFrame): reactions
+        rxnspec_consensus_instances (pd.DataFrame): reaction species
+    Returns:
+        reactions (pd.DataFrame): reactions with trivial reactions dropped
+        reaction_species (pd.DataFrame): reaction species with trivial reaction species dropped
+    """
+    # look for reactions where substrates and products cancel out
+    reactants = rxnspec_consensus_instances.query("stoichiometry != 0")
+    reactants_stoi_sum = (
+        reactants[[SBML_DFS.R_ID, SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]]
+        .groupby([SBML_DFS.R_ID, SBML_DFS.SC_ID])
+        .sum()
+    )
+    # identify cspecies which cancel out
+    invalid_cspecies_in_reaction = reactants_stoi_sum.query("stoichiometry == 0")
+    if invalid_cspecies_in_reaction.shape[0] > 0:
+        logger.info(
+            f"{invalid_cspecies_in_reaction.shape[0]} reactions species will be removed because they are substrates"
+            " and products in the same reaction"
+        )
+    # find all cspecies which cancel outs original rsc_ids
+    invalid_reaction_species = reactants.merge(
+        invalid_cspecies_in_reaction,
+        left_on=[SBML_DFS.R_ID, SBML_DFS.SC_ID],
+        right_index=True,
+    ).index.tolist()
+    # update the reaction species table to reflect reaction_species which were dropped because
+    # they were both substrates and products
+    updated_reaction_species = rxnspec_consensus_instances[
+        ~rxnspec_consensus_instances.index.isin(invalid_reaction_species)
+    ]
+    # identify valid reactions based on their presence in updated_reaction_species
+    valid_reactions = rxn_consensus_species.index.isin(
+        updated_reaction_species[SBML_DFS.R_ID]
+    )
+    invalid_reaction_names = rxn_consensus_species[~valid_reactions][
+        SBML_DFS.R_NAME
+    ].tolist()
+    if len(invalid_reaction_names) > 0:
+        logger.info(
+            f"{len(invalid_reaction_names)} reactions where substrates and products cancel out"
+            f" were dropped including: {' & '.join(invalid_reaction_names[0:5])}"
+        )
+    updated_reactions = rxn_consensus_species[valid_reactions]
+    return updated_reactions, updated_reaction_species
+def _create_stubbed_index() -> indices.PWIndex:
+    """Create a default pathway index for the uncompartmentalized model."""
+    stubbed_index_df = pd.DataFrame(
+        {
+            SOURCE_SPEC.FILE: None,
+            SOURCE_SPEC.SOURCE: None,
+            SOURCE_SPEC.SPECIES: None,
+            SOURCE_SPEC.PATHWAY_ID: "uncompartmentalization",
+            SOURCE_SPEC.NAME: "Merging all compartments",
+            SOURCE_SPEC.DATE: None,
+        },
+        index=[0],
+    )
+    stubbed_index = indices.PWIndex(stubbed_index_df, validate_paths=False)
+    return stubbed_index
+def _create_stubbed_source() -> source.Source:
+    """Create a default Source object for the uncompartmetnalized model."""
+    src = source.Source(
+        pd.DataFrame([{"model": "uncompartmentalization"}]),
+        pw_index=_create_stubbed_index(),
+    )
+    return src

napistu/network/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from __future__ import annotations
+from importlib.metadata import PackageNotFoundError
+from importlib.metadata import version
+try:
+    __version__ = version("calicolabs-cpr")
+except PackageNotFoundError:
+    # package is not installed
+    pass

napistu/network/constants.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""Module to contain all constants used for representing and working with networks"""
+from __future__ import annotations
+from types import SimpleNamespace
+from napistu.constants import SBML_DFS
+from napistu.constants import SBOTERM_NAMES
+CPR_GRAPH_NODES = SimpleNamespace(NAME="name")
+CPR_GRAPH_EDGES = SimpleNamespace(
+    DIRECTED="directed",
+    FROM="from",
+    R_ID=SBML_DFS.R_ID,
+    R_ISREVERSIBLE=SBML_DFS.R_ISREVERSIBLE,
+    SBO_TERM=SBML_DFS.SBO_TERM,
+    SBO_NAME="sbo_name",
+    SC_DEGREE="sc_degree",
+    SC_PARENTS="sc_parents",
+    SC_CHILDREN="sc_children",
+    SPECIES_TYPE="species_type",
+    STOICHIOMETRY=SBML_DFS.STOICHIOMETRY,
+    TO="to",
+    UPSTREAM_WEIGHTS="upstream_weights",
+    WEIGHTS="weights",
+)
+# variables which should be in cpr graph's edges
+CPR_GRAPH_REQUIRED_EDGE_VARS = {
+    CPR_GRAPH_EDGES.FROM,
+    CPR_GRAPH_EDGES.TO,
+    CPR_GRAPH_EDGES.SBO_TERM,
+    CPR_GRAPH_EDGES.STOICHIOMETRY,
+    CPR_GRAPH_EDGES.SC_PARENTS,
+    CPR_GRAPH_EDGES.SC_CHILDREN,
+}
+# nomenclature for individual fields
+CPR_GRAPH_NODE_TYPES = SimpleNamespace(REACTION="reaction", SPECIES="species")
+VALID_CPR_GRAPH_NODE_TYPES = [
+    CPR_GRAPH_NODE_TYPES.REACTION,
+    CPR_GRAPH_NODE_TYPES.SPECIES,
+]
+CPR_GRAPH_EDGE_DIRECTIONS = SimpleNamespace(
+    FORWARD="forward", REVERSE="reverse", UNDIRECTED="undirected"
+)
+# network-level nomenclature
+CPR_GRAPH_TYPES = SimpleNamespace(
+    BIPARTITE="bipartite", REGULATORY="regulatory", SURROGATE="surrogate"
+)
+VALID_CPR_GRAPH_TYPES = [
+    CPR_GRAPH_TYPES.BIPARTITE,
+    CPR_GRAPH_TYPES.REGULATORY,
+    CPR_GRAPH_TYPES.SURROGATE,
+]
+CPR_WEIGHTING_STRATEGIES = SimpleNamespace(
+    CALIBRATED="calibrated", MIXED="mixed", TOPOLOGY="topology", UNWEIGHTED="unweighted"
+)
+VALID_WEIGHTING_STRATEGIES = [
+    CPR_WEIGHTING_STRATEGIES.UNWEIGHTED,
+    CPR_WEIGHTING_STRATEGIES.TOPOLOGY,
+    CPR_WEIGHTING_STRATEGIES.MIXED,
+    CPR_WEIGHTING_STRATEGIES.CALIBRATED,
+]
+# the regulatory graph defines a hierarchy of upstream and downstream
+# entities in a reaction
+# modifier/stimulator/inhibitor -> catalyst -> reactant -> reaction -> product
+REGULATORY_GRAPH_HIERARCHY = [
+    [SBOTERM_NAMES.MODIFIER, SBOTERM_NAMES.STIMULATOR, SBOTERM_NAMES.INHIBITOR],
+    [SBOTERM_NAMES.CATALYST],
+    [SBOTERM_NAMES.REACTANT],
+    [CPR_GRAPH_NODE_TYPES.REACTION],
+    # normally we don't expect interactors to be defined because they are handled by
+    # net_create._format_interactors_for_regulatory_graph() but include them here
+    # until Issue #102 is solved
+    [SBOTERM_NAMES.INTERACTOR],
+    [SBOTERM_NAMES.PRODUCT],
+]
+# an alternative layout to regulatory where enyzmes are downstream of substrates.
+# this doesn't make much sense from a regulatory perspective because
+# enzymes modify substrates not the other way around. but, its what one might
+# expect if catalysts are a surrogate for reactions as is the case for metabolic
+# network layouts
+SURROGATE_GRAPH_HIERARCHY = [
+    [SBOTERM_NAMES.MODIFIER, SBOTERM_NAMES.STIMULATOR, SBOTERM_NAMES.INHIBITOR],
+    [SBOTERM_NAMES.REACTANT],
+    [SBOTERM_NAMES.CATALYST],
+    [CPR_GRAPH_NODE_TYPES.REACTION],
+    # normally we don't expect interactors to be defined because they are handled by
+    # net_create._format_interactors_for_regulatory_graph() but include them here
+    # until Issue #102 is solved
+    [SBOTERM_NAMES.INTERACTOR],
+    [SBOTERM_NAMES.PRODUCT],
+]
+NEIGHBORHOOD_NETWORK_TYPES = SimpleNamespace(
+    DOWNSTREAM="downstream", HOURGLASS="hourglass", UPSTREAM="upstream"
+)
+VALID_NEIGHBORHOOD_NETWORK_TYPES = [
+    NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM,
+    NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
+    NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM,
+]