PyPI - napistu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

napistu/__init__.py +12 -0
napistu/__main__.py +867 -0
napistu/consensus.py +1557 -0
napistu/constants.py +500 -0
napistu/gcs/__init__.py +10 -0
napistu/gcs/constants.py +69 -0
napistu/gcs/downloads.py +180 -0
napistu/identifiers.py +805 -0
napistu/indices.py +227 -0
napistu/ingestion/__init__.py +10 -0
napistu/ingestion/bigg.py +146 -0
napistu/ingestion/constants.py +296 -0
napistu/ingestion/cpr_edgelist.py +106 -0
napistu/ingestion/identifiers_etl.py +148 -0
napistu/ingestion/obo.py +268 -0
napistu/ingestion/psi_mi.py +276 -0
napistu/ingestion/reactome.py +218 -0
napistu/ingestion/sbml.py +621 -0
napistu/ingestion/string.py +356 -0
napistu/ingestion/trrust.py +285 -0
napistu/ingestion/yeast.py +147 -0
napistu/mechanism_matching.py +597 -0
napistu/modify/__init__.py +10 -0
napistu/modify/constants.py +86 -0
napistu/modify/curation.py +628 -0
napistu/modify/gaps.py +635 -0
napistu/modify/pathwayannot.py +1381 -0
napistu/modify/uncompartmentalize.py +264 -0
napistu/network/__init__.py +10 -0
napistu/network/constants.py +117 -0
napistu/network/neighborhoods.py +1594 -0
napistu/network/net_create.py +1647 -0
napistu/network/net_utils.py +652 -0
napistu/network/paths.py +500 -0
napistu/network/precompute.py +221 -0
napistu/rpy2/__init__.py +127 -0
napistu/rpy2/callr.py +168 -0
napistu/rpy2/constants.py +101 -0
napistu/rpy2/netcontextr.py +464 -0
napistu/rpy2/rids.py +697 -0
napistu/sbml_dfs_core.py +2216 -0
napistu/sbml_dfs_utils.py +304 -0
napistu/source.py +394 -0
napistu/utils.py +943 -0
napistu-0.1.0.dist-info/METADATA +56 -0
napistu-0.1.0.dist-info/RECORD +77 -0
napistu-0.1.0.dist-info/WHEEL +5 -0
napistu-0.1.0.dist-info/entry_points.txt +2 -0
napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
napistu-0.1.0.dist-info/top_level.txt +2 -0
tests/__init__.py +0 -0
tests/conftest.py +83 -0
tests/test_consensus.py +255 -0
tests/test_constants.py +20 -0
tests/test_curation.py +134 -0
tests/test_data/__init__.py +0 -0
tests/test_edgelist.py +20 -0
tests/test_gcs.py +23 -0
tests/test_identifiers.py +151 -0
tests/test_igraph.py +353 -0
tests/test_indices.py +88 -0
tests/test_mechanism_matching.py +126 -0
tests/test_net_utils.py +66 -0
tests/test_netcontextr.py +105 -0
tests/test_obo.py +34 -0
tests/test_pathwayannot.py +95 -0
tests/test_precomputed_distances.py +222 -0
tests/test_rpy2.py +61 -0
tests/test_sbml.py +46 -0
tests/test_sbml_dfs_create.py +307 -0
tests/test_sbml_dfs_utils.py +22 -0
tests/test_sbo.py +11 -0
tests/test_set_coverage.py +50 -0
tests/test_source.py +67 -0
tests/test_uncompartmentalize.py +40 -0
tests/test_utils.py +487 -0
tests/utils.py +30 -0

napistu/indices.py ADDED Viewed

@@ -0,0 +1,227 @@
+from __future__ import annotations
+import copy
+import os
+import re
+from os import PathLike
+from typing import Iterable
+from fs import open_fs
+import pandas as pd
+from napistu.utils import path_exists
+from napistu.constants import EXPECTED_PW_INDEX_COLUMNS
+from napistu.constants import SOURCE_SPEC
+class PWIndex:
+    """
+    Pathway Index
+    Organizing metadata (and optionally paths) of individual pathway representations
+    Attributes
+    ----------
+    index : pd.DataFrame
+        A table describing the location and contents of pathway files.
+    base_path: str
+        Path to directory of indexed files
+    Methods
+    -------
+    filter(sources, species)
+        Filter index based on pathway source an/or category
+    search(query)
+        Filter index to pathways matching the search query
+    """
+    def __init__(
+        self,
+        pw_index: PathLike[str] | str | pd.DataFrame,
+        pw_index_base_path=None,
+        validate_paths=True,
+    ) -> None:
+        """
+        Tracks pathway file locations and contents.
+        Parameters
+        ----------
+        pw_index : str or None
+            Path to index file or a pd.DataFrame containing the contents of PWIndex.index
+        pw_index_base_path : str or None
+            A Path that relative paths in pw_index will reference
+        validate_paths : bool
+            If True then paths constructed from base_path + file will be tested for existence.
+            If False then paths will not be validated and base_path attribute will be set to None
+        Returns
+        -------
+        None
+        """
+        # read index either directly from pandas or from a file
+        if isinstance(pw_index, pd.DataFrame):
+            self.index = pw_index
+        elif isinstance(pw_index, PathLike) or isinstance(pw_index, str):
+            base_path = os.path.dirname(pw_index)
+            file_name = os.path.basename(pw_index)
+            with open_fs(base_path) as base_fs:
+                with base_fs.open(file_name) as f:
+                    self.index = pd.read_table(f)
+        else:
+            raise ValueError(
+                f"pw_index needs to be of type PathLike[str] | str | pd.DataFrame but was {type(pw_index).__name__}"
+            )
+        # format option arguments
+        if (pw_index_base_path is not None) and (
+            not isinstance(pw_index_base_path, str)
+        ):
+            raise TypeError(
+                f"pw_index_base_path was as {type(pw_index_base_path).__name__} and must be a str if provided"
+            )
+        if not isinstance(validate_paths, bool):
+            raise TypeError(
+                f"validate_paths was as {type(validate_paths).__name__} and must be a bool"
+            )
+        # verify that the index is syntactically correct
+        observed_columns = set(self.index.columns.to_list())
+        if EXPECTED_PW_INDEX_COLUMNS != observed_columns:
+            missing = ", ".join(EXPECTED_PW_INDEX_COLUMNS.difference(observed_columns))
+            extra = ", ".join(observed_columns.difference(EXPECTED_PW_INDEX_COLUMNS))
+            raise ValueError(
+                f"Observed pw_index columns did not match expected columns:\n"
+                f"Missing columns: {missing}\nExtra columns: {extra}"
+            )
+        # verify that all pathway_ids are unique
+        duplicated_pathway_ids = list(
+            self.index[SOURCE_SPEC.PATHWAY_ID][
+                self.index[SOURCE_SPEC.PATHWAY_ID].duplicated()
+            ]
+        )
+        if len(duplicated_pathway_ids) != 0:
+            path_str = "\n".join(duplicated_pathway_ids)
+            raise ValueError(
+                f"{len(duplicated_pathway_ids)} pathway_ids were duplicated:\n{path_str}"
+            )
+        if validate_paths:
+            if pw_index_base_path is not None:
+                self.base_path = pw_index_base_path
+            elif isinstance(pw_index, PathLike) or isinstance(pw_index, str):
+                self.base_path = os.path.dirname(pw_index)
+            else:
+                raise ValueError(
+                    "validate_paths was True but neither pw_index_base_path "
+                    "nor an index path were provided. Please provide "
+                    "pw_index_base_path if you intend to verify that "
+                    "the files present in pw_index exist"
+                )
+            if path_exists(self.base_path) is False:
+                raise FileNotFoundError(
+                    "base_path at {self.base_path} is not a valid directory"
+                )
+            # verify that pathway files exist
+            self._check_files()
+        elif pw_index_base_path is not None:
+            print(
+                "validate_paths is False so pw_index_base_path will be ignored and paths will not be validated"
+            )
+    def _check_files(self):
+        """Verifies that all files in the pwindex are present
+        Raises:
+            FileNotFoundError: Error if a file not present
+        """
+        with open_fs(self.base_path) as base_fs:
+            # verify that pathway files exist
+            files = base_fs.listdir(".")
+            missing_pathway_files = set(self.index[SOURCE_SPEC.FILE]) - set(files)
+            if len(missing_pathway_files) != 0:
+                file_str = "\n".join(missing_pathway_files)
+                raise FileNotFoundError(
+                    f"{len(missing_pathway_files)} were missing:\n{file_str}"
+                )
+    def filter(
+        self,
+        sources: str | Iterable[str] | None = None,
+        species: str | Iterable[str] | None = None,
+    ):
+        """
+        Filter Pathway Index
+        Args:
+            sources (str | Iterable[str] | None, optional): A list of valid sources or None for all
+            species (str | Iterable[str] | None, optional): A list of valid species or None all all
+        """
+        pw_index = self.index
+        if sources is not None:
+            pw_index = pw_index.query("source in @sources")
+        if species is not None:
+            pw_index = pw_index.query("species in @species")
+        self.index = pw_index
+    def search(self, query):
+        """
+        Search Pathway Index
+        Parameters:
+        query: str
+            Filter to rows of interest based on case-insensitive match to names.
+        Returns:
+        None
+        """
+        pw_index = self.index
+        # find matches to query
+        fil = pw_index[SOURCE_SPEC.NAME].str.contains(
+            query, regex=True, flags=re.IGNORECASE
+        )
+        pw_index = pw_index[fil]
+        self.index = pw_index
+def adapt_pw_index(
+    source: str | PWIndex,
+    species: str | Iterable[str] | None,
+    outdir: str | None = None,
+) -> PWIndex:
+    """Adapts a pw_index
+    Helpful to filter for species before reconstructing.
+    Args:
+        source (str | PWIndex): uri for pw_index.csv file or PWIndex object
+        species (str):
+        outdir (str | None, optional): Optional directory to write pw_index to.
+            Defaults to None.
+    Returns:
+        PWIndex: Filtered pw index
+    """
+    if isinstance(source, str):
+        pw_index = PWIndex(source)
+    elif isinstance(source, PWIndex):
+        pw_index = copy.deepcopy(source)
+    else:
+        raise ValueError("'source' needs to be str or PWIndex")
+    pw_index.filter(species=species)
+    if outdir is not None:
+        with open_fs(outdir, create=True) as fs:
+            with fs.open("pw_index.tsv", "w") as f:
+                pw_index.index.to_csv(f, sep="\t")
+    return pw_index

napistu/ingestion/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from __future__ import annotations
+from importlib.metadata import PackageNotFoundError
+from importlib.metadata import version
+try:
+    __version__ = version("calicolabs-cpr")
+except PackageNotFoundError:
+    # package is not installed
+    pass

napistu/ingestion/bigg.py ADDED Viewed

@@ -0,0 +1,146 @@
+from __future__ import annotations
+import datetime
+import logging
+import os
+from typing import Iterable
+import pandas as pd
+from napistu import indices
+from napistu import sbml_dfs_core
+from napistu import utils
+from napistu.consensus import construct_sbml_dfs_dict
+from napistu.ingestion import sbml
+from napistu.ingestion.constants import BIGG_MODEL_FIELD_SPECIES
+from napistu.ingestion.constants import BIGG_MODEL_FIELD_URL
+from napistu.ingestion.constants import BIGG_MODEL_KEYS
+from napistu.ingestion.constants import BIGG_MODEL_URLS
+from napistu.ingestion.constants import BIGG_RECON3D_FIELD_ANNOTATION
+from napistu.ingestion.constants import SPECIES_FULL_NAME_HUMAN
+from napistu.ingestion.constants import SPECIES_FULL_NAME_MOUSE
+from napistu.ingestion.constants import SPECIES_FULL_NAME_YEAST
+from fs import open_fs
+logger = logging.getLogger(__name__)
+def bigg_sbml_download(bg_pathway_root: str, overwrite: bool = False) -> None:
+    """
+    BiGG SBML Download
+    Download SBML models from BiGG. Currently just the human Recon3D model
+    Parameters:
+    bg_pathway_root (str): Paths to a directory where a \"sbml\" directory should be created.
+    overwrite (bool): Overwrite an existing output directory.
+    Returns:
+    None
+    """
+    utils.initialize_dir(bg_pathway_root, overwrite)
+    bigg_models = {
+        BIGG_MODEL_KEYS[SPECIES_FULL_NAME_HUMAN]: {
+            BIGG_MODEL_FIELD_URL: BIGG_MODEL_URLS[SPECIES_FULL_NAME_HUMAN],
+            BIGG_MODEL_FIELD_SPECIES: SPECIES_FULL_NAME_HUMAN,
+        },
+        BIGG_MODEL_KEYS[SPECIES_FULL_NAME_MOUSE]: {
+            BIGG_MODEL_FIELD_URL: BIGG_MODEL_URLS[SPECIES_FULL_NAME_MOUSE],
+            BIGG_MODEL_FIELD_SPECIES: SPECIES_FULL_NAME_MOUSE,
+        },
+        BIGG_MODEL_KEYS[SPECIES_FULL_NAME_YEAST]: {
+            BIGG_MODEL_FIELD_URL: BIGG_MODEL_URLS[SPECIES_FULL_NAME_YEAST],
+            BIGG_MODEL_FIELD_SPECIES: SPECIES_FULL_NAME_YEAST,
+        },
+    }
+    bigg_models_df = pd.DataFrame(bigg_models).T
+    bigg_models_df["sbml_path"] = [
+        os.path.join(bg_pathway_root, k) + ".sbml"
+        for k in bigg_models_df.index.tolist()
+    ]
+    bigg_models_df["file"] = [os.path.basename(x) for x in bigg_models_df["sbml_path"]]
+    # add other attributes which will be used in the pw_index
+    bigg_models_df["date"] = datetime.date.today().strftime("%Y%m%d")
+    bigg_models_df.index = bigg_models_df.index.rename("pathway_id")
+    bigg_models_df = bigg_models_df.reset_index()
+    bigg_models_df["name"] = bigg_models_df["pathway_id"]
+    bigg_models_df = bigg_models_df.assign(source="BiGG")
+    with open_fs(bg_pathway_root, create=True) as bg_fs:
+        for _, row in bigg_models_df.iterrows():
+            with bg_fs.open(row["file"], "wb") as f:
+                utils.download_wget(row["url"], f)  # type: ignore
+        pw_index = bigg_models_df[
+            ["file", "source", "species", "pathway_id", "name", "date"]
+        ]
+        # save index to sbml dir
+        with bg_fs.open("pw_index.tsv", "wb") as f:
+            pw_index.to_csv(f, sep="\t", index=False)
+    return None
+def annotate_recon(raw_model_path: str, annotated_model_path: str) -> None:
+    """Annotate Recon3D
+    Add compartment annotations to Recon3D so it can be merged with other pathways
+    """
+    logger.warning(
+        "add_sbml_annotations is deprecated and maybe removed in a future version of rcpr; "
+        "we are now adding these annotation during ingestion by sbml.sbml_df_from_sbml() rather "
+        "than directly appending them to the raw .sbml"
+    )
+    recon_3d_annotations = pd.DataFrame(BIGG_RECON3D_FIELD_ANNOTATION)
+    sbml_model = sbml.SBML(raw_model_path)
+    sbml.add_sbml_annotations(
+        sbml_model, recon_3d_annotations, save_path=annotated_model_path
+    )
+    return None
+def construct_bigg_consensus(
+    pw_index_inp: str | indices.PWIndex,
+    species: str | Iterable[str] | None = None,
+    outdir: str | None = None,
+) -> sbml_dfs_core.SBML_dfs:
+    """Constructs a BiGG SBML DFs Pathway Representation
+    Attention: curently this does work only for a singly model. Integraiton of multiple
+    models is not supported yet in BiGG.
+    Args:
+        pw_index_inp (str | indices.PWIndex): PWIndex or uri pointing to PWIndex
+        species (str | Iterable[str] | None): one or more species to filter by. Default: no filtering
+        outdir (str | None, optional): output directory used to cache results. Defaults to None.
+    Returns:
+        sbml_dfs_core.SBML_dfs: A consensus SBML
+    """
+    if isinstance(pw_index_inp, str):
+        pw_index = indices.adapt_pw_index(pw_index_inp, species=species, outdir=outdir)
+    elif isinstance(pw_index_inp, indices.PWIndex):
+        pw_index = pw_index_inp
+    else:
+        raise ValueError("pw_index_inp needs to be a PWIndex or a str to a location.")
+    if outdir is not None:
+        construct_sbml_dfs_dict_fkt = utils.pickle_cache(
+            os.path.join(outdir, "model_pool.pkl")
+        )(construct_sbml_dfs_dict)
+    else:
+        construct_sbml_dfs_dict_fkt = construct_sbml_dfs_dict
+    sbml_dfs_dict = construct_sbml_dfs_dict_fkt(pw_index)
+    if len(sbml_dfs_dict) > 1:
+        raise NotImplementedError("Merging of models not implemented yet for BiGG")
+    # In Bigg there should be only one model
+    model = list(sbml_dfs_dict.values())[0]
+    # fix missing compartimentalization
+    model = sbml_dfs_core.infer_uncompartmentalized_species_location(model)
+    model = sbml_dfs_core.name_compartmentalized_species(model)
+    model.validate()
+    return model

napistu/ingestion/constants.py ADDED Viewed

@@ -0,0 +1,296 @@
+# Ingestion constants
+from __future__ import annotations
+from types import SimpleNamespace
+SPECIES_FULL_NAME_HUMAN = "Homo sapiens"
+SPECIES_FULL_NAME_MOUSE = "Mus musculus"
+SPECIES_FULL_NAME_YEAST = "Saccharomyces cerevisiae"
+SPECIES_FULL_NAME_RAT = "Rattus norvegicus"
+SPECIES_FULL_NAME_WORM = "Caenorhabditis elegans"
+# BIGG
+BIGG_MODEL_URLS = {
+    SPECIES_FULL_NAME_HUMAN: "http://bigg.ucsd.edu/static/models/Recon3D.xml",
+    SPECIES_FULL_NAME_MOUSE: "http://bigg.ucsd.edu/static/models/iMM1415.xml",
+    SPECIES_FULL_NAME_YEAST: "http://bigg.ucsd.edu/static/models/iMM904.xml",
+}
+BIGG_MODEL_FIELD_URL = "url"
+BIGG_MODEL_FIELD_SPECIES = "species"
+BIGG_MODEL_KEYS = {
+    SPECIES_FULL_NAME_HUMAN: "recon3D",
+    SPECIES_FULL_NAME_MOUSE: "iMM1415",
+    SPECIES_FULL_NAME_YEAST: "iMM904",
+}
+BIGG_RECON3D_FIELD_ID = "id"
+BIGG_RECON3D_FIELD_TYPE = "type"
+BIGG_RECON3D_FIELD_URI = "uri"
+BIGG_RECON3D_ID_C = "c"
+BIGG_RECON3D_ID_L = "l"
+BIGG_RECON3D_ID_E = "e"
+BIGG_RECON3D_ID_M = "m"
+BIGG_RECON3D_ID_R = "r"
+BIGG_RECON3D_ID_X = "x"
+BIGG_RECON3D_ID_N = "n"
+BIGG_RECON3D_ID_I = "i"
+BIGG_RECON3D_TYPE_COMPARTMENT = "compartment"
+BIGG_RECON3D_FIELD_ANNOTATION = [
+    {
+        # cytosol
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_C,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005829",
+    },
+    {
+        # cytoplasm
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_C,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005737",
+    },
+    {
+        # plasma membrane
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_C,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005886",
+    },
+    {
+        # lysosome lumen
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_L,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0043202",
+    },
+    {
+        # lysosomal membrane
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_L,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005765",
+    },
+    {
+        # mitochondrial intermembrane space
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_M,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005758",
+    },
+    {
+        # mitochondrial outer membrane
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_M,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005741",
+    },
+    {
+        # ER membrane
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_R,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005789",
+    },
+    {
+        # ER lumen
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_R,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005788",
+    },
+    {
+        # extracellular region
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_E,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005576",
+    },
+    {
+        # peroxosomal membrane
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_X,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005778",
+    },
+    {
+        # peroxosomal matrix
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_X,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005782",
+    },
+    {
+        # nucleolus
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_N,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005730",
+    },
+    {
+        # nuclear envelope
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_N,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005635",
+    },
+    {
+        # nucleoplasm
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_N,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005654",
+    },
+    {
+        # golgi membrane
+        BIGG_RECON3D_FIELD_ID: "g",
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0000139",
+    },
+    {
+        # golgi lumen
+        BIGG_RECON3D_FIELD_ID: "g",
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005796",
+    },
+    {
+        # mitochondrial matrix
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_I,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005759",
+    },
+    {
+        # mitochondrial inner membrane
+        BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_I,
+        BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
+        BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005743",
+    },
+]
+# IDENTIFIERS ETL
+IDENTIFIERS_ETL_YEAST_URL = "https://www.uniprot.org/docs/yeast.txt"
+IDENTIFIERS_ETL_SBO_URL = (
+    "https://raw.githubusercontent.com/EBI-BioModels/SBO/master/SBO_OBO.obo"
+)
+IDENTIFIERS_ETL_YEAST_FIELDS = (
+    "common",
+    "common_all",
+    "OLN",
+    "SwissProt_acc",
+    "SwissProt_entry",
+    "SGD",
+    "size",
+    "3d",
+    "chromosome",
+)
+# OBO
+OBO_GO_BASIC_URL = "http://purl.obolibrary.org/obo/go/go-basic.obo"
+OBO_GO_BASIC_LOCAL_TMP = "/tmp/go-basic.obo"
+# PSI MI
+PSI_MI_INTACT_FTP_URL = (
+    "https://ftp.ebi.ac.uk/pub/databases/intact/current/psi30/species"
+)
+PSI_MI_INTACT_DEFAULT_OUTPUT_DIR = "/tmp/intact_tmp"
+PSI_MI_INTACT_XML_NAMESPACE = "{http://psi.hupo.org/mi/mif300}"
+PSI_MI_INTACT_SPECIES_TO_BASENAME = {
+    SPECIES_FULL_NAME_YEAST: "yeast",
+    SPECIES_FULL_NAME_HUMAN: "human",
+    SPECIES_FULL_NAME_MOUSE: "mouse",
+    SPECIES_FULL_NAME_RAT: "rat",
+    SPECIES_FULL_NAME_WORM: "caeel",
+}
+# REACTOME
+REACTOME_SBGN_URL = "https://reactome.org/download/current/homo_sapiens.sbgn.tar.gz"
+REACTOME_SMBL_URL = "https://reactome.org/download/current/all_species.3.1.sbml.tgz"
+REACTOME_PATHWAYS_URL = "https://reactome.org/download/current/ReactomePathways.txt"
+REACTOME_PATHWAY_INDEX_COLUMNS = ["file", "source", "species", "pathway_id", "name"]
+REACTOME_PATHWAY_LIST_COLUMNS = ["pathway_id", "name", "species"]
+# SBML
+SMBL_ERROR_NUMBER = "error_number"
+SMBL_ERROR_CATEGORY = "category"
+SMBL_ERROR_SEVERITY = "severity"
+SMBL_ERROR_DESCRIPTION = "description"
+SMBL_ERROR_MESSAGE = "message"
+SMBL_SUMMARY_PATHWAY_NAME = "Pathway Name"
+SMBL_SUMMARY_PATHWAY_ID = "Pathway ID"
+SMBL_SUMMARY_N_SPECIES = "# of Species"
+SMBL_SUMMARY_N_REACTIONS = "# of Reactions"
+SMBL_SUMMARY_COMPARTMENTS = "Compartments"
+SMBL_REACTION_DICT_ID = "r_id"
+SMBL_REACTION_DICT_NAME = "r_name"
+SMBL_REACTION_DICT_IDENTIFIERS = "r_Identifiers"
+SMBL_REACTION_DICT_SOURCE = "r_Source"
+SMBL_REACTION_DICT_IS_REVERSIBLE = "r_isreversible"
+SMBL_REACTION_SPEC_RSC_ID = "rsc_id"
+SMBL_REACTION_SPEC_SC_ID = "sc_id"
+SMBL_REACTION_SPEC_STOICHIOMETRY = "stoichiometry"
+SMBL_REACTION_SPEC_SBO_TERM = "sbo_term"
+SBML_COMPARTMENT_DICT_ID = "c_id"
+SBML_COMPARTMENT_DICT_NAME = "c_name"
+SBML_COMPARTMENT_DICT_IDENTIFIERS = "c_Identifiers"
+SBML_COMPARTMENT_DICT_SOURCE = "c_Source"
+SBML_SPECIES_DICT_ID = "s_id"
+SBML_SPECIES_DICT_NAME = "s_name"
+SBML_SPECIES_DICT_IDENTIFIERS = "s_Identifiers"
+SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME = "sc_name"
+SBML_COMPARTMENTALIZED_SPECIES_DICT_SOURCE = "sc_Source"
+SBML_REACTION_ATTR_GET_GENE_PRODUCT = "getGeneProduct"
+SBML_ANNOTATION_METHOD_GET_SPECIES = "getSpecies"
+SBML_ANNOTATION_METHOD_GET_COMPARTMENT = "getCompartment"
+SBML_ANNOTATION_METHOD_GET_REACTION = "getReaction"
+# STRING
+STRING_URL_EXPRESSIONS = {
+    "interactions": "https://stringdb-static.org/download/protein.links.full.v{version}/{taxid}.protein.links.full.v{version}.txt.gz",
+    "aliases": "https://stringdb-static.org/download/protein.aliases.v{version}/{taxid}.protein.aliases.v{version}.txt.gz",
+}
+STRING_PROTEIN_ID_RAW = "#string_protein_id"
+STRING_PROTEIN_ID = "string_protein_id"
+STRING_SOURCE = "protein1"
+STRING_TARGET = "protein2"
+STRING_VERSION = 11.5
+STRING_TAX_IDS = {
+    SPECIES_FULL_NAME_WORM: 6239,
+    SPECIES_FULL_NAME_HUMAN: 9606,
+    SPECIES_FULL_NAME_MOUSE: 10090,
+    SPECIES_FULL_NAME_RAT: 10116,
+    SPECIES_FULL_NAME_YEAST: 4932,
+}
+STRING_UPSTREAM_COMPARTMENT = "upstream_compartment"
+STRING_DOWNSTREAM_COMPARTMENT = "downstream_compartment"
+STRING_UPSTREAM_NAME = "upstream_name"
+STRING_DOWNSTREAM_NAME = "downstream_name"
+# TRRUST
+TTRUST_URL_RAW_DATA_HUMAN = (
+    "https://www.grnpedia.org/trrust/data/trrust_rawdata.human.tsv"
+)
+TRRUST_SYMBOL = "symbol"
+TRRUST_UNIPROT = "uniprot"
+TRRUST_UNIPROT_ID = "uniprot_id"
+TRRUST_COMPARTMENT_NUCLEOPLASM = "nucleoplasm"
+TRRUST_COMPARTMENT_NUCLEOPLASM_GO_ID = "GO:0005654"
+TRRUST_SIGNS = SimpleNamespace(ACTIVATION="Activation", REPRESSION="Repression")
+# YEAST IDEA
+# https://idea.research.calicolabs.com/data
+YEAST_IDEA_KINETICS_URL = "https://storage.googleapis.com/calico-website-pin-public-bucket/datasets/idea_kinetics.zip"
+YEAST_IDEA_SOURCE = "TF"
+YEAST_IDEA_TARGET = "GeneName"
+YEAST_IDEA_PUBMED_ID = "32181581"  # ids are characters by convention
+# Identifiers ETL
+IDENTIFIERS_ETL_YEAST_HEADER_REGEX = "__________"