PyPI - napistu - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl - Mend

napistu 0.1.0py3-none-any.whl → 0.2.4.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

napistu/__init__.py +1 -1
napistu/consensus.py +1010 -513
napistu/constants.py +24 -0
napistu/gcs/constants.py +2 -2
napistu/gcs/downloads.py +57 -25
napistu/gcs/utils.py +21 -0
napistu/identifiers.py +105 -6
napistu/ingestion/constants.py +0 -1
napistu/ingestion/obo.py +24 -8
napistu/ingestion/psi_mi.py +20 -5
napistu/ingestion/reactome.py +8 -32
napistu/mcp/__init__.py +69 -0
napistu/mcp/__main__.py +180 -0
napistu/mcp/codebase.py +182 -0
napistu/mcp/codebase_utils.py +298 -0
napistu/mcp/constants.py +72 -0
napistu/mcp/documentation.py +166 -0
napistu/mcp/documentation_utils.py +235 -0
napistu/mcp/execution.py +382 -0
napistu/mcp/profiles.py +73 -0
napistu/mcp/server.py +86 -0
napistu/mcp/tutorials.py +124 -0
napistu/mcp/tutorials_utils.py +230 -0
napistu/mcp/utils.py +47 -0
napistu/mechanism_matching.py +782 -26
napistu/modify/constants.py +41 -0
napistu/modify/curation.py +4 -1
napistu/modify/gaps.py +243 -156
napistu/modify/pathwayannot.py +26 -8
napistu/network/neighborhoods.py +16 -7
napistu/network/net_create.py +209 -54
napistu/network/net_propagation.py +118 -0
napistu/network/net_utils.py +1 -32
napistu/rpy2/netcontextr.py +10 -7
napistu/rpy2/rids.py +7 -5
napistu/sbml_dfs_core.py +46 -29
napistu/sbml_dfs_utils.py +37 -1
napistu/source.py +8 -2
napistu/utils.py +67 -8
napistu-0.2.4.dev2.dist-info/METADATA +84 -0
napistu-0.2.4.dev2.dist-info/RECORD +95 -0
{napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/WHEEL +1 -1
tests/conftest.py +11 -5
tests/test_consensus.py +4 -1
tests/test_gaps.py +127 -0
tests/test_gcs.py +3 -2
tests/test_igraph.py +14 -0
tests/test_mcp_documentation_utils.py +13 -0
tests/test_mechanism_matching.py +658 -0
tests/test_net_propagation.py +89 -0
tests/test_net_utils.py +83 -0
tests/test_sbml.py +2 -0
tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
tests/test_utils.py +81 -0
napistu-0.1.0.dist-info/METADATA +0 -56
napistu-0.1.0.dist-info/RECORD +0 -77
{napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/entry_points.txt +0 -0
{napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
{napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/top_level.txt +0 -0

napistu/constants.py CHANGED Viewed

@@ -7,6 +7,20 @@ import libsbml
 from types import SimpleNamespace
 import pandas as pd
+PACKAGE_DEFS = SimpleNamespace(
+    NAPISTU="napistu",
+    GITHUB_OWNER="napistu",
+    GITHUB_PROJECT_REPO="napistu",
+    GITHUB_NAPISTU_PY="napistu-py",
+    GITHUB_NAPISTU_R="napistu-r",
+    TUTORIALS_URL="https://github.com/napistu/napistu/wiki",
+    # User-facing functionality should use a user-defined directory but
+    # for convenience, we provide a default cache directory for dev-facing
+    # workflows
+    CACHE_DIR="napistu_data",
+)
 PROTEINATLAS_SUBCELL_LOC_URL = (
     "https://www.proteinatlas.org/download/tsv/subcellular_location.tsv.zip"
 )
@@ -332,6 +346,14 @@ CPR_EDGELIST_REQ_VARS = {
 CPR_PATH_REQ_VARS = {CPR_EDGELIST.SC_ID_ORIGIN, CPR_EDGELIST.SC_ID_DEST}
+FEATURE_ID_VAR_DEFAULT = "feature_id"
+RESOLVE_MATCHES_AGGREGATORS = SimpleNamespace(
+    WEIGHTED_MEAN="weighted_mean", MEAN="mean", FIRST="first", MAX="max"
+)
+RESOLVE_MATCHES_TMP_WEIGHT_COL = "__tmp_weight_for_aggregation__"
 # specifying weighting schemes schema
 DEFAULT_WT_TRANS = "identity"
@@ -389,6 +411,8 @@ ONTOLOGIES = SimpleNamespace(
     UNIPROT="uniprot",
 )
+ONTOLOGIES_LIST = list(ONTOLOGIES.__dict__.values())
 CHARACTERISTIC_COMPLEX_ONTOLOGIES = [
     ONTOLOGIES.ENSEMBL_GENE,
     ONTOLOGIES.NCBI_ENTREZ_GENE,

napistu/gcs/constants.py CHANGED Viewed

@@ -31,7 +31,7 @@ GCS_ASSETS = SimpleNamespace(
                 GCS_SUBASSET_NAMES.REGULATORY_GRAPH: GCS_FILETYPES.REGULATORY_GRAPH,
                 GCS_SUBASSET_NAMES.REGULATORY_DISTANCES: GCS_FILETYPES.REGULATORY_DISTANCES,
             },
-            "public_url": "https://storage.googleapis.com/calico-cpr-public/test_pathway.tar.gz",
+            "public_url": "https://storage.googleapis.com/shackett-napistu-public/test_pathway.tar.gz",
         },
         "human_consensus": {
             "file": "human_consensus.tar.gz",
@@ -40,7 +40,7 @@ GCS_ASSETS = SimpleNamespace(
                 GCS_SUBASSET_NAMES.IDENTIFIERS: GCS_FILETYPES.IDENTIFIERS,
                 GCS_SUBASSET_NAMES.REGULATORY_GRAPH: GCS_FILETYPES.REGULATORY_GRAPH,
             },
-            "public_url": "https://storage.googleapis.com/calico-cpr-public/human_consensus.tar.gz",
+            "public_url": "https://storage.googleapis.com/shackett-napistu-public/human_consensus.tar.gz",
         },
         "human_consensus_w_distances": {
             "file": "human_consensus_w_distances.tar.gz",

napistu/gcs/downloads.py CHANGED Viewed

@@ -2,26 +2,29 @@ from __future__ import annotations
 import logging
 import os
-import pathlib
 import re
+import shutil
 from pydantic import BaseModel
 from typing import Optional
 from napistu import utils
 from napistu.gcs.constants import GCS_ASSETS
 from napistu.gcs.constants import INIT_DATA_DIR_MSG
+from napistu.gcs.utils import _initialize_data_dir
 logger = logging.getLogger(__name__)
-def load_public_cpr_asset(
+def load_public_napistu_asset(
     asset: str,
     data_dir: str,
     subasset: str | None = None,
     init_msg: str = INIT_DATA_DIR_MSG,
+    overwrite: bool = False,
 ) -> str:
     """
-    Load Public CPR Asset
+    Load Public Napistu Asset
     Download the `asset` asset to `data_dir` if it doesn't
     already exist and return a path
@@ -30,6 +33,7 @@ def load_public_cpr_asset(
     subasset: the name of a subasset to load from within the asset bundle
     data_dir: the local directory where assets should be stored
     init_msg: message to display if data_dir does not exist
+    overwrite: if True, always download the asset and re-extract it, even if it already exists
     returns:
         asset_path: the path to a local file
@@ -42,14 +46,16 @@ def load_public_cpr_asset(
     # get the path for the asset (which may have been downloaded in a tar-ball)
     asset_path = os.path.join(data_dir, _get_gcs_asset_path(asset, subasset))
-    if os.path.isfile(asset_path):
+    if os.path.isfile(asset_path) and not overwrite:
         return asset_path
     download_path = os.path.join(
         data_dir, os.path.basename(GCS_ASSETS.ASSETS[asset]["file"])
     )
+    if overwrite:
+        _remove_asset_files_if_needed(asset, data_dir)
     if not os.path.isfile(download_path):
-        download_public_cpr_asset(asset, download_path)
+        download_public_napistu_asset(asset, download_path)
     # gunzip if needed
     extn = utils.get_extn_from_url(download_path)
@@ -70,12 +76,12 @@ def load_public_cpr_asset(
     return asset_path
-def download_public_cpr_asset(asset: str, out_path: str) -> None:
+def download_public_napistu_asset(asset: str, out_path: str) -> None:
     """
-    Download Public CPR Asset
+    Download Public Napistu Asset
     Args:
-        asset (str): The name of a CPR public asset stored in Google Cloud Storage (GCS)
+        asset (str): The name of a Napistu public asset stored in Google Cloud Storage (GCS)
         out_path (list): Local location where the file should be saved.
     Returns:
@@ -86,22 +92,12 @@ def download_public_cpr_asset(asset: str, out_path: str) -> None:
     selected_file = GCS_ASSETS.ASSETS[asset]["public_url"]
     logger.info(f"Downloading {os.path.basename(selected_file)} to {out_path}")
+    logger.info(f"Download URI: {selected_file}")
     utils.download_wget(selected_file, out_path)
-    return None
-def _initialize_data_dir(data_dir: str, init_msg: str = INIT_DATA_DIR_MSG) -> None:
-    """Create a data directory if it doesn't exist."""
-    if not os.path.isdir(data_dir):
-        logger.warning(INIT_DATA_DIR_MSG.format(data_dir=data_dir))
-        # Artifact directory not found; creating {parentdir}")
-        logger.warning(f"Trying to create {data_dir}")
-        pathlib.Path(data_dir).mkdir(parents=True, exist_ok=True)
+    if not os.path.isfile(out_path):
+        raise FileNotFoundError(f"Download failed: {out_path} was not created.")
     return None
@@ -109,7 +105,7 @@ def _initialize_data_dir(data_dir: str, init_msg: str = INIT_DATA_DIR_MSG) -> No
 def _validate_gcs_asset(asset: str) -> None:
     """Validate a GCS asset by name."""
-    assets = _CprAssetsValidator(assets=GCS_ASSETS.ASSETS).assets
+    assets = _NapistuAssetsValidator(assets=GCS_ASSETS.ASSETS).assets
     valid_gcs_assets = assets.keys()
     if asset not in valid_gcs_assets:
         raise ValueError(
@@ -170,11 +166,47 @@ def _get_gcs_asset_path(asset: str, subasset: Optional[str] = None) -> str:
     return out_file
-class _CprAssetValidator(BaseModel):
+class _NapistuAssetValidator(BaseModel):
     file: str
     subassets: dict[str, str] | None
     public_url: str
-class _CprAssetsValidator(BaseModel):
-    assets: dict[str, _CprAssetValidator]
+class _NapistuAssetsValidator(BaseModel):
+    assets: dict[str, _NapistuAssetValidator]
+def _remove_asset_files_if_needed(asset: str, data_dir: str):
+    """
+    Remove asset archive and any extracted directory from data_dir.
+    Args:
+        asset (str): The asset key (e.g., 'test_pathway').
+        data_dir (str): The directory where assets are stored.
+    """
+    logger = logging.getLogger(__name__)
+    removed = []
+    # Remove the archive file (any extension)
+    archive_filename = os.path.basename(GCS_ASSETS.ASSETS[asset]["file"])
+    archive_path = os.path.join(data_dir, archive_filename)
+    if os.path.exists(archive_path):
+        os.remove(archive_path)
+        logger.info(f"Removed asset archive: {archive_path}")
+        removed.append(archive_path)
+    # Remove extracted directory (if any)
+    asset_dict = GCS_ASSETS.ASSETS[asset]
+    if asset_dict.get("subassets") is not None or any(
+        archive_filename.endswith(ext) for ext in [".tar.gz", ".tgz", ".zip", ".gz"]
+    ):
+        extract_dir = os.path.join(data_dir, archive_filename.split(".")[0])
+        if os.path.isdir(extract_dir):
+            shutil.rmtree(extract_dir)
+            logger.info(f"Removed extracted asset directory: {extract_dir}")
+            removed.append(extract_dir)
+    if not removed:
+        logger.debug("No asset files found to remove.")
+    return removed

napistu/gcs/utils.py ADDED Viewed

@@ -0,0 +1,21 @@
+import logging
+import os
+import pathlib
+from napistu.gcs.constants import INIT_DATA_DIR_MSG
+logger = logging.getLogger(__name__)
+def _initialize_data_dir(data_dir: str, init_msg: str = INIT_DATA_DIR_MSG) -> None:
+    """Create a data directory if it doesn't exist."""
+    if not os.path.isdir(data_dir):
+        logger.warning(init_msg.format(data_dir=data_dir))
+        # Artifact directory not found; creating {parentdir}")
+        logger.warning(f"Trying to create {data_dir}")
+        pathlib.Path(data_dir).mkdir(parents=True, exist_ok=True)
+    return None

napistu/identifiers.py CHANGED Viewed

@@ -9,15 +9,19 @@ from urllib.parse import urlparse
 import libsbml
 import pandas as pd
-from napistu import utils
 from pydantic import BaseModel
+from napistu import sbml_dfs_core
+from napistu import sbml_dfs_utils
+from napistu import utils
 from napistu.constants import IDENTIFIERS
 from napistu.constants import BIOLOGICAL_QUALIFIER_CODES
 from napistu.constants import ENSEMBL_MOLECULE_TYPES_TO_ONTOLOGY
 from napistu.constants import ENSEMBL_MOLECULE_TYPES_FROM_ONTOLOGY
 from napistu.constants import ENSEMBL_SPECIES_FROM_CODE
 from napistu.constants import ENSEMBL_SPECIES_TO_CODE
+from napistu.constants import SPECIES_IDENTIFIERS_REQUIRED_VARS
 logger = logging.getLogger(__name__)
@@ -225,24 +229,37 @@ def format_uri_url(uri: str) -> dict:
         elif netloc == "www.ensembl.org" and split_path[-1] == "geneview":
             ontology = "ensembl_gene"
             identifier, id_ontology, _ = parse_ensembl_id(result.query)  # type: ignore
-            assert ontology == id_ontology
+            if ontology != id_ontology:
+                raise ValueError(
+                    f"Ontology mismatch: expected {ontology}, got {id_ontology}"
+                )
         elif netloc == "www.ensembl.org" and split_path[-1] in [
             "transview",
             "Transcript",
         ]:
             ontology = "ensembl_transcript"
             identifier, id_ontology, _ = parse_ensembl_id(result.query)  # type: ignore
-            assert ontology == id_ontology
+            if ontology != id_ontology:
+                raise ValueError(
+                    f"Ontology mismatch: expected {ontology}, got {id_ontology}"
+                )
         elif netloc == "www.ensembl.org" and split_path[-1] == "ProteinSummary":
             ontology = "ensembl_protein"
             identifier, id_ontology, _ = parse_ensembl_id(result.query)  # type: ignore
-            assert ontology == id_ontology
+            if ontology != id_ontology:
+                raise ValueError(
+                    f"Ontology mismatch: expected {ontology}, got {id_ontology}"
+                )
         elif netloc == "www.ensembl.org" and (
             re.search("ENS[GTP]", split_path[-1])
             or re.search("ENS[A-Z]{3}[GTP]", split_path[-1])
         ):
             # format ensembl IDs which lack gene/transview
-            identifier, ontology, _ = parse_ensembl_id(split_path[-1])
+            identifier, implied_ontology, _ = parse_ensembl_id(split_path[-1])
+            if implied_ontology != ontology:
+                raise ValueError(
+                    f"Implied ontology mismatch: expected {ontology}, got {implied_ontology}"
+                )
         elif netloc == "www.mirbase.org" or netloc == "mirbase.org":
             ontology = "mirbase"
             if re.search("MI[0-9]+", split_path[-1]):
@@ -676,7 +693,10 @@ def ensembl_id_to_url_regex(identifier: str, ontology: str) -> tuple[str, str]:
     # extract the species name from the 3 letter species code in the id
     # (these letters are not present for humans)
     identifier, implied_ontology, species = parse_ensembl_id(identifier)  # type: ignore
-    assert implied_ontology == ontology
+    if implied_ontology != ontology:
+        raise ValueError(
+            f"Implied ontology mismatch: expected {ontology}, got {implied_ontology}"
+        )
     # create an appropriate regex for validating input
     # this provides testing for other identifiers even if it is redundant with other
@@ -794,6 +814,85 @@ def _format_Identifiers_pubmed(pubmed_id: str) -> Identifiers:
     return Identifiers([id_entry])
+def _check_species_identifiers_table(
+    species_identifiers: pd.DataFrame,
+    required_vars: set = SPECIES_IDENTIFIERS_REQUIRED_VARS,
+):
+    missing_required_vars = required_vars.difference(
+        set(species_identifiers.columns.tolist())
+    )
+    if len(missing_required_vars) > 0:
+        raise ValueError(
+            f"{len(missing_required_vars)} required variables "
+            "were missing from the species_identifiers table: "
+            f"{', '.join(missing_required_vars)}"
+        )
+    return None
+def _prepare_species_identifiers(
+    sbml_dfs: sbml_dfs_core.SBML_dfs,
+    dogmatic: bool = False,
+    species_identifiers: Optional[pd.DataFrame] = None,
+) -> pd.DataFrame:
+    """Accepts and validates species_identifiers, or extracts a fresh table if None."""
+    if species_identifiers is None:
+        species_identifiers = sbml_dfs_utils.get_characteristic_species_ids(
+            sbml_dfs, dogmatic=dogmatic
+        )
+    else:
+        # check for compatibility
+        try:
+            # check species_identifiers format
+            _check_species_identifiers_table(species_identifiers)
+            # quick check for compatibility between sbml_dfs and species_identifiers
+            _validate_assets_sbml_ids(sbml_dfs, species_identifiers)
+        except ValueError as e:
+            logger.warning(
+                f"The provided identifiers are not compatible with your `sbml_dfs` object. Extracting a fresh species identifier table. {e}"
+            )
+            species_identifiers = sbml_dfs_utils.get_characteristic_species_ids(
+                sbml_dfs, dogmatic=dogmatic
+            )
+    return species_identifiers
+def _validate_assets_sbml_ids(
+    sbml_dfs: sbml_dfs_core.SBML_dfs, identifiers_df: pd.DataFrame
+) -> None:
+    """Check an sbml_dfs file and identifiers table for inconsistencies."""
+    joined_species_w_ids = sbml_dfs.species.merge(
+        identifiers_df[["s_id", "s_name"]].drop_duplicates(),
+        left_index=True,
+        right_on="s_id",
+    )
+    inconsistent_names_df = joined_species_w_ids.query("s_name_x != s_name_y").dropna()
+    inconsistent_names_list = [
+        f"{x} != {y}"
+        for x, y in zip(
+            inconsistent_names_df["s_name_x"], inconsistent_names_df["s_name_y"]
+        )
+    ]
+    if len(inconsistent_names_list):
+        example_inconsistent_names = inconsistent_names_list[
+            0 : min(10, len(inconsistent_names_list))
+        ]
+        raise ValueError(
+            f"{len(inconsistent_names_list)} species names do not match between "
+            f"sbml_dfs and identifiers_df including: {', '.join(example_inconsistent_names)}"
+        )
+    return None
 class _IdentifierValidator(BaseModel):
     ontology: str
     identifier: str

napistu/ingestion/constants.py CHANGED Viewed

@@ -196,7 +196,6 @@ PSI_MI_INTACT_SPECIES_TO_BASENAME = {
 # REACTOME
-REACTOME_SBGN_URL = "https://reactome.org/download/current/homo_sapiens.sbgn.tar.gz"
 REACTOME_SMBL_URL = "https://reactome.org/download/current/all_species.3.1.sbml.tgz"
 REACTOME_PATHWAYS_URL = "https://reactome.org/download/current/ReactomePathways.txt"
 REACTOME_PATHWAY_INDEX_COLUMNS = ["file", "source", "species", "pathway_id", "name"]

napistu/ingestion/obo.py CHANGED Viewed

@@ -34,8 +34,14 @@ def create_go_parents_df(go_basic_obo_df: pd.DataFrame) -> pd.DataFrame:
     cc_parents = go_basic_obo_df.query("namespace == 'cellular_component'")["is_a"]
     # this is currently at 4496 rows - this is expected to slowly increase
-    assert cc_parents.shape[0] >= 4496
-    assert cc_parents.shape[0] < 5000
+    if cc_parents.shape[0] < 4496:
+        raise ValueError(
+            f"Expected at least 4496 rows in cc_parents, got {cc_parents.shape[0]}"
+        )
+    if cc_parents.shape[0] >= 5000:
+        raise ValueError(
+            f"Expected fewer than 5000 rows in cc_parents, got {cc_parents.shape[0]}"
+        )
     # convert from a list of strings to a list of dicts then expand so each
     # dict is its own row
@@ -48,8 +54,14 @@ def create_go_parents_df(go_basic_obo_df: pd.DataFrame) -> pd.DataFrame:
     go_parents_df["child_id"] = parent_entries.index
     # currently at 4688 rows - this may increase or decrease but will do so slowly
-    assert go_parents_df.shape[0] > 4600
-    assert go_parents_df.shape[0] < 5000
+    if go_parents_df.shape[0] <= 4600:
+        raise ValueError(
+            f"Expected more than 4600 rows in go_parents_df, got {go_parents_df.shape[0]}"
+        )
+    if go_parents_df.shape[0] >= 5000:
+        raise ValueError(
+            f"Expected fewer than 5000 rows in go_parents_df, got {go_parents_df.shape[0]}"
+        )
     return go_parents_df
@@ -187,8 +199,10 @@ def create_parent_child_graph(go_parents_df: pd.DataFrame) -> ig.Graph:
     )
     # is it a fully connected DAG as expected?
-    assert parent_child_graph.is_dag()
-    assert parent_child_graph.is_connected("weak")
+    if not parent_child_graph.is_dag():
+        raise ValueError("parent_child_graph is not a DAG as expected")
+    if not parent_child_graph.is_connected("weak"):
+        raise ValueError("parent_child_graph is not weakly connected as expected")
     return parent_child_graph
@@ -243,8 +257,10 @@ def _isa_str_list_to_dict_list(isa_list: list) -> list[dict[str, Any]]:
     isa_dict_list = list()
     for split_val in split_vals:
-        assert len(split_val) == 2
+        if len(split_val) != 2:
+            raise ValueError(
+                f"Expected tuple of length 2, got {len(split_val)}: {split_val}"
+            )
         isa_dict_list.append({"parent_id": split_val[0], "parent_name": split_val[1]})
     return isa_dict_list

napistu/ingestion/psi_mi.py CHANGED Viewed

@@ -44,7 +44,10 @@ def format_psi(
     # the root should be an entrySet if this is a PSI 3.0 file
     entry_set = et.getroot()
-    assert entry_set.tag == PSI_MI_INTACT_XML_NAMESPACE + "entrySet"
+    if entry_set.tag != PSI_MI_INTACT_XML_NAMESPACE + "entrySet":
+        raise ValueError(
+            f"Expected root tag to be {PSI_MI_INTACT_XML_NAMESPACE + 'entrySet'}, got {entry_set.tag}"
+        )
     entry_nodes = entry_set.findall(f"./{PSI_MI_INTACT_XML_NAMESPACE}entry")
@@ -97,7 +100,10 @@ def _download_intact_species(
 def _format_entry(an_entry) -> dict[str, Any]:
     """Extract a single XML entry of interactors and interactions."""
-    assert an_entry.tag == PSI_MI_INTACT_XML_NAMESPACE + "entry"
+    if an_entry.tag != PSI_MI_INTACT_XML_NAMESPACE + "entry":
+        raise ValueError(
+            f"Expected entry tag to be {PSI_MI_INTACT_XML_NAMESPACE + 'entry'}, got {an_entry.tag}"
+        )
     entry_dict = {
         "source": _format_entry_source(an_entry),
@@ -169,7 +175,10 @@ def _format_entry_interactor_list(an_entry) -> list[dict[str, Any]]:
 def _format_entry_interactor(interactor) -> dict[str, Any]:
     """Format a single molecular interactor in an interaction list XML node."""
-    assert interactor.tag == PSI_MI_INTACT_XML_NAMESPACE + "interactor"
+    if interactor.tag != PSI_MI_INTACT_XML_NAMESPACE + "interactor":
+        raise ValueError(
+            f"Expected interactor tag to be {PSI_MI_INTACT_XML_NAMESPACE + 'interactor'}, got {interactor.tag}"
+        )
     # optional full name
     interactor_name_node = interactor.find(
@@ -238,7 +247,10 @@ def _format_entry_interactions(an_entry) -> list[dict[str, Any]]:
 def _format_entry_interaction(interaction) -> dict[str, Any]:
     """Format a single interaction in an XML interaction list."""
-    assert interaction.tag == PSI_MI_INTACT_XML_NAMESPACE + "interaction"
+    if interaction.tag != PSI_MI_INTACT_XML_NAMESPACE + "interaction":
+        raise ValueError(
+            f"Expected interaction tag to be {PSI_MI_INTACT_XML_NAMESPACE + 'interaction'}, got {interaction.tag}"
+        )
     interaction_name = interaction.find(
         f"./{PSI_MI_INTACT_XML_NAMESPACE}names/{PSI_MI_INTACT_XML_NAMESPACE}shortLabel"
@@ -260,7 +272,10 @@ def _format_entry_interaction(interaction) -> dict[str, Any]:
 def _format_entry_interaction_participants(interaction_participant) -> dict[str, str]:
     """Format the participants in an XML interaction."""
-    assert interaction_participant.tag == PSI_MI_INTACT_XML_NAMESPACE + "participant"
+    if interaction_participant.tag != PSI_MI_INTACT_XML_NAMESPACE + "participant":
+        raise ValueError(
+            f"Expected participant tag to be {PSI_MI_INTACT_XML_NAMESPACE + 'participant'}, got {interaction_participant.tag}"
+        )
     out = {
         "interactor_id": interaction_participant.attrib["id"],

napistu/ingestion/reactome.py CHANGED Viewed

@@ -17,42 +17,12 @@ from napistu.consensus import construct_sbml_dfs_dict
 from napistu.ingestion.constants import REACTOME_PATHWAY_INDEX_COLUMNS
 from napistu.ingestion.constants import REACTOME_PATHWAY_LIST_COLUMNS
 from napistu.ingestion.constants import REACTOME_PATHWAYS_URL
-from napistu.ingestion.constants import REACTOME_SBGN_URL
 from napistu.ingestion.constants import REACTOME_SMBL_URL
-from napistu.ingestion.constants import SPECIES_FULL_NAME_HUMAN
 from fs import open_fs
 logger = logging.getLogger(__name__)
-def reactome_sbgn_download(output_dir_path: str, overwrite: bool = False):
-    """
-    Reactome SBGN Download
-    Download all human Reactome SBGN (systems biology graphical notation) files.
-    Args:
-        output_dir_path (str): Paths to a directory where .sbgn files should be saved.
-        overwrite (bool): Overwrite an existing output directory.
-    """
-    utils.download_and_extract(
-        REACTOME_SBGN_URL,
-        output_dir_path=output_dir_path,
-        overwrite=overwrite,
-    )
-    # create the pathway index
-    pw_index = _build_reactome_pw_index(
-        output_dir_path,
-        file_ext="sbgn",
-        # For sbgn only homo sapiens files are available
-        species_filter=(SPECIES_FULL_NAME_HUMAN,),
-    )
-    # save as tsv
-    out_fs = open_fs(output_dir_path)
-    with out_fs.open("pw_index.tsv", "wb") as index_path:
-        pw_index.to_csv(index_path, sep="\t", index=False)
 def reactome_sbml_download(output_dir_path: str, overwrite: bool = False):
     """
     Reactome SBML Download
@@ -164,8 +134,14 @@ def _check_reactome_pw_index(pw_index: indices.PWIndex, reactome_pathway_list: l
     # check extension in pw_index
     extn = set([os.path.splitext(x)[1] for x in pw_index["file"]])
-    assert len(extn) == 1
-    assert len(extn.intersection(set([".sbgn", ".sbml"]))) == 1
+    if len(extn) != 1:
+        raise ValueError(
+            f"Expected all files to have the same extension, but found extensions: {extn}"
+        )
+    if len(extn.intersection({".sbml"})) != 1:
+        raise ValueError(
+            f"Expected all files to have the .sbml extension, but found: {extn}"
+        )
     extn_string = extn.pop()
     local_reactome_pws = set(pw_index["pathway_id"])

napistu/mcp/__init__.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""
+MCP (Model Context Protocol) Server for Napistu.
+This module requires optional dependencies. Install with:
+pip install napistu[mcp]
+"""
+import asyncio
+from typing import Dict, Any
+__all__ = ["start_server", "register_object", "is_available"]
+# Check if MCP dependencies are available
+try:
+    __import__("mcp")
+    is_available = True
+except ImportError:
+    is_available = False
+if is_available:
+    from .server import create_server
+    from .profiles import get_profile
+    def start_server(profile_name: str = "local", **kwargs) -> Dict[str, Any]:
+        """
+        Start an MCP server with a specific profile.
+        Args:
+            profile_name: Name of the profile ('local', 'remote', or 'full')
+            **kwargs: Additional configuration options
+        Returns:
+            Server control dictionary
+        """
+        profile = get_profile(profile_name, **kwargs)
+        server = create_server(profile)
+        # Start the server
+        asyncio.create_task(server.start())
+        # Return control interface
+        return {
+            "status": "running",
+            "server": server,
+            "profile": profile_name,
+            "stop": server.stop,
+            "register_object": (
+                register_object if profile.get_config()["enable_execution"] else None
+            ),
+        }
+    # Helper function for registering objects with a running server
+    def register_object(name, obj):
+        """Register an object with the execution component."""
+        from .execution import register_object as _register
+        return _register(name, obj)
+else:
+    # Stubs for when MCP is not available
+    def start_server(*args, **kwargs):
+        raise ImportError(
+            "MCP support not installed. Install with 'pip install napistu[mcp]'"
+        )
+    def register_object(*args, **kwargs):
+        raise ImportError(
+            "MCP support not installed. Install with 'pip install napistu[mcp]'"
+        )

napistu 0.1.0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl

napistu 0.1.0py3-none-any.whl → 0.2.4.dev2py3-none-any.whl