PyPI - napistu - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl - Mend

napistu 0.1.0py3-none-any.whl → 0.2.4.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

napistu/__init__.py +1 -1
napistu/consensus.py +1010 -513
napistu/constants.py +24 -0
napistu/gcs/constants.py +2 -2
napistu/gcs/downloads.py +57 -25
napistu/gcs/utils.py +21 -0
napistu/identifiers.py +105 -6
napistu/ingestion/constants.py +0 -1
napistu/ingestion/obo.py +24 -8
napistu/ingestion/psi_mi.py +20 -5
napistu/ingestion/reactome.py +8 -32
napistu/mcp/__init__.py +69 -0
napistu/mcp/__main__.py +180 -0
napistu/mcp/codebase.py +182 -0
napistu/mcp/codebase_utils.py +298 -0
napistu/mcp/constants.py +72 -0
napistu/mcp/documentation.py +166 -0
napistu/mcp/documentation_utils.py +235 -0
napistu/mcp/execution.py +382 -0
napistu/mcp/profiles.py +73 -0
napistu/mcp/server.py +86 -0
napistu/mcp/tutorials.py +124 -0
napistu/mcp/tutorials_utils.py +230 -0
napistu/mcp/utils.py +47 -0
napistu/mechanism_matching.py +782 -26
napistu/modify/constants.py +41 -0
napistu/modify/curation.py +4 -1
napistu/modify/gaps.py +243 -156
napistu/modify/pathwayannot.py +26 -8
napistu/network/neighborhoods.py +16 -7
napistu/network/net_create.py +209 -54
napistu/network/net_propagation.py +118 -0
napistu/network/net_utils.py +1 -32
napistu/rpy2/netcontextr.py +10 -7
napistu/rpy2/rids.py +7 -5
napistu/sbml_dfs_core.py +46 -29
napistu/sbml_dfs_utils.py +37 -1
napistu/source.py +8 -2
napistu/utils.py +67 -8
napistu-0.2.4.dev2.dist-info/METADATA +84 -0
napistu-0.2.4.dev2.dist-info/RECORD +95 -0
{napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/WHEEL +1 -1
tests/conftest.py +11 -5
tests/test_consensus.py +4 -1
tests/test_gaps.py +127 -0
tests/test_gcs.py +3 -2
tests/test_igraph.py +14 -0
tests/test_mcp_documentation_utils.py +13 -0
tests/test_mechanism_matching.py +658 -0
tests/test_net_propagation.py +89 -0
tests/test_net_utils.py +83 -0
tests/test_sbml.py +2 -0
tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
tests/test_utils.py +81 -0
napistu-0.1.0.dist-info/METADATA +0 -56
napistu-0.1.0.dist-info/RECORD +0 -77
{napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/entry_points.txt +0 -0
{napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
{napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/top_level.txt +0 -0

napistu/modify/pathwayannot.py CHANGED Viewed

@@ -24,6 +24,8 @@ from napistu.constants import ONTOLOGIES
 from napistu.constants import ENSEMBL_PREFIX_TO_ONTOLOGY
 from napistu.modify.constants import COFACTOR_SCHEMA
 from napistu.modify.constants import COFACTOR_CHEBI_IDS
+from napistu.modify.constants import NEO4_MEMBERS_SET
+from napistu.modify.constants import REACTOME_CROSSREF_SET
 logger = logging.getLogger(__name__)
@@ -499,12 +501,14 @@ def add_entity_sets(
     reactome_members = _read_neo4j_members(neo4j_members)
     # create missing species and compartmentalized species
+    logger.info("Adding entity set species")
     (
         merged_membership,
         new_species_for_sbml_dfs,
         set_component_species_ids,
     ) = _add_entity_sets_species(sbml_dfs, reactome_members)
+    logger.info("Adding complex formation species")
     (
         new_compartmentalized_species_for_sbml_dfs,
         updated_compartmentalized_membership,
@@ -515,6 +519,7 @@ def add_entity_sets(
         set_component_species_ids,
     )
+    logger.info("Adding entity set reactions")
     (
         new_reactions_for_sbml_dfs,
         new_reaction_species_for_sbml_dfs,
@@ -569,7 +574,8 @@ def add_reactome_identifiers(
     """
-    select_reactome_ids = _format_reactome_crossref_ids(crossref_path)
+    logger.info("Reading Reactome crossref ids")
+    select_reactome_ids = _read_reactome_crossref_ids(crossref_path)
     # read all current identifiers
     current_ids = sbml_dfs.get_identifiers(SBML_DFS.SPECIES)
@@ -582,6 +588,7 @@ def add_reactome_identifiers(
     )
     # combine existing s_ids with additional cross-ref annotations using uniprot ids
+    logger.info("Merging Reactome crossref ids with existing identifiers")
     merged_crossrefs = _merge_reactome_crossref_ids(
         current_molecular_ids, select_reactome_ids
     )
@@ -637,7 +644,8 @@ def add_reactome_identifiers(
     updated_identifiers.index.name = SBML_DFS.S_ID
     updated_identifiers.name = "new_Identifiers"
-    # add new identifiers to species table
+    # add new identifiers to species tabl
+    logger.info("Adding new identifiers to species table")
     updated_species = sbml_dfs.species.merge(
         updated_identifiers,
         left_index=True,
@@ -1173,6 +1181,9 @@ def _read_neo4j_members(neo4j_members: str) -> pd.DataFrame:
         with bfs.open(path, "rb") as f:
             reactome_members = pd.read_csv(f).assign(url="")
+    # check that the expected columns are present
+    utils.match_pd_vars(reactome_members, NEO4_MEMBERS_SET).assert_present()
     reactome_members[IDENTIFIERS.ONTOLOGY] = reactome_members[
         IDENTIFIERS.ONTOLOGY
     ].str.lower()
@@ -1295,9 +1306,13 @@ def _merge_reactome_crossref_ids(
     ].drop("_merge", axis=1)
     merged_crossrefs = pd.concat([uni_rct_with_crossrefs, uni_no_rct_with_crossrefs])
-    assert (
-        uni_rct_with_crossrefs.shape[0] + uni_no_rct_with_crossrefs.shape[0]
-    ) == merged_crossrefs.shape[0]
+    if (
+        not (uni_rct_with_crossrefs.shape[0] + uni_no_rct_with_crossrefs.shape[0])
+        == merged_crossrefs.shape[0]
+    ):
+        raise ValueError(
+            "The number of merged crossrefs does not match the sum of the number of uniprot + reactome crossrefs and uniprot-only crossrefs"
+        )
     species_with_protein_and_no_gene = current_molecular_ids[
         current_molecular_ids[IDENTIFIERS.ONTOLOGY] == ONTOLOGIES.UNIPROT
@@ -1332,9 +1347,9 @@ def _merge_reactome_crossref_ids(
     return merged_crossrefs
-def _format_reactome_crossref_ids(
+def _read_reactome_crossref_ids(
     crossref_path: str,
-) -> str:
+) -> pd.DataFrame:
     """
     Format Reactome CrossRef IDs
@@ -1347,7 +1362,7 @@ def _format_reactome_crossref_ids(
     Returns
     -------
-    select_reactome_ids: str
+    select_reactome_ids: pd.DataFrame
         Crossref identifiers
     """
@@ -1357,6 +1372,9 @@ def _format_reactome_crossref_ids(
         with bfs.open(path, "rb") as f:
             reactome_ids = pd.read_csv(f)
+    # check that the expected columns are present
+    utils.match_pd_vars(reactome_ids, REACTOME_CROSSREF_SET).assert_present()
     # only use ensembl and pharos for now
     # rename pharos ontology

napistu/network/neighborhoods.py CHANGED Viewed

@@ -84,7 +84,8 @@ def find_and_prune_neighborhoods(
     if isinstance(compartmentalized_species, str):
         compartmentalized_species = [compartmentalized_species]
-    assert isinstance(compartmentalized_species, list)
+    if not isinstance(compartmentalized_species, list):
+        raise TypeError("compartmentalized_species must be a list")
     if isinstance(precomputed_distances, pd.DataFrame):
         logger.info("Pre-computed neighbors based on precomputed_distances")
@@ -832,8 +833,8 @@ def add_vertices_uri_urls(
     """
-    assert isinstance(vertices, pd.DataFrame)
-    assert vertices.shape[0] > 0
+    if vertices.shape[0] <= 0:
+        raise ValueError("vertices must have at least one row")
     # add uri urls for each node
@@ -880,7 +881,8 @@ def add_vertices_uri_urls(
             [neighborhood_species_aug, neighborhood_reactions]
         ).fillna("")
-    assert isinstance(updated_vertices, pd.DataFrame)
+    if not isinstance(updated_vertices, pd.DataFrame):
+        raise TypeError("updated_vertices must be a pandas DataFrame")
     if vertices.shape[0] != updated_vertices.shape[0]:
         raise ValueError("output vertices rows did not match input")
@@ -1532,7 +1534,8 @@ def _calculate_path_attrs(
         raise TypeError("neighborhood_paths should be a list of lists of edge indices")
     if not isinstance(vertices, list):
         raise TypeError("vertices should be a list of list of vertices")
-    assert len(vertices) > 0  # control for length zero vertices upstream
+    if len(vertices) <= 0:
+        raise ValueError("vertices must have length greater than zero")
     if len(neighborhood_paths) != len(vertices):
         raise ValueError("vertices and neighborhood_paths were not the same length")
@@ -1588,7 +1591,13 @@ def _calculate_path_attrs(
     path_attributes_df = pd.concat([path_attributes_df, edgeles_nodes_df])
     neighborhood_path_entities.update({x: {x} for x in edgeless_nodes})
-    assert path_attributes_df.shape[0] == len(neighborhood_paths)
-    assert len(neighborhood_path_entities) == len(neighborhood_paths)
+    if path_attributes_df.shape[0] != len(neighborhood_paths):
+        raise ValueError(
+            "path_attributes_df row count must match number of neighborhood_paths"
+        )
+    if len(neighborhood_path_entities) != len(neighborhood_paths):
+        raise ValueError(
+            "neighborhood_path_entities length must match number of neighborhood_paths"
+        )
     return path_attributes_df, neighborhood_path_entities

napistu 0.1.0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl

napistu 0.1.0py3-none-any.whl → 0.2.4.dev2py3-none-any.whl