PyPI - napistu - Versions diffs - 0.4.5__py3-none-any.whl → 0.4.7__py3-none-any.whl - Mend

napistu 0.4.5py3-none-any.whl → 0.4.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

napistu/network/constants.py +33 -3
napistu/network/neighborhoods.py +430 -230
napistu/network/ng_utils.py +7 -7
napistu/network/paths.py +28 -18
{napistu-0.4.5.dist-info → napistu-0.4.7.dist-info}/METADATA +1 -1
{napistu-0.4.5.dist-info → napistu-0.4.7.dist-info}/RECORD +13 -13
tests/test_network_neighborhoods.py +120 -21
tests/test_network_paths.py +40 -16
tests/test_network_precompute.py +25 -10
{napistu-0.4.5.dist-info → napistu-0.4.7.dist-info}/WHEEL +0 -0
{napistu-0.4.5.dist-info → napistu-0.4.7.dist-info}/entry_points.txt +0 -0
{napistu-0.4.5.dist-info → napistu-0.4.7.dist-info}/licenses/LICENSE +0 -0
{napistu-0.4.5.dist-info → napistu-0.4.7.dist-info}/top_level.txt +0 -0

napistu/network/neighborhoods.py CHANGED Viewed

@@ -18,13 +18,26 @@ from napistu import utils
 from napistu.network import ng_utils
 from napistu.network import paths
-from napistu.constants import SBML_DFS
-from napistu.constants import MINI_SBO_NAME_TO_POLARITY
-from napistu.constants import MINI_SBO_TO_NAME
-from napistu.network.constants import GRAPH_WIRING_APPROACHES
-from napistu.network.constants import NEIGHBORHOOD_NETWORK_TYPES
-from napistu.network.constants import VALID_NEIGHBORHOOD_NETWORK_TYPES
+from napistu.constants import (
+    MINI_SBO_NAME_TO_POLARITY,
+    MINI_SBO_TO_NAME,
+    NAPISTU_EDGELIST,
+    ONTOLOGIES,
+    SBML_DFS,
+)
+from napistu.network.constants import (
+    DISTANCES,
+    GRAPH_RELATIONSHIPS,
+    GRAPH_WIRING_APPROACHES,
+    NAPISTU_GRAPH_EDGES,
+    NAPISTU_GRAPH_NODE_TYPES,
+    NAPISTU_GRAPH_VERTICES,
+    NEIGHBORHOOD_DICT_KEYS,
+    NEIGHBORHOOD_NETWORK_TYPES,
+    NET_POLARITY,
+    VALID_NEIGHBORHOOD_NETWORK_TYPES,
+)
 logger = logging.getLogger(__name__)
@@ -34,8 +47,9 @@ def find_and_prune_neighborhoods(
     napistu_graph: ig.Graph,
     compartmentalized_species: str | list[str],
     precomputed_distances: pd.DataFrame | None = None,
+    min_pw_size: int = 3,
     source_total_counts: pd.Series | None = None,
-    network_type: str = NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM,
+    network_type: str = NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
     order: int = 3,
     verbose: bool = True,
     top_n: int = 10,
@@ -55,6 +69,8 @@ def find_and_prune_neighborhoods(
         Compartmentalized species IDs for neighborhood centers
     precomputed_distances : pd.DataFrame or None
         If provided, an edgelist of origin->destination path weights and lengths
+    min_pw_size: int
+        the minimum size of a pathway to be considered
     source_total_counts: pd.Series | None
         Optional, A series of the total counts of each source. As produced by
         source.get_source_total_counts()
@@ -91,6 +107,16 @@ def find_and_prune_neighborhoods(
     if not isinstance(compartmentalized_species, list):
         raise TypeError("compartmentalized_species must be a list")
+    invalid_cspecies = [
+        x
+        for x in compartmentalized_species
+        if x not in sbml_dfs.compartmentalized_species.index
+    ]
+    if len(invalid_cspecies) > 0:
+        raise ValueError(
+            f"compartmentalized_species contains invalid species: {invalid_cspecies}"
+        )
     if isinstance(precomputed_distances, pd.DataFrame):
         logger.info("Pre-computed neighbors based on precomputed_distances")
@@ -105,18 +131,19 @@ def find_and_prune_neighborhoods(
     else:
         precomputed_neighbors = None
-    neighborhoods = find_neighborhoods(
+    neighborhood_dicts = find_neighborhoods(
         sbml_dfs=sbml_dfs,
         napistu_graph=napistu_graph,
         compartmentalized_species=compartmentalized_species,
         network_type=network_type,
         order=order,
-        verbose=verbose,
         precomputed_neighbors=precomputed_neighbors,
+        min_pw_size=min_pw_size,
         source_total_counts=source_total_counts,
+        verbose=verbose,
     )
-    pruned_neighborhoods = prune_neighborhoods(neighborhoods, top_n=top_n)
+    pruned_neighborhoods = prune_neighborhoods(neighborhood_dicts, top_n=top_n)
     return pruned_neighborhoods
@@ -164,7 +191,7 @@ def load_neighborhoods(
     -------
     all_neighborhoods_df: pd.DataFrame
         A table containing all species in each query s_ids neighborhood
-    neighborhoods_dict: dict
+    neighborhood_dicts: dict
         Outputs from find_and_prune_neighborhoods for each s_id
     """
@@ -178,16 +205,16 @@ def load_neighborhoods(
     neighborhood_paths = [vertices_path, networks_path]
     if all([os.path.isfile(x) for x in neighborhood_paths]) and overwrite is False:
-        print(f"loading existing neighborhoods for {neighborhood_prefix}")
+        logger.info(f"loading existing neighborhoods for {neighborhood_prefix}")
         all_neighborhoods_df = pd.read_csv(vertices_path, sep="\t")
         with open(networks_path, "rb") as in_file:
-            neighborhoods_dict = pickle.load(in_file)
+            neighborhood_dicts = pickle.load(in_file)
     else:
-        print(f"creating neighborhoods based on {neighborhood_prefix}")
+        logger.info(f"creating neighborhoods based on {neighborhood_prefix}")
-        all_neighborhoods_df, neighborhoods_dict = create_neighborhoods(
+        all_neighborhoods_df, neighborhood_dicts = create_neighborhoods(
             s_ids=s_ids,
             sbml_dfs=sbml_dfs,
             napistu_graph=napistu_graph,
@@ -202,9 +229,9 @@ def load_neighborhoods(
         # pickle neighborhoods
         with open(networks_path, "wb") as fh:
-            pickle.dump(neighborhoods_dict, fh)
+            pickle.dump(neighborhood_dicts, fh)
-    return all_neighborhoods_df, neighborhoods_dict
+    return all_neighborhoods_df, neighborhood_dicts
 def create_neighborhoods(
@@ -242,7 +269,7 @@ def create_neighborhoods(
     -------
     all_neighborhoods_df: pd.DataFrame
         A table containing all species in each query s_ids neighborhood
-    neighborhoods_dict: dict
+    neighborhood_dicts: dict
         Outputs from find_and_prune_neighborhoods for each s_id
     """
@@ -263,13 +290,13 @@ def create_neighborhoods(
         raise TypeError(f"top_n was a {type(top_n)} and must be an int")
     neighborhoods_list = list()
-    neighborhoods_dict = dict()
+    neighborhood_dicts = dict()
     for s_id in s_ids:
         query_sc_species = ng_utils.compartmentalize_species(sbml_dfs, s_id)
         compartmentalized_species = query_sc_species[SBML_DFS.SC_ID].tolist()
-        neighborhoods = find_and_prune_neighborhoods(
+        neighborhood_dicts = find_and_prune_neighborhoods(
             sbml_dfs,
             napistu_graph,
             compartmentalized_species=compartmentalized_species,
@@ -283,23 +310,25 @@ def create_neighborhoods(
         neighborhood_entities = pd.concat(
             [
-                neighborhoods[sc_id]["vertices"].assign(focal_sc_id=sc_id)
-                for sc_id in neighborhoods.keys()
+                neighborhood_dicts[sc_id][NEIGHBORHOOD_DICT_KEYS.VERTICES].assign(
+                    focal_sc_id=sc_id
+                )
+                for sc_id in neighborhood_dicts.keys()
             ]
         ).assign(focal_s_id=s_id)
         neighborhood_species = neighborhood_entities.merge(
             sbml_dfs.compartmentalized_species[SBML_DFS.S_ID],
-            left_on="name",
+            left_on=NAPISTU_GRAPH_VERTICES.NAME,
             right_index=True,
         )
         neighborhoods_list.append(neighborhood_species)
-        neighborhoods_dict[s_id] = neighborhoods
+        neighborhood_dicts[s_id] = neighborhood_dicts
     all_neighborhoods_df = pd.concat(neighborhoods_list).reset_index(drop=True)
-    return all_neighborhoods_df, neighborhoods_dict
+    return all_neighborhoods_df, neighborhood_dicts
 def create_neighborhood_prefix(network_type: str, order: int, top_n: int) -> str:
@@ -321,6 +350,7 @@ def create_neighborhood_prefix(network_type: str, order: int, top_n: int) -> str
 def load_neighborhoods_by_partition(
     selected_partition: int,
     neighborhood_outdir: str,
+    cache_dir: str,
     wiring_approach: str = GRAPH_WIRING_APPROACHES.REGULATORY,
 ) -> None:
     """
@@ -343,19 +373,18 @@ def load_neighborhoods_by_partition(
     """
-    consensus_root = "/group/cpr/consensus"
-    consensus_name = "reactome"
-    consensus_outdir = os.path.join(consensus_root, consensus_name)
     if not os.path.isdir(neighborhood_outdir):
         raise FileNotFoundError(f"{neighborhood_outdir} does not exist")
+    if not os.path.isdir(cache_dir):
+        raise FileNotFoundError(f"{cache_dir} does not exist")
     partition_output = os.path.join(
         neighborhood_outdir, f"partition_{selected_partition}"
     )
     # initialize an empty output
     if os.path.isdir(partition_output):
-        print(f"removing existing directory: {partition_output}")
+        logger.warning(f"removing existing directory: {partition_output}")
         shutil.rmtree(partition_output)
     os.makedirs(partition_output)
@@ -369,13 +398,13 @@ def load_neighborhoods_by_partition(
     if parition_sids_df.shape[0] == 0:
         raise ValueError(f"No s_ids associated with partition {selected_partition}")
-    parition_sids = parition_sids_df["s_id"].tolist()
+    parition_sids = parition_sids_df[SBML_DFS.S_ID].tolist()
     # read pathway and network data
     # read model containing Calico curations. this is primarily to support search programs
     # to not use these switch to refined.pkl
-    refined_model_pkl_path = os.path.join(consensus_outdir, "curated.pkl")
+    refined_model_pkl_path = os.path.join(cache_dir, "curated.pkl")
     with open(refined_model_pkl_path, "rb") as in_file:
         refined_model = pickle.load(in_file)
     refined_model.validate()
@@ -383,12 +412,12 @@ def load_neighborhoods_by_partition(
     # load the graph
     napistu_graph = ng_utils.read_network_pkl(
         model_prefix="curated",
-        network_dir=consensus_outdir,
+        network_dir=cache_dir,
         directed=True,
         wiring_approach=wiring_approach,
     )
-    all_neighborhoods_df, neighborhoods_dict = load_neighborhoods(
+    _, _ = load_neighborhoods(
         s_ids=parition_sids,
         sbml_dfs=refined_model,
         napistu_graph=napistu_graph,
@@ -429,7 +458,7 @@ def read_paritioned_neighborhoods(
     -------
     all_neighborhoods_df: pd.DataFrame
         A table containing all species in each query s_ids neighborhood
-    neighborhoods_dict: dict
+    neighborhood_dicts: dict
         Outputs from find_and_prune_neighborhoods for each s_id
     """
@@ -494,7 +523,7 @@ def read_paritioned_neighborhoods(
     # combine all partitions' dfs and dicts
     all_neighborhoods_df = pd.concat(neighborhood_paths_list).reset_index(drop=True)
-    neighborhoods_dict = dict(ChainMap(*path_dict_list))
+    neighborhood_dicts = dict(ChainMap(*path_dict_list))
     # TO DO - remove s_id duplication (these are present in the vertices table in the partition outputs)
     if not all(all_neighborhoods_df["s_id_x"] == all_neighborhoods_df["s_id_y"]):
@@ -503,14 +532,14 @@ def read_paritioned_neighborhoods(
         {"s_id_x": "s_id"}, axis=1
     )
-    return all_neighborhoods_df, neighborhoods_dict
+    return all_neighborhoods_df, neighborhood_dicts
 def find_neighborhoods(
     sbml_dfs: sbml_dfs_core.SBML_dfs,
     napistu_graph: ig.Graph,
     compartmentalized_species: list[str],
-    network_type: str = "downstream",
+    network_type: str = NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
     order: int = 3,
     min_pw_size: int = 3,
     precomputed_neighbors: pd.DataFrame | None = None,
@@ -544,7 +573,7 @@ def find_neighborhoods(
         the minimum size of a pathway to be considered
     source_total_counts: pd.Series | None
         Optional, A series of the total counts of each source. As produced by
-        source.get_source_total_counts()\
+        source.get_source_total_counts()
     verbose: bool
         Extra reporting
@@ -557,15 +586,24 @@ def find_neighborhoods(
     if not isinstance(network_type, str):
         raise TypeError(f"network_type was a {type(network_type)} and must be a str")
-    valid_network_types = ["downstream", "upstream", "hourglass"]
-    if network_type not in valid_network_types:
+    if network_type not in VALID_NEIGHBORHOOD_NETWORK_TYPES:
         raise ValueError(
-            f"network_type must be one of {', '.join(valid_network_types)}"
+            f"network_type must be one of {', '.join(VALID_NEIGHBORHOOD_NETWORK_TYPES)}"
         )
     if not isinstance(order, int):
         raise TypeError(f"order was a {type(order)} and must be an int")
+    invalid_cspecies = [
+        x
+        for x in compartmentalized_species
+        if x not in sbml_dfs.compartmentalized_species.index
+    ]
+    if len(invalid_cspecies) > 0:
+        raise ValueError(
+            f"compartmentalized_species contains invalid species: {invalid_cspecies}"
+        )
     # create a table which includes cspecies and reaction nearby each of the
     # focal compartmentalized_speecies
     neighborhood_df = _build_raw_neighborhood_df(
@@ -634,8 +672,8 @@ def create_neighborhood_dict_entry(
             nodes in the neighborhood
         edges: pd.DataFrame
             edges in the neighborhood
-        edge_sources: pd.DataFrame
-            models that edges were derived from
+        reaction_sources: pd.DataFrame
+            models that reactions were derived from
         neighborhood_path_entities: dict
             upstream and downstream dicts representing entities in paths.
             If the keys are to be included in a neighborhood, the
@@ -643,12 +681,12 @@ def create_neighborhood_dict_entry(
             focal node.
     """
-    one_neighborhood_df = neighborhood_df[neighborhood_df["sc_id"] == sc_id]
+    one_neighborhood_df = neighborhood_df[neighborhood_df[SBML_DFS.SC_ID] == sc_id]
     if verbose:
         _create_neighborhood_dict_entry_logging(sc_id, one_neighborhood_df, sbml_dfs)
-    if not one_neighborhood_df["name"].eq(sc_id).any():
+    if not one_neighborhood_df[NAPISTU_GRAPH_VERTICES.NAME].eq(sc_id).any():
         raise ValueError(
             f"The focal node sc_id = {sc_id} was not in 'one_neighborhood_df'.\
             By convention it should be part of its neighborhood"
@@ -664,95 +702,61 @@ def create_neighborhood_dict_entry(
     # add edge polarity: whether edges are activating, inhibiting or unknown
     if edges.shape[0] > 0:
-        edges["link_polarity"] = (
-            edges["sbo_term"].map(MINI_SBO_TO_NAME).map(MINI_SBO_NAME_TO_POLARITY)
+        edges[NET_POLARITY.LINK_POLARITY] = (
+            edges[SBML_DFS.SBO_TERM]
+            .map(MINI_SBO_TO_NAME)
+            .map(MINI_SBO_NAME_TO_POLARITY)
         )
     try:
-        edge_sources = ng_utils.get_minimal_sources_edges(
-            vertices.rename(columns={"name": "node"}),
+        reaction_sources = ng_utils.get_minimal_sources_edges(
+            vertices.rename(columns={NAPISTU_GRAPH_VERTICES.NAME: "node"}),
             sbml_dfs,
             min_pw_size=min_pw_size,
             # optional, counts of sources across the whole model
             source_total_counts=source_total_counts,
         )
     except Exception:
-        edge_sources = None
+        logger.warning(f"Could not get reaction sources for {sc_id}; returning None")
+        reaction_sources = None
     # to add weights to the network solve the shortest path problem
     # from the focal node to each neighbor
     # solve this problem separately whether a given neighbor is an
     # ancestor or descendant
-    # focal node -> descendants
-    one_descendants_df = one_neighborhood_df[
-        one_neighborhood_df["relationship"] == "descendants"
-    ]
-    descendants_list = list(set(one_descendants_df["name"].tolist()).union({sc_id}))
-    # hide warnings which are mostly just Dijkstra complaining about not finding neighbors
-    with warnings.catch_warnings():
-        # igraph throws warnings for each pair of unconnected species
-        warnings.simplefilter("ignore")
-        neighborhood_paths = neighborhood_graph.get_shortest_paths(
-            # focal node
-            v=sc_id,
-            to=descendants_list,
-            weights="weights",
-            mode="out",
-            output="epath",
-        )
-    downstream_path_attrs, downstream_entity_dict = _calculate_path_attrs(
-        neighborhood_paths, edges, vertices=descendants_list, weight_var="weights"
-    )
-    downstream_path_attrs = downstream_path_attrs.assign(node_orientation="downstream")
-    # ancestors -> focal_node
-    one_ancestors_df = one_neighborhood_df[
-        one_neighborhood_df["relationship"] == "ancestors"
-    ]
-    ancestors_list = list(set(one_ancestors_df["name"].tolist()).union({sc_id}))
-    with warnings.catch_warnings():
-        # igraph throws warnings for each pair of unconnected species
-        warnings.simplefilter("ignore")
-        neighborhood_paths = neighborhood_graph.get_shortest_paths(
-            v=sc_id,
-            to=ancestors_list,
-            weights="upstream_weights",
-            mode="in",
-            output="epath",
-        )
-    upstream_path_attrs, upstream_entity_dict = _calculate_path_attrs(
-        neighborhood_paths,
+    (
+        downstream_path_attrs,
+        downstream_entity_dict,
+        upstream_path_attrs,
+        upstream_entity_dict,
+    ) = _find_neighbors_paths(
+        neighborhood_graph,
+        one_neighborhood_df,
+        sc_id,
         edges,
-        vertices=ancestors_list,
-        weight_var="upstream_weights",
     )
-    upstream_path_attrs = upstream_path_attrs.assign(node_orientation="upstream")
     # combine upstream and downstream shortest paths
     # in cases a node is upstream and downstream of the focal node
     # by taking the lowest path weight
     vertex_neighborhood_attrs = (
         pd.concat([downstream_path_attrs, upstream_path_attrs])
-        .sort_values("path_weight")
+        .sort_values(DISTANCES.PATH_WEIGHTS)
         .groupby("neighbor")
         .first()
     )
     # label the focal node
-    vertex_neighborhood_attrs.loc[sc_id, "node_orientation"] = "focal"
+    vertex_neighborhood_attrs.loc[sc_id, "node_orientation"] = GRAPH_RELATIONSHIPS.FOCAL
     # if the precomputed distances, graph and/or sbml_dfs are inconsistent
     # then the shortest paths search may just return empty lists
     # throw a clearer error message in this case.
-    EXPECTED_VERTEX_ATTRS = {"final_from", "final_to", "net_polarity"}
+    EXPECTED_VERTEX_ATTRS = {
+        DISTANCES.FINAL_FROM,
+        DISTANCES.FINAL_TO,
+        NET_POLARITY.NET_POLARITY,
+    }
     missing_vertex_attrs = EXPECTED_VERTEX_ATTRS.difference(
         set(vertex_neighborhood_attrs.columns.tolist())
     )
@@ -767,22 +771,22 @@ def create_neighborhood_dict_entry(
     # add net_polarity to edges in addition to nodes
     edges = edges.merge(
         vertex_neighborhood_attrs.reset_index()[
-            ["final_from", "final_to", "net_polarity"]
+            [DISTANCES.FINAL_FROM, DISTANCES.FINAL_TO, NET_POLARITY.NET_POLARITY]
         ].dropna(),
-        left_on=["from", "to"],
-        right_on=["final_from", "final_to"],
+        left_on=[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO],
+        right_on=[DISTANCES.FINAL_FROM, DISTANCES.FINAL_TO],
         how="left",
     )
     vertices = vertices.merge(
-        vertex_neighborhood_attrs, left_on="name", right_index=True
+        vertex_neighborhood_attrs, left_on=NAPISTU_GRAPH_VERTICES.NAME, right_index=True
     )
     # drop nodes with a path length / weight of zero
     # which are NOT the focal node
     # these were cases where no path to/from the focal node to the query node was found
     disconnected_neighbors = vertices.query(
-        "(not node_orientation == 'focal') and path_weight == 0"
+        f"(not node_orientation == '{GRAPH_RELATIONSHIPS.FOCAL}') and {DISTANCES.PATH_WEIGHTS} == 0"
     )
     vertices = vertices[~vertices.index.isin(disconnected_neighbors.index.tolist())]
@@ -790,8 +794,8 @@ def create_neighborhood_dict_entry(
     vertices = add_vertices_uri_urls(vertices, sbml_dfs)
     neighborhood_path_entities = {
-        "downstream": downstream_entity_dict,
-        "upstream": upstream_entity_dict,
+        NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM: downstream_entity_dict,
+        NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM: upstream_entity_dict,
     }
     # update graph with additional vertex and edge attributes
@@ -799,16 +803,16 @@ def create_neighborhood_dict_entry(
         vertices=vertices.to_dict("records"),
         edges=edges.to_dict("records"),
         directed=napistu_graph.is_directed(),
-        vertex_name_attr="name",
-        edge_foreign_keys=("from", "to"),
+        vertex_name_attr=NAPISTU_GRAPH_VERTICES.NAME,
+        edge_foreign_keys=(NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO),
     )
     outdict = {
-        "graph": updated_napistu_graph,
-        "vertices": vertices,
-        "edges": edges,
-        "edge_sources": edge_sources,
-        "neighborhood_path_entities": neighborhood_path_entities,
+        NEIGHBORHOOD_DICT_KEYS.GRAPH: updated_napistu_graph,
+        NEIGHBORHOOD_DICT_KEYS.VERTICES: vertices,
+        NEIGHBORHOOD_DICT_KEYS.EDGES: edges,
+        NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES: reaction_sources,
+        NEIGHBORHOOD_DICT_KEYS.NEIGHBORHOOD_PATH_ENTITIES: neighborhood_path_entities,
     }
     return outdict
@@ -818,9 +822,11 @@ def _create_neighborhood_dict_entry_logging(
     sc_id: str, one_neighborhood_df: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
 ):
     df_summary = one_neighborhood_df.copy()
-    df_summary["node_type"] = [
-        "species" if x else "reactions"
-        for x in df_summary["name"].isin(sbml_dfs.compartmentalized_species.index)
+    df_summary[NAPISTU_GRAPH_VERTICES.NODE_TYPE] = [
+        NAPISTU_GRAPH_NODE_TYPES.SPECIES if x else NAPISTU_GRAPH_NODE_TYPES.REACTION
+        for x in df_summary[NAPISTU_GRAPH_VERTICES.NAME].isin(
+            sbml_dfs.compartmentalized_species.index
+        )
     ]
     relationship_counts = df_summary.value_counts(
         ["relationship", "node_type"]
@@ -844,22 +850,45 @@ def add_vertices_uri_urls(
     vertices: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
 ) -> pd.DataFrame:
     """
-    Add Vertices URI URLs
+    Add URI URLs to neighborhood vertices DataFrame.
-    Add a url variable to the neighborhood vertices pd.DataFrame
+    This function enriches a vertices DataFrame with URI URLs for both species and
+    reactions. For species, it adds standard reference identifiers and Pharos IDs
+    where available. For reactions, it adds reaction-specific URI URLs.
     Parameters
     ----------
     vertices: pd.DataFrame
-        table of neighborhood vertices
+        DataFrame containing neighborhood vertices with the following required columns:
+        - NAPISTU_GRAPH_VERTICES.NAME: The name/identifier of each vertex
+        - NAPISTU_GRAPH_VERTICES.NODE_TYPE: The type of node, either
+        NAPISTU_GRAPH_NODE_TYPES.SPECIES or NAPISTU_GRAPH_NODE_TYPES.REACTION
     sbml_dfs: sbml_dfs_core.SBML_dfs
-        consensus network model
+        Pathway model including species, compartmentalized species, reactions and ontologies
     Returns
     -------
-    vertices: pd.DataFrame
-        input table with a url field
+    pd.DataFrame
+        Input vertices DataFrame enriched with URI URL columns:
+        - For species: standard reference identifier URLs and Pharos IDs
+        - For reactions: reaction-specific URI URLs
+        - Empty strings for missing URLs
+    Raises
+    ------
+    ValueError
+        If vertices DataFrame is empty (no rows)
+    TypeError
+        If the output is not a pandas DataFrame
+    ValueError
+        If the output row count doesn't match the input row count
+    Notes
+    -----
+    - Species vertices are merged with compartmentalized_species to get s_id mappings
+    - Reaction vertices are processed directly using their names
+    - Missing URLs are filled with empty strings
+    - The function preserves the original row order and count
     """
     if vertices.shape[0] <= 0:
@@ -868,35 +897,54 @@ def add_vertices_uri_urls(
     # add uri urls for each node
     # add s_ids
-    neighborhood_species = vertices[vertices["node_type"] == "species"].merge(
-        sbml_dfs.compartmentalized_species["s_id"],
-        left_on="name",
+    neighborhood_species = vertices[
+        vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == NAPISTU_GRAPH_NODE_TYPES.SPECIES
+    ].merge(
+        sbml_dfs.compartmentalized_species[SBML_DFS.S_ID],
+        left_on=NAPISTU_GRAPH_VERTICES.NAME,
         right_index=True,
         how="left",
     )
     # add a standard reference identifier
     neighborhood_species_aug = neighborhood_species.merge(
-        sbml_dfs.get_uri_urls("species", neighborhood_species["s_id"]),
-        left_on="s_id",
+        sbml_dfs.get_uri_urls(
+            NAPISTU_GRAPH_NODE_TYPES.SPECIES, neighborhood_species[SBML_DFS.S_ID]
+        ),
+        left_on=SBML_DFS.S_ID,
         right_index=True,
         how="left",
         # add pharos ids where available
     ).merge(
         sbml_dfs.get_uri_urls(
-            "species", neighborhood_species["s_id"], required_ontology="pharos"
-        ).rename("pharos"),
-        left_on="s_id",
+            NAPISTU_GRAPH_NODE_TYPES.SPECIES,
+            neighborhood_species[SBML_DFS.S_ID],
+            required_ontology=ONTOLOGIES.PHAROS,
+        ).rename(ONTOLOGIES.PHAROS),
+        left_on=SBML_DFS.S_ID,
         right_index=True,
         how="left",
     )
-    if sum(vertices["node_type"] == "reaction") > 0:
-        neighborhood_reactions = vertices[vertices["node_type"] == "reaction"].merge(
+    if (
+        sum(
+            vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
+            == NAPISTU_GRAPH_NODE_TYPES.REACTION
+        )
+        > 0
+    ):
+        neighborhood_reactions = vertices[
+            vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
+            == NAPISTU_GRAPH_NODE_TYPES.REACTION
+        ].merge(
             sbml_dfs.get_uri_urls(
-                "reactions", vertices[vertices["node_type"] == "reaction"]["name"]
+                SBML_DFS.REACTIONS,
+                vertices[
+                    vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
+                    == NAPISTU_GRAPH_NODE_TYPES.REACTION
+                ][NAPISTU_GRAPH_VERTICES.NAME],
             ),
-            left_on="name",
+            left_on=NAPISTU_GRAPH_VERTICES.NAME,
             right_index=True,
             how="left",
         )
@@ -945,7 +993,7 @@ def prune_neighborhoods(neighborhoods: dict, top_n: int = 100) -> dict:
     if not isinstance(top_n, int):
         raise TypeError(f"top_n was a {type(top_n)} and must be an int")
-    pruned_neighborhoods_dict = dict()
+    pruned_neighborhood_dicts = dict()
     for an_sc_id in neighborhoods.keys():
         one_neighborhood = neighborhoods[an_sc_id]
@@ -955,41 +1003,58 @@ def prune_neighborhoods(neighborhoods: dict, top_n: int = 100) -> dict:
         pruned_vertices = _prune_vertex_set(one_neighborhood, top_n=top_n)
         # reduce neighborhood to this set of high-weight vertices
-        all_neighbors = pd.DataFrame({"name": one_neighborhood["graph"].vs["name"]})
+        all_neighbors = pd.DataFrame(
+            {
+                NAPISTU_GRAPH_VERTICES.NAME: one_neighborhood[
+                    NEIGHBORHOOD_DICT_KEYS.GRAPH
+                ].vs[NAPISTU_GRAPH_VERTICES.NAME]
+            }
+        )
         pruned_vertices_indices = all_neighbors[
-            all_neighbors["name"].isin(pruned_vertices["name"])
+            all_neighbors[NAPISTU_GRAPH_VERTICES.NAME].isin(
+                pruned_vertices[NAPISTU_GRAPH_VERTICES.NAME]
+            )
         ].index.tolist()
-        pruned_neighborhood = one_neighborhood["graph"].subgraph(
-            one_neighborhood["graph"].vs[pruned_vertices_indices],
+        pruned_neighborhood = one_neighborhood[NEIGHBORHOOD_DICT_KEYS.GRAPH].subgraph(
+            one_neighborhood[NEIGHBORHOOD_DICT_KEYS.GRAPH].vs[pruned_vertices_indices],
             implementation="auto",
         )
         pruned_edges = pd.DataFrame([e.attributes() for e in pruned_neighborhood.es])
-        pruned_reactions = pruned_vertices[pruned_vertices["node_type"] == "reaction"][
-            "name"
-        ]
+        pruned_reactions = pruned_vertices[
+            pruned_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
+            == NAPISTU_GRAPH_NODE_TYPES.REACTION
+        ][NAPISTU_GRAPH_VERTICES.NAME]
         if pruned_reactions.shape[0] != 0:
-            if one_neighborhood["edge_sources"] is None:
+            if one_neighborhood[NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES] is None:
                 # allow for missing source information since this is currently optional
-                pruned_edge_sources = one_neighborhood["edge_sources"]
+                pruned_reaction_sources = one_neighborhood[
+                    NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES
+                ]
             else:
-                pruned_edge_sources = one_neighborhood["edge_sources"][
-                    one_neighborhood["edge_sources"]["r_id"].isin(pruned_reactions)
+                pruned_reaction_sources = one_neighborhood[
+                    NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES
+                ][
+                    one_neighborhood[NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES][
+                        SBML_DFS.R_ID
+                    ].isin(pruned_reactions)
                 ]
         else:
-            pruned_edge_sources = one_neighborhood["edge_sources"]
+            pruned_reaction_sources = one_neighborhood[
+                NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES
+            ]
-        pruned_neighborhoods_dict[an_sc_id] = {
-            "graph": pruned_neighborhood,
-            "vertices": pruned_vertices,
-            "edges": pruned_edges,
-            "edge_sources": pruned_edge_sources,
+        pruned_neighborhood_dicts[an_sc_id] = {
+            NEIGHBORHOOD_DICT_KEYS.GRAPH: pruned_neighborhood,
+            NEIGHBORHOOD_DICT_KEYS.VERTICES: pruned_vertices,
+            NEIGHBORHOOD_DICT_KEYS.EDGES: pruned_edges,
+            NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES: pruned_reaction_sources,
         }
-    return pruned_neighborhoods_dict
+    return pruned_neighborhood_dicts
 def plot_neighborhood(
@@ -1029,16 +1094,16 @@ def plot_neighborhood(
         "focal disease": "lime",
         "disease": "aquamarine",
         "focal": "lightcoral",
-        "species": "firebrick",
-        "reaction": "dodgerblue",
+        NAPISTU_GRAPH_NODE_TYPES.SPECIES: "firebrick",
+        NAPISTU_GRAPH_NODE_TYPES.REACTION: "dodgerblue",
     }
     edge_polarity_colors = {
-        "ambiguous": "dimgray",
-        "activation": "gold",
-        "inhibition": "royalblue",
-        "ambiguous activation": "palegoldenrod",
-        "ambiguous inhibition": "powerblue",
+        NET_POLARITY.AMBIGUOUS: "dimgray",
+        NET_POLARITY.ACTIVATION: "gold",
+        NET_POLARITY.INHIBITION: "royalblue",
+        NET_POLARITY.AMBIGUOUS_ACTIVATION: "palegoldenrod",
+        NET_POLARITY.AMBIGUOUS_INHIBITION: "powerblue",
         np.nan: "dimgray",
     }
@@ -1047,17 +1112,19 @@ def plot_neighborhood(
     visual_style["vertex_size"] = 10
     if name_nodes:
         visual_style["vertex_label"] = [
-            textwrap.fill(x, 15) for x in neighborhood_graph.vs["node_name"]
+            textwrap.fill(x, 15)
+            for x in neighborhood_graph.vs[NAPISTU_GRAPH_VERTICES.NODE_NAME]
         ]
     visual_style["vertex_label_color"] = "white"
     visual_style["vertex_label_size"] = 8
     visual_style["vertex_label_angle"] = 90
     visual_style["vertex_label_dist"] = 3
     visual_style["vertex_color"] = [
-        color_dict[x] for x in neighborhood_graph.vs["node_type"]
+        color_dict[x] for x in neighborhood_graph.vs[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
     ]
     visual_style["edge_color"] = [
-        edge_polarity_colors[x] for x in neighborhood_graph.es["net_polarity"]
+        edge_polarity_colors[x]
+        for x in neighborhood_graph.es[NET_POLARITY.NET_POLARITY]
     ]
     visual_style["layout"] = neighborhood_graph_layout
     visual_style["bbox"] = (plot_size, plot_size)
@@ -1089,8 +1156,8 @@ def _precompute_neighbors(
     # check that compartmentalized_species are included in precomputed_distances
     all_cspecies = {
-        *precomputed_distances["sc_id_origin"].tolist(),
-        *precomputed_distances["sc_id_dest"].tolist(),
+        *precomputed_distances[NAPISTU_EDGELIST.SC_ID_ORIGIN].tolist(),
+        *precomputed_distances[NAPISTU_EDGELIST.SC_ID_DEST].tolist(),
     }
     missing_cspecies = set(compartmentalized_species).difference(all_cspecies)
     if len(missing_cspecies) > 0:
@@ -1105,14 +1172,16 @@ def _precompute_neighbors(
         NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM,
         NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
     ]:
-        valid_origin = precomputed_distances["sc_id_origin"].isin(
+        valid_origin = precomputed_distances[NAPISTU_EDGELIST.SC_ID_ORIGIN].isin(
             compartmentalized_species
         )
     if network_type in [
         NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM,
         NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
     ]:
-        valid_dest = precomputed_distances["sc_id_dest"].isin(compartmentalized_species)
+        valid_dest = precomputed_distances[NAPISTU_EDGELIST.SC_ID_DEST].isin(
+            compartmentalized_species
+        )
     if network_type == NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS:
         cspecies_subset_precomputed_distances = precomputed_distances[
@@ -1133,7 +1202,7 @@ def _precompute_neighbors(
     # filter by distance
     close_cspecies_subset_precomputed_distances = cspecies_subset_precomputed_distances[
-        cspecies_subset_precomputed_distances["path_length"] <= order
+        cspecies_subset_precomputed_distances[DISTANCES.PATH_LENGTH] <= order
     ]
     # filter to retain top_n
@@ -1143,13 +1212,13 @@ def _precompute_neighbors(
     ]:
         top_descendants = (
             close_cspecies_subset_precomputed_distances[
-                close_cspecies_subset_precomputed_distances["sc_id_origin"].isin(
-                    compartmentalized_species
-                )
+                close_cspecies_subset_precomputed_distances[
+                    DISTANCES.SC_ID_ORIGIN
+                ].isin(compartmentalized_species)
             ]
             # sort by path_weight so we can retain the lowest weight neighbors
-            .sort_values("path_weights")
-            .groupby("sc_id_origin")
+            .sort_values(DISTANCES.PATH_WEIGHTS)
+            .groupby(NAPISTU_EDGELIST.SC_ID_ORIGIN)
             .head(top_n)
         )
@@ -1161,9 +1230,9 @@ def _precompute_neighbors(
     ]:
         top_ancestors = (
             close_cspecies_subset_precomputed_distances[
-                close_cspecies_subset_precomputed_distances["sc_id_dest"].isin(
-                    compartmentalized_species
-                )
+                close_cspecies_subset_precomputed_distances[
+                    NAPISTU_EDGELIST.SC_ID_DEST
+                ].isin(compartmentalized_species)
             ]
             # sort by path_upstream_weights so we can retain the lowest weight neighbors
             # we allow for upstream weights to differ from downstream weights
@@ -1176,8 +1245,8 @@ def _precompute_neighbors(
             # the logic is flipped if we are looking for ancestors where
             # we penalize based on the number of parents of a node when
             # we use it (i.e., the default upstream_weights).
-            .sort_values("path_upstream_weights")
-            .groupby("sc_id_dest")
+            .sort_values(DISTANCES.PATH_UPSTREAM_WEIGHTS)
+            .groupby(NAPISTU_EDGELIST.SC_ID_DEST)
             .head(top_n)
         )
@@ -1193,7 +1262,7 @@ def _precompute_neighbors(
             precomputed_neighbors=top_descendants,
             compartmentalized_species=compartmentalized_species,
             sbml_dfs=sbml_dfs,
-            relationship="descendants",
+            relationship=GRAPH_RELATIONSHIPS.DESCENDANTS,
         )
         if downstream_reactions is not None:
@@ -1207,7 +1276,7 @@ def _precompute_neighbors(
             precomputed_neighbors=top_ancestors,
             compartmentalized_species=compartmentalized_species,
             sbml_dfs=sbml_dfs,
-            relationship="ancestors",
+            relationship=GRAPH_RELATIONSHIPS.ANCESTORS,
         )
         if upstream_reactions is not None:
@@ -1217,8 +1286,8 @@ def _precompute_neighbors(
     # an sc_id_origin-specific subgraph
     identity_df = pd.DataFrame(
         {
-            "sc_id_origin": compartmentalized_species,
-            "sc_id_dest": compartmentalized_species,
+            NAPISTU_EDGELIST.SC_ID_ORIGIN: compartmentalized_species,
+            NAPISTU_EDGELIST.SC_ID_DEST: compartmentalized_species,
         }
     )
@@ -1232,14 +1301,16 @@ def _precompute_neighbors(
                 downstream_reactions,  # type: ignore
                 identity_df,
             ]
-        )[["sc_id_origin", "sc_id_dest"]].drop_duplicates()
+        )[
+            [NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST]
+        ].drop_duplicates()
     elif network_type == NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM:
         precomputed_neighbors = pd.concat([top_descendants, downstream_reactions, identity_df])[  # type: ignore
-            ["sc_id_origin", "sc_id_dest"]
+            [NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST]
         ].drop_duplicates()
     elif network_type == NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM:
         precomputed_neighbors = pd.concat([top_ancestors, upstream_reactions, identity_df])[  # type: ignore
-            ["sc_id_origin", "sc_id_dest"]
+            [NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST]
         ].drop_duplicates()
     else:
         raise ValueError("This error shouldn't happen")
@@ -1271,7 +1342,7 @@ def _build_raw_neighborhood_df(
         descendants_df = _find_neighbors(
             napistu_graph=napistu_graph,
             compartmentalized_species=compartmentalized_species,
-            relationship="descendants",
+            relationship=GRAPH_RELATIONSHIPS.DESCENDANTS,
             order=order,
             precomputed_neighbors=precomputed_neighbors,
         )
@@ -1284,7 +1355,7 @@ def _build_raw_neighborhood_df(
         ancestors_df = _find_neighbors(
             napistu_graph=napistu_graph,
             compartmentalized_species=compartmentalized_species,
-            relationship="ancestors",
+            relationship=GRAPH_RELATIONSHIPS.ANCESTORS,
             order=order,
             precomputed_neighbors=precomputed_neighbors,
         )
@@ -1300,8 +1371,9 @@ def _build_raw_neighborhood_df(
         raise NotImplementedError("invalid network_type")
     # add name since this is an easy way to lookup igraph vertices
-    neighborhood_df["name"] = [
-        x["name"] for x in napistu_graph.vs[neighborhood_df["neighbor"]]
+    neighborhood_df[NAPISTU_GRAPH_VERTICES.NAME] = [
+        x[NAPISTU_GRAPH_VERTICES.NAME]
+        for x in napistu_graph.vs[neighborhood_df["neighbor"]]
     ]
     return neighborhood_df
@@ -1327,17 +1399,23 @@ def _find_neighbors(
     if isinstance(precomputed_neighbors, pd.DataFrame):
         # add graph indices to neighbors
         nodes_to_names = (
-            pd.DataFrame({"name": napistu_graph.vs["name"]})
+            pd.DataFrame(
+                {
+                    NAPISTU_GRAPH_VERTICES.NAME: napistu_graph.vs[
+                        NAPISTU_GRAPH_VERTICES.NAME
+                    ]
+                }
+            )
             .reset_index()
             .rename({"index": "neighbor"}, axis=1)
         )
-        if relationship == "descendants":
-            bait_id = "sc_id_origin"
-            target_id = "sc_id_dest"
-        elif relationship == "ancestors":
-            bait_id = "sc_id_dest"
-            target_id = "sc_id_origin"
+        if relationship == GRAPH_RELATIONSHIPS.DESCENDANTS:
+            bait_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
+            target_id = NAPISTU_EDGELIST.SC_ID_DEST
+        elif relationship == GRAPH_RELATIONSHIPS.ANCESTORS:
+            bait_id = NAPISTU_EDGELIST.SC_ID_DEST
+            target_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
         else:
             raise ValueError(
                 f"relationship must be 'descendants' or 'ancestors' but was {relationship}"
@@ -1347,15 +1425,17 @@ def _find_neighbors(
             precomputed_neighbors[
                 precomputed_neighbors[bait_id].isin(compartmentalized_species)
             ]
-            .merge(nodes_to_names.rename({"name": target_id}, axis=1))
-            .rename({bait_id: "sc_id"}, axis=1)
+            .merge(
+                nodes_to_names.rename({NAPISTU_GRAPH_VERTICES.NAME: target_id}, axis=1)
+            )
+            .rename({bait_id: SBML_DFS.SC_ID}, axis=1)
             .drop([target_id], axis=1)
             .assign(relationship=relationship)
         )
     else:
-        if relationship == "descendants":
+        if relationship == GRAPH_RELATIONSHIPS.DESCENDANTS:
             mode_type = "out"
-        elif relationship == "ancestors":
+        elif relationship == GRAPH_RELATIONSHIPS.ANCESTORS:
             mode_type = "in"
         else:
             raise ValueError(
@@ -1371,7 +1451,7 @@ def _find_neighbors(
         neighbors_df = pd.concat(
             [
-                pd.DataFrame({"sc_id": c, "neighbor": x}, index=range(0, len(x)))
+                pd.DataFrame({SBML_DFS.SC_ID: c, "neighbor": x}, index=range(0, len(x)))
                 for c, x in zip(compartmentalized_species, neighbors)
             ]
         ).assign(relationship=relationship)
@@ -1401,12 +1481,12 @@ def _find_reactions_by_relationship(
     if precomputed_neighbors.shape[0] == 0:
         return None
-    if relationship == "descendants":
-        bait_id = "sc_id_origin"
-        target_id = "sc_id_dest"
-    elif relationship == "ancestors":
-        bait_id = "sc_id_dest"
-        target_id = "sc_id_origin"
+    if relationship == GRAPH_RELATIONSHIPS.DESCENDANTS:
+        bait_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
+        target_id = NAPISTU_EDGELIST.SC_ID_DEST
+    elif relationship == GRAPH_RELATIONSHIPS.ANCESTORS:
+        bait_id = NAPISTU_EDGELIST.SC_ID_DEST
+        target_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
     else:
         raise ValueError(
             f"relationship must be 'descendants' or 'ancestors' but was {relationship}"
@@ -1437,8 +1517,8 @@ def _find_reactions_by_relationship(
         relatives_cspecies = {*relatives, *[uq]}
         # count the number of relative cspecies including each reaction
         rxn_species_counts = sbml_dfs.reaction_species[
-            sbml_dfs.reaction_species["sc_id"].isin(relatives_cspecies)
-        ].value_counts("r_id")
+            sbml_dfs.reaction_species[SBML_DFS.SC_ID].isin(relatives_cspecies)
+        ].value_counts(SBML_DFS.R_ID)
         # retain reactions involving 2+ cspecies.
         # some of these reactions will be irrelevant and will be excluded when
@@ -1483,10 +1563,11 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
     """
-    neighborhood_vertices = one_neighborhood["vertices"]
+    neighborhood_vertices = one_neighborhood[NEIGHBORHOOD_DICT_KEYS.VERTICES]
     indexed_neighborhood_species = neighborhood_vertices[
-        neighborhood_vertices["node_type"] == "species"
+        neighborhood_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
+        == NAPISTU_GRAPH_NODE_TYPES.SPECIES
     ].set_index("node_orientation")
     pruned_oriented_neighbors = list()
@@ -1496,14 +1577,14 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
             # handle cases where only one entry exists to DF->series coercion occurs
             vertex_subset = vertex_subset.to_frame().T
-        sorted_vertex_set = vertex_subset.sort_values("path_weight")
-        weight_cutoff = sorted_vertex_set["path_weight"].iloc[
+        sorted_vertex_set = vertex_subset.sort_values(DISTANCES.PATH_WEIGHTS)
+        weight_cutoff = sorted_vertex_set[DISTANCES.PATH_WEIGHTS].iloc[
             min(top_n - 1, sorted_vertex_set.shape[0] - 1)
         ]
         top_neighbors = sorted_vertex_set[
-            sorted_vertex_set["path_weight"] <= weight_cutoff
-        ]["name"].tolist()
+            sorted_vertex_set[DISTANCES.PATH_WEIGHTS] <= weight_cutoff
+        ][NAPISTU_GRAPH_VERTICES.NAME].tolist()
         # include reactions and other species necessary to reach the top neighbors
         # by pulling in the past solutions to weighted shortest paths problems
@@ -1522,7 +1603,7 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
     # combine all neighbors
     pruned_neighbors = set().union(*pruned_oriented_neighbors)
     pruned_vertices = neighborhood_vertices[
-        neighborhood_vertices["name"].isin(pruned_neighbors)
+        neighborhood_vertices[NAPISTU_GRAPH_VERTICES.NAME].isin(pruned_neighbors)
     ].reset_index(drop=True)
     return pruned_vertices
@@ -1532,7 +1613,7 @@ def _calculate_path_attrs(
     neighborhood_paths: list[list],
     edges: pd.DataFrame,
     vertices: list,
-    weight_var: str = "weights",
+    weight_var: str = NAPISTU_GRAPH_EDGES.WEIGHTS,
 ) -> tuple[pd.DataFrame, dict[Any, set]]:
     """
     Calculate Path Attributes
@@ -1582,15 +1663,15 @@ def _calculate_path_attrs(
         # if all_path_edges.ngroups > 0:
         path_attributes_df = pd.concat(
             [
-                all_path_edges[weight_var].agg("sum").rename("path_weight"),
-                all_path_edges.agg("size").rename("path_length"),
-                all_path_edges["link_polarity"]
+                all_path_edges[weight_var].agg("sum").rename(DISTANCES.PATH_WEIGHTS),
+                all_path_edges.agg("size").rename(DISTANCES.PATH_LENGTH),
+                all_path_edges[NET_POLARITY.LINK_POLARITY]
                 .agg(paths._terminal_net_polarity)
-                .rename("net_polarity"),
+                .rename(NET_POLARITY.NET_POLARITY),
                 # add the final edge since this can be used to add path attributes to edges
                 # i.e., apply net_polarity to an edge
-                all_path_edges["from"].agg("last").rename("final_from"),
-                all_path_edges["to"].agg("last").rename("final_to"),
+                all_path_edges["from"].agg("last").rename(DISTANCES.FINAL_FROM),
+                all_path_edges["to"].agg("last").rename(DISTANCES.FINAL_TO),
             ],
             axis=1,
         ).reset_index()
@@ -1613,7 +1694,11 @@ def _calculate_path_attrs(
         if len(neighborhood_paths[i]) == 0
     ]
     edgeles_nodes_df = pd.DataFrame({"neighbor": edgeless_nodes}).assign(
-        path_length=0, path_weight=0, net_polarity=None
+        **{
+            DISTANCES.PATH_LENGTH: 0,
+            DISTANCES.PATH_WEIGHTS: 0,
+            NET_POLARITY.NET_POLARITY: None,
+        }
     )
     # add edgeless entries as entries in the two outputs
@@ -1630,3 +1715,118 @@ def _calculate_path_attrs(
         )
     return path_attributes_df, neighborhood_path_entities
+def _find_neighbors_paths(
+    neighborhood_graph: ig.Graph,
+    one_neighborhood_df: pd.DataFrame,
+    sc_id: str,
+    edges: pd.DataFrame,
+) -> tuple[pd.DataFrame, dict[Any, set], pd.DataFrame, dict[Any, set]]:
+    """
+    Find shortest paths between the focal node and its neighbors in both directions.
+    This function calculates shortest paths from the focal node to its descendants
+    (downstream) and ancestors (upstream) using igraph's shortest path algorithms.
+    It uses _calculate_path_attrs to compute path attributes including path weights,
+    lengths, and polarity information.
+    Parameters
+    ----------
+    neighborhood_graph: ig.Graph
+        The igraph Graph object representing the neighborhood network
+    one_neighborhood_df: pd.DataFrame
+        DataFrame containing neighborhood information with 'relationship' column
+        indicating 'descendants' or 'ancestors' for each node
+    sc_id: str
+        The compartmentalized species ID of the focal node
+    edges: pd.DataFrame
+        DataFrame containing edge information with columns for 'from', 'to',
+        weights, and link polarity
+    Returns
+    -------
+    downstream_path_attrs: pd.DataFrame
+        DataFrame containing path attributes for downstream paths from focal node
+        to descendants. Includes columns: neighbor, path_weight, path_length,
+        net_polarity, final_from, final_to, node_orientation
+    downstream_entity_dict: dict[Any, set]
+        Dictionary mapping each descendant neighbor to the set of entities
+        (nodes) connecting it to the focal node
+    upstream_path_attrs: pd.DataFrame
+        DataFrame containing path attributes for upstream paths from focal node
+        to ancestors. Includes columns: neighbor, path_weight, path_length,
+        net_polarity, final_from, final_to, node_orientation
+    upstream_entity_dict: dict[Any, set]
+        Dictionary mapping each ancestor neighbor to the set of entities
+        (nodes) connecting it to the focal node
+    """
+    one_descendants_df = one_neighborhood_df[
+        one_neighborhood_df["relationship"] == GRAPH_RELATIONSHIPS.DESCENDANTS
+    ]
+    descendants_list = list(
+        set(one_descendants_df[NAPISTU_GRAPH_VERTICES.NAME].tolist()).union({sc_id})
+    )
+    # hide warnings which are mostly just Dijkstra complaining about not finding neighbors
+    with warnings.catch_warnings():
+        # igraph throws warnings for each pair of unconnected species
+        warnings.simplefilter("ignore")
+        neighborhood_paths = neighborhood_graph.get_shortest_paths(
+            # focal node
+            v=sc_id,
+            to=descendants_list,
+            weights=NAPISTU_GRAPH_EDGES.WEIGHTS,
+            mode="out",
+            output="epath",
+        )
+    downstream_path_attrs, downstream_entity_dict = _calculate_path_attrs(
+        neighborhood_paths,
+        edges,
+        vertices=descendants_list,
+        weight_var=NAPISTU_GRAPH_EDGES.WEIGHTS,
+    )
+    downstream_path_attrs = downstream_path_attrs.assign(
+        node_orientation=NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM
+    )
+    # ancestors -> focal_node
+    one_ancestors_df = one_neighborhood_df[
+        one_neighborhood_df["relationship"] == GRAPH_RELATIONSHIPS.ANCESTORS
+    ]
+    ancestors_list = list(
+        set(one_ancestors_df[NAPISTU_GRAPH_VERTICES.NAME].tolist()).union({sc_id})
+    )
+    with warnings.catch_warnings():
+        # igraph throws warnings for each pair of unconnected species
+        warnings.simplefilter("ignore")
+        neighborhood_paths = neighborhood_graph.get_shortest_paths(
+            v=sc_id,
+            to=ancestors_list,
+            weights=NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS,
+            mode="in",
+            output="epath",
+        )
+    upstream_path_attrs, upstream_entity_dict = _calculate_path_attrs(
+        neighborhood_paths,
+        edges,
+        vertices=ancestors_list,
+        weight_var=NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS,
+    )
+    upstream_path_attrs = upstream_path_attrs.assign(
+        node_orientation=NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM
+    )
+    return (
+        downstream_path_attrs,
+        downstream_entity_dict,
+        upstream_path_attrs,
+        upstream_entity_dict,
+    )

napistu 0.4.5__py3-none-any.whl → 0.4.7__py3-none-any.whl

napistu 0.4.5py3-none-any.whl → 0.4.7py3-none-any.whl