PyPI - napistu - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.4.dev3__py3-none-any.whl - Mend

napistu 0.1.0py3-none-any.whl → 0.2.4.dev3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

napistu/__init__.py +1 -1
napistu/consensus.py +1010 -513
napistu/constants.py +24 -0
napistu/gcs/constants.py +2 -2
napistu/gcs/downloads.py +57 -25
napistu/gcs/utils.py +21 -0
napistu/identifiers.py +105 -6
napistu/ingestion/constants.py +0 -1
napistu/ingestion/obo.py +24 -8
napistu/ingestion/psi_mi.py +20 -5
napistu/ingestion/reactome.py +8 -32
napistu/mcp/__init__.py +69 -0
napistu/mcp/__main__.py +180 -0
napistu/mcp/codebase.py +182 -0
napistu/mcp/codebase_utils.py +298 -0
napistu/mcp/constants.py +72 -0
napistu/mcp/documentation.py +166 -0
napistu/mcp/documentation_utils.py +235 -0
napistu/mcp/execution.py +382 -0
napistu/mcp/profiles.py +73 -0
napistu/mcp/server.py +86 -0
napistu/mcp/tutorials.py +124 -0
napistu/mcp/tutorials_utils.py +230 -0
napistu/mcp/utils.py +47 -0
napistu/mechanism_matching.py +782 -26
napistu/modify/constants.py +41 -0
napistu/modify/curation.py +4 -1
napistu/modify/gaps.py +243 -156
napistu/modify/pathwayannot.py +26 -8
napistu/network/neighborhoods.py +16 -7
napistu/network/net_create.py +209 -54
napistu/network/net_propagation.py +118 -0
napistu/network/net_utils.py +1 -32
napistu/rpy2/netcontextr.py +10 -7
napistu/rpy2/rids.py +7 -5
napistu/sbml_dfs_core.py +46 -29
napistu/sbml_dfs_utils.py +37 -1
napistu/source.py +8 -2
napistu/utils.py +67 -8
napistu-0.2.4.dev3.dist-info/METADATA +84 -0
napistu-0.2.4.dev3.dist-info/RECORD +95 -0
{napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/WHEEL +1 -1
tests/conftest.py +11 -5
tests/test_consensus.py +4 -1
tests/test_gaps.py +127 -0
tests/test_gcs.py +3 -2
tests/test_igraph.py +14 -0
tests/test_mcp_documentation_utils.py +13 -0
tests/test_mechanism_matching.py +658 -0
tests/test_net_propagation.py +89 -0
tests/test_net_utils.py +83 -0
tests/test_sbml.py +2 -0
tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
tests/test_utils.py +81 -0
napistu-0.1.0.dist-info/METADATA +0 -56
napistu-0.1.0.dist-info/RECORD +0 -77
{napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/entry_points.txt +0 -0
{napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/licenses/LICENSE +0 -0
{napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/top_level.txt +0 -0

napistu/network/net_create.py CHANGED Viewed

@@ -42,11 +42,12 @@ logger = logging.getLogger(__name__)
 def create_cpr_graph(
     sbml_dfs: sbml_dfs_core.SBML_dfs,
-    reaction_graph_attrs: dict = dict(),
+    reaction_graph_attrs: Optional[dict] = None,
     directed: bool = True,
     edge_reversed: bool = False,
     graph_type: str = CPR_GRAPH_TYPES.BIPARTITE,
     verbose: bool = False,
+    custom_transformations: Optional[dict] = None,
 ) -> ig.Graph:
     """
     Create CPR Graph
@@ -73,12 +74,17 @@ def create_cpr_graph(
               not modified by a substrate per-se).
     verbose : bool
         Extra reporting
+    custom_transformations : dict, optional
+        Dictionary of custom transformation functions to use for attribute transformation.
     Returns:
     ----------
     An Igraph network
     """
+    if reaction_graph_attrs is None:
+        reaction_graph_attrs = {}
     if not isinstance(sbml_dfs, sbml_dfs_core.SBML_dfs):
         raise TypeError(
             f"sbml_dfs must be a sbml_dfs_core.SBML_dfs, but was {type(sbml_dfs)}"
@@ -168,7 +174,10 @@ def create_cpr_graph(
     logger.info("Adding reversibility and other meta-data from reactions_data")
     augmented_network_edges = _augment_network_edges(
-        network_edges, working_sbml_dfs, reaction_graph_attrs
+        network_edges,
+        working_sbml_dfs,
+        reaction_graph_attrs,
+        custom_transformations=custom_transformations,
     )
     logger.info(
@@ -264,12 +273,13 @@ def create_cpr_graph(
 def process_cpr_graph(
     sbml_dfs: sbml_dfs_core.SBML_dfs,
-    reaction_graph_attrs: dict = dict(),
+    reaction_graph_attrs: Optional[dict] = None,
     directed: bool = True,
     edge_reversed: bool = False,
     graph_type: str = CPR_GRAPH_TYPES.BIPARTITE,
     weighting_strategy: str = CPR_WEIGHTING_STRATEGIES.UNWEIGHTED,
     verbose: bool = False,
+    custom_transformations: dict = None,
 ) -> ig.Graph:
     """
     Process Consensus Graph
@@ -294,11 +304,16 @@ def process_cpr_graph(
             - calibrated: transforme edges with a quantitative score based on reaction_attrs and combine them
                 with topology scores to generate a consensus.
         verbose (bool): Extra reporting
+        custom_transformations (dict, optional):
+            Dictionary of custom transformation functions to use for attribute transformation.
     Returns:
         weighted_graph (ig.Graph): An Igraph network
     """
+    if reaction_graph_attrs is None:
+        reaction_graph_attrs = {}
     logging.info("Constructing network")
     cpr_graph = create_cpr_graph(
         sbml_dfs,
@@ -307,6 +322,7 @@ def process_cpr_graph(
         edge_reversed=edge_reversed,
         graph_type=graph_type,
         verbose=verbose,
+        custom_transformations=custom_transformations,
     )
     if "reactions" in reaction_graph_attrs.keys():
@@ -326,7 +342,10 @@ def process_cpr_graph(
 def pluck_entity_data(
-    sbml_dfs: sbml_dfs_core.SBML_dfs, graph_attrs: dict[str, dict], data_type: str
+    sbml_dfs: sbml_dfs_core.SBML_dfs,
+    graph_attrs: dict[str, dict],
+    data_type: str,
+    custom_transformations: Optional[dict[str, callable]] = None,
 ) -> pd.DataFrame | None:
     """
     Pluck Entity Attributes
@@ -338,13 +357,21 @@ def pluck_entity_data(
     sbml_dfs: sbml_dfs_core.SBML_dfs
         A mechanistic model
     graph_attrs: dict
-        A dictionary of species/reaction attributes to pull out
+        A dictionary of species/reaction attributes to pull out. If the requested
+        data_type ("species" or "reactions") is not present as a key, or if the value
+        is an empty dict, this function will return None (no error).
     data_type: str
         "species" or "reactions" to pull out species_data or reactions_data
+    custom_transformations: dict[str, callable], optional
+        A dictionary mapping transformation names to functions. If provided, these
+        will be checked before built-in transformations. Example:
+            custom_transformations = {"square": lambda x: x**2}
     Returns:
         A table where all extracted attributes are merged based on a common index or None
-        if no attributes were extracted.
+        if no attributes were extracted. If the requested data_type is not present in
+        graph_attrs, or if the attribute dict is empty, returns None. This is intended
+        to allow optional annotation blocks.
     """
@@ -361,30 +388,47 @@ def pluck_entity_data(
     entity_attrs = graph_attrs[data_type]
     # validating dict
-    _validate_entity_attrs(entity_attrs)
+    _validate_entity_attrs(entity_attrs, custom_transformations=custom_transformations)
+    if len(entity_attrs) == 0:
+        logger.info(
+            f'No attributes defined for "{data_type}" in graph_attrs; returning None'
+        )
+        return None
     data_type_attr = data_type + "_data"
     entity_data_tbls = getattr(sbml_dfs, data_type_attr)
     data_list = list()
     for k, v in entity_attrs.items():
-        if v["table"] is not None:
-            # does the data table exist?
-            if v["table"] not in entity_data_tbls.keys():
-                raise ValueError(
-                    f"{v['table']} was defined as a table in \"graph_attrs\" but "
-                    f'it is not present in the "{data_type_attr}" of the sbml_dfs'
-                )
+        # v["table"] is always present if entity_attrs is non-empty and validated
+        if v["table"] not in entity_data_tbls.keys():
+            raise ValueError(
+                f"{v['table']} was defined as a table in \"graph_attrs\" but "
+                f'it is not present in the "{data_type_attr}" of the sbml_dfs'
+            )
-            if v["variable"] not in entity_data_tbls[v["table"]].columns.tolist():
-                raise ValueError(
-                    f"{v['variable']} was defined as a variable in \"graph_attrs\" but "
-                    f"it is not present in the {v['table']} of the \"{data_type_attr}\" of "
-                    "the sbml_dfs"
-                )
+        if v["variable"] not in entity_data_tbls[v["table"]].columns.tolist():
+            raise ValueError(
+                f"{v['variable']} was defined as a variable in \"graph_attrs\" but "
+                f"it is not present in the {v['table']} of the \"{data_type_attr}\" of "
+                "the sbml_dfs"
+            )
-            entity_series = entity_data_tbls[v["table"]][v["variable"]].rename(k)
-            data_list.append(entity_series)
+        entity_series = entity_data_tbls[v["table"]][v["variable"]].rename(k)
+        trans_name = v.get("trans", DEFAULT_WT_TRANS)
+        # Look up transformation
+        if custom_transformations and trans_name in custom_transformations:
+            trans_fxn = custom_transformations[trans_name]
+        elif trans_name in DEFINED_WEIGHT_TRANSFORMATION:
+            trans_fxn = globals()[DEFINED_WEIGHT_TRANSFORMATION[trans_name]]
+        else:
+            # This should never be hit if _validate_entity_attrs is called correctly.
+            raise ValueError(
+                f"Transformation '{trans_name}' not found in custom_transformations or DEFINED_WEIGHT_TRANSFORMATION."
+            )
+        entity_series = entity_series.apply(trans_fxn)
+        data_list.append(entity_series)
     if len(data_list) == 0:
         return None
@@ -392,7 +436,9 @@ def pluck_entity_data(
     return pd.concat(data_list, axis=1)
-def apply_weight_transformations(edges_df: pd.DataFrame, reaction_attrs: dict):
+def apply_weight_transformations(
+    edges_df: pd.DataFrame, reaction_attrs: dict, custom_transformations: dict = None
+):
     """
     Apply Weight Transformations
@@ -403,22 +449,37 @@ def apply_weight_transformations(edges_df: pd.DataFrame, reaction_attrs: dict):
             A dictionary of attributes identifying weighting attributes within
             an sbml_df's reaction_data, how they will be named in edges_df (the keys),
             and how they should be transformed (the "trans" aliases")
+        custom_transformations (dict, optional):
+            A dictionary mapping transformation names to functions. If provided, these
+            will be checked before built-in transformations.
     Returns:
         transformed_edges_df (pd.DataFrame): edges_df with weight variables transformed.
     """
-    _validate_entity_attrs(reaction_attrs)
+    _validate_entity_attrs(
+        reaction_attrs, custom_transformations=custom_transformations
+    )
     transformed_edges_df = copy.deepcopy(edges_df)
     for k, v in reaction_attrs.items():
         if k not in transformed_edges_df.columns:
             raise ValueError(f"A weighting variable {k} was missing from edges_df")
-        trans_fxn = DEFINED_WEIGHT_TRANSFORMATION[v["trans"]]
+        trans_name = v["trans"]
+        # Look up transformation
+        if custom_transformations and trans_name in custom_transformations:
+            trans_fxn = custom_transformations[trans_name]
+        elif trans_name in DEFINED_WEIGHT_TRANSFORMATION:
+            trans_fxn = globals()[DEFINED_WEIGHT_TRANSFORMATION[trans_name]]
+        else:
+            # This should never be hit if _validate_entity_attrs is called correctly.
+            raise ValueError(
+                f"Transformation '{trans_name}' not found in custom_transformations or DEFINED_WEIGHT_TRANSFORMATION."
+            )
-        transformed_edges_df[k] = transformed_edges_df[k].apply(globals()[trans_fxn])
+        transformed_edges_df[k] = transformed_edges_df[k].apply(trans_fxn)
     return transformed_edges_df
@@ -582,12 +643,11 @@ def _create_cpr_graph_tiered(
     invalid_sbo_terms = sbml_dfs.reaction_species[
         ~sbml_dfs.reaction_species[SBML_DFS.SBO_TERM].isin(MINI_SBO_TO_NAME.keys())
     ]
-    assert isinstance(invalid_sbo_terms, pd.DataFrame)
     if invalid_sbo_terms.shape[0] != 0:
         invalid_counts = invalid_sbo_terms.value_counts(SBML_DFS.SBO_TERM).to_frame("N")
-        assert isinstance(invalid_counts, pd.DataFrame)
+        if not isinstance(invalid_counts, pd.DataFrame):
+            raise TypeError("invalid_counts must be a pandas DataFrame")
         logger.warning(utils.style_df(invalid_counts, headers="keys"))  # type: ignore
         raise ValueError("Some reaction species have unusable SBO terms")
@@ -647,28 +707,33 @@ def _create_cpr_graph_tiered(
     n_children = (
         unique_edges[CPR_GRAPH_EDGES.FROM]
         .value_counts()
-        .to_frame()
+        # rename values to the child name
+        .to_frame(name=CPR_GRAPH_EDGES.SC_CHILDREN)
         .reset_index()
         .rename(
             {
-                "index": SBML_DFS.SC_ID,
-                CPR_GRAPH_EDGES.FROM: CPR_GRAPH_EDGES.SC_CHILDREN,
+                CPR_GRAPH_EDGES.FROM: SBML_DFS.SC_ID,
             },
             axis=1,
         )
     )
     # parents
     n_parents = (
         unique_edges[CPR_GRAPH_EDGES.TO]
         .value_counts()
-        .to_frame()
+        # rename values to the parent name
+        .to_frame(name=CPR_GRAPH_EDGES.SC_PARENTS)
         .reset_index()
         .rename(
-            {"index": SBML_DFS.SC_ID, CPR_GRAPH_EDGES.TO: CPR_GRAPH_EDGES.SC_PARENTS},
+            {
+                CPR_GRAPH_EDGES.TO: SBML_DFS.SC_ID,
+            },
             axis=1,
         )
     )
-    graph_degree_by_edgelist = n_children.merge(n_parents, how="outer").fillna(0)
+    graph_degree_by_edgelist = n_children.merge(n_parents, how="outer").fillna(int(0))
     graph_degree_by_edgelist[CPR_GRAPH_EDGES.SC_DEGREE] = (
         graph_degree_by_edgelist[CPR_GRAPH_EDGES.SC_CHILDREN]
@@ -692,7 +757,7 @@ def _create_cpr_graph_tiered(
             axis=1,
         )
         .join(graph_degree_by_edgelist)
-        .fillna(0)
+        .fillna(int(0))
     )
     is_from_reaction = all_reaction_edges_df[CPR_GRAPH_EDGES.FROM].isin(
@@ -740,9 +805,14 @@ def _format_tiered_reaction_species(
     """
     rxn_species = sorted_reaction_species.loc[r_id]
-    assert isinstance(rxn_species, pd.DataFrame)
-    assert list(rxn_species.index.names) == [SBML_DFS.SBO_TERM]
-    assert rxn_species.columns.tolist() == [SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]
+    if not isinstance(rxn_species, pd.DataFrame):
+        raise TypeError("rxn_species must be a pandas DataFrame")
+    if list(rxn_species.index.names) != [SBML_DFS.SBO_TERM]:
+        raise ValueError("rxn_species index names must be [SBML_DFS.SBO_TERM]")
+    if rxn_species.columns.tolist() != [SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]:
+        raise ValueError(
+            "rxn_species columns must be [SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]"
+        )
     rxn_sbo_terms = set(rxn_species.index.unique())
     # map to common names
@@ -781,7 +851,8 @@ def _format_tiered_reaction_species(
     )
     ordered_tiers = entities_ordered_by_tier.index.get_level_values("tier").unique()
-    assert len(ordered_tiers) > 1
+    if len(ordered_tiers) <= 1:
+        raise ValueError("ordered_tiers must have more than one element")
     # which tier is the reaction?
     reaction_tier = graph_hierarchy_df["tier"][
@@ -1275,9 +1346,34 @@ def _add_graph_species_attribute(
     cpr_graph: ig.Graph,
     sbml_dfs: sbml_dfs_core.SBML_dfs,
     species_graph_attrs: dict,
+    custom_transformations: Optional[dict] = None,
 ) -> ig.Graph:
-    """Add meta-data from species_data to existing igraph's vertices."""
+    """
+    Add meta-data from species_data to existing igraph's vertices.
+    This function augments the vertices of an igraph network with additional attributes
+    derived from the species-level data in the provided SBML_dfs object. The attributes
+    to add are specified in the species_graph_attrs dictionary, and can be transformed
+    using either built-in or user-supplied transformation functions.
+    Parameters
+    ----------
+    cpr_graph : ig.Graph
+        The igraph network to augment.
+    sbml_dfs : sbml_dfs_core.SBML_dfs
+        The SBML_dfs object containing species data.
+    species_graph_attrs : dict
+        Dictionary specifying which attributes to pull from species_data and how to transform them.
+        The structure should be {attribute_name: {"table": ..., "variable": ..., "trans": ...}}.
+    custom_transformations : dict, optional
+        Dictionary mapping transformation names to functions. If provided, these will be checked
+        before built-in transformations. Example: {"square": lambda x: x**2}
+    Returns
+    -------
+    ig.Graph
+        The input igraph network with additional vertex attributes added from species_data.
+    """
     if not isinstance(species_graph_attrs, dict):
         raise TypeError(
             f"species_graph_attrs must be a dict, but was {type(species_graph_attrs)}"
@@ -1288,7 +1384,9 @@ def _add_graph_species_attribute(
     sp_graph_key_list = []
     sp_node_attr_list = []
     for k in species_graph_attrs.keys():
-        _validate_entity_attrs(species_graph_attrs[k])
+        _validate_entity_attrs(
+            species_graph_attrs[k], custom_transformations=custom_transformations
+        )
         sp_graph_key_list.append(k)
         sp_node_attr_list.append(list(species_graph_attrs[k].keys()))
@@ -1305,6 +1403,7 @@ def _add_graph_species_attribute(
         curr_network_nodes_df,
         sbml_dfs,
         species_graph_attrs,
+        custom_transformations=custom_transformations,
     )
     for vs_attr in flat_sp_node_attr_list:
@@ -1319,9 +1418,33 @@ def _augment_network_nodes(
     network_nodes: pd.DataFrame,
     sbml_dfs: sbml_dfs_core.SBML_dfs,
     species_graph_attrs: dict = dict(),
+    custom_transformations: Optional[dict] = None,
 ) -> pd.DataFrame:
-    """Add species-level attributes, expand network_nodes with s_id and c_id and then map to species-level attributes by s_id."""
+    """
+    Add species-level attributes, expand network_nodes with s_id and c_id and then map to species-level attributes by s_id.
+    This function merges species-level attributes from sbml_dfs into the provided network_nodes DataFrame,
+    using the mapping in species_graph_attrs. Optionally, custom transformation functions can be provided
+    to transform the attributes as they are added.
+    Parameters
+    ----------
+    network_nodes : pd.DataFrame
+        DataFrame of network nodes. Must include columns 'name', 'node_name', and 'node_type'.
+    sbml_dfs : sbml_dfs_core.SBML_dfs
+        The SBML_dfs object containing species data.
+    species_graph_attrs : dict
+        Dictionary specifying which attributes to pull from species_data and how to transform them.
+        The structure should be {attribute_name: {"table": ..., "variable": ..., "trans": ...}}.
+    custom_transformations : dict, optional
+        Dictionary mapping transformation names to functions. If provided, these will be checked
+        before built-in transformations. Example: {"square": lambda x: x**2}
+    Returns
+    -------
+    pd.DataFrame
+        The input network_nodes DataFrame with additional columns for each extracted and transformed attribute.
+    """
     REQUIRED_NETWORK_NODE_ATTRS = {
         "name",
         "node_name",
@@ -1349,18 +1472,26 @@ def _augment_network_nodes(
     )
     # assign species_data related attributes to s_id
-    species_graph_data = pluck_entity_data(sbml_dfs, species_graph_attrs, "species")
+    species_graph_data = pluck_entity_data(
+        sbml_dfs,
+        species_graph_attrs,
+        "species",
+        custom_transformations=custom_transformations,
+    )
     if species_graph_data is not None:
         # add species_graph_data to the network_nodes df, based on s_id
         network_nodes_wdata = network_nodes_sid.merge(
             species_graph_data, left_on="s_id", right_index=True, how="left"
         )
+    else:
+        network_nodes_wdata = network_nodes_sid
     # Note: multiple sc_ids with the same s_id will be assign with the same species_graph_data
-    network_nodes_wdata.fillna(0, inplace=True)
-    network_nodes_wdata.drop(columns=["s_id", "c_id"], inplace=True)
+    network_nodes_wdata = network_nodes_wdata.fillna(int(0)).drop(
+        columns=["s_id", "c_id"]
+    )
     return network_nodes_wdata
@@ -1369,9 +1500,21 @@ def _augment_network_edges(
     network_edges: pd.DataFrame,
     sbml_dfs: sbml_dfs_core.SBML_dfs,
     reaction_graph_attrs: dict = dict(),
+    custom_transformations: Optional[dict] = None,
 ) -> pd.DataFrame:
-    """Add reversibility and other metadata from reactions."""
+    """Add reversibility and other metadata from reactions.
+    Parameters
+    ----------
+    network_edges : pd.DataFrame
+        DataFrame of network edges.
+    sbml_dfs : sbml_dfs_core.SBML_dfs
+        The SBML_dfs object containing reaction data.
+    reaction_graph_attrs : dict
+        Dictionary of reaction attributes to add.
+    custom_transformations : dict, optional
+        Dictionary of custom transformation functions to use for attribute transformation.
+    """
     REQUIRED_NETWORK_EDGE_ATTRS = {
         "from",
         "to",
@@ -1406,7 +1549,10 @@ def _augment_network_edges(
     # add other attributes based on reactions data
     reaction_graph_data = pluck_entity_data(
-        sbml_dfs, reaction_graph_attrs, SBML_DFS.REACTIONS
+        sbml_dfs,
+        reaction_graph_attrs,
+        SBML_DFS.REACTIONS,
+        custom_transformations=custom_transformations,
     )
     if reaction_graph_data is not None:
         network_edges = network_edges.merge(
@@ -1491,7 +1637,10 @@ def _reverse_network_edges(augmented_network_edges: pd.DataFrame) -> pd.DataFram
         ]
     )
-    assert transformed_r_reaction_edges.shape[0] == r_reaction_edges.shape[0]
+    if transformed_r_reaction_edges.shape[0] != r_reaction_edges.shape[0]:
+        raise ValueError(
+            "transformed_r_reaction_edges and r_reaction_edges must have the same number of rows"
+        )
     return transformed_r_reaction_edges.assign(
         direction=CPR_GRAPH_EDGE_DIRECTIONS.REVERSE
@@ -1621,7 +1770,9 @@ def _create_topology_weights(
 def _validate_entity_attrs(
-    entity_attrs: dict, validate_transformations: bool = True
+    entity_attrs: dict,
+    validate_transformations: bool = True,
+    custom_transformations: Optional[dict] = None,
 ) -> None:
     """Validate that graph attributes are a valid format."""
@@ -1631,11 +1782,15 @@ def _validate_entity_attrs(
         entity_attrs = _EntityAttrValidator(**v).model_dump()
         if validate_transformations:
-            if v["trans"] not in DEFINED_WEIGHT_TRANSFORMATION.keys():
+            trans_name = v["trans"]
+            valid_trans = set(DEFINED_WEIGHT_TRANSFORMATION.keys())
+            if custom_transformations:
+                valid_trans = valid_trans.union(set(custom_transformations.keys()))
+            if trans_name not in valid_trans:
                 raise ValueError(
-                    f"transformation {v['trans']} was not defined as an alias in "
-                    "DEFINED_WEIGHT_TRANSFORMATION. The defined transformations "
-                    f"are {', '.join(DEFINED_WEIGHT_TRANSFORMATION.keys())}"
+                    f"transformation {trans_name} was not defined as an alias in "
+                    "DEFINED_WEIGHT_TRANSFORMATION or custom_transformations. The defined transformations "
+                    f"are {', '.join(valid_trans)}"
                 )
     return None

napistu/network/net_propagation.py ADDED Viewed

@@ -0,0 +1,118 @@
+import pandas as pd
+import numpy as np
+import igraph as ig
+import inspect
+from typing import Optional
+def personalized_pagerank_by_attribute(
+    g: ig.Graph,
+    attribute: str,
+    damping: float = 0.85,
+    calculate_uniform_dist: bool = True,
+    additional_propagation_args: Optional[dict] = None,
+) -> pd.DataFrame:
+    """
+    Run personalized PageRank with reset probability proportional to a vertex attribute.
+    Optionally computes uniform PPR over nonzero attribute nodes.
+    Parameters
+    ----------
+    g : igraph.Graph
+        The input graph.
+    attribute : str
+        The vertex attribute to use for personalization.
+    damping : float, optional
+        Damping factor (default 0.85).
+    calculate_uniform_dist : bool, optional
+        If True, also compute uniform PPR over nonzero attribute nodes.
+    additional_propagation_args : dict, optional
+        Additional arguments to pass to igraph's personalized_pagerank. Keys must match the method's signature.
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with columns ['name', 'pagerank_by_attribute', attribute] and optionally 'pagerank_uniform'.
+    Example
+    -------
+    >>> import igraph as ig
+    >>> from scraps.utils import personalized_pagerank_by_attribute
+    >>> g = ig.Graph.Full(3)
+    >>> g.vs['name'] = ['A', 'B', 'C']
+    >>> g.vs['score'] = [1, 0, 2]
+    >>> df = personalized_pagerank_by_attribute(g, 'score')
+    >>> print(df)
+    """
+    # Validate and extract attribute (missing/None as 0)
+    attr = _ensure_nonnegative_vertex_attribute(g, attribute)
+    # Validate additional_propagation_args
+    if additional_propagation_args is None:
+        additional_propagation_args = {}
+    else:
+        valid_args = set(inspect.signature(g.personalized_pagerank).parameters.keys())
+        for k in additional_propagation_args:
+            if k not in valid_args:
+                raise ValueError(f"Invalid argument for personalized_pagerank: {k}")
+    # Personalized PageRank (no normalization, igraph handles it)
+    pr_attr = g.personalized_pagerank(
+        reset=attr.tolist(), damping=damping, **additional_propagation_args
+    )
+    # Node names
+    names = g.vs["name"] if "name" in g.vs.attributes() else list(range(g.vcount()))
+    data = {"name": names, "pagerank_by_attribute": pr_attr, attribute: attr}
+    # Uniform PPR over nonzero attribute nodes
+    if calculate_uniform_dist:
+        used_in_uniform = attr > 0
+        n_uniform = used_in_uniform.sum()
+        if n_uniform == 0:
+            raise ValueError("No nonzero attribute values for uniform PPR.")
+        uniform_vec = np.zeros_like(attr, dtype=float)
+        uniform_vec[used_in_uniform] = 1.0 / n_uniform
+        pr_uniform = g.personalized_pagerank(
+            reset=uniform_vec.tolist(), damping=damping, **additional_propagation_args
+        )
+        data["pagerank_uniform"] = pr_uniform
+    return pd.DataFrame(data)
+def _ensure_nonnegative_vertex_attribute(g: ig.Graph, attribute: str):
+    """
+    Utility to check that a vertex attribute is present, numeric, and non-negative.
+    Raises ValueError if checks fail.
+    Missing or None values are treated as 0.
+    Raises ValueError if attribute is missing for all vertices or all values are zero.
+    """
+    all_missing = all(
+        (attribute not in v.attributes() or v[attribute] is None) for v in g.vs
+    )
+    if all_missing:
+        raise ValueError(f"Vertex attribute '{attribute}' is missing for all vertices.")
+    values = [
+        (
+            v[attribute]
+            if (attribute in v.attributes() and v[attribute] is not None)
+            else 0.0
+        )
+        for v in g.vs
+    ]
+    arr = np.array(values, dtype=float)
+    if np.all(arr == 0):
+        raise ValueError(
+            f"Vertex attribute '{attribute}' is zero for all vertices; cannot use as reset vector."
+        )
+    if np.any(arr < 0):
+        raise ValueError(f"Attribute '{attribute}' contains negative values.")
+    return arr

napistu/network/net_utils.py CHANGED Viewed

@@ -18,6 +18,7 @@ from napistu.network import net_create
 from napistu.constants import SBML_DFS
 from napistu.constants import SOURCE_SPEC
+from napistu.identifiers import _validate_assets_sbml_ids
 from napistu.network.constants import CPR_GRAPH_NODES
 from napistu.network.constants import CPR_GRAPH_TYPES
@@ -520,38 +521,6 @@ def _validate_assets_graph_dist(
     return None
-def _validate_assets_sbml_ids(
-    sbml_dfs: sbml_dfs_core.SBML_dfs, identifiers_df: pd.DataFrame
-) -> None:
-    """Check an sbml_dfs file and identifiers table for inconsistencies."""
-    joined_species_w_ids = sbml_dfs.species.merge(
-        identifiers_df[["s_id", "s_name"]].drop_duplicates(),
-        left_index=True,
-        right_on="s_id",
-    )
-    inconsistent_names_df = joined_species_w_ids.query("s_name_x != s_name_y").dropna()
-    inconsistent_names_list = [
-        f"{x} != {y}"
-        for x, y in zip(
-            inconsistent_names_df["s_name_x"], inconsistent_names_df["s_name_y"]
-        )
-    ]
-    if len(inconsistent_names_list):
-        example_inconsistent_names = inconsistent_names_list[
-            0 : min(10, len(inconsistent_names_list))
-        ]
-        raise ValueError(
-            f"{len(inconsistent_names_list)} species names do not match between "
-            f"sbml_dfs and identifiers_df including: {', '.join(example_inconsistent_names)}"
-        )
-    return None
 def _get_top_n_idx(arr: Sequence, n: int, ascending: bool = False) -> Sequence[int]:
     """Returns the indices of the top n values in an array

napistu 0.1.0__py3-none-any.whl → 0.2.4.dev3__py3-none-any.whl

napistu 0.1.0py3-none-any.whl → 0.2.4.dev3py3-none-any.whl