PyPI - napistu - Versions diffs - 0.3.6__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

napistu 0.3.6py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

napistu/__main__.py +28 -13
napistu/consensus.py +19 -25
napistu/constants.py +102 -83
napistu/indices.py +3 -1
napistu/ingestion/napistu_edgelist.py +4 -4
napistu/ingestion/sbml.py +298 -295
napistu/ingestion/string.py +14 -18
napistu/ingestion/trrust.py +22 -27
napistu/matching/interactions.py +41 -39
napistu/matching/species.py +1 -1
napistu/modify/gaps.py +2 -1
napistu/network/constants.py +61 -45
napistu/network/data_handling.py +1 -1
napistu/network/neighborhoods.py +3 -3
napistu/network/net_create.py +440 -616
napistu/network/net_create_utils.py +734 -0
napistu/network/net_propagation.py +1 -1
napistu/network/{napistu_graph_core.py → ng_core.py} +57 -15
napistu/network/ng_utils.py +28 -21
napistu/network/paths.py +4 -4
napistu/network/precompute.py +35 -74
napistu/ontologies/genodexito.py +5 -1
napistu/ontologies/renaming.py +4 -0
napistu/sbml_dfs_core.py +127 -64
napistu/sbml_dfs_utils.py +50 -0
napistu/utils.py +132 -46
{napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/METADATA +2 -2
{napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/RECORD +47 -44
tests/conftest.py +171 -13
tests/test_consensus.py +74 -5
tests/test_gaps.py +26 -15
tests/test_network_data_handling.py +5 -2
tests/test_network_net_create.py +93 -202
tests/test_network_net_create_utils.py +538 -0
tests/test_network_ng_core.py +19 -0
tests/test_network_ng_utils.py +1 -1
tests/test_network_precompute.py +5 -4
tests/test_ontologies_renaming.py +28 -24
tests/test_rpy2_callr.py +0 -1
tests/test_rpy2_init.py +0 -1
tests/test_sbml_dfs_core.py +165 -15
tests/test_sbml_dfs_utils.py +45 -0
tests/test_utils.py +45 -2
{napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/WHEEL +0 -0
{napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/entry_points.txt +0 -0
{napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/licenses/LICENSE +0 -0
{napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/top_level.txt +0 -0

napistu/network/net_propagation.py CHANGED Viewed

@@ -5,7 +5,7 @@ import pandas as pd
 import numpy as np
 import igraph as ig
-from napistu.network.napistu_graph_core import NapistuGraph
+from napistu.network.ng_core import NapistuGraph
 def personalized_pagerank_by_attribute(

napistu/network/{napistu_graph_core.py → ng_core.py} RENAMED Viewed

@@ -36,7 +36,7 @@ class NapistuGraph(ig.Graph):
     ----------
     is_reversed : bool
         Whether the graph edges have been reversed from their original direction
-    graph_type : str or None
+    wiring_approach : str or None
         Type of graph (e.g., 'bipartite', 'regulatory', 'surrogate')
     weighting_strategy : str or None
         Strategy used for edge weighting (e.g., 'topology', 'mixed', 'calibrated')
@@ -101,7 +101,7 @@ class NapistuGraph(ig.Graph):
         # Initialize metadata
         self._metadata = {
             "is_reversed": False,
-            "graph_type": None,
+            "wiring_approach": None,
             "weighting_strategy": None,
             "creation_params": {},
         }
@@ -147,6 +147,21 @@ class NapistuGraph(ig.Graph):
         return new_graph
+    @property
+    def is_reversed(self) -> bool:
+        """Check if the graph has been reversed."""
+        return self._metadata["is_reversed"]
+    @property
+    def wiring_approach(self) -> Optional[str]:
+        """Get the graph type (bipartite, regulatory, etc.)."""
+        return self._metadata["wiring_approach"]
+    @property
+    def weighting_strategy(self) -> Optional[str]:
+        """Get the weighting strategy used."""
+        return self._metadata["weighting_strategy"]
     def reverse_edges(self) -> None:
         """
         Reverse all edges in the graph.
@@ -181,20 +196,47 @@ class NapistuGraph(ig.Graph):
         return None
-    @property
-    def is_reversed(self) -> bool:
-        """Check if the graph has been reversed."""
-        return self._metadata["is_reversed"]
+    def remove_isolated_vertices(self):
+        """
+        Remove vertices that have no edges (degree 0) from the graph.
-    @property
-    def graph_type(self) -> Optional[str]:
-        """Get the graph type (bipartite, regulatory, etc.)."""
-        return self._metadata["graph_type"]
-    @property
-    def weighting_strategy(self) -> Optional[str]:
-        """Get the weighting strategy used."""
-        return self._metadata["weighting_strategy"]
+        Returns
+        -------
+        None
+            The graph is modified in-place.
+        """
+        # Find isolated vertices (degree 0)
+        isolated_vertices = self.vs.select(_degree=0)
+        if len(isolated_vertices) == 0:
+            logger.info("No isolated vertices found to remove")
+            return
+        # Get vertex names/indices for logging (up to 5 examples)
+        vertex_names = []
+        for v in isolated_vertices[:5]:
+            # Use vertex name if available, otherwise use index
+            name = (
+                v["name"]
+                if "name" in v.attributes() and v["name"] is not None
+                else str(v.index)
+            )
+            vertex_names.append(name)
+        # Create log message
+        examples_str = ", ".join(f"'{name}'" for name in vertex_names)
+        if len(isolated_vertices) > 5:
+            examples_str += f" (and {len(isolated_vertices) - 5} more)"
+        logger.info(
+            f"Removed {len(isolated_vertices)} isolated vertices: [{examples_str}]"
+        )
+        # Remove the isolated vertices
+        self.delete_vertices(isolated_vertices)
     def set_metadata(self, **kwargs) -> None:
         """
@@ -252,7 +294,7 @@ class NapistuGraph(ig.Graph):
         base_str = super().__str__()
         metadata_str = (
             f"Reversed: {self.is_reversed}, "
-            f"Type: {self.graph_type}, "
+            f"Type: {self.wiring_approach}, "
             f"Weighting: {self.weighting_strategy}"
         )
         return f"{base_str}\nNapistuGraph metadata: {metadata_str}"

napistu/network/ng_utils.py CHANGED Viewed

@@ -17,12 +17,12 @@ import pandas as pd
 from napistu import sbml_dfs_core
 from napistu import source
 from napistu.network import net_create
-from napistu.network.napistu_graph_core import NapistuGraph
+from napistu.network.ng_core import NapistuGraph
 from napistu.constants import SBML_DFS
 from napistu.constants import SOURCE_SPEC
 from napistu.identifiers import _validate_assets_sbml_ids
-from napistu.network.constants import NAPISTU_GRAPH_TYPES
+from napistu.network.constants import GRAPH_WIRING_APPROACHES
 from napistu.network.constants import NAPISTU_GRAPH_DIRECTEDNESS
 logger = logging.getLogger(__name__)
@@ -138,9 +138,9 @@ def export_networks(
     model_prefix: str,
     outdir: str,
     directeds: list[bool] = [True, False],
-    graph_types: list[str] = [
-        NAPISTU_GRAPH_TYPES.BIPARTITE,
-        NAPISTU_GRAPH_TYPES.REGULATORY,
+    wiring_approaches: list[str] = [
+        GRAPH_WIRING_APPROACHES.BIPARTITE,
+        GRAPH_WIRING_APPROACHES.REGULATORY,
     ],
 ) -> None:
     """
@@ -158,10 +158,11 @@ def export_networks(
         Path to an existing directory where results should be saved
     directeds : [bool]
         List of directed types to export: a directed (True) or undirected graph be made (False)
-    graph_types : [str]
+    wiring_approaches : [str]
         Types of graphs to construct, valid values are:
             - bipartite: substrates and modifiers point to the reaction they drive, this reaction points to products
             - regulatory: non-enzymatic modifiers point to enzymes, enzymes point to substrates and products
+            - surrogate regulatory approach but with substrates upstream of enzymes
     Returns:
     ----------
@@ -177,24 +178,26 @@ def export_networks(
         raise FileNotFoundError(f"{outdir} does not exist")
     if not isinstance(directeds, list):
         raise TypeError(f"directeds must be a list, but was {type(directeds)}")
-    if not isinstance(graph_types, list):
-        raise TypeError(f"graph_types must be a list but was a {type(graph_types)}")
+    if not isinstance(wiring_approaches, list):
+        raise TypeError(
+            f"wiring_approaches must be a list but was a {type(wiring_approaches)}"
+        )
-    # iterate through provided graph_types and export each type
-    for graph_type in graph_types:
+    # iterate through provided wiring_approaches and export each type
+    for wiring_approach in wiring_approaches:
         for directed in directeds:
             export_pkl_path = _create_network_save_string(
                 model_prefix=model_prefix,
                 outdir=outdir,
                 directed=directed,
-                graph_type=graph_type,
+                wiring_approach=wiring_approach,
             )
-            print(f"Exporting {graph_type} network to {export_pkl_path}")
+            print(f"Exporting {wiring_approach} network to {export_pkl_path}")
             network_graph = net_create.process_napistu_graph(
                 sbml_dfs=sbml_dfs,
                 directed=directed,
-                graph_type=graph_type,
+                wiring_approach=wiring_approach,
                 verbose=True,
             )
@@ -206,7 +209,7 @@ def export_networks(
 def read_network_pkl(
     model_prefix: str,
     network_dir: str,
-    graph_type: str,
+    wiring_approach: str,
     directed: bool = True,
 ) -> NapistuGraph:
     """
@@ -222,10 +225,11 @@ def read_network_pkl(
         Path to a directory containing all saved networks.
     directed : bool
         Should a directed (True) or undirected graph be loaded (False)
-    graph_type : [str]
+    wiring_approach : [str]
         Type of graphs to read, valid values are:
             - bipartite: substrates and modifiers point to the reaction they drive, this reaction points to products
             - reguatory: non-enzymatic modifiers point to enzymes, enzymes point to substrates and products
+            - surrogate regulatory approach but with substrates upstream of enzymes
     Returns
     -------
@@ -239,15 +243,17 @@ def read_network_pkl(
         raise FileNotFoundError(f"{network_dir} does not exist")
     if not isinstance(directed, bool):
         raise TypeError(f"directed must be a bool, but was {type(directed)}")
-    if not isinstance(graph_type, str):
-        raise TypeError(f"graph_type must be a str but was a {type(graph_type)}")
+    if not isinstance(wiring_approach, str):
+        raise TypeError(
+            f"wiring_approach must be a str but was a {type(wiring_approach)}"
+        )
     import_pkl_path = _create_network_save_string(
-        model_prefix, network_dir, directed, graph_type
+        model_prefix, network_dir, directed, wiring_approach
     )
     if not os.path.isfile(import_pkl_path):
         raise FileNotFoundError(f"{import_pkl_path} does not exist")
-    print(f"Importing {graph_type} network from {import_pkl_path}")
+    print(f"Importing {wiring_approach} network from {import_pkl_path}")
     network_graph = ig.Graph.Read_Pickle(fname=import_pkl_path)
@@ -374,7 +380,7 @@ def read_graph_attrs_spec(graph_attrs_spec_uri: str) -> dict:
 # Internal utility functions
 def _create_network_save_string(
-    model_prefix: str, outdir: str, directed: bool, graph_type: str
+    model_prefix: str, outdir: str, directed: bool, wiring_approach: str
 ) -> str:
     if directed:
         directed_str = NAPISTU_GRAPH_DIRECTEDNESS.DIRECTED
@@ -382,7 +388,8 @@ def _create_network_save_string(
         directed_str = NAPISTU_GRAPH_DIRECTEDNESS.UNDIRECTED
     export_pkl_path = os.path.join(
-        outdir, model_prefix + "_network_" + graph_type + "_" + directed_str + ".pkl"
+        outdir,
+        model_prefix + "_network_" + wiring_approach + "_" + directed_str + ".pkl",
     )
     return export_pkl_path

napistu/network/paths.py CHANGED Viewed

@@ -9,9 +9,9 @@ import pandas as pd
 from napistu import sbml_dfs_core
 from napistu import utils
-from napistu.network.napistu_graph_core import NapistuGraph
+from napistu.network.ng_core import NapistuGraph
 from napistu.network.ng_utils import get_minimal_sources_edges
-from napistu.constants import CPR_PATH_REQ_VARS
+from napistu.constants import NAPISTU_PATH_REQ_VARS
 from napistu.constants import MINI_SBO_NAME_TO_POLARITY
 from napistu.constants import MINI_SBO_TO_NAME
 from napistu.constants import SBML_DFS
@@ -391,7 +391,7 @@ def _filter_paths_by_precomputed_distances(
 ) -> pd.DataFrame:
     """Filter source -> destination pairs based on precomputed distances if they were provided."""
-    utils.match_pd_vars(all_species_pairs, CPR_PATH_REQ_VARS).assert_present()
+    utils.match_pd_vars(all_species_pairs, NAPISTU_PATH_REQ_VARS).assert_present()
     if precomputed_distances is None:
         logger.info(
@@ -402,7 +402,7 @@ def _filter_paths_by_precomputed_distances(
         if not isinstance(precomputed_distances, pd.DataFrame):
             raise TypeError('"precomputed_distances" must be a pd.DataFrame')
-    utils.match_pd_vars(precomputed_distances, CPR_PATH_REQ_VARS).assert_present()
+    utils.match_pd_vars(precomputed_distances, NAPISTU_PATH_REQ_VARS).assert_present()
     # filter to pairs which are connected in the pre-computed distances table
     valid_all_species_pairs = all_species_pairs.merge(

napistu/network/precompute.py CHANGED Viewed

@@ -2,17 +2,16 @@ from __future__ import annotations
 import logging
 import math
-from pathlib import Path
-from typing import Union
-import io
 import numpy as np
 import pandas as pd
-from fs.errors import ResourceNotFound
-from napistu.network.napistu_graph_core import NapistuGraph
+from napistu.network.ng_core import NapistuGraph
 from napistu.network.ig_utils import validate_edge_attributes
-from napistu.utils import load_json, save_json
+from napistu.constants import NAPISTU_EDGELIST, SBML_DFS
+from napistu.network.constants import (
+    NAPISTU_GRAPH_EDGES,
+)
 logger = logging.getLogger(__name__)
@@ -22,10 +21,13 @@ def precompute_distances(
     max_steps: int = -1,
     max_score_q: float = float(1),
     partition_size: int = int(5000),
-    weights_vars: list[str] = ["weights", "upstream_weights"],
+    weights_vars: list[str] = [
+        NAPISTU_GRAPH_EDGES.WEIGHTS,
+        NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS,
+    ],
 ) -> pd.DataFrame:
     """
-    Pre-Compute Distances
+    Precompute Distances between all pairs of species in a NapistuGraph network.
     Parameters
     ----------
@@ -80,6 +82,7 @@ def precompute_distances(
     # interate through all partitions of "from" nodes and find their shortest and lowest weighted paths
     unique_partitions = vs_to_partition.index.unique().tolist()
+    logger.info(f"Calculating distances for {len(unique_partitions)} partitions")
     precomputed_distances = pd.concat(
         [
             _calculate_distances_subset(
@@ -93,6 +96,10 @@ def precompute_distances(
     ).query("sc_id_origin != sc_id_dest")
     # filter by path length and/or weight
+    logger.info(
+        f"Filtering distances by path length ({max_steps}) and score quantile ({max_score_q})"
+    )
     filtered_precomputed_distances = _filter_precomputed_distances(
         precomputed_distances=precomputed_distances,
         max_steps=max_steps,
@@ -103,65 +110,14 @@ def precompute_distances(
     return filtered_precomputed_distances
-def save_precomputed_distances(
-    precomputed_distances: pd.DataFrame, uri: Union[str, Path]
-) -> None:
-    """
-    Save a precomputed distances DataFrame to a JSON file.
-    Parameters
-    ----------
-    precomputed_distances : pd.DataFrame
-        The precomputed distances DataFrame to save
-    uri : Union[str, Path]
-        Path where to save the JSON file. Can be a local path or a GCS URI.
-    Raises
-    ------
-    OSError
-        If the file cannot be written to (permission issues, etc.)
-    """
-    save_json(str(uri), precomputed_distances.to_json())
-def load_precomputed_distances(uri: Union[str, Path]) -> pd.DataFrame:
-    """
-    Load a precomputed distances DataFrame from a JSON file.
-    Parameters
-    ----------
-    uri : Union[str, Path]
-        Path to the JSON file to load
-    Returns
-    -------
-    pd.DataFrame
-        The reconstructed precomputed distances DataFrame
-    Raises
-    ------
-    FileNotFoundError
-        If the specified file does not exist
-    """
-    try:
-        json_string = load_json(str(uri))
-        df = pd.read_json(io.StringIO(json_string))
-        # Convert integer columns to float
-        for col in df.columns:
-            if df[col].dtype in ["int64", "int32", "int16", "int8"]:
-                df[col] = df[col].astype(float)
-        return df
-    except ResourceNotFound as e:
-        raise FileNotFoundError(f"File not found: {uri}") from e
 def _calculate_distances_subset(
     napistu_graph: NapistuGraph,
     vs_to_partition: pd.DataFrame,
     one_partition: pd.DataFrame,
-    weights_vars: list[str] = ["weights", "upstream_weights"],
+    weights_vars: list[str] = [
+        NAPISTU_GRAPH_EDGES.WEIGHTS,
+        NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS,
+    ],
 ) -> pd.DataFrame:
     """Calculate distances from a subset of vertices to all vertices."""
@@ -169,14 +125,15 @@ def _calculate_distances_subset(
         pd.DataFrame(
             np.array(
                 napistu_graph.distances(
-                    source=one_partition["sc_id"], target=vs_to_partition["sc_id"]
+                    source=one_partition[SBML_DFS.SC_ID],
+                    target=vs_to_partition[SBML_DFS.SC_ID],
                 )
             ),
-            index=one_partition["sc_id"].rename("sc_id_origin"),
-            columns=vs_to_partition["sc_id"].rename("sc_id_dest"),
+            index=one_partition[SBML_DFS.SC_ID].rename(NAPISTU_EDGELIST.SC_ID_ORIGIN),
+            columns=vs_to_partition[SBML_DFS.SC_ID].rename(NAPISTU_EDGELIST.SC_ID_DEST),
         )
         .reset_index()
-        .melt("sc_id_origin", value_name="path_length")
+        .melt(NAPISTU_EDGELIST.SC_ID_ORIGIN, value_name="path_length")
         .replace([np.inf, -np.inf], np.nan, inplace=False)
         .dropna()
     )
@@ -187,16 +144,20 @@ def _calculate_distances_subset(
             pd.DataFrame(
                 np.array(
                     napistu_graph.distances(
-                        source=one_partition["sc_id"],
-                        target=vs_to_partition["sc_id"],
+                        source=one_partition[SBML_DFS.SC_ID],
+                        target=vs_to_partition[SBML_DFS.SC_ID],
                         weights=weight_type,
                     )
                 ),
-                index=one_partition["sc_id"].rename("sc_id_origin"),
-                columns=vs_to_partition["sc_id"].rename("sc_id_dest"),
+                index=one_partition[SBML_DFS.SC_ID].rename(
+                    NAPISTU_EDGELIST.SC_ID_ORIGIN
+                ),
+                columns=vs_to_partition[SBML_DFS.SC_ID].rename(
+                    NAPISTU_EDGELIST.SC_ID_DEST
+                ),
             )
             .reset_index()
-            .melt("sc_id_origin", value_name=f"path_{weight_type}")
+            .melt(NAPISTU_EDGELIST.SC_ID_ORIGIN, value_name=f"path_{weight_type}")
             .replace([np.inf, -np.inf], np.nan, inplace=False)
             .dropna()
         )
@@ -211,8 +172,8 @@ def _calculate_distances_subset(
     # note: these may be different paths! e.g., a longer path may have a lower weight than a short one
     path_summaries = d_steps.merge(
         d_weights,
-        left_on=["sc_id_origin", "sc_id_dest"],
-        right_on=["sc_id_origin", "sc_id_dest"],
+        left_on=[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST],
+        right_on=[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST],
     )
     # return connected species

napistu/ontologies/genodexito.py CHANGED Viewed

@@ -356,7 +356,7 @@ class Genodexito:
             )
             logger.debug(
                 f"{ids.shape[0] - expanded_ids.shape[0]} "
-                "ids are not included in expanded ids"
+                "ids are not included in expanded ids. These will be filled with empty Identifiers"
             )
         else:
             matched_expanded_ids = expanded_ids
@@ -364,6 +364,10 @@ class Genodexito:
         updated_ids = ids.drop(SBML_DFS.S_IDENTIFIERS, axis=1).join(
             pd.DataFrame(matched_expanded_ids)
         )
+        # fill missing attributes with empty Identifiers
+        updated_ids[SBML_DFS.S_IDENTIFIERS] = updated_ids[
+            SBML_DFS.S_IDENTIFIERS
+        ].fillna(identifiers.Identifiers([]))
         setattr(sbml_dfs, "species", updated_ids)

napistu/ontologies/renaming.py CHANGED Viewed

@@ -72,6 +72,10 @@ def rename_species_ontologies(
     updated_species = sbml_dfs.species.drop(SBML_DFS.S_IDENTIFIERS, axis=1).join(
         pd.DataFrame(species_identifiers)
     )
+    # fill missing attributes with empty Identifiers
+    updated_species[SBML_DFS.S_IDENTIFIERS] = updated_species[
+        SBML_DFS.S_IDENTIFIERS
+    ].fillna(identifiers.Identifiers([]))
     setattr(sbml_dfs, "species", updated_species)

napistu 0.3.6__py3-none-any.whl → 0.4.0__py3-none-any.whl

napistu 0.3.6py3-none-any.whl → 0.4.0py3-none-any.whl