PyPI - edsger - Versions diffs - 0.1.5__cp39-cp39-win32.whl → 0.1.6__cp39-cp39-win32.whl - Mend

edsger 0.1.5cp39-cp39-win32.whl → 0.1.6cp39-cp39-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

edsger/_version.py +1 -1
edsger/bellman_ford.cp39-win32.pyd +0 -0
edsger/bellman_ford.pyx +7 -0
edsger/bfs.cp39-win32.pyd +0 -0
edsger/bfs.pyx +243 -0
edsger/commons.cp39-win32.pyd +0 -0
edsger/commons.pyx +7 -0
edsger/dijkstra.cp39-win32.pyd +0 -0
edsger/dijkstra.pyx +7 -0
edsger/graph_importer.py +340 -0
edsger/networks.py +4 -2
edsger/path.py +676 -129
edsger/path_tracking.cp39-win32.pyd +0 -0
edsger/path_tracking.pyx +7 -0
edsger/pq_4ary_dec_0b.cp39-win32.pyd +0 -0
edsger/pq_4ary_dec_0b.pyx +7 -0
edsger/spiess_florian.cp39-win32.pyd +0 -0
edsger/spiess_florian.pyx +7 -0
edsger/star.cp39-win32.pyd +0 -0
edsger/star.pyx +7 -0
edsger/utils.py +9 -8
{edsger-0.1.5.dist-info → edsger-0.1.6.dist-info}/METADATA +124 -2
edsger-0.1.6.dist-info/RECORD +32 -0
edsger-0.1.5.dist-info/RECORD +0 -29
{edsger-0.1.5.dist-info → edsger-0.1.6.dist-info}/WHEEL +0 -0
{edsger-0.1.5.dist-info → edsger-0.1.6.dist-info}/licenses/AUTHORS.rst +0 -0
{edsger-0.1.5.dist-info → edsger-0.1.6.dist-info}/licenses/LICENSE +0 -0
{edsger-0.1.5.dist-info → edsger-0.1.6.dist-info}/top_level.txt +0 -0

edsger/path.py CHANGED Viewed

@@ -2,11 +2,13 @@
 Path-related methods.
 """
+from typing import Optional, Union, List, Any
 import warnings
 import numpy as np
 import pandas as pd
+from edsger.graph_importer import standardize_graph_dataframe
 from edsger.commons import (
     A_VERY_SMALL_TIME_INTERVAL_PY,
     DTYPE_INF_PY,
@@ -40,6 +42,7 @@ from edsger.star import (
     convert_graph_to_csr_float64,
     convert_graph_to_csr_uint32,
 )
+from edsger.bfs import bfs_csr, bfs_csc  # pylint: disable=no-name-in-module
 class Dijkstra:
@@ -78,19 +81,20 @@ class Dijkstra:
     def __init__(
         self,
-        edges,
-        tail="tail",
-        head="head",
-        weight="weight",
-        orientation="out",
-        check_edges=False,
-        permute=False,
-        verbose=False,
-    ):
+        edges: pd.DataFrame,
+        tail: str = "tail",
+        head: str = "head",
+        weight: str = "weight",
+        orientation: str = "out",
+        check_edges: bool = False,
+        permute: bool = False,
+        verbose: bool = False,
+    ) -> None:
         # load the edges
         if check_edges:
             self._check_edges(edges, tail, head, weight)
-        self._edges = edges[[tail, head, weight]].copy(deep=True)
+        # Convert to standardized NumPy-backed pandas DataFrame
+        self._edges = standardize_graph_dataframe(edges, tail, head, weight)
         self._n_edges = len(self._edges)
         self._verbose = verbose
@@ -99,7 +103,12 @@ class Dijkstra:
         # reindex the vertices
         self._permute = permute
-        if self._permute:
+        if len(self._edges) == 0:
+            # Handle empty graphs
+            self._permutation = None
+            self._n_vertices = 0
+            self.__n_vertices_init = 0
+        elif self._permute:
             self.__n_vertices_init = self._edges[[tail, head]].max(axis=0).max() + 1
             self._permutation = self._permute_graph(tail, head)
             self._n_vertices = len(self._permutation)
@@ -138,7 +147,7 @@ class Dijkstra:
         self._path_links = None
     @property
-    def edges(self):
+    def edges(self) -> Any:
         """
         Getter for the graph edge dataframe.
@@ -150,7 +159,7 @@ class Dijkstra:
         return self._edges
     @property
-    def n_edges(self):
+    def n_edges(self) -> int:
         """
         Getter for the number of graph edges.
@@ -162,7 +171,7 @@ class Dijkstra:
         return self._n_edges
     @property
-    def n_vertices(self):
+    def n_vertices(self) -> int:
         """
         Getter for the number of graph vertices.
@@ -174,7 +183,7 @@ class Dijkstra:
         return self._n_vertices
     @property
-    def orientation(self):
+    def orientation(self) -> str:
         """
         Getter of Dijkstra's algorithm orientation ("in" or "out").
@@ -186,7 +195,7 @@ class Dijkstra:
         return self._orientation
     @property
-    def permute(self):
+    def permute(self) -> bool:
         """
         Getter for the graph permutation/reindexing option.
@@ -198,7 +207,7 @@ class Dijkstra:
         return self._permute
     @property
-    def path_links(self):
+    def path_links(self) -> Optional[np.ndarray]:
         """
         Getter for the graph permutation/reindexing option.
@@ -239,7 +248,7 @@ class Dijkstra:
     def _check_edges(self, edges, tail, head, weight):
         """Checks if the edges DataFrame is well-formed. If not, raises an appropriate error."""
-        if not isinstance(edges, pd.core.frame.DataFrame):
+        if not isinstance(edges, pd.DataFrame):
             raise TypeError("edges should be a pandas DataFrame")
         if tail not in edges:
@@ -257,7 +266,7 @@ class Dijkstra:
                 f"edge weight column '{weight}' not found in graph edges dataframe"
             )
-        if edges[[tail, head, weight]].isna().any().any():
+        if edges[[tail, head, weight]].isnull().to_numpy().any():
             raise ValueError(
                 " ".join(
                     [
@@ -287,7 +296,7 @@ class Dijkstra:
         permutation = pd.DataFrame(
             data={
                 "vert_idx": np.union1d(
-                    self._edges[tail].values, self._edges[head].values
+                    np.asarray(self._edges[tail]), np.asarray(self._edges[head])
                 )
             }
         )
@@ -330,13 +339,13 @@ class Dijkstra:
     def run(
         self,
-        vertex_idx,
-        path_tracking=False,
-        return_inf=True,
-        return_series=False,
-        heap_length_ratio=1.0,
-        termination_nodes=None,
-    ):
+        vertex_idx: int,
+        path_tracking: bool = False,
+        return_inf: bool = True,
+        return_series: bool = False,
+        heap_length_ratio: float = 1.0,
+        termination_nodes: Optional[List[int]] = None,
+    ) -> Union[np.ndarray, pd.Series]:
         """
         Runs shortest path algorithm between a given vertex and all other vertices in the graph.
@@ -369,17 +378,10 @@ class Dijkstra:
             Pandas Series object with the same data and the vertex indices as index.
         """
-        # validate the input arguments
-        if not isinstance(vertex_idx, int):
-            try:
-                vertex_idx = int(vertex_idx)
-            except ValueError as exc:
-                raise TypeError(
-                    f"argument 'vertex_idx={vertex_idx}' must be an integer"
-                ) from exc
+        # validate the input arguments - type checking handled by static typing
         if vertex_idx < 0:
             raise ValueError(f"argument 'vertex_idx={vertex_idx}' must be positive")
-        if self._permute:
+        if self._permute and self._permutation is not None:
             if vertex_idx not in self._permutation.vert_idx_old.values:
                 raise ValueError(f"vertex {vertex_idx} not found in graph")
             vertex_new = self._permutation.loc[
@@ -389,16 +391,7 @@ class Dijkstra:
             if vertex_idx >= self._n_vertices:
                 raise ValueError(f"vertex {vertex_idx} not found in graph")
             vertex_new = vertex_idx
-        if not isinstance(path_tracking, bool):
-            raise TypeError(
-                f"argument 'path_tracking=f{path_tracking}' must be of bool type"
-            )
-        if not isinstance(return_inf, bool):
-            raise TypeError(f"argument 'return_inf=f{return_inf}' must be of bool type")
-        if not isinstance(return_series, bool):
-            raise TypeError(
-                f"argument 'return_series=f{return_series}' must be of bool type"
-            )
+        # Type checking is now handled by static typing
         if not isinstance(heap_length_ratio, float):
             raise TypeError(
                 f"argument 'heap_length_ratio=f{heap_length_ratio}' must be of float type"
@@ -428,7 +421,7 @@ class Dijkstra:
                 raise ValueError("argument 'termination_nodes' must not be empty")
             # handle vertex permutation if needed
-            if self._permute:
+            if self._permute and self._permutation is not None:
                 termination_nodes_permuted = []
                 for termination_node in termination_nodes_array:
                     if termination_node not in self._permutation.vert_idx_old.values:
@@ -546,7 +539,7 @@ class Dijkstra:
                         heap_length,
                     )
-            if self._permute:
+            if self._permute and self._permutation is not None:
                 # permute back the path vertex indices
                 path_df = pd.DataFrame(
                     data={
@@ -592,11 +585,17 @@ class Dijkstra:
         # reorder path lengths
         if return_series:
-            if self._permute and termination_nodes_array is None:
+            if (
+                self._permute
+                and termination_nodes_array is None
+                and self._permutation is not None
+            ):
                 self._permutation["path_length"] = path_length_values
-                path_lengths_df = self._permutation[
-                    ["vert_idx_old", "path_length"]
-                ].sort_values(by="vert_idx_old")
+                path_lengths_df = (
+                    self._permutation[["vert_idx_old", "path_length"]]
+                    .copy()
+                    .sort_values("vert_idx_old")
+                )  # type: ignore
                 path_lengths_df.set_index("vert_idx_old", drop=True, inplace=True)
                 path_lengths_df.index.name = "vertex_idx"
                 path_lengths_series = path_lengths_df.path_length
@@ -604,7 +603,11 @@ class Dijkstra:
                 path_lengths_series = pd.Series(path_length_values)
                 path_lengths_series.index.name = "vertex_idx"
                 path_lengths_series.name = "path_length"
-                if self._permute and termination_nodes_array is not None:
+                if (
+                    self._permute
+                    and termination_nodes_array is not None
+                    and termination_nodes is not None
+                ):
                     # For early termination with permutation, use original termination node indices
                     path_lengths_series.index = termination_nodes
@@ -614,19 +617,20 @@ class Dijkstra:
         if termination_nodes_array is not None:
             return path_length_values
-        if self._permute:
+        if self._permute and self._permutation is not None:
             self._permutation["path_length"] = path_length_values
             if return_inf:
                 path_length_values = np.inf * np.ones(self.__n_vertices_init)
             else:
                 path_length_values = DTYPE_INF_PY * np.ones(self.__n_vertices_init)
+            assert self._permutation is not None  # guaranteed by condition above
             path_length_values[self._permutation.vert_idx_old.values] = (
                 self._permutation.path_length.values
             )
         return path_length_values
-    def get_vertices(self):
+    def get_vertices(self) -> Any:
         """
         Get the unique vertices from the graph.
@@ -638,11 +642,13 @@ class Dijkstra:
         vertices : ndarray
             A 1-D array containing the unique vertices.
         """
-        if self._permute:
-            return self._permutation.vert_idx_old.values
-        return np.union1d(self._edges["tail"], self._edges["head"])
+        if self._permute and self._permutation is not None:
+            return np.asarray(self._permutation.vert_idx_old)
+        return np.union1d(
+            np.asarray(self._edges["tail"]), np.asarray(self._edges["head"])
+        )
-    def get_path(self, vertex_idx):
+    def get_path(self, vertex_idx: int) -> Optional[np.ndarray]:
         """Compute path from predecessors or successors.
         Parameters:
@@ -715,19 +721,20 @@ class BellmanFord:
     def __init__(
         self,
-        edges,
-        tail="tail",
-        head="head",
-        weight="weight",
-        orientation="out",
-        check_edges=False,
-        permute=False,
-        verbose=False,
-    ):
+        edges: pd.DataFrame,
+        tail: str = "tail",
+        head: str = "head",
+        weight: str = "weight",
+        orientation: str = "out",
+        check_edges: bool = False,
+        permute: bool = False,
+        verbose: bool = False,
+    ) -> None:
         # load the edges
         if check_edges:
             self._check_edges(edges, tail, head, weight)
-        self._edges = edges[[tail, head, weight]].copy(deep=True)
+        # Convert to standardized NumPy-backed pandas DataFrame
+        self._edges = standardize_graph_dataframe(edges, tail, head, weight)
         self._n_edges = len(self._edges)
         self._verbose = verbose
@@ -736,7 +743,12 @@ class BellmanFord:
         # reindex the vertices
         self._permute = permute
-        if self._permute:
+        if len(self._edges) == 0:
+            # Handle empty graphs
+            self._permutation = None
+            self._n_vertices = 0
+            self.__n_vertices_init = 0
+        elif self._permute:
             self.__n_vertices_init = self._edges[[tail, head]].max(axis=0).max() + 1
             self._permutation = self._permute_graph(tail, head)
             self._n_vertices = len(self._permutation)
@@ -770,7 +782,7 @@ class BellmanFord:
         self._has_negative_cycle = False
     @property
-    def edges(self):
+    def edges(self) -> Any:
         """
         Getter for the graph edge dataframe.
@@ -782,7 +794,7 @@ class BellmanFord:
         return self._edges
     @property
-    def n_edges(self):
+    def n_edges(self) -> int:
         """
         Getter for the number of graph edges.
@@ -794,7 +806,7 @@ class BellmanFord:
         return self._n_edges
     @property
-    def n_vertices(self):
+    def n_vertices(self) -> int:
         """
         Getter for the number of graph vertices.
@@ -806,7 +818,7 @@ class BellmanFord:
         return self._n_vertices
     @property
-    def orientation(self):
+    def orientation(self) -> str:
         """
         Getter of Bellman-Ford's algorithm orientation ("in" or "out").
@@ -818,7 +830,7 @@ class BellmanFord:
         return self._orientation
     @property
-    def permute(self):
+    def permute(self) -> bool:
         """
         Getter for the graph permutation/reindexing option.
@@ -830,7 +842,7 @@ class BellmanFord:
         return self._permute
     @property
-    def path_links(self):
+    def path_links(self) -> Optional[np.ndarray]:
         """
         Getter for the path links (predecessors or successors).
@@ -871,7 +883,7 @@ class BellmanFord:
     def _check_edges(self, edges, tail, head, weight):
         """Checks if the edges DataFrame is well-formed. If not, raises an appropriate error."""
-        if not isinstance(edges, pd.core.frame.DataFrame):
+        if not isinstance(edges, pd.DataFrame):
             raise TypeError("edges should be a pandas DataFrame")
         if tail not in edges:
@@ -889,7 +901,7 @@ class BellmanFord:
                 f"edge weight column '{weight}' not found in graph edges dataframe"
             )
-        if edges[[tail, head, weight]].isna().any().any():
+        if edges[[tail, head, weight]].isnull().to_numpy().any():
             raise ValueError(
                 " ".join(
                     [
@@ -917,7 +929,7 @@ class BellmanFord:
         permutation = pd.DataFrame(
             data={
                 "vert_idx": np.union1d(
-                    self._edges[tail].values, self._edges[head].values
+                    np.asarray(self._edges[tail]), np.asarray(self._edges[head])
                 )
             }
         )
@@ -960,12 +972,12 @@ class BellmanFord:
     def run(
         self,
-        vertex_idx,
-        path_tracking=False,
-        return_inf=True,
-        return_series=False,
-        detect_negative_cycles=True,
-    ):
+        vertex_idx: int,
+        path_tracking: bool = False,
+        return_inf: bool = True,
+        return_series: bool = False,
+        detect_negative_cycles: bool = True,
+    ) -> Union[np.ndarray, pd.Series]:
         """
         Runs Bellman-Ford shortest path algorithm between a given vertex and all other vertices
         in the graph.
@@ -1000,17 +1012,10 @@ class BellmanFord:
         ValueError
             If detect_negative_cycles is True and a negative cycle is detected in the graph.
         """
-        # validate the input arguments
-        if not isinstance(vertex_idx, int):
-            try:
-                vertex_idx = int(vertex_idx)
-            except ValueError as exc:
-                raise TypeError(
-                    f"argument 'vertex_idx={vertex_idx}' must be an integer"
-                ) from exc
+        # validate the input arguments - type checking handled by static typing
         if vertex_idx < 0:
             raise ValueError(f"argument 'vertex_idx={vertex_idx}' must be positive")
-        if self._permute:
+        if self._permute and self._permutation is not None:
             if vertex_idx not in self._permutation.vert_idx_old.values:
                 raise ValueError(f"vertex {vertex_idx} not found in graph")
             vertex_new = self._permutation.loc[
@@ -1020,20 +1025,7 @@ class BellmanFord:
             if vertex_idx >= self._n_vertices:
                 raise ValueError(f"vertex {vertex_idx} not found in graph")
             vertex_new = vertex_idx
-        if not isinstance(path_tracking, bool):
-            raise TypeError(
-                f"argument 'path_tracking=f{path_tracking}' must be of bool type"
-            )
-        if not isinstance(return_inf, bool):
-            raise TypeError(f"argument 'return_inf=f{return_inf}' must be of bool type")
-        if not isinstance(return_series, bool):
-            raise TypeError(
-                f"argument 'return_series=f{return_series}' must be of bool type"
-            )
-        if not isinstance(detect_negative_cycles, bool):
-            raise TypeError(
-                f"argument 'detect_negative_cycles={detect_negative_cycles}' must be of bool type"
-            )
+        # Type checking is now handled by static typing
         # compute path length
         if not path_tracking:
@@ -1075,7 +1067,7 @@ class BellmanFord:
                     self._n_vertices,
                 )
-            if self._permute:
+            if self._permute and self._permutation is not None:
                 # permute back the path vertex indices
                 path_df = pd.DataFrame(
                     data={
@@ -1146,7 +1138,7 @@ class BellmanFord:
         # reorder path lengths
         if return_series:
-            if self._permute:
+            if self._permute and self._permutation is not None:
                 path_df = pd.DataFrame(
                     data={"path_length": path_length_values[: self._n_vertices]}
                 )
@@ -1170,7 +1162,7 @@ class BellmanFord:
             return path_lengths_series
         # No else needed - de-indent the code
-        if self._permute:
+        if self._permute and self._permutation is not None:
             path_df = pd.DataFrame(
                 data={"path_length": path_length_values[: self._n_vertices]}
             )
@@ -1191,7 +1183,7 @@ class BellmanFord:
                 )
         return path_length_values
-    def get_path(self, vertex_idx):
+    def get_path(self, vertex_idx: int) -> Optional[np.ndarray]:
         """Compute path from predecessors or successors.
         Parameters:
@@ -1301,18 +1293,21 @@ class HyperpathGenerating:
     def __init__(
         self,
-        edges,
-        tail="tail",
-        head="head",
-        trav_time="trav_time",
-        freq="freq",
-        check_edges=False,
-        orientation="in",
-    ):
+        edges: pd.DataFrame,
+        tail: str = "tail",
+        head: str = "head",
+        trav_time: str = "trav_time",
+        freq: str = "freq",
+        check_edges: bool = False,
+        orientation: str = "in",
+    ) -> None:
         # load the edges
         if check_edges:
             self._check_edges(edges, tail, head, trav_time, freq)
-        self._edges = edges[[tail, head, trav_time, freq]].copy(deep=True)
+        # Convert to standardized NumPy-backed pandas DataFrame
+        self._edges = standardize_graph_dataframe(
+            edges, tail, head, trav_time=trav_time, freq=freq
+        )
         self.edge_count = len(self._edges)
         # remove inf values if any, and values close to zero
@@ -1354,15 +1349,21 @@ class HyperpathGenerating:
             self._edge_idx = rs_data.astype(np.uint32)
         # edge attributes
-        self._trav_time = self._edges[trav_time].values.astype(DTYPE_PY)
-        self._freq = self._edges[freq].values.astype(DTYPE_PY)
-        self._tail = self._edges[tail].values.astype(np.uint32)
-        self._head = self._edges[head].values.astype(np.uint32)
+        self._trav_time = np.asarray(self._edges[trav_time]).astype(DTYPE_PY)
+        self._freq = np.asarray(self._edges[freq]).astype(DTYPE_PY)
+        self._tail = np.asarray(self._edges[tail]).astype(np.uint32)
+        self._head = np.asarray(self._edges[head]).astype(np.uint32)
         # node attribute
         self.u_i_vec = None
-    def run(self, origin, destination, volume, return_inf=False):
+    def run(
+        self,
+        origin: Union[int, List[int]],
+        destination: int,
+        volume: Union[float, List[float]],
+        return_inf: bool = False,
+    ) -> None:
         """
         Computes the hyperpath and updates edge volumes based on the input demand and configuration.
@@ -1439,7 +1440,7 @@ class HyperpathGenerating:
             self._head,
             demand_indices,  # source vertex indices
             demand_values,
-            self._edges["volume"].values,
+            np.asarray(self._edges["volume"]),
             u_i_vec,
             self.vertex_count,
             destination,
@@ -1456,7 +1457,7 @@ class HyperpathGenerating:
         assert v >= 0.0
     def _check_edges(self, edges, tail, head, trav_time, freq):
-        if not isinstance(edges, pd.core.frame.DataFrame):
+        if not isinstance(edges, pd.DataFrame):
             raise TypeError("edges should be a pandas DataFrame")
         for col in [tail, head, trav_time, freq]:
@@ -1465,7 +1466,7 @@ class HyperpathGenerating:
                     f"edge column '{col}' not found in graph edges dataframe"
                 )
-        if edges[[tail, head, trav_time, freq]].isna().any().any():
+        if edges[[tail, head, trav_time, freq]].isnull().to_numpy().any():
             raise ValueError(
                 " ".join(
                     [
@@ -1487,6 +1488,552 @@ class HyperpathGenerating:
                 raise ValueError(f"column '{col}' should be nonnegative")
+class BFS:
+    """
+    Breadth-First Search algorithm for finding shortest paths in directed graphs.
+    BFS ignores edge weights (treats all edges as having equal weight) and finds the shortest
+    path in terms of the minimum number of edges/hops between vertices. This implementation
+    works on directed graphs using CSR format for forward traversal and CSC format for
+    backward traversal.
+    Note: If parallel edges exist between the same pair of vertices, only one edge will be
+    kept automatically during initialization.
+    Parameters:
+    -----------
+    edges: pandas.DataFrame
+        DataFrame containing the edges of the graph. It should have two columns: 'tail' and 'head'.
+        The 'tail' column should contain the IDs of the starting nodes, and the 'head' column
+        should contain the IDs of the ending nodes. If a 'weight' column is present, it will be
+        ignored.
+    tail: str, optional (default='tail')
+        The name of the column in the DataFrame that contains the IDs of the edge starting nodes.
+    head: str, optional (default='head')
+        The name of the column in the DataFrame that contains the IDs of the edge ending nodes.
+    orientation: str, optional (default='out')
+        The orientation of BFS algorithm. It can be either 'out' for single source shortest
+        paths or 'in' for single target shortest path.
+    check_edges: bool, optional (default=False)
+        Whether to check if the edges DataFrame is well-formed. If set to True, the edges
+        DataFrame will be checked for missing values and invalid data types.
+    permute: bool, optional (default=False)
+        Whether to permute the IDs of the nodes. If set to True, the node IDs will be reindexed
+        to start from 0 and be contiguous.
+    verbose: bool, optional (default=False)
+        Whether to print messages about parallel edge removal.
+    sentinel: int, optional (default=-9999)
+        Sentinel value for unreachable nodes and the start vertex in the predecessor array.
+        Must be a negative integer that fits in int32 range.
+    """
+    def __init__(
+        self,
+        edges: pd.DataFrame,
+        tail: str = "tail",
+        head: str = "head",
+        orientation: str = "out",
+        check_edges: bool = False,
+        permute: bool = False,
+        verbose: bool = False,
+        sentinel: int = -9999,
+    ) -> None:
+        # Validate sentinel value
+        if not isinstance(sentinel, int):
+            raise TypeError(
+                f"sentinel must be an integer, got {type(sentinel).__name__}"
+            )
+        if sentinel >= 0:
+            raise ValueError(f"sentinel must be negative, got {sentinel}")
+        if sentinel < np.iinfo(np.int32).min or sentinel > np.iinfo(np.int32).max:
+            raise ValueError(
+                f"sentinel must fit in int32 range [{np.iinfo(np.int32).min}, "
+                f"{np.iinfo(np.int32).max}], got {sentinel}"
+            )
+        self._sentinel = sentinel
+        # load the edges
+        if check_edges:
+            self._check_edges(edges, tail, head)
+        # Convert to standardized NumPy-backed pandas DataFrame
+        # Note: BFS doesn't need weights, but standardize_graph_dataframe handles that
+        self._edges = standardize_graph_dataframe(edges, tail, head)
+        self._n_edges = len(self._edges)
+        self._verbose = verbose
+        # preprocess edges to handle parallel edges
+        self._preprocess_edges(tail, head)
+        # reindex the vertices
+        self._permute = permute
+        if len(self._edges) == 0:
+            # Handle empty graphs
+            self._permutation = None
+            self._n_vertices = 0
+            self.__n_vertices_init = 0
+        elif self._permute:
+            self.__n_vertices_init = self._edges[[tail, head]].max(axis=0).max() + 1
+            self._permutation = self._permute_graph(tail, head)
+            self._n_vertices = len(self._permutation)
+        else:
+            self._permutation = None
+            self._n_vertices = self._edges[[tail, head]].max(axis=0).max() + 1
+            self.__n_vertices_init = self._n_vertices
+        # convert to CSR/CSC
+        self._check_orientation(orientation)
+        self._orientation = orientation
+        if self._orientation == "out":
+            # Use dummy weight column for conversion (BFS doesn't use weights)
+            self._edges["_bfs_dummy_weight"] = 1.0
+            fs_indptr, fs_indices, _ = convert_graph_to_csr_float64(
+                self._edges, tail, head, "_bfs_dummy_weight", self._n_vertices
+            )
+            self._edges.drop("_bfs_dummy_weight", axis=1, inplace=True)
+            self.__indices = fs_indices.astype(np.uint32)
+            self.__indptr = fs_indptr.astype(np.uint32)
+        else:
+            self._edges["_bfs_dummy_weight"] = 1.0
+            rs_indptr, rs_indices, _ = convert_graph_to_csc_float64(
+                self._edges, tail, head, "_bfs_dummy_weight", self._n_vertices
+            )
+            self._edges.drop("_bfs_dummy_weight", axis=1, inplace=True)
+            self.__indices = rs_indices.astype(np.uint32)
+            self.__indptr = rs_indptr.astype(np.uint32)
+        self._path_links = None
+    @property
+    def UNREACHABLE(self) -> int:
+        """
+        Getter for the sentinel value used for unreachable nodes.
+        Returns
+        -------
+        sentinel : int
+            The sentinel value for unreachable nodes and the start vertex.
+        """
+        return self._sentinel
+    @property
+    def edges(self) -> Any:
+        """
+        Getter for the graph edge dataframe.
+        Returns
+        -------
+        edges: pandas.DataFrame
+            DataFrame containing the edges of the graph.
+        """
+        return self._edges
+    @property
+    def n_edges(self) -> int:
+        """
+        Getter for the number of graph edges.
+        Returns
+        -------
+        n_edges: int
+            The number of edges in the graph.
+        """
+        return self._n_edges
+    @property
+    def n_vertices(self) -> int:
+        """
+        Getter for the number of graph vertices.
+        Returns
+        -------
+        n_vertices: int
+            The number of nodes in the graph (after permutation, if _permute is True).
+        """
+        return self._n_vertices
+    @property
+    def orientation(self) -> str:
+        """
+        Getter of BFS algorithm orientation ("in" or "out").
+        Returns
+        -------
+        orientation : str
+            The orientation of BFS algorithm.
+        """
+        return self._orientation
+    @property
+    def permute(self) -> bool:
+        """
+        Getter for the graph permutation/reindexing option.
+        Returns
+        -------
+        permute : bool
+            Whether to permute the IDs of the nodes.
+        """
+        return self._permute
+    @property
+    def path_links(self) -> Optional[np.ndarray]:
+        """
+        Getter for the path links (predecessors or successors).
+        Returns
+        -------
+        path_links: numpy.ndarray
+            predecessors or successors node index if the path tracking is activated.
+        """
+        return self._path_links
+    def _preprocess_edges(self, tail, head):
+        """
+        Preprocess edges to handle parallel edges by keeping only one edge
+        between any pair of vertices (BFS doesn't use weights).
+        Parameters
+        ----------
+        tail : str
+            The column name for tail vertices
+        head : str
+            The column name for head vertices
+        """
+        original_count = len(self._edges)
+        self._edges = self._edges.groupby([tail, head], as_index=False).first()
+        final_count = len(self._edges)
+        if original_count > final_count:
+            parallel_edges_removed = original_count - final_count
+            if self._verbose:
+                print(
+                    f"Automatically removed {parallel_edges_removed} parallel edge(s). "
+                    f"BFS treats all edges equally."
+                )
+        self._n_edges = len(self._edges)
+    def _check_edges(self, edges, tail, head):
+        """Checks if the edges DataFrame is well-formed. If not, raises an appropriate error."""
+        if not isinstance(edges, pd.DataFrame):
+            raise TypeError("edges should be a pandas DataFrame")
+        if tail not in edges:
+            raise KeyError(
+                f"edge tail column '{tail}' not found in graph edges dataframe"
+            )
+        if head not in edges:
+            raise KeyError(
+                f"edge head column '{head}' not found in graph edges dataframe"
+            )
+        if edges[[tail, head]].isnull().to_numpy().any():
+            raise ValueError(
+                " ".join(
+                    [
+                        f"edges[[{tail}, {head}]] ",
+                        "should not have any missing value",
+                    ]
+                )
+            )
+        for col in [tail, head]:
+            if not pd.api.types.is_integer_dtype(edges[col].dtype):
+                raise TypeError(f"edges['{col}'] should be of integer type")
+    def _permute_graph(self, tail, head):
+        """Permute the IDs of the nodes to start from 0 and be contiguous.
+        Returns a DataFrame with the permuted IDs."""
+        permutation = pd.DataFrame(
+            data={
+                "vert_idx": np.union1d(
+                    np.asarray(self._edges[tail]), np.asarray(self._edges[head])
+                )
+            }
+        )
+        permutation["vert_idx_new"] = permutation.index
+        permutation.index.name = "index"
+        self._edges = pd.merge(
+            self._edges,
+            permutation[["vert_idx", "vert_idx_new"]],
+            left_on=tail,
+            right_on="vert_idx",
+            how="left",
+        )
+        self._edges.drop([tail, "vert_idx"], axis=1, inplace=True)
+        self._edges.rename(columns={"vert_idx_new": tail}, inplace=True)
+        self._edges = pd.merge(
+            self._edges,
+            permutation[["vert_idx", "vert_idx_new"]],
+            left_on=head,
+            right_on="vert_idx",
+            how="left",
+        )
+        self._edges.drop([head, "vert_idx"], axis=1, inplace=True)
+        self._edges.rename(columns={"vert_idx_new": head}, inplace=True)
+        permutation.rename(columns={"vert_idx": "vert_idx_old"}, inplace=True)
+        permutation.reset_index(drop=True, inplace=True)
+        permutation.sort_values(by="vert_idx_new", inplace=True)
+        permutation.index.name = "index"
+        self._edges.index.name = "index"
+        return permutation
+    def _check_orientation(self, orientation):
+        """Checks the orientation attribute."""
+        if orientation not in ["in", "out"]:
+            raise ValueError("orientation should be either 'in' on 'out'")
+    def run(
+        self,
+        vertex_idx: int,
+        path_tracking: bool = False,
+        return_series: bool = False,
+    ) -> Union[np.ndarray, pd.Series]:
+        """
+        Runs BFS algorithm between a given vertex and all other vertices in the graph.
+        Parameters
+        ----------
+        vertex_idx : int
+            The index of the source/target vertex.
+        path_tracking : bool, optional (default=False)
+            Whether to track the shortest path(s) from the source vertex to all other vertices
+            in the graph. When True, predecessors are stored and can be retrieved with get_path().
+        return_series : bool, optional (default=False)
+            Whether to return a Pandas Series object indexed by vertex indices with predecessors
+            as values.
+        Returns
+        -------
+        predecessors : np.ndarray or pd.Series
+            If `return_series=False`, a 1D Numpy array of shape (n_vertices,) with the
+            predecessor of each vertex in the BFS tree (`orientation="out"`), or
+            the successor of each vertex (`orientation="in"`).
+            Unreachable vertices and the start vertex have the sentinel value (default: -9999).
+            If `return_series=True`, a Pandas Series object with the same data and the
+            vertex indices as index.
+        """
+        # validate the input arguments
+        if vertex_idx < 0:
+            raise ValueError(f"argument 'vertex_idx={vertex_idx}' must be non-negative")
+        if self._permute and self._permutation is not None:
+            if vertex_idx not in self._permutation.vert_idx_old.values:
+                raise ValueError(f"vertex {vertex_idx} not found in graph")
+            vertex_new = self._permutation.loc[
+                self._permutation.vert_idx_old == vertex_idx, "vert_idx_new"
+            ].iloc[0]
+        else:
+            if vertex_idx >= self._n_vertices:
+                raise ValueError(f"vertex {vertex_idx} not found in graph")
+            vertex_new = vertex_idx
+        # compute BFS predecessors
+        if self._orientation == "out":
+            predecessors = bfs_csr(
+                self.__indptr,
+                self.__indices,
+                vertex_new,
+                self._n_vertices,
+                self._sentinel,
+            )
+        else:
+            predecessors = bfs_csc(
+                self.__indptr,
+                self.__indices,
+                vertex_new,
+                self._n_vertices,
+                self._sentinel,
+            )
+        # store path links if tracking is enabled
+        if path_tracking:
+            # Convert predecessors to path_links format (uint32)
+            # Replace sentinel value with vertex's own index (like Dijkstra does)
+            self._path_links = np.arange(self._n_vertices, dtype=np.uint32)
+            reachable_mask = predecessors != self._sentinel
+            self._path_links[reachable_mask] = predecessors[reachable_mask].astype(
+                np.uint32
+            )
+            if self._permute and self._permutation is not None:
+                # permute back the path vertex indices (same approach as Dijkstra)
+                path_df = pd.DataFrame(
+                    data={
+                        "vertex_idx": np.arange(self._n_vertices),
+                        "associated_idx": self._path_links,
+                    }
+                )
+                path_df = pd.merge(
+                    path_df,
+                    self._permutation,
+                    left_on="vertex_idx",
+                    right_on="vert_idx_new",
+                    how="left",
+                )
+                path_df.drop(["vertex_idx", "vert_idx_new"], axis=1, inplace=True)
+                path_df.rename(columns={"vert_idx_old": "vertex_idx"}, inplace=True)
+                path_df = pd.merge(
+                    path_df,
+                    self._permutation,
+                    left_on="associated_idx",
+                    right_on="vert_idx_new",
+                    how="left",
+                )
+                path_df.drop(["associated_idx", "vert_idx_new"], axis=1, inplace=True)
+                path_df.rename(columns={"vert_idx_old": "associated_idx"}, inplace=True)
+                if return_series:
+                    path_df.set_index("vertex_idx", inplace=True)
+                    self._path_links = path_df.associated_idx.astype(np.uint32)
+                else:
+                    self._path_links = np.arange(
+                        self.__n_vertices_init, dtype=np.uint32
+                    )
+                    self._path_links[path_df.vertex_idx.values] = (
+                        path_df.associated_idx.values
+                    )
+        else:
+            self._path_links = None
+        # reorder predecessors for permuted graphs
+        if return_series:
+            if self._permute and self._permutation is not None:
+                pred_df = pd.DataFrame(data={"predecessor": predecessors})
+                pred_df["vert_idx_new"] = pred_df.index
+                pred_df = pd.merge(
+                    pred_df,
+                    self._permutation,
+                    left_on="vert_idx_new",
+                    right_on="vert_idx_new",
+                    how="left",
+                )
+                # Map predecessor values back to original IDs
+                valid_mask = pred_df["predecessor"] != self._sentinel
+                if valid_mask.any():
+                    pred_df_valid = pred_df[valid_mask].copy()
+                    pred_df_valid = pd.merge(
+                        pred_df_valid,
+                        self._permutation,
+                        left_on="predecessor",
+                        right_on="vert_idx_new",
+                        how="left",
+                        suffixes=("", "_pred"),
+                    )
+                    pred_df.loc[valid_mask, "predecessor"] = pred_df_valid[
+                        "vert_idx_old_pred"
+                    ].values.astype(np.int32)
+                pred_df.set_index("vert_idx_old", inplace=True)
+                predecessors_series = pred_df.predecessor.astype(np.int32)
+                predecessors_series.index.name = "vertex_idx"
+                predecessors_series.name = "predecessor"
+            else:
+                predecessors_series = pd.Series(predecessors, dtype=np.int32)
+                predecessors_series.index.name = "vertex_idx"
+                predecessors_series.name = "predecessor"
+            return predecessors_series
+        # For array output with permutation
+        if self._permute and self._permutation is not None:
+            pred_df = pd.DataFrame(data={"predecessor": predecessors})
+            pred_df["vert_idx_new"] = pred_df.index
+            pred_df = pd.merge(
+                pred_df,
+                self._permutation,
+                left_on="vert_idx_new",
+                right_on="vert_idx_new",
+                how="left",
+            )
+            # Map predecessor values back to original IDs
+            valid_mask = pred_df["predecessor"] != self._sentinel
+            if valid_mask.any():
+                pred_df_valid = pred_df[valid_mask].copy()
+                pred_df_valid = pd.merge(
+                    pred_df_valid,
+                    self._permutation,
+                    left_on="predecessor",
+                    right_on="vert_idx_new",
+                    how="left",
+                    suffixes=("", "_pred"),
+                )
+                pred_df.loc[valid_mask, "predecessor"] = pred_df_valid[
+                    "vert_idx_old_pred"
+                ].values.astype(np.int32)
+            predecessors_array = np.full(
+                self.__n_vertices_init, self._sentinel, dtype=np.int32
+            )
+            predecessors_array[pred_df.vert_idx_old.values] = (
+                pred_df.predecessor.values.astype(np.int32)
+            )
+            return predecessors_array
+        return predecessors
+    def get_vertices(self) -> Any:
+        """
+        Get the unique vertices from the graph.
+        If the graph has been permuted, this method returns the vertices based on the original
+        indexing. Otherwise, it returns the union of tail and head vertices from the edges.
+        Returns
+        -------
+        vertices : ndarray
+            A 1-D array containing the unique vertices.
+        """
+        if self._permute and self._permutation is not None:
+            return np.asarray(self._permutation.vert_idx_old)
+        return np.union1d(
+            np.asarray(self._edges["tail"]), np.asarray(self._edges["head"])
+        )
+    def get_path(self, vertex_idx: int) -> Optional[np.ndarray]:
+        """Compute path from predecessors or successors.
+        Parameters:
+        -----------
+        vertex_idx : int
+            source or target vertex index.
+        Returns
+        -------
+        path_vertices : numpy.ndarray
+            Array of np.int32 type storing the path from or to the given vertex index. If we are
+            dealing with BFS from a source (orientation="out"), the input vertex is the target
+            vertex and the path to the source is given backward from the target to the source
+            using the predecessors. If we are dealing with BFS to a target (orientation="in"),
+            the input vertex is the source vertex and the path to the target is given backward
+            from the target to the source using the successors.
+        """
+        if self._path_links is None:
+            warnings.warn(
+                "Current BFS instance has no path attribute: "
+                "make sure path_tracking is set to True, and run the "
+                "BFS algorithm",
+                UserWarning,
+            )
+            return None
+        if isinstance(self._path_links, pd.Series):
+            path_vertices = compute_path(self._path_links.values, vertex_idx)
+        else:
+            path_vertices = compute_path(self._path_links, vertex_idx)
+        return path_vertices
 # author : Francois Pacull
 # copyright : Architecture & Performance
 # email: francois.pacull@architecture-performance.fr

edsger 0.1.5__cp39-cp39-win32.whl → 0.1.6__cp39-cp39-win32.whl

edsger 0.1.5cp39-cp39-win32.whl → 0.1.6cp39-cp39-win32.whl