edsger 0.1.4__cp312-cp312-win32.whl → 0.1.6__cp312-cp312-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
edsger/path.py CHANGED
@@ -2,11 +2,13 @@
2
2
  Path-related methods.
3
3
  """
4
4
 
5
+ from typing import Optional, Union, List, Any
5
6
  import warnings
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
 
11
+ from edsger.graph_importer import standardize_graph_dataframe
10
12
  from edsger.commons import (
11
13
  A_VERY_SMALL_TIME_INTERVAL_PY,
12
14
  DTYPE_INF_PY,
@@ -14,6 +16,14 @@ from edsger.commons import (
14
16
  INF_FREQ_PY,
15
17
  MIN_FREQ_PY,
16
18
  )
19
+ from edsger.bellman_ford import (
20
+ compute_bf_sssp,
21
+ compute_bf_sssp_w_path,
22
+ compute_bf_stsp,
23
+ compute_bf_stsp_w_path,
24
+ detect_negative_cycle,
25
+ detect_negative_cycle_csc,
26
+ )
17
27
  from edsger.dijkstra import (
18
28
  compute_sssp,
19
29
  compute_sssp_w_path,
@@ -32,6 +42,7 @@ from edsger.star import (
32
42
  convert_graph_to_csr_float64,
33
43
  convert_graph_to_csr_uint32,
34
44
  )
45
+ from edsger.bfs import bfs_csr, bfs_csc # pylint: disable=no-name-in-module
35
46
 
36
47
 
37
48
  class Dijkstra:
@@ -39,6 +50,9 @@ class Dijkstra:
39
50
  Dijkstra's algorithm for finding the shortest paths between nodes in directed graphs with
40
51
  positive edge weights.
41
52
 
53
+ Note: If parallel edges exist between the same pair of vertices, only the edge with the minimum
54
+ weight will be kept automatically during initialization.
55
+
42
56
  Parameters:
43
57
  -----------
44
58
  edges: pandas.DataFrame
@@ -61,27 +75,40 @@ class Dijkstra:
61
75
  permute: bool, optional (default=False)
62
76
  Whether to permute the IDs of the nodes. If set to True, the node IDs will be reindexed to
63
77
  start from 0 and be contiguous.
78
+ verbose: bool, optional (default=False)
79
+ Whether to print messages about parallel edge removal.
64
80
  """
65
81
 
66
82
  def __init__(
67
83
  self,
68
- edges,
69
- tail="tail",
70
- head="head",
71
- weight="weight",
72
- orientation="out",
73
- check_edges=False,
74
- permute=False,
75
- ):
84
+ edges: pd.DataFrame,
85
+ tail: str = "tail",
86
+ head: str = "head",
87
+ weight: str = "weight",
88
+ orientation: str = "out",
89
+ check_edges: bool = False,
90
+ permute: bool = False,
91
+ verbose: bool = False,
92
+ ) -> None:
76
93
  # load the edges
77
94
  if check_edges:
78
95
  self._check_edges(edges, tail, head, weight)
79
- self._edges = edges[[tail, head, weight]].copy(deep=True)
96
+ # Convert to standardized NumPy-backed pandas DataFrame
97
+ self._edges = standardize_graph_dataframe(edges, tail, head, weight)
80
98
  self._n_edges = len(self._edges)
99
+ self._verbose = verbose
100
+
101
+ # preprocess edges to handle parallel edges
102
+ self._preprocess_edges(tail, head, weight)
81
103
 
82
104
  # reindex the vertices
83
105
  self._permute = permute
84
- if self._permute:
106
+ if len(self._edges) == 0:
107
+ # Handle empty graphs
108
+ self._permutation = None
109
+ self._n_vertices = 0
110
+ self.__n_vertices_init = 0
111
+ elif self._permute:
85
112
  self.__n_vertices_init = self._edges[[tail, head]].max(axis=0).max() + 1
86
113
  self._permutation = self._permute_graph(tail, head)
87
114
  self._n_vertices = len(self._permutation)
@@ -120,7 +147,7 @@ class Dijkstra:
120
147
  self._path_links = None
121
148
 
122
149
  @property
123
- def edges(self):
150
+ def edges(self) -> Any:
124
151
  """
125
152
  Getter for the graph edge dataframe.
126
153
 
@@ -132,7 +159,7 @@ class Dijkstra:
132
159
  return self._edges
133
160
 
134
161
  @property
135
- def n_edges(self):
162
+ def n_edges(self) -> int:
136
163
  """
137
164
  Getter for the number of graph edges.
138
165
 
@@ -144,7 +171,7 @@ class Dijkstra:
144
171
  return self._n_edges
145
172
 
146
173
  @property
147
- def n_vertices(self):
174
+ def n_vertices(self) -> int:
148
175
  """
149
176
  Getter for the number of graph vertices.
150
177
 
@@ -156,7 +183,7 @@ class Dijkstra:
156
183
  return self._n_vertices
157
184
 
158
185
  @property
159
- def orientation(self):
186
+ def orientation(self) -> str:
160
187
  """
161
188
  Getter of Dijkstra's algorithm orientation ("in" or "out").
162
189
 
@@ -168,7 +195,7 @@ class Dijkstra:
168
195
  return self._orientation
169
196
 
170
197
  @property
171
- def permute(self):
198
+ def permute(self) -> bool:
172
199
  """
173
200
  Getter for the graph permutation/reindexing option.
174
201
 
@@ -180,7 +207,7 @@ class Dijkstra:
180
207
  return self._permute
181
208
 
182
209
  @property
183
- def path_links(self):
210
+ def path_links(self) -> Optional[np.ndarray]:
184
211
  """
185
212
  Getter for the graph permutation/reindexing option.
186
213
 
@@ -191,9 +218,37 @@ class Dijkstra:
191
218
  """
192
219
  return self._path_links
193
220
 
221
+ def _preprocess_edges(self, tail, head, weight):
222
+ """
223
+ Preprocess edges to handle parallel edges by keeping only the minimum weight edge
224
+ between any pair of vertices.
225
+
226
+ Parameters
227
+ ----------
228
+ tail : str
229
+ The column name for tail vertices
230
+ head : str
231
+ The column name for head vertices
232
+ weight : str
233
+ The column name for edge weights
234
+ """
235
+ original_count = len(self._edges)
236
+ self._edges = self._edges.groupby([tail, head], as_index=False)[weight].min()
237
+ final_count = len(self._edges)
238
+
239
+ if original_count > final_count:
240
+ parallel_edges_removed = original_count - final_count
241
+ if self._verbose:
242
+ print(
243
+ f"Automatically removed {parallel_edges_removed} parallel edge(s). "
244
+ f"For each pair of vertices, kept the edge with minimum weight."
245
+ )
246
+
247
+ self._n_edges = len(self._edges)
248
+
194
249
  def _check_edges(self, edges, tail, head, weight):
195
250
  """Checks if the edges DataFrame is well-formed. If not, raises an appropriate error."""
196
- if not isinstance(edges, pd.core.frame.DataFrame):
251
+ if not isinstance(edges, pd.DataFrame):
197
252
  raise TypeError("edges should be a pandas DataFrame")
198
253
 
199
254
  if tail not in edges:
@@ -211,7 +266,7 @@ class Dijkstra:
211
266
  f"edge weight column '{weight}' not found in graph edges dataframe"
212
267
  )
213
268
 
214
- if edges[[tail, head, weight]].isna().any().any():
269
+ if edges[[tail, head, weight]].isnull().to_numpy().any():
215
270
  raise ValueError(
216
271
  " ".join(
217
272
  [
@@ -241,7 +296,7 @@ class Dijkstra:
241
296
  permutation = pd.DataFrame(
242
297
  data={
243
298
  "vert_idx": np.union1d(
244
- self._edges[tail].values, self._edges[head].values
299
+ np.asarray(self._edges[tail]), np.asarray(self._edges[head])
245
300
  )
246
301
  }
247
302
  )
@@ -284,13 +339,13 @@ class Dijkstra:
284
339
 
285
340
  def run(
286
341
  self,
287
- vertex_idx,
288
- path_tracking=False,
289
- return_inf=True,
290
- return_series=False,
291
- heap_length_ratio=1.0,
292
- termination_nodes=None,
293
- ):
342
+ vertex_idx: int,
343
+ path_tracking: bool = False,
344
+ return_inf: bool = True,
345
+ return_series: bool = False,
346
+ heap_length_ratio: float = 1.0,
347
+ termination_nodes: Optional[List[int]] = None,
348
+ ) -> Union[np.ndarray, pd.Series]:
294
349
  """
295
350
  Runs shortest path algorithm between a given vertex and all other vertices in the graph.
296
351
 
@@ -323,17 +378,10 @@ class Dijkstra:
323
378
  Pandas Series object with the same data and the vertex indices as index.
324
379
 
325
380
  """
326
- # validate the input arguments
327
- if not isinstance(vertex_idx, int):
328
- try:
329
- vertex_idx = int(vertex_idx)
330
- except ValueError as exc:
331
- raise TypeError(
332
- f"argument 'vertex_idx={vertex_idx}' must be an integer"
333
- ) from exc
381
+ # validate the input arguments - type checking handled by static typing
334
382
  if vertex_idx < 0:
335
383
  raise ValueError(f"argument 'vertex_idx={vertex_idx}' must be positive")
336
- if self._permute:
384
+ if self._permute and self._permutation is not None:
337
385
  if vertex_idx not in self._permutation.vert_idx_old.values:
338
386
  raise ValueError(f"vertex {vertex_idx} not found in graph")
339
387
  vertex_new = self._permutation.loc[
@@ -343,16 +391,7 @@ class Dijkstra:
343
391
  if vertex_idx >= self._n_vertices:
344
392
  raise ValueError(f"vertex {vertex_idx} not found in graph")
345
393
  vertex_new = vertex_idx
346
- if not isinstance(path_tracking, bool):
347
- raise TypeError(
348
- f"argument 'path_tracking=f{path_tracking}' must be of bool type"
349
- )
350
- if not isinstance(return_inf, bool):
351
- raise TypeError(f"argument 'return_inf=f{return_inf}' must be of bool type")
352
- if not isinstance(return_series, bool):
353
- raise TypeError(
354
- f"argument 'return_series=f{return_series}' must be of bool type"
355
- )
394
+ # Type checking is now handled by static typing
356
395
  if not isinstance(heap_length_ratio, float):
357
396
  raise TypeError(
358
397
  f"argument 'heap_length_ratio=f{heap_length_ratio}' must be of float type"
@@ -370,10 +409,10 @@ class Dijkstra:
370
409
  if termination_nodes is not None:
371
410
  try:
372
411
  termination_nodes_array = np.array(termination_nodes, dtype=np.uint32)
373
- except (ValueError, TypeError):
412
+ except (ValueError, TypeError) as exc:
374
413
  raise TypeError(
375
414
  "argument 'termination_nodes' must be array-like of integers"
376
- )
415
+ ) from exc
377
416
 
378
417
  if termination_nodes_array.ndim != 1:
379
418
  raise ValueError("argument 'termination_nodes' must be 1-dimensional")
@@ -382,7 +421,7 @@ class Dijkstra:
382
421
  raise ValueError("argument 'termination_nodes' must not be empty")
383
422
 
384
423
  # handle vertex permutation if needed
385
- if self._permute:
424
+ if self._permute and self._permutation is not None:
386
425
  termination_nodes_permuted = []
387
426
  for termination_node in termination_nodes_array:
388
427
  if termination_node not in self._permutation.vert_idx_old.values:
@@ -500,7 +539,7 @@ class Dijkstra:
500
539
  heap_length,
501
540
  )
502
541
 
503
- if self._permute:
542
+ if self._permute and self._permutation is not None:
504
543
  # permute back the path vertex indices
505
544
  path_df = pd.DataFrame(
506
545
  data={
@@ -546,11 +585,17 @@ class Dijkstra:
546
585
 
547
586
  # reorder path lengths
548
587
  if return_series:
549
- if self._permute and termination_nodes_array is None:
588
+ if (
589
+ self._permute
590
+ and termination_nodes_array is None
591
+ and self._permutation is not None
592
+ ):
550
593
  self._permutation["path_length"] = path_length_values
551
- path_lengths_df = self._permutation[
552
- ["vert_idx_old", "path_length"]
553
- ].sort_values(by="vert_idx_old")
594
+ path_lengths_df = (
595
+ self._permutation[["vert_idx_old", "path_length"]]
596
+ .copy()
597
+ .sort_values("vert_idx_old")
598
+ ) # type: ignore
554
599
  path_lengths_df.set_index("vert_idx_old", drop=True, inplace=True)
555
600
  path_lengths_df.index.name = "vertex_idx"
556
601
  path_lengths_series = path_lengths_df.path_length
@@ -558,7 +603,11 @@ class Dijkstra:
558
603
  path_lengths_series = pd.Series(path_length_values)
559
604
  path_lengths_series.index.name = "vertex_idx"
560
605
  path_lengths_series.name = "path_length"
561
- if self._permute and termination_nodes_array is not None:
606
+ if (
607
+ self._permute
608
+ and termination_nodes_array is not None
609
+ and termination_nodes is not None
610
+ ):
562
611
  # For early termination with permutation, use original termination node indices
563
612
  path_lengths_series.index = termination_nodes
564
613
 
@@ -568,19 +617,20 @@ class Dijkstra:
568
617
  if termination_nodes_array is not None:
569
618
  return path_length_values
570
619
 
571
- if self._permute:
620
+ if self._permute and self._permutation is not None:
572
621
  self._permutation["path_length"] = path_length_values
573
622
  if return_inf:
574
623
  path_length_values = np.inf * np.ones(self.__n_vertices_init)
575
624
  else:
576
625
  path_length_values = DTYPE_INF_PY * np.ones(self.__n_vertices_init)
626
+ assert self._permutation is not None # guaranteed by condition above
577
627
  path_length_values[self._permutation.vert_idx_old.values] = (
578
628
  self._permutation.path_length.values
579
629
  )
580
630
 
581
631
  return path_length_values
582
632
 
583
- def get_vertices(self):
633
+ def get_vertices(self) -> Any:
584
634
  """
585
635
  Get the unique vertices from the graph.
586
636
 
@@ -592,11 +642,13 @@ class Dijkstra:
592
642
  vertices : ndarray
593
643
  A 1-D array containing the unique vertices.
594
644
  """
595
- if self._permute:
596
- return self._permutation.vert_idx_old.values
597
- return np.union1d(self._edges["tail"], self._edges["head"])
645
+ if self._permute and self._permutation is not None:
646
+ return np.asarray(self._permutation.vert_idx_old)
647
+ return np.union1d(
648
+ np.asarray(self._edges["tail"]), np.asarray(self._edges["head"])
649
+ )
598
650
 
599
- def get_path(self, vertex_idx):
651
+ def get_path(self, vertex_idx: int) -> Optional[np.ndarray]:
600
652
  """Compute path from predecessors or successors.
601
653
 
602
654
  Parameters:
@@ -632,245 +684,1055 @@ class Dijkstra:
632
684
  return path_vertices
633
685
 
634
686
 
635
- class HyperpathGenerating:
687
+ class BellmanFord:
636
688
  """
637
- A class for constructing and managing hyperpath-based routing and analysis in transportation
638
- or graph-based systems.
639
-
640
- Parameters
641
- ----------
642
- edges : pandas.DataFrame
643
- A DataFrame containing graph edge information with columns specified by `tail`, `head`,
644
- `trav_time`, and `freq`. Must not contain missing values.
645
- tail : str, optional
646
- Name of the column in `edges` representing the tail nodes (source nodes), by default "tail".
647
- head : str, optional
648
- Name of the column in `edges` representing the head nodes (target nodes), by default "head".
649
- trav_time : str, optional
650
- Name of the column in `edges` representing travel times for edges, by default "trav_time".
651
- freq : str, optional
652
- Name of the column in `edges` representing frequencies of edges, by default "freq".
653
- check_edges : bool, optional
654
- Whether to validate the structure and data types of `edges`, by default False.
655
- orientation : {"in", "out"}, optional
656
- Determines the orientation of the graph structure for traversal.
657
- - "in": Graph traversal is from destination to origin.
658
- - "out": Graph traversal is from origin to destination.
659
- By default "in".
689
+ Bellman-Ford algorithm for finding the shortest paths between nodes in directed graphs.
690
+ Supports negative edge weights and detects negative cycles.
660
691
 
661
- Attributes
662
- ----------
663
- edge_count : int
664
- The number of edges in the graph.
665
- vertex_count : int
666
- The total number of vertices in the graph.
667
- u_i_vec : numpy.ndarray
668
- An array storing the least travel time for each vertex after running the algorithm.
669
- _edges : pandas.DataFrame
670
- Internal DataFrame containing the edges with additional metadata.
671
- _trav_time : numpy.ndarray
672
- Array of travel times for edges.
673
- _freq : numpy.ndarray
674
- Array of frequencies for edges.
675
- _tail : numpy.ndarray
676
- Array of tail nodes (source nodes) for edges.
677
- _head : numpy.ndarray
678
- Array of head nodes (target nodes) for edges.
679
- __indptr : numpy.ndarray
680
- Array for compressed row (or column) pointers in the CSR/CSC representation.
681
- _edge_idx : numpy.ndarray
682
- Array of edge indices in the CSR/CSC representation.
692
+ Note: If parallel edges exist between the same pair of vertices, only the edge with the minimum
693
+ weight will be kept automatically during initialization.
683
694
 
684
- Methods
685
- -------
686
- run(origin, destination, volume, return_inf=False)
687
- Computes the hyperpath and updates edge volumes based on the input demand and configuration.
688
- _check_vertex_idx(idx)
689
- Validates a vertex index to ensure it is within the graph's bounds.
690
- _check_volume(v)
691
- Validates a volume value to ensure it is a non-negative float.
692
- _check_edges(edges, tail, head, trav_time, freq)
693
- Validates the structure and data types of the input edges DataFrame.
695
+ Parameters:
696
+ -----------
697
+ edges: pandas.DataFrame
698
+ DataFrame containing the edges of the graph. It should have three columns: 'tail', 'head',
699
+ and 'weight'. The 'tail' column should contain the IDs of the starting nodes, the 'head'
700
+ column should contain the IDs of the ending nodes, and the 'weight' column should contain
701
+ the weights of the edges (can be negative).
702
+ tail: str, optional (default='tail')
703
+ The name of the column in the DataFrame that contains the IDs of the edge starting nodes.
704
+ head: str, optional (default='head')
705
+ The name of the column in the DataFrame that contains the IDs of the edge ending nodes.
706
+ weight: str, optional (default='weight')
707
+ The name of the column in the DataFrame that contains the weights of the edges.
708
+ orientation: str, optional (default='out')
709
+ The orientation of Bellman-Ford's algorithm. It can be either 'out' for single source
710
+ shortest paths or 'in' for single target shortest path.
711
+ check_edges: bool, optional (default=False)
712
+ Whether to check if the edges DataFrame is well-formed. If set to True, the edges
713
+ DataFrame will be checked for missing values and invalid data types. Note: negative
714
+ weights are allowed.
715
+ permute: bool, optional (default=False)
716
+ Whether to permute the IDs of the nodes. If set to True, the node IDs will be reindexed to
717
+ start from 0 and be contiguous.
718
+ verbose: bool, optional (default=False)
719
+ Whether to print messages about parallel edge removal.
694
720
  """
695
721
 
696
722
  def __init__(
697
723
  self,
698
- edges,
699
- tail="tail",
700
- head="head",
701
- trav_time="trav_time",
702
- freq="freq",
703
- check_edges=False,
704
- orientation="in",
705
- ):
724
+ edges: pd.DataFrame,
725
+ tail: str = "tail",
726
+ head: str = "head",
727
+ weight: str = "weight",
728
+ orientation: str = "out",
729
+ check_edges: bool = False,
730
+ permute: bool = False,
731
+ verbose: bool = False,
732
+ ) -> None:
706
733
  # load the edges
707
734
  if check_edges:
708
- self._check_edges(edges, tail, head, trav_time, freq)
709
- self._edges = edges[[tail, head, trav_time, freq]].copy(deep=True)
710
- self.edge_count = len(self._edges)
735
+ self._check_edges(edges, tail, head, weight)
736
+ # Convert to standardized NumPy-backed pandas DataFrame
737
+ self._edges = standardize_graph_dataframe(edges, tail, head, weight)
738
+ self._n_edges = len(self._edges)
739
+ self._verbose = verbose
711
740
 
712
- # remove inf values if any, and values close to zero
713
- self._edges[trav_time] = np.where(
714
- self._edges[trav_time] > DTYPE_INF_PY, DTYPE_INF_PY, self._edges[trav_time]
715
- )
716
- self._edges[trav_time] = np.where(
717
- self._edges[trav_time] < A_VERY_SMALL_TIME_INTERVAL_PY,
718
- A_VERY_SMALL_TIME_INTERVAL_PY,
719
- self._edges[trav_time],
720
- )
721
- self._edges[freq] = np.where(
722
- self._edges[freq] > INF_FREQ_PY, INF_FREQ_PY, self._edges[freq]
723
- )
724
- self._edges[freq] = np.where(
725
- self._edges[freq] < MIN_FREQ_PY, MIN_FREQ_PY, self._edges[freq]
726
- )
741
+ # preprocess edges to handle parallel edges
742
+ self._preprocess_edges(tail, head, weight)
727
743
 
728
- # create an edge index column
729
- self._edges = self._edges.reset_index(drop=True)
730
- data_col = "edge_idx"
731
- self._edges[data_col] = self._edges.index
744
+ # reindex the vertices
745
+ self._permute = permute
746
+ if len(self._edges) == 0:
747
+ # Handle empty graphs
748
+ self._permutation = None
749
+ self._n_vertices = 0
750
+ self.__n_vertices_init = 0
751
+ elif self._permute:
752
+ self.__n_vertices_init = self._edges[[tail, head]].max(axis=0).max() + 1
753
+ self._permutation = self._permute_graph(tail, head)
754
+ self._n_vertices = len(self._permutation)
755
+ else:
756
+ self._permutation = None
757
+ self._n_vertices = self._edges[[tail, head]].max(axis=0).max() + 1
758
+ self.__n_vertices_init = self._n_vertices
732
759
 
733
- # convert to CSR/CSC format
734
- self.vertex_count = self._edges[[tail, head]].max().max() + 1
735
- assert orientation in ["out", "in"]
760
+ # convert to CSR/CSC:
761
+ self._check_orientation(orientation)
736
762
  self._orientation = orientation
737
763
  if self._orientation == "out":
738
- fs_indptr, _, fs_data = convert_graph_to_csr_uint32(
739
- self._edges, tail, head, data_col, self.vertex_count
764
+ fs_indptr, fs_indices, fs_data = convert_graph_to_csr_float64(
765
+ self._edges, tail, head, weight, self._n_vertices
740
766
  )
767
+ self.__indices = fs_indices.astype(np.uint32)
741
768
  self.__indptr = fs_indptr.astype(np.uint32)
742
- self._edge_idx = fs_data.astype(np.uint32)
769
+ self.__edge_weights = fs_data.astype(DTYPE_PY)
743
770
  else:
744
- rs_indptr, _, rs_data = convert_graph_to_csc_uint32(
745
- self._edges, tail, head, data_col, self.vertex_count
771
+ rs_indptr, rs_indices, rs_data = convert_graph_to_csc_float64(
772
+ self._edges, tail, head, weight, self._n_vertices
746
773
  )
774
+ self.__indices = rs_indices.astype(np.uint32)
747
775
  self.__indptr = rs_indptr.astype(np.uint32)
748
- self._edge_idx = rs_data.astype(np.uint32)
776
+ self.__edge_weights = rs_data.astype(DTYPE_PY)
749
777
 
750
- # edge attributes
751
- self._trav_time = self._edges[trav_time].values.astype(DTYPE_PY)
752
- self._freq = self._edges[freq].values.astype(DTYPE_PY)
753
- self._tail = self._edges[tail].values.astype(np.uint32)
754
- self._head = self._edges[head].values.astype(np.uint32)
778
+ # Check if graph has any negative weights (for optimization)
779
+ self._has_negative_weights = np.any(self.__edge_weights < 0)
755
780
 
756
- # node attribute
757
- self.u_i_vec = None
781
+ self._path_links = None
782
+ self._has_negative_cycle = False
758
783
 
759
- def run(self, origin, destination, volume, return_inf=False):
784
+ @property
785
+ def edges(self) -> Any:
760
786
  """
761
- Computes the hyperpath and updates edge volumes based on the input demand and configuration.
762
-
763
- Parameters
764
- ----------
765
- origin : int or list of int
766
- The starting vertex or vertices of the demand. If `self._orientation` is "in",
767
- this can be a list of origins corresponding to the demand volumes.
768
- destination : int or list of int
769
- The target vertex or vertices of the demand. If `self._orientation` is "out",
770
- this can be a list of destinations corresponding to the demand volumes.
771
- volume : float or list of float
772
- The demand volume associated with each origin or destination. Must be non-negative.
773
- If a single float is provided, it is applied to a single origin-destination pair.
774
- return_inf : bool, optional
775
- If True, returns additional information from the computation (not yet implemented).
776
- Default is False.
787
+ Getter for the graph edge dataframe.
777
788
 
778
- Raises
779
- ------
780
- NotImplementedError
781
- If `self._orientation` is "out", as the one-to-many algorithm is not yet implemented.
782
- AssertionError
783
- If the lengths of `origin` or `destination` and `volume` do not match.
784
- If any vertex index or volume is invalid.
785
- TypeError
786
- If `volume` is not a float or list of floats.
787
- ValueError
788
- If any volume value is negative.
789
+ Returns
790
+ -------
791
+ edges: pandas.DataFrame
792
+ DataFrame containing the edges of the graph.
793
+ """
794
+ return self._edges
789
795
 
790
- Notes
791
- -----
792
- - The method modifies the `self._edges` DataFrame by adding a "volume" column representing
793
- edge volumes based on the computed hyperpath.
794
- - The `self.u_i_vec` array is updated to store the least travel time for each vertex.
795
- - Only "in" orientation is currently supported.
796
+ @property
797
+ def n_edges(self) -> int:
796
798
  """
797
- # column storing the resulting edge volumes
798
- self._edges["volume"] = 0.0
799
- self.u_i_vec = None
799
+ Getter for the number of graph edges.
800
800
 
801
- # vertex least travel time
802
- u_i_vec = DTYPE_INF_PY * np.ones(self.vertex_count, dtype=DTYPE_PY)
801
+ Returns
802
+ -------
803
+ n_edges: int
804
+ The number of edges in the graph.
805
+ """
806
+ return self._n_edges
803
807
 
804
- # input check
805
- if not isinstance(volume, list):
806
- volume = [volume]
807
- if self._orientation == "out":
808
- self._check_vertex_idx(origin)
809
- if not isinstance(destination, list):
810
- destination = [destination]
811
- assert len(destination) == len(volume)
812
- for i, item in enumerate(destination):
813
- self._check_vertex_idx(item)
814
- self._check_volume(volume[i])
815
- demand_indices = np.array(destination, dtype=np.uint32)
816
- elif self._orientation == "in":
817
- if not isinstance(origin, list):
818
- origin = [origin]
819
- assert len(origin) == len(volume)
820
- for i, item in enumerate(origin):
821
- self._check_vertex_idx(item)
822
- self._check_volume(volume[i])
823
- self._check_vertex_idx(destination)
824
- demand_indices = np.array(origin, dtype=np.uint32)
825
- assert isinstance(return_inf, bool)
808
+ @property
809
+ def n_vertices(self) -> int:
810
+ """
811
+ Getter for the number of graph vertices.
826
812
 
827
- demand_values = np.array(volume, dtype=DTYPE_PY)
813
+ Returns
814
+ -------
815
+ n_vertices: int
816
+ The number of nodes in the graph (after permutation, if _permute is True).
817
+ """
818
+ return self._n_vertices
828
819
 
829
- if self._orientation == "out":
830
- raise NotImplementedError(
831
- "one-to-many Spiess & Florian's algorithm not implemented yet"
832
- )
820
+ @property
821
+ def orientation(self) -> str:
822
+ """
823
+ Getter of Bellman-Ford's algorithm orientation ("in" or "out").
833
824
 
834
- compute_SF_in(
835
- self.__indptr,
836
- self._edge_idx,
837
- self._trav_time,
838
- self._freq,
839
- self._tail,
840
- self._head,
841
- demand_indices, # source vertex indices
842
- demand_values,
843
- self._edges["volume"].values,
844
- u_i_vec,
845
- self.vertex_count,
846
- destination,
847
- )
848
- self.u_i_vec = u_i_vec
825
+ Returns
826
+ -------
827
+ orientation : str
828
+ The orientation of Bellman-Ford's algorithm.
829
+ """
830
+ return self._orientation
849
831
 
850
- def _check_vertex_idx(self, idx):
851
- assert isinstance(idx, int)
852
- assert idx >= 0
853
- assert idx < self.vertex_count
832
+ @property
833
+ def permute(self) -> bool:
834
+ """
835
+ Getter for the graph permutation/reindexing option.
854
836
 
855
- def _check_volume(self, v):
856
- assert isinstance(v, float)
837
+ Returns
838
+ -------
839
+ permute : bool
840
+ Whether to permute the IDs of the nodes.
841
+ """
842
+ return self._permute
843
+
844
+ @property
845
+ def path_links(self) -> Optional[np.ndarray]:
846
+ """
847
+ Getter for the path links (predecessors or successors).
848
+
849
+ Returns
850
+ -------
851
+ path_links: numpy.ndarray
852
+ predecessors or successors node index if the path tracking is activated.
853
+ """
854
+ return self._path_links
855
+
856
+ def _preprocess_edges(self, tail, head, weight):
857
+ """
858
+ Preprocess edges to handle parallel edges by keeping only the minimum weight edge
859
+ between any pair of vertices.
860
+
861
+ Parameters
862
+ ----------
863
+ tail : str
864
+ The column name for tail vertices
865
+ head : str
866
+ The column name for head vertices
867
+ weight : str
868
+ The column name for edge weights
869
+ """
870
+ original_count = len(self._edges)
871
+ self._edges = self._edges.groupby([tail, head], as_index=False)[weight].min()
872
+ final_count = len(self._edges)
873
+
874
+ if original_count > final_count:
875
+ parallel_edges_removed = original_count - final_count
876
+ if self._verbose:
877
+ print(
878
+ f"Automatically removed {parallel_edges_removed} parallel edge(s). "
879
+ f"For each pair of vertices, kept the edge with minimum weight."
880
+ )
881
+
882
+ self._n_edges = len(self._edges)
883
+
884
+ def _check_edges(self, edges, tail, head, weight):
885
+ """Checks if the edges DataFrame is well-formed. If not, raises an appropriate error."""
886
+ if not isinstance(edges, pd.DataFrame):
887
+ raise TypeError("edges should be a pandas DataFrame")
888
+
889
+ if tail not in edges:
890
+ raise KeyError(
891
+ f"edge tail column '{tail}' not found in graph edges dataframe"
892
+ )
893
+
894
+ if head not in edges:
895
+ raise KeyError(
896
+ f"edge head column '{head}' not found in graph edges dataframe"
897
+ )
898
+
899
+ if weight not in edges:
900
+ raise KeyError(
901
+ f"edge weight column '{weight}' not found in graph edges dataframe"
902
+ )
903
+
904
+ if edges[[tail, head, weight]].isnull().to_numpy().any():
905
+ raise ValueError(
906
+ " ".join(
907
+ [
908
+ f"edges[[{tail}, {head}, {weight}]] ",
909
+ "should not have any missing value",
910
+ ]
911
+ )
912
+ )
913
+
914
+ for col in [tail, head]:
915
+ if not pd.api.types.is_integer_dtype(edges[col].dtype):
916
+ raise TypeError(f"edges['{col}'] should be of integer type")
917
+
918
+ if not pd.api.types.is_numeric_dtype(edges[weight].dtype):
919
+ raise TypeError(f"edges['{weight}'] should be of numeric type")
920
+
921
+ # Note: Unlike Dijkstra, we allow negative weights for Bellman-Ford
922
+ if not np.isfinite(edges[weight]).all():
923
+ raise ValueError(f"edges['{weight}'] should be finite")
924
+
925
+ def _permute_graph(self, tail, head):
926
+ """Permute the IDs of the nodes to start from 0 and be contiguous.
927
+ Returns a DataFrame with the permuted IDs."""
928
+
929
+ permutation = pd.DataFrame(
930
+ data={
931
+ "vert_idx": np.union1d(
932
+ np.asarray(self._edges[tail]), np.asarray(self._edges[head])
933
+ )
934
+ }
935
+ )
936
+ permutation["vert_idx_new"] = permutation.index
937
+ permutation.index.name = "index"
938
+
939
+ self._edges = pd.merge(
940
+ self._edges,
941
+ permutation[["vert_idx", "vert_idx_new"]],
942
+ left_on=tail,
943
+ right_on="vert_idx",
944
+ how="left",
945
+ )
946
+ self._edges.drop([tail, "vert_idx"], axis=1, inplace=True)
947
+ self._edges.rename(columns={"vert_idx_new": tail}, inplace=True)
948
+
949
+ self._edges = pd.merge(
950
+ self._edges,
951
+ permutation[["vert_idx", "vert_idx_new"]],
952
+ left_on=head,
953
+ right_on="vert_idx",
954
+ how="left",
955
+ )
956
+ self._edges.drop([head, "vert_idx"], axis=1, inplace=True)
957
+ self._edges.rename(columns={"vert_idx_new": head}, inplace=True)
958
+
959
+ permutation.rename(columns={"vert_idx": "vert_idx_old"}, inplace=True)
960
+ permutation.reset_index(drop=True, inplace=True)
961
+ permutation.sort_values(by="vert_idx_new", inplace=True)
962
+
963
+ permutation.index.name = "index"
964
+ self._edges.index.name = "index"
965
+
966
+ return permutation
967
+
968
+ def _check_orientation(self, orientation):
969
+ """Checks the orientation attribute."""
970
+ if orientation not in ["in", "out"]:
971
+ raise ValueError("orientation should be either 'in' on 'out'")
972
+
973
+ def run(
974
+ self,
975
+ vertex_idx: int,
976
+ path_tracking: bool = False,
977
+ return_inf: bool = True,
978
+ return_series: bool = False,
979
+ detect_negative_cycles: bool = True,
980
+ ) -> Union[np.ndarray, pd.Series]:
981
+ """
982
+ Runs Bellman-Ford shortest path algorithm between a given vertex and all other vertices
983
+ in the graph.
984
+
985
+ Parameters
986
+ ----------
987
+ vertex_idx : int
988
+ The index of the source/target vertex.
989
+ path_tracking : bool, optional (default=False)
990
+ Whether to track the shortest path(s) from the source vertex to all other vertices in
991
+ the graph.
992
+ return_inf : bool, optional (default=True)
993
+ Whether to return path length(s) as infinity (np.inf) when no path exists.
994
+ return_series : bool, optional (default=False)
995
+ Whether to return a Pandas Series object indexed by vertex indices with path length(s)
996
+ as values.
997
+ detect_negative_cycles : bool, optional (default=True)
998
+ Whether to detect negative cycles in the graph. If True and a negative cycle is
999
+ detected,
1000
+ raises a ValueError.
1001
+
1002
+ Returns
1003
+ -------
1004
+ path_length_values or path_lengths_series : array_like or Pandas Series
1005
+ If `return_series=False`, a 1D Numpy array of shape (n_vertices,) with the shortest
1006
+ path length from the source vertex to each vertex in the graph (`orientation="out"`), or
1007
+ from each vertex to the target vertex (`orientation="in"`). If `return_series=True`, a
1008
+ Pandas Series object with the same data and the vertex indices as index.
1009
+
1010
+ Raises
1011
+ ------
1012
+ ValueError
1013
+ If detect_negative_cycles is True and a negative cycle is detected in the graph.
1014
+ """
1015
+ # validate the input arguments - type checking handled by static typing
1016
+ if vertex_idx < 0:
1017
+ raise ValueError(f"argument 'vertex_idx={vertex_idx}' must be positive")
1018
+ if self._permute and self._permutation is not None:
1019
+ if vertex_idx not in self._permutation.vert_idx_old.values:
1020
+ raise ValueError(f"vertex {vertex_idx} not found in graph")
1021
+ vertex_new = self._permutation.loc[
1022
+ self._permutation.vert_idx_old == vertex_idx, "vert_idx_new"
1023
+ ].iloc[0]
1024
+ else:
1025
+ if vertex_idx >= self._n_vertices:
1026
+ raise ValueError(f"vertex {vertex_idx} not found in graph")
1027
+ vertex_new = vertex_idx
1028
+ # Type checking is now handled by static typing
1029
+
1030
+ # compute path length
1031
+ if not path_tracking:
1032
+ self._path_links = None
1033
+ if self._orientation == "in":
1034
+ path_length_values = compute_bf_stsp(
1035
+ self.__indptr,
1036
+ self.__indices,
1037
+ self.__edge_weights,
1038
+ vertex_new,
1039
+ self._n_vertices,
1040
+ )
1041
+ else:
1042
+ path_length_values = compute_bf_sssp(
1043
+ self.__indptr,
1044
+ self.__indices,
1045
+ self.__edge_weights,
1046
+ vertex_new,
1047
+ self._n_vertices,
1048
+ )
1049
+ else:
1050
+ self._path_links = np.arange(0, self._n_vertices, dtype=np.uint32)
1051
+ if self._orientation == "in":
1052
+ path_length_values = compute_bf_stsp_w_path(
1053
+ self.__indptr,
1054
+ self.__indices,
1055
+ self.__edge_weights,
1056
+ self._path_links,
1057
+ vertex_new,
1058
+ self._n_vertices,
1059
+ )
1060
+ else:
1061
+ path_length_values = compute_bf_sssp_w_path(
1062
+ self.__indptr,
1063
+ self.__indices,
1064
+ self.__edge_weights,
1065
+ self._path_links,
1066
+ vertex_new,
1067
+ self._n_vertices,
1068
+ )
1069
+
1070
+ if self._permute and self._permutation is not None:
1071
+ # permute back the path vertex indices
1072
+ path_df = pd.DataFrame(
1073
+ data={
1074
+ "vertex_idx": np.arange(self._n_vertices),
1075
+ "associated_idx": self._path_links,
1076
+ }
1077
+ )
1078
+ path_df = pd.merge(
1079
+ path_df,
1080
+ self._permutation,
1081
+ left_on="vertex_idx",
1082
+ right_on="vert_idx_new",
1083
+ how="left",
1084
+ )
1085
+ path_df.drop(["vertex_idx", "vert_idx_new"], axis=1, inplace=True)
1086
+ path_df.rename(columns={"vert_idx_old": "vertex_idx"}, inplace=True)
1087
+ path_df = pd.merge(
1088
+ path_df,
1089
+ self._permutation,
1090
+ left_on="associated_idx",
1091
+ right_on="vert_idx_new",
1092
+ how="left",
1093
+ )
1094
+ path_df.drop(["associated_idx", "vert_idx_new"], axis=1, inplace=True)
1095
+ path_df.rename(columns={"vert_idx_old": "associated_idx"}, inplace=True)
1096
+
1097
+ if return_series:
1098
+ path_df.set_index("vertex_idx", inplace=True)
1099
+ self._path_links = path_df.associated_idx.astype(np.uint32)
1100
+ else:
1101
+ self._path_links = np.arange(
1102
+ self.__n_vertices_init, dtype=np.uint32
1103
+ )
1104
+ self._path_links[path_df.vertex_idx.values] = (
1105
+ path_df.associated_idx.values
1106
+ )
1107
+
1108
+ # detect negative cycles if requested (only if negative weights exist)
1109
+ if detect_negative_cycles and self._has_negative_weights:
1110
+ if self._orientation == "out":
1111
+ # CSR format - can use detect_negative_cycle directly
1112
+ self._has_negative_cycle = detect_negative_cycle(
1113
+ self.__indptr,
1114
+ self.__indices,
1115
+ self.__edge_weights,
1116
+ path_length_values,
1117
+ self._n_vertices,
1118
+ )
1119
+ else:
1120
+ # CSC format - use CSC-specific negative cycle detection
1121
+ # Much more efficient than converting CSC→CSR
1122
+ self._has_negative_cycle = detect_negative_cycle_csc(
1123
+ self.__indptr,
1124
+ self.__indices,
1125
+ self.__edge_weights,
1126
+ path_length_values,
1127
+ self._n_vertices,
1128
+ )
1129
+
1130
+ if self._has_negative_cycle:
1131
+ raise ValueError("Negative cycle detected in the graph")
1132
+
1133
+ # deal with infinity
1134
+ if return_inf:
1135
+ path_length_values = np.where(
1136
+ path_length_values == DTYPE_INF_PY, np.inf, path_length_values
1137
+ )
1138
+
1139
+ # reorder path lengths
1140
+ if return_series:
1141
+ if self._permute and self._permutation is not None:
1142
+ path_df = pd.DataFrame(
1143
+ data={"path_length": path_length_values[: self._n_vertices]}
1144
+ )
1145
+ path_df["vert_idx_new"] = path_df.index
1146
+ path_df = pd.merge(
1147
+ path_df,
1148
+ self._permutation,
1149
+ left_on="vert_idx_new",
1150
+ right_on="vert_idx_new",
1151
+ how="left",
1152
+ )
1153
+ path_df.drop(["vert_idx_new"], axis=1, inplace=True)
1154
+ path_df.set_index("vert_idx_old", inplace=True)
1155
+ path_lengths_series = path_df.path_length.astype(DTYPE_PY)
1156
+ else:
1157
+ path_lengths_series = pd.Series(
1158
+ data=path_length_values[: self._n_vertices], dtype=DTYPE_PY
1159
+ )
1160
+ path_lengths_series.index = np.arange(self._n_vertices)
1161
+ path_lengths_series.index.name = None
1162
+ return path_lengths_series
1163
+
1164
+ # No else needed - de-indent the code
1165
+ if self._permute and self._permutation is not None:
1166
+ path_df = pd.DataFrame(
1167
+ data={"path_length": path_length_values[: self._n_vertices]}
1168
+ )
1169
+ path_df["vert_idx_new"] = path_df.index
1170
+ path_df = pd.merge(
1171
+ path_df,
1172
+ self._permutation,
1173
+ left_on="vert_idx_new",
1174
+ right_on="vert_idx_new",
1175
+ how="left",
1176
+ )
1177
+ path_df.drop(["vert_idx_new"], axis=1, inplace=True)
1178
+ path_length_values = np.full(self.__n_vertices_init, DTYPE_INF_PY)
1179
+ path_length_values[path_df.vert_idx_old.values] = path_df.path_length.values
1180
+ if return_inf:
1181
+ path_length_values = np.where(
1182
+ path_length_values == DTYPE_INF_PY, np.inf, path_length_values
1183
+ )
1184
+ return path_length_values
1185
+
1186
+ def get_path(self, vertex_idx: int) -> Optional[np.ndarray]:
1187
+ """Compute path from predecessors or successors.
1188
+
1189
+ Parameters:
1190
+ -----------
1191
+
1192
+ vertex_idx : int
1193
+ source or target vertex index.
1194
+
1195
+ Returns
1196
+ -------
1197
+
1198
+ path_vertices : numpy.ndarray
1199
+ Array of np.uint32 type storing the path from or to the given vertex index. If we are
1200
+ dealing with the sssp algorithm, the input vertex is the target vertex and the path to
1201
+ the source is given backward from the target to the source using the predecessors. If
1202
+ we are dealing with the stsp algorithm, the input vertex is the source vertex and the
1203
+ path to the target is given backward from the target to the source using the
1204
+ successors.
1205
+
1206
+ """
1207
+ if self._path_links is None:
1208
+ warnings.warn(
1209
+ "Current BellmanFord instance has not path attribute : \
1210
+ make sure path_tracking is set to True, and run the \
1211
+ shortest path algorithm",
1212
+ UserWarning,
1213
+ )
1214
+ return None
1215
+ if isinstance(self._path_links, pd.Series):
1216
+ path_vertices = compute_path(self._path_links.values, vertex_idx)
1217
+ else:
1218
+ path_vertices = compute_path(self._path_links, vertex_idx)
1219
+ return path_vertices
1220
+
1221
+ def has_negative_cycle(self):
1222
+ """
1223
+ Check if the last run detected a negative cycle.
1224
+
1225
+ Returns
1226
+ -------
1227
+ has_negative_cycle : bool
1228
+ True if a negative cycle was detected in the last run, False otherwise.
1229
+ """
1230
+ return self._has_negative_cycle
1231
+
1232
+
1233
+ class HyperpathGenerating:
1234
+ """
1235
+ A class for constructing and managing hyperpath-based routing and analysis in transportation
1236
+ or graph-based systems.
1237
+
1238
+ Parameters
1239
+ ----------
1240
+ edges : pandas.DataFrame
1241
+ A DataFrame containing graph edge information with columns specified by `tail`, `head`,
1242
+ `trav_time`, and `freq`. Must not contain missing values.
1243
+ tail : str, optional
1244
+ Name of the column in `edges` representing the tail nodes (source nodes), by default "tail".
1245
+ head : str, optional
1246
+ Name of the column in `edges` representing the head nodes (target nodes), by default "head".
1247
+ trav_time : str, optional
1248
+ Name of the column in `edges` representing travel times for edges, by default "trav_time".
1249
+ freq : str, optional
1250
+ Name of the column in `edges` representing frequencies of edges, by default "freq".
1251
+ check_edges : bool, optional
1252
+ Whether to validate the structure and data types of `edges`, by default False.
1253
+ orientation : {"in", "out"}, optional
1254
+ Determines the orientation of the graph structure for traversal.
1255
+ - "in": Graph traversal is from destination to origin.
1256
+ - "out": Graph traversal is from origin to destination.
1257
+ By default "in".
1258
+
1259
+ Attributes
1260
+ ----------
1261
+ edge_count : int
1262
+ The number of edges in the graph.
1263
+ vertex_count : int
1264
+ The total number of vertices in the graph.
1265
+ u_i_vec : numpy.ndarray
1266
+ An array storing the least travel time for each vertex after running the algorithm.
1267
+ _edges : pandas.DataFrame
1268
+ Internal DataFrame containing the edges with additional metadata.
1269
+ _trav_time : numpy.ndarray
1270
+ Array of travel times for edges.
1271
+ _freq : numpy.ndarray
1272
+ Array of frequencies for edges.
1273
+ _tail : numpy.ndarray
1274
+ Array of tail nodes (source nodes) for edges.
1275
+ _head : numpy.ndarray
1276
+ Array of head nodes (target nodes) for edges.
1277
+ __indptr : numpy.ndarray
1278
+ Array for compressed row (or column) pointers in the CSR/CSC representation.
1279
+ _edge_idx : numpy.ndarray
1280
+ Array of edge indices in the CSR/CSC representation.
1281
+
1282
+ Methods
1283
+ -------
1284
+ run(origin, destination, volume, return_inf=False)
1285
+ Computes the hyperpath and updates edge volumes based on the input demand and configuration.
1286
+ _check_vertex_idx(idx)
1287
+ Validates a vertex index to ensure it is within the graph's bounds.
1288
+ _check_volume(v)
1289
+ Validates a volume value to ensure it is a non-negative float.
1290
+ _check_edges(edges, tail, head, trav_time, freq)
1291
+ Validates the structure and data types of the input edges DataFrame.
1292
+ """
1293
+
1294
+ def __init__(
1295
+ self,
1296
+ edges: pd.DataFrame,
1297
+ tail: str = "tail",
1298
+ head: str = "head",
1299
+ trav_time: str = "trav_time",
1300
+ freq: str = "freq",
1301
+ check_edges: bool = False,
1302
+ orientation: str = "in",
1303
+ ) -> None:
1304
+ # load the edges
1305
+ if check_edges:
1306
+ self._check_edges(edges, tail, head, trav_time, freq)
1307
+ # Convert to standardized NumPy-backed pandas DataFrame
1308
+ self._edges = standardize_graph_dataframe(
1309
+ edges, tail, head, trav_time=trav_time, freq=freq
1310
+ )
1311
+ self.edge_count = len(self._edges)
1312
+
1313
+ # remove inf values if any, and values close to zero
1314
+ self._edges[trav_time] = np.where(
1315
+ self._edges[trav_time] > DTYPE_INF_PY, DTYPE_INF_PY, self._edges[trav_time]
1316
+ )
1317
+ self._edges[trav_time] = np.where(
1318
+ self._edges[trav_time] < A_VERY_SMALL_TIME_INTERVAL_PY,
1319
+ A_VERY_SMALL_TIME_INTERVAL_PY,
1320
+ self._edges[trav_time],
1321
+ )
1322
+ self._edges[freq] = np.where(
1323
+ self._edges[freq] > INF_FREQ_PY, INF_FREQ_PY, self._edges[freq]
1324
+ )
1325
+ self._edges[freq] = np.where(
1326
+ self._edges[freq] < MIN_FREQ_PY, MIN_FREQ_PY, self._edges[freq]
1327
+ )
1328
+
1329
+ # create an edge index column
1330
+ self._edges = self._edges.reset_index(drop=True)
1331
+ data_col = "edge_idx"
1332
+ self._edges[data_col] = self._edges.index
1333
+
1334
+ # convert to CSR/CSC format
1335
+ self.vertex_count = self._edges[[tail, head]].max().max() + 1
1336
+ assert orientation in ["out", "in"]
1337
+ self._orientation = orientation
1338
+ if self._orientation == "out":
1339
+ fs_indptr, _, fs_data = convert_graph_to_csr_uint32(
1340
+ self._edges, tail, head, data_col, self.vertex_count
1341
+ )
1342
+ self.__indptr = fs_indptr.astype(np.uint32)
1343
+ self._edge_idx = fs_data.astype(np.uint32)
1344
+ else:
1345
+ rs_indptr, _, rs_data = convert_graph_to_csc_uint32(
1346
+ self._edges, tail, head, data_col, self.vertex_count
1347
+ )
1348
+ self.__indptr = rs_indptr.astype(np.uint32)
1349
+ self._edge_idx = rs_data.astype(np.uint32)
1350
+
1351
+ # edge attributes
1352
+ self._trav_time = np.asarray(self._edges[trav_time]).astype(DTYPE_PY)
1353
+ self._freq = np.asarray(self._edges[freq]).astype(DTYPE_PY)
1354
+ self._tail = np.asarray(self._edges[tail]).astype(np.uint32)
1355
+ self._head = np.asarray(self._edges[head]).astype(np.uint32)
1356
+
1357
+ # node attribute
1358
+ self.u_i_vec = None
1359
+
1360
+ def run(
1361
+ self,
1362
+ origin: Union[int, List[int]],
1363
+ destination: int,
1364
+ volume: Union[float, List[float]],
1365
+ return_inf: bool = False,
1366
+ ) -> None:
1367
+ """
1368
+ Computes the hyperpath and updates edge volumes based on the input demand and configuration.
1369
+
1370
+ Parameters
1371
+ ----------
1372
+ origin : int or list of int
1373
+ The starting vertex or vertices of the demand. If `self._orientation` is "in",
1374
+ this can be a list of origins corresponding to the demand volumes.
1375
+ destination : int or list of int
1376
+ The target vertex or vertices of the demand. If `self._orientation` is "out",
1377
+ this can be a list of destinations corresponding to the demand volumes.
1378
+ volume : float or list of float
1379
+ The demand volume associated with each origin or destination. Must be non-negative.
1380
+ If a single float is provided, it is applied to a single origin-destination pair.
1381
+ return_inf : bool, optional
1382
+ If True, returns additional information from the computation (not yet implemented).
1383
+ Default is False.
1384
+
1385
+ Raises
1386
+ ------
1387
+ NotImplementedError
1388
+ If `self._orientation` is "out", as the one-to-many algorithm is not yet implemented.
1389
+ AssertionError
1390
+ If the lengths of `origin` or `destination` and `volume` do not match.
1391
+ If any vertex index or volume is invalid.
1392
+ TypeError
1393
+ If `volume` is not a float or list of floats.
1394
+ ValueError
1395
+ If any volume value is negative.
1396
+
1397
+ Notes
1398
+ -----
1399
+ - The method modifies the `self._edges` DataFrame by adding a "volume" column representing
1400
+ edge volumes based on the computed hyperpath.
1401
+ - The `self.u_i_vec` array is updated to store the least travel time for each vertex.
1402
+ - Only "in" orientation is currently supported.
1403
+ """
1404
+ # column storing the resulting edge volumes
1405
+ self._edges["volume"] = 0.0
1406
+ self.u_i_vec = None
1407
+
1408
+ # vertex least travel time
1409
+ u_i_vec = DTYPE_INF_PY * np.ones(self.vertex_count, dtype=DTYPE_PY)
1410
+
1411
+ # input check
1412
+ if not isinstance(volume, list):
1413
+ volume = [volume]
1414
+
1415
+ if self._orientation == "out":
1416
+ raise NotImplementedError(
1417
+ "one-to-many Spiess & Florian's algorithm not implemented yet"
1418
+ )
1419
+
1420
+ # Only "in" orientation is supported currently
1421
+ if not isinstance(origin, list):
1422
+ origin = [origin]
1423
+ assert len(origin) == len(volume)
1424
+ for i, item in enumerate(origin):
1425
+ self._check_vertex_idx(item)
1426
+ self._check_volume(volume[i])
1427
+ self._check_vertex_idx(destination)
1428
+ demand_indices = np.array(origin, dtype=np.uint32)
1429
+
1430
+ assert isinstance(return_inf, bool)
1431
+
1432
+ demand_values = np.array(volume, dtype=DTYPE_PY)
1433
+
1434
+ compute_SF_in(
1435
+ self.__indptr,
1436
+ self._edge_idx,
1437
+ self._trav_time,
1438
+ self._freq,
1439
+ self._tail,
1440
+ self._head,
1441
+ demand_indices, # source vertex indices
1442
+ demand_values,
1443
+ np.asarray(self._edges["volume"]),
1444
+ u_i_vec,
1445
+ self.vertex_count,
1446
+ destination,
1447
+ )
1448
+ self.u_i_vec = u_i_vec
1449
+
1450
+ def _check_vertex_idx(self, idx):
1451
+ assert isinstance(idx, int)
1452
+ assert idx >= 0
1453
+ assert idx < self.vertex_count
1454
+
1455
+ def _check_volume(self, v):
1456
+ assert isinstance(v, float)
857
1457
  assert v >= 0.0
858
1458
 
859
- def _check_edges(self, edges, tail, head, trav_time, freq):
860
- if not isinstance(edges, pd.core.frame.DataFrame):
861
- raise TypeError("edges should be a pandas DataFrame")
1459
+ def _check_edges(self, edges, tail, head, trav_time, freq):
1460
+ if not isinstance(edges, pd.DataFrame):
1461
+ raise TypeError("edges should be a pandas DataFrame")
1462
+
1463
+ for col in [tail, head, trav_time, freq]:
1464
+ if col not in edges:
1465
+ raise KeyError(
1466
+ f"edge column '{col}' not found in graph edges dataframe"
1467
+ )
1468
+
1469
+ if edges[[tail, head, trav_time, freq]].isnull().to_numpy().any():
1470
+ raise ValueError(
1471
+ " ".join(
1472
+ [
1473
+ f"edges[[{tail}, {head}, {trav_time}, {freq}]] ",
1474
+ "should not have any missing value",
1475
+ ]
1476
+ )
1477
+ )
1478
+
1479
+ for col in [tail, head]:
1480
+ if not pd.api.types.is_integer_dtype(edges[col].dtype):
1481
+ raise TypeError(f"column '{col}' should be of integer type")
1482
+
1483
+ for col in [trav_time, freq]:
1484
+ if not pd.api.types.is_numeric_dtype(edges[col].dtype):
1485
+ raise TypeError(f"column '{col}' should be of numeric type")
1486
+
1487
+ if edges[col].min() < 0.0:
1488
+ raise ValueError(f"column '{col}' should be nonnegative")
1489
+
1490
+
1491
+ class BFS:
1492
+ """
1493
+ Breadth-First Search algorithm for finding shortest paths in directed graphs.
1494
+
1495
+ BFS ignores edge weights (treats all edges as having equal weight) and finds the shortest
1496
+ path in terms of the minimum number of edges/hops between vertices. This implementation
1497
+ works on directed graphs using CSR format for forward traversal and CSC format for
1498
+ backward traversal.
1499
+
1500
+ Note: If parallel edges exist between the same pair of vertices, only one edge will be
1501
+ kept automatically during initialization.
1502
+
1503
+ Parameters:
1504
+ -----------
1505
+ edges: pandas.DataFrame
1506
+ DataFrame containing the edges of the graph. It should have two columns: 'tail' and 'head'.
1507
+ The 'tail' column should contain the IDs of the starting nodes, and the 'head' column
1508
+ should contain the IDs of the ending nodes. If a 'weight' column is present, it will be
1509
+ ignored.
1510
+ tail: str, optional (default='tail')
1511
+ The name of the column in the DataFrame that contains the IDs of the edge starting nodes.
1512
+ head: str, optional (default='head')
1513
+ The name of the column in the DataFrame that contains the IDs of the edge ending nodes.
1514
+ orientation: str, optional (default='out')
1515
+ The orientation of BFS algorithm. It can be either 'out' for single source shortest
1516
+ paths or 'in' for single target shortest path.
1517
+ check_edges: bool, optional (default=False)
1518
+ Whether to check if the edges DataFrame is well-formed. If set to True, the edges
1519
+ DataFrame will be checked for missing values and invalid data types.
1520
+ permute: bool, optional (default=False)
1521
+ Whether to permute the IDs of the nodes. If set to True, the node IDs will be reindexed
1522
+ to start from 0 and be contiguous.
1523
+ verbose: bool, optional (default=False)
1524
+ Whether to print messages about parallel edge removal.
1525
+ sentinel: int, optional (default=-9999)
1526
+ Sentinel value for unreachable nodes and the start vertex in the predecessor array.
1527
+ Must be a negative integer that fits in int32 range.
1528
+ """
1529
+
1530
+ def __init__(
1531
+ self,
1532
+ edges: pd.DataFrame,
1533
+ tail: str = "tail",
1534
+ head: str = "head",
1535
+ orientation: str = "out",
1536
+ check_edges: bool = False,
1537
+ permute: bool = False,
1538
+ verbose: bool = False,
1539
+ sentinel: int = -9999,
1540
+ ) -> None:
1541
+ # Validate sentinel value
1542
+ if not isinstance(sentinel, int):
1543
+ raise TypeError(
1544
+ f"sentinel must be an integer, got {type(sentinel).__name__}"
1545
+ )
1546
+ if sentinel >= 0:
1547
+ raise ValueError(f"sentinel must be negative, got {sentinel}")
1548
+ if sentinel < np.iinfo(np.int32).min or sentinel > np.iinfo(np.int32).max:
1549
+ raise ValueError(
1550
+ f"sentinel must fit in int32 range [{np.iinfo(np.int32).min}, "
1551
+ f"{np.iinfo(np.int32).max}], got {sentinel}"
1552
+ )
1553
+ self._sentinel = sentinel
1554
+
1555
+ # load the edges
1556
+ if check_edges:
1557
+ self._check_edges(edges, tail, head)
1558
+ # Convert to standardized NumPy-backed pandas DataFrame
1559
+ # Note: BFS doesn't need weights, but standardize_graph_dataframe handles that
1560
+ self._edges = standardize_graph_dataframe(edges, tail, head)
1561
+ self._n_edges = len(self._edges)
1562
+ self._verbose = verbose
1563
+
1564
+ # preprocess edges to handle parallel edges
1565
+ self._preprocess_edges(tail, head)
1566
+
1567
+ # reindex the vertices
1568
+ self._permute = permute
1569
+ if len(self._edges) == 0:
1570
+ # Handle empty graphs
1571
+ self._permutation = None
1572
+ self._n_vertices = 0
1573
+ self.__n_vertices_init = 0
1574
+ elif self._permute:
1575
+ self.__n_vertices_init = self._edges[[tail, head]].max(axis=0).max() + 1
1576
+ self._permutation = self._permute_graph(tail, head)
1577
+ self._n_vertices = len(self._permutation)
1578
+ else:
1579
+ self._permutation = None
1580
+ self._n_vertices = self._edges[[tail, head]].max(axis=0).max() + 1
1581
+ self.__n_vertices_init = self._n_vertices
1582
+
1583
+ # convert to CSR/CSC
1584
+ self._check_orientation(orientation)
1585
+ self._orientation = orientation
1586
+ if self._orientation == "out":
1587
+ # Use dummy weight column for conversion (BFS doesn't use weights)
1588
+ self._edges["_bfs_dummy_weight"] = 1.0
1589
+ fs_indptr, fs_indices, _ = convert_graph_to_csr_float64(
1590
+ self._edges, tail, head, "_bfs_dummy_weight", self._n_vertices
1591
+ )
1592
+ self._edges.drop("_bfs_dummy_weight", axis=1, inplace=True)
1593
+ self.__indices = fs_indices.astype(np.uint32)
1594
+ self.__indptr = fs_indptr.astype(np.uint32)
1595
+ else:
1596
+ self._edges["_bfs_dummy_weight"] = 1.0
1597
+ rs_indptr, rs_indices, _ = convert_graph_to_csc_float64(
1598
+ self._edges, tail, head, "_bfs_dummy_weight", self._n_vertices
1599
+ )
1600
+ self._edges.drop("_bfs_dummy_weight", axis=1, inplace=True)
1601
+ self.__indices = rs_indices.astype(np.uint32)
1602
+ self.__indptr = rs_indptr.astype(np.uint32)
1603
+
1604
+ self._path_links = None
1605
+
1606
+ @property
1607
+ def UNREACHABLE(self) -> int:
1608
+ """
1609
+ Getter for the sentinel value used for unreachable nodes.
1610
+
1611
+ Returns
1612
+ -------
1613
+ sentinel : int
1614
+ The sentinel value for unreachable nodes and the start vertex.
1615
+ """
1616
+ return self._sentinel
1617
+
1618
+ @property
1619
+ def edges(self) -> Any:
1620
+ """
1621
+ Getter for the graph edge dataframe.
1622
+
1623
+ Returns
1624
+ -------
1625
+ edges: pandas.DataFrame
1626
+ DataFrame containing the edges of the graph.
1627
+ """
1628
+ return self._edges
1629
+
1630
+ @property
1631
+ def n_edges(self) -> int:
1632
+ """
1633
+ Getter for the number of graph edges.
1634
+
1635
+ Returns
1636
+ -------
1637
+ n_edges: int
1638
+ The number of edges in the graph.
1639
+ """
1640
+ return self._n_edges
862
1641
 
863
- for col in [tail, head, trav_time, freq]:
864
- if col not in edges:
865
- raise KeyError(
866
- f"edge column '{col}' not found in graph edges dataframe"
1642
+ @property
1643
+ def n_vertices(self) -> int:
1644
+ """
1645
+ Getter for the number of graph vertices.
1646
+
1647
+ Returns
1648
+ -------
1649
+ n_vertices: int
1650
+ The number of nodes in the graph (after permutation, if _permute is True).
1651
+ """
1652
+ return self._n_vertices
1653
+
1654
+ @property
1655
+ def orientation(self) -> str:
1656
+ """
1657
+ Getter of BFS algorithm orientation ("in" or "out").
1658
+
1659
+ Returns
1660
+ -------
1661
+ orientation : str
1662
+ The orientation of BFS algorithm.
1663
+ """
1664
+ return self._orientation
1665
+
1666
+ @property
1667
+ def permute(self) -> bool:
1668
+ """
1669
+ Getter for the graph permutation/reindexing option.
1670
+
1671
+ Returns
1672
+ -------
1673
+ permute : bool
1674
+ Whether to permute the IDs of the nodes.
1675
+ """
1676
+ return self._permute
1677
+
1678
+ @property
1679
+ def path_links(self) -> Optional[np.ndarray]:
1680
+ """
1681
+ Getter for the path links (predecessors or successors).
1682
+
1683
+ Returns
1684
+ -------
1685
+ path_links: numpy.ndarray
1686
+ predecessors or successors node index if the path tracking is activated.
1687
+ """
1688
+ return self._path_links
1689
+
1690
+ def _preprocess_edges(self, tail, head):
1691
+ """
1692
+ Preprocess edges to handle parallel edges by keeping only one edge
1693
+ between any pair of vertices (BFS doesn't use weights).
1694
+
1695
+ Parameters
1696
+ ----------
1697
+ tail : str
1698
+ The column name for tail vertices
1699
+ head : str
1700
+ The column name for head vertices
1701
+ """
1702
+ original_count = len(self._edges)
1703
+ self._edges = self._edges.groupby([tail, head], as_index=False).first()
1704
+ final_count = len(self._edges)
1705
+
1706
+ if original_count > final_count:
1707
+ parallel_edges_removed = original_count - final_count
1708
+ if self._verbose:
1709
+ print(
1710
+ f"Automatically removed {parallel_edges_removed} parallel edge(s). "
1711
+ f"BFS treats all edges equally."
867
1712
  )
868
1713
 
869
- if edges[[tail, head, trav_time, freq]].isna().any().any():
1714
+ self._n_edges = len(self._edges)
1715
+
1716
+ def _check_edges(self, edges, tail, head):
1717
+ """Checks if the edges DataFrame is well-formed. If not, raises an appropriate error."""
1718
+ if not isinstance(edges, pd.DataFrame):
1719
+ raise TypeError("edges should be a pandas DataFrame")
1720
+
1721
+ if tail not in edges:
1722
+ raise KeyError(
1723
+ f"edge tail column '{tail}' not found in graph edges dataframe"
1724
+ )
1725
+
1726
+ if head not in edges:
1727
+ raise KeyError(
1728
+ f"edge head column '{head}' not found in graph edges dataframe"
1729
+ )
1730
+
1731
+ if edges[[tail, head]].isnull().to_numpy().any():
870
1732
  raise ValueError(
871
1733
  " ".join(
872
1734
  [
873
- f"edges[[{tail}, {head}, {trav_time}, {freq}]] ",
1735
+ f"edges[[{tail}, {head}]] ",
874
1736
  "should not have any missing value",
875
1737
  ]
876
1738
  )
@@ -878,14 +1740,298 @@ class HyperpathGenerating:
878
1740
 
879
1741
  for col in [tail, head]:
880
1742
  if not pd.api.types.is_integer_dtype(edges[col].dtype):
881
- raise TypeError(f"column '{col}' should be of integer type")
1743
+ raise TypeError(f"edges['{col}'] should be of integer type")
882
1744
 
883
- for col in [trav_time, freq]:
884
- if not pd.api.types.is_numeric_dtype(edges[col].dtype):
885
- raise TypeError(f"column '{col}' should be of numeric type")
1745
+ def _permute_graph(self, tail, head):
1746
+ """Permute the IDs of the nodes to start from 0 and be contiguous.
1747
+ Returns a DataFrame with the permuted IDs."""
886
1748
 
887
- if edges[col].min() < 0.0:
888
- raise ValueError(f"column '{col}' should be nonnegative")
1749
+ permutation = pd.DataFrame(
1750
+ data={
1751
+ "vert_idx": np.union1d(
1752
+ np.asarray(self._edges[tail]), np.asarray(self._edges[head])
1753
+ )
1754
+ }
1755
+ )
1756
+ permutation["vert_idx_new"] = permutation.index
1757
+ permutation.index.name = "index"
1758
+
1759
+ self._edges = pd.merge(
1760
+ self._edges,
1761
+ permutation[["vert_idx", "vert_idx_new"]],
1762
+ left_on=tail,
1763
+ right_on="vert_idx",
1764
+ how="left",
1765
+ )
1766
+ self._edges.drop([tail, "vert_idx"], axis=1, inplace=True)
1767
+ self._edges.rename(columns={"vert_idx_new": tail}, inplace=True)
1768
+
1769
+ self._edges = pd.merge(
1770
+ self._edges,
1771
+ permutation[["vert_idx", "vert_idx_new"]],
1772
+ left_on=head,
1773
+ right_on="vert_idx",
1774
+ how="left",
1775
+ )
1776
+ self._edges.drop([head, "vert_idx"], axis=1, inplace=True)
1777
+ self._edges.rename(columns={"vert_idx_new": head}, inplace=True)
1778
+
1779
+ permutation.rename(columns={"vert_idx": "vert_idx_old"}, inplace=True)
1780
+ permutation.reset_index(drop=True, inplace=True)
1781
+ permutation.sort_values(by="vert_idx_new", inplace=True)
1782
+
1783
+ permutation.index.name = "index"
1784
+ self._edges.index.name = "index"
1785
+
1786
+ return permutation
1787
+
1788
+ def _check_orientation(self, orientation):
1789
+ """Checks the orientation attribute."""
1790
+ if orientation not in ["in", "out"]:
1791
+ raise ValueError("orientation should be either 'in' on 'out'")
1792
+
1793
+ def run(
1794
+ self,
1795
+ vertex_idx: int,
1796
+ path_tracking: bool = False,
1797
+ return_series: bool = False,
1798
+ ) -> Union[np.ndarray, pd.Series]:
1799
+ """
1800
+ Runs BFS algorithm between a given vertex and all other vertices in the graph.
1801
+
1802
+ Parameters
1803
+ ----------
1804
+ vertex_idx : int
1805
+ The index of the source/target vertex.
1806
+ path_tracking : bool, optional (default=False)
1807
+ Whether to track the shortest path(s) from the source vertex to all other vertices
1808
+ in the graph. When True, predecessors are stored and can be retrieved with get_path().
1809
+ return_series : bool, optional (default=False)
1810
+ Whether to return a Pandas Series object indexed by vertex indices with predecessors
1811
+ as values.
1812
+
1813
+ Returns
1814
+ -------
1815
+ predecessors : np.ndarray or pd.Series
1816
+ If `return_series=False`, a 1D Numpy array of shape (n_vertices,) with the
1817
+ predecessor of each vertex in the BFS tree (`orientation="out"`), or
1818
+ the successor of each vertex (`orientation="in"`).
1819
+ Unreachable vertices and the start vertex have the sentinel value (default: -9999).
1820
+ If `return_series=True`, a Pandas Series object with the same data and the
1821
+ vertex indices as index.
1822
+ """
1823
+ # validate the input arguments
1824
+ if vertex_idx < 0:
1825
+ raise ValueError(f"argument 'vertex_idx={vertex_idx}' must be non-negative")
1826
+ if self._permute and self._permutation is not None:
1827
+ if vertex_idx not in self._permutation.vert_idx_old.values:
1828
+ raise ValueError(f"vertex {vertex_idx} not found in graph")
1829
+ vertex_new = self._permutation.loc[
1830
+ self._permutation.vert_idx_old == vertex_idx, "vert_idx_new"
1831
+ ].iloc[0]
1832
+ else:
1833
+ if vertex_idx >= self._n_vertices:
1834
+ raise ValueError(f"vertex {vertex_idx} not found in graph")
1835
+ vertex_new = vertex_idx
1836
+
1837
+ # compute BFS predecessors
1838
+ if self._orientation == "out":
1839
+ predecessors = bfs_csr(
1840
+ self.__indptr,
1841
+ self.__indices,
1842
+ vertex_new,
1843
+ self._n_vertices,
1844
+ self._sentinel,
1845
+ )
1846
+ else:
1847
+ predecessors = bfs_csc(
1848
+ self.__indptr,
1849
+ self.__indices,
1850
+ vertex_new,
1851
+ self._n_vertices,
1852
+ self._sentinel,
1853
+ )
1854
+
1855
+ # store path links if tracking is enabled
1856
+ if path_tracking:
1857
+ # Convert predecessors to path_links format (uint32)
1858
+ # Replace sentinel value with vertex's own index (like Dijkstra does)
1859
+ self._path_links = np.arange(self._n_vertices, dtype=np.uint32)
1860
+ reachable_mask = predecessors != self._sentinel
1861
+ self._path_links[reachable_mask] = predecessors[reachable_mask].astype(
1862
+ np.uint32
1863
+ )
1864
+
1865
+ if self._permute and self._permutation is not None:
1866
+ # permute back the path vertex indices (same approach as Dijkstra)
1867
+ path_df = pd.DataFrame(
1868
+ data={
1869
+ "vertex_idx": np.arange(self._n_vertices),
1870
+ "associated_idx": self._path_links,
1871
+ }
1872
+ )
1873
+ path_df = pd.merge(
1874
+ path_df,
1875
+ self._permutation,
1876
+ left_on="vertex_idx",
1877
+ right_on="vert_idx_new",
1878
+ how="left",
1879
+ )
1880
+ path_df.drop(["vertex_idx", "vert_idx_new"], axis=1, inplace=True)
1881
+ path_df.rename(columns={"vert_idx_old": "vertex_idx"}, inplace=True)
1882
+ path_df = pd.merge(
1883
+ path_df,
1884
+ self._permutation,
1885
+ left_on="associated_idx",
1886
+ right_on="vert_idx_new",
1887
+ how="left",
1888
+ )
1889
+ path_df.drop(["associated_idx", "vert_idx_new"], axis=1, inplace=True)
1890
+ path_df.rename(columns={"vert_idx_old": "associated_idx"}, inplace=True)
1891
+
1892
+ if return_series:
1893
+ path_df.set_index("vertex_idx", inplace=True)
1894
+ self._path_links = path_df.associated_idx.astype(np.uint32)
1895
+ else:
1896
+ self._path_links = np.arange(
1897
+ self.__n_vertices_init, dtype=np.uint32
1898
+ )
1899
+ self._path_links[path_df.vertex_idx.values] = (
1900
+ path_df.associated_idx.values
1901
+ )
1902
+ else:
1903
+ self._path_links = None
1904
+
1905
+ # reorder predecessors for permuted graphs
1906
+ if return_series:
1907
+ if self._permute and self._permutation is not None:
1908
+ pred_df = pd.DataFrame(data={"predecessor": predecessors})
1909
+ pred_df["vert_idx_new"] = pred_df.index
1910
+ pred_df = pd.merge(
1911
+ pred_df,
1912
+ self._permutation,
1913
+ left_on="vert_idx_new",
1914
+ right_on="vert_idx_new",
1915
+ how="left",
1916
+ )
1917
+
1918
+ # Map predecessor values back to original IDs
1919
+ valid_mask = pred_df["predecessor"] != self._sentinel
1920
+ if valid_mask.any():
1921
+ pred_df_valid = pred_df[valid_mask].copy()
1922
+ pred_df_valid = pd.merge(
1923
+ pred_df_valid,
1924
+ self._permutation,
1925
+ left_on="predecessor",
1926
+ right_on="vert_idx_new",
1927
+ how="left",
1928
+ suffixes=("", "_pred"),
1929
+ )
1930
+ pred_df.loc[valid_mask, "predecessor"] = pred_df_valid[
1931
+ "vert_idx_old_pred"
1932
+ ].values.astype(np.int32)
1933
+
1934
+ pred_df.set_index("vert_idx_old", inplace=True)
1935
+ predecessors_series = pred_df.predecessor.astype(np.int32)
1936
+ predecessors_series.index.name = "vertex_idx"
1937
+ predecessors_series.name = "predecessor"
1938
+ else:
1939
+ predecessors_series = pd.Series(predecessors, dtype=np.int32)
1940
+ predecessors_series.index.name = "vertex_idx"
1941
+ predecessors_series.name = "predecessor"
1942
+
1943
+ return predecessors_series
1944
+
1945
+ # For array output with permutation
1946
+ if self._permute and self._permutation is not None:
1947
+ pred_df = pd.DataFrame(data={"predecessor": predecessors})
1948
+ pred_df["vert_idx_new"] = pred_df.index
1949
+ pred_df = pd.merge(
1950
+ pred_df,
1951
+ self._permutation,
1952
+ left_on="vert_idx_new",
1953
+ right_on="vert_idx_new",
1954
+ how="left",
1955
+ )
1956
+
1957
+ # Map predecessor values back to original IDs
1958
+ valid_mask = pred_df["predecessor"] != self._sentinel
1959
+ if valid_mask.any():
1960
+ pred_df_valid = pred_df[valid_mask].copy()
1961
+ pred_df_valid = pd.merge(
1962
+ pred_df_valid,
1963
+ self._permutation,
1964
+ left_on="predecessor",
1965
+ right_on="vert_idx_new",
1966
+ how="left",
1967
+ suffixes=("", "_pred"),
1968
+ )
1969
+ pred_df.loc[valid_mask, "predecessor"] = pred_df_valid[
1970
+ "vert_idx_old_pred"
1971
+ ].values.astype(np.int32)
1972
+
1973
+ predecessors_array = np.full(
1974
+ self.__n_vertices_init, self._sentinel, dtype=np.int32
1975
+ )
1976
+ predecessors_array[pred_df.vert_idx_old.values] = (
1977
+ pred_df.predecessor.values.astype(np.int32)
1978
+ )
1979
+ return predecessors_array
1980
+
1981
+ return predecessors
1982
+
1983
+ def get_vertices(self) -> Any:
1984
+ """
1985
+ Get the unique vertices from the graph.
1986
+
1987
+ If the graph has been permuted, this method returns the vertices based on the original
1988
+ indexing. Otherwise, it returns the union of tail and head vertices from the edges.
1989
+
1990
+ Returns
1991
+ -------
1992
+ vertices : ndarray
1993
+ A 1-D array containing the unique vertices.
1994
+ """
1995
+ if self._permute and self._permutation is not None:
1996
+ return np.asarray(self._permutation.vert_idx_old)
1997
+ return np.union1d(
1998
+ np.asarray(self._edges["tail"]), np.asarray(self._edges["head"])
1999
+ )
2000
+
2001
+ def get_path(self, vertex_idx: int) -> Optional[np.ndarray]:
2002
+ """Compute path from predecessors or successors.
2003
+
2004
+ Parameters:
2005
+ -----------
2006
+
2007
+ vertex_idx : int
2008
+ source or target vertex index.
2009
+
2010
+ Returns
2011
+ -------
2012
+
2013
+ path_vertices : numpy.ndarray
2014
+ Array of np.int32 type storing the path from or to the given vertex index. If we are
2015
+ dealing with BFS from a source (orientation="out"), the input vertex is the target
2016
+ vertex and the path to the source is given backward from the target to the source
2017
+ using the predecessors. If we are dealing with BFS to a target (orientation="in"),
2018
+ the input vertex is the source vertex and the path to the target is given backward
2019
+ from the target to the source using the successors.
2020
+
2021
+ """
2022
+ if self._path_links is None:
2023
+ warnings.warn(
2024
+ "Current BFS instance has no path attribute: "
2025
+ "make sure path_tracking is set to True, and run the "
2026
+ "BFS algorithm",
2027
+ UserWarning,
2028
+ )
2029
+ return None
2030
+ if isinstance(self._path_links, pd.Series):
2031
+ path_vertices = compute_path(self._path_links.values, vertex_idx)
2032
+ else:
2033
+ path_vertices = compute_path(self._path_links, vertex_idx)
2034
+ return path_vertices
889
2035
 
890
2036
 
891
2037
  # author : Francois Pacull